pax_global_header 0000666 0000000 0000000 00000000064 13721731165 0014520 g ustar 00root root 0000000 0000000 52 comment=d5a25b83fbf4f3f61ff03a9202e36f5b75544426
badger-2.2007.2/ 0000775 0000000 0000000 00000000000 13721731165 0013176 5 ustar 00root root 0000000 0000000 badger-2.2007.2/.deepsource.toml 0000664 0000000 0000000 00000000325 13721731165 0016307 0 ustar 00root root 0000000 0000000 version = 1
test_patterns = [
'integration/testgc/**',
'**/*_test.go'
]
exclude_patterns = [
]
[[analyzers]]
name = 'go'
enabled = true
[analyzers.meta]
import_path = 'github.com/dgraph-io/badger'
badger-2.2007.2/.github/ 0000775 0000000 0000000 00000000000 13721731165 0014536 5 ustar 00root root 0000000 0000000 badger-2.2007.2/.github/CODEOWNERS 0000664 0000000 0000000 00000000321 13721731165 0016125 0 ustar 00root root 0000000 0000000 # CODEOWNERS info: https://help.github.com/en/articles/about-code-owners
# Owners are automatically requested for review for PRs that changes code
# that they own.
* @manishrjain @ashish-goswami @jarifibrahim
badger-2.2007.2/.github/ISSUE_TEMPLATE 0000664 0000000 0000000 00000001167 13721731165 0016651 0 ustar 00root root 0000000 0000000
### What version of Go are you using (`go version`)?
$ go version
### What operating system are you using?
### What version of Badger are you using?
### Does this issue reproduce with the latest master?
### Steps to Reproduce the issue
### What Badger options were set?
### What did you do?
### What did you expect to see?
### What did you see instead?
badger-2.2007.2/.github/stale.yml 0000664 0000000 0000000 00000001526 13721731165 0016375 0 ustar 00root root 0000000 0000000 # Number of days of inactivity before an issue becomes stale
daysUntilStale: 30
# Number of days of inactivity before a stale issue is closed
daysUntilClose: 7
# Issues with these labels will never be considered stale
exemptLabels:
- skip/stale
- status/accepted
# Label to use when marking an issue as stale
staleLabel: status/stale
# Comment to post when marking an issue as stale. Set to `false` to disable
markComment: >
This issue has been automatically marked as stale because it has not had
recent activity. It will be closed if no further activity occurs. Thank you
for your contributions.
# Comment to post when closing a stale issue. Set to `false` to disable
closeComment: >
This issue was marked as stale and no activity has occurred since then,
therefore it will now be closed. Please, reopen if the issue is still
relevant.
badger-2.2007.2/.gitignore 0000664 0000000 0000000 00000000021 13721731165 0015157 0 ustar 00root root 0000000 0000000 p/
badger-test*/
badger-2.2007.2/.golangci.yml 0000664 0000000 0000000 00000000504 13721731165 0015561 0 ustar 00root root 0000000 0000000 run:
tests: false
linters-settings:
lll:
line-length: 100
linters:
disable-all: true
enable:
- errcheck
- ineffassign
- gas
- gofmt
- golint
- gosimple
- govet
- lll
- varcheck
- unused
issues:
exclude-rules:
- linters:
- gosec
text: "G404: "
badger-2.2007.2/.travis.yml 0000664 0000000 0000000 00000004514 13721731165 0015313 0 ustar 00root root 0000000 0000000 language: go
go:
- "1.12"
- "1.13"
- tip
os:
- osx
env:
jobs:
- GOARCH=386
- GOARCH=amd64
global:
- secure: CRkV2+/jlO0gXzzS50XGxfMS117FNwiVjxNY/LeWq06RKD+dDCPxTJl3JCNe3l0cYEPAglV2uMMYukDiTqJ7e+HI4nh4N4mv6lwx39N8dAvJe1x5ITS2T4qk4kTjuQb1Q1vw/ZOxoQqmvNKj2uRmBdJ/HHmysbRJ1OzCWML3OXdUwJf0AYlJzTjpMfkOKr7sTtE4rwyyQtd4tKH1fGdurgI9ZuFd9qvYxK2qcJhsQ6CNqMXt+7FkVkN1rIPmofjjBTNryzUr4COFXuWH95aDAif19DeBW4lbNgo1+FpDsrgmqtuhl6NAuptI8q/imow2KXBYJ8JPXsxW8DVFj0IIp0RCd3GjaEnwBEbxAyiIHLfW7AudyTS/dJOvZffPqXnuJ8xj3OPIdNe4xY0hWl8Ju2HhKfLOAHq7VadHZWd3IHLil70EiL4/JLD1rNbMImUZisFaA8pyrcIvYYebjOnk4TscwKFLedClRSX1XsMjWWd0oykQtrdkHM2IxknnBpaLu7mFnfE07f6dkG0nlpyu4SCLey7hr5FdcEmljA0nIxTSYDg6035fQkBEAbe7hlESOekkVNT9IZPwG+lmt3vU4ofi6NqNbJecOuSB+h36IiZ9s4YQtxYNnLgW14zjuFGGyT5smc3IjBT7qngDjKIgyrSVoRkY/8udy9qbUgvBeW8=
jobs:
allow_failures:
- go: tip
exclude:
# Exclude builds for 386 architecture on go 1.12 and tip
# Since we don't want it to run for 32 bit
- go: "1.12"
env: GOARCH=386
- go: tip
env: GOARCH=386
include:
# Define one extra linux build, which we use to run cross
# compiled 32 bit tests
- os: linux
arch: arm64
go: "1.14"
env: go_32=yes
notifications:
email: false
slack:
secure: X7uBLWYbuUhf8QFE16CoS5z7WvFR8EN9j6cEectMW6mKZ3vwXGwVXRIPsgUq/606DsQdCCx34MR8MRWYGlu6TBolbSe9y0EP0i46yipPz22YtuT7umcVUbGEyx8MZKgG0v1u/zA0O4aCsOBpGAA3gxz8h3JlEHDt+hv6U8xRsSllVLzLSNb5lwxDtcfEDxVVqP47GMEgjLPM28Pyt5qwjk7o5a4YSVzkfdxBXxd3gWzFUWzJ5E3cTacli50dK4GVfiLcQY2aQYoYO7AAvDnvP+TPfjDkBlUEE4MUz5CDIN51Xb+WW33sX7g+r3Bj7V5IRcF973RiYkpEh+3eoiPnyWyxhDZBYilty3b+Hysp6d4Ov/3I3ll7Bcny5+cYjakjkMH3l9w3gs6Y82GlpSLSJshKWS8vPRsxFe0Pstj6QSJXTd9EBaFr+l1ScXjJv/Sya9j8N9FfTuOTESWuaL1auX4Y7zEEVHlA8SCNOO8K0eTfxGZnC/YcIHsR8rePEAcFxfOYQppkyLF/XvAtnb/LMUuu0g4y2qNdme6Oelvyar1tFEMRtbl4mRCdu/krXBFtkrsfUaVY6WTPdvXAGotsFJ0wuA53zGVhlcd3+xAlSlR3c1QX95HIMeivJKb5L4nTjP+xnrmQNtnVk+tG4LSH2ltuwcZSSczModtcBmRefrk=
script: >-
if [ $TRAVIS_OS_NAME = "linux" ] && [ $go_32 ]; then
uname -a
GOOS=linux GOARCH=arm go test -v ./...
# Another round of tests after turning off mmap.
GOOS=linux GOARCH=arm go test -v -vlog_mmap=false github.com/dgraph-io/badger
else
go test -v ./...
# Another round of tests after turning off mmap.
go test -v -vlog_mmap=false github.com/dgraph-io/badger
fi
badger-2.2007.2/CHANGELOG.md 0000664 0000000 0000000 00000036260 13721731165 0015016 0 ustar 00root root 0000000 0000000 # Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
## [2.2007.2] - 2020-08-31
### Fixed
- Compaction: Use separate compactors for L0, L1 (#1466)
- Rework Block and Index cache (#1473)
- Add IsClosed method (#1478)
- Cleanup: Avoid truncating in vlog.Open on error (#1465)
- Cleanup: Do not close cache before compactions (#1464)
### New APIs
- Badger.DB
- BlockCacheMetrics (#1473)
- IndexCacheMetrics (#1473)
- Badger.Option
- WithBlockCacheSize (#1473)
- WithIndexCacheSize (#1473)
### Removed APIs [Breaking Changes]
- Badger.DB
- DataCacheMetrics (#1473)
- BfCacheMetrics (#1473)
- Badger.Option
- WithMaxCacheSize (#1473)
- WithMaxBfCacheSize (#1473)
- WithKeepBlockIndicesInCache (#1473)
- WithKeepBlocksInCache (#1473)
## [2.2007.1] - 2020-08-19
### Fixed
- Remove vlog file if bootstrap, syncDir or mmap fails (#1434)
- levels: Compaction incorrectly drops some delete markers (#1422)
- Replay: Update head for LSM entires also (#1456)
## [2.2007.0] - 2020-08-10
### Fixed
- Add a limit to the size of the batches sent over a stream. (#1412)
- Fix Sequence generates duplicate values (#1281)
- Fix race condition in DoesNotHave (#1287)
- Fail fast if cgo is disabled and compression is ZSTD (#1284)
- Proto: make badger/v2 compatible with v1 (#1293)
- Proto: Rename dgraph.badger.v2.pb to badgerpb2 (#1314)
- Handle duplicates in ManagedWriteBatch (#1315)
- Ensure `bitValuePointer` flag is cleared for LSM entry values written to LSM (#1313)
- DropPrefix: Return error on blocked writes (#1329)
- Confirm `badgerMove` entry required before rewrite (#1302)
- Drop move keys when its key prefix is dropped (#1331)
- Iterator: Always add key to txn.reads (#1328)
- Restore: Account for value size as well (#1358)
- Compaction: Expired keys and delete markers are never purged (#1354)
- GC: Consider size of value while rewriting (#1357)
- Force KeepL0InMemory to be true when InMemory is true (#1375)
- Rework DB.DropPrefix (#1381)
- Update head while replaying value log (#1372)
- Avoid panic on multiple closer.Signal calls (#1401)
- Return error if the vlog writes exceeds more than 4GB (#1400)
### Performance
- Clean up transaction oracle as we go (#1275)
- Use cache for storing block offsets (#1336)
### Features
- Support disabling conflict detection (#1344)
- Add leveled logging (#1249)
- Support entry version in Write batch (#1310)
- Add Write method to batch write (#1321)
- Support multiple iterators in read-write transactions (#1286)
### New APIs
- Badger.DB
- NewManagedWriteBatch (#1310)
- DropPrefix (#1381)
- Badger.Option
- WithDetectConflicts (#1344)
- WithKeepBlockIndicesInCache (#1336)
- WithKeepBlocksInCache (#1336)
- Badger.WriteBatch
- DeleteAt (#1310)
- SetEntryAt (#1310)
- Write (#1321)
### Changes to Default Options
- DefaultOptions: Set KeepL0InMemory to false (#1345)
- Increase default valueThreshold from 32B to 1KB (#1346)
### Deprecated
- Badger.Option
- WithEventLogging (#1203)
### Reverts
This sections lists the changes which were reverted because of non-reproducible crashes.
- Compress/Encrypt Blocks in the background (#1227)
## [2.0.3] - 2020-03-24
### Fixed
- Add support for watching nil prefix in subscribe API (#1246)
### Performance
- Compress/Encrypt Blocks in the background (#1227)
- Disable cache by default (#1257)
### Features
- Add BypassDirLock option (#1243)
- Add separate cache for bloomfilters (#1260)
### New APIs
- badger.DB
- BfCacheMetrics (#1260)
- DataCacheMetrics (#1260)
- badger.Options
- WithBypassLockGuard (#1243)
- WithLoadBloomsOnOpen (#1260)
- WithMaxBfCacheSize (#1260)
## [2.0.3] - 2020-03-24
### Fixed
- Add support for watching nil prefix in subscribe API (#1246)
### Performance
- Compress/Encrypt Blocks in the background (#1227)
- Disable cache by default (#1257)
### Features
- Add BypassDirLock option (#1243)
- Add separate cache for bloomfilters (#1260)
### New APIs
- badger.DB
- BfCacheMetrics (#1260)
- DataCacheMetrics (#1260)
- badger.Options
- WithBypassLockGuard (#1243)
- WithLoadBloomsOnOpen (#1260)
- WithMaxBfCacheSize (#1260)
## [2.0.2] - 2020-03-02
### Fixed
- Cast sz to uint32 to fix compilation on 32 bit. (#1175)
- Fix checkOverlap in compaction. (#1166)
- Avoid sync in inmemory mode. (#1190)
- Support disabling the cache completely. (#1185)
- Add support for caching bloomfilters. (#1204)
- Fix int overflow for 32bit. (#1216)
- Remove the 'this entry should've caught' log from value.go. (#1170)
- Rework concurrency semantics of valueLog.maxFid. (#1187)
### Performance
- Use fastRand instead of locked-rand in skiplist. (#1173)
- Improve write stalling on level 0 and 1. (#1186)
- Disable compression and set ZSTD Compression Level to 1. (#1191)
## [2.0.1] - 2020-01-02
### New APIs
- badger.Options
- WithInMemory (f5b6321)
- WithZSTDCompressionLevel (3eb4e72)
- Badger.TableInfo
- EstimatedSz (f46f8ea)
### Features
- Introduce in-memory mode in badger. (#1113)
### Fixed
- Limit manifest's change set size. (#1119)
- Cast idx to uint32 to fix compilation on i386. (#1118)
- Fix request increment ref bug. (#1121)
- Fix windows dataloss issue. (#1134)
- Fix VerifyValueChecksum checks. (#1138)
- Fix encryption in stream writer. (#1146)
- Fix segmentation fault in vlog.Read. (header.Decode) (#1150)
- Fix merge iterator duplicates issue. (#1157)
### Performance
- Set level 15 as default compression level in Zstd. (#1111)
- Optimize createTable in stream_writer.go. (#1132)
## [2.0.0] - 2019-11-12
### New APIs
- badger.DB
- NewWriteBatchAt (7f43769)
- CacheMetrics (b9056f1)
- badger.Options
- WithMaxCacheSize (b9056f1)
- WithEventLogging (75c6a44)
- WithBlockSize (1439463)
- WithBloomFalsePositive (1439463)
- WithKeepL0InMemory (ee70ff2)
- WithVerifyValueChecksum (ee70ff2)
- WithCompression (5f3b061)
- WithEncryptionKey (a425b0e)
- WithEncryptionKeyRotationDuration (a425b0e)
- WithChecksumVerificationMode (7b4083d)
### Features
- Data cache to speed up lookups and iterations. (#1066)
- Data compression. (#1013)
- Data encryption-at-rest. (#1042)
### Fixed
- Fix deadlock when flushing discard stats. (#976)
- Set move key's expiresAt for keys with TTL. (#1006)
- Fix unsafe usage in Decode. (#1097)
- Fix race condition on db.orc.nextTxnTs. (#1101)
- Fix level 0 GC dataloss bug. (#1090)
- Fix deadlock in discard stats. (#1070)
- Support checksum verification for values read from vlog. (#1052)
- Store entire L0 in memory. (#963)
- Fix table.Smallest/Biggest and iterator Prefix bug. (#997)
- Use standard proto functions for Marshal/Unmarshal and Size. (#994)
- Fix boundaries on GC batch size. (#987)
- VlogSize to store correct directory name to expvar.Map. (#956)
- Fix transaction too big issue in restore. (#957)
- Fix race condition in updateDiscardStats. (#973)
- Cast results of len to uint32 to fix compilation in i386 arch. (#961)
- Making the stream writer APIs goroutine-safe. (#959)
- Fix prefix bug in key iterator and allow all versions. (#950)
- Drop discard stats if we can't unmarshal it. (#936)
- Fix race condition in flushDiscardStats function. (#921)
- Ensure rewrite in vlog is within transactional limits. (#911)
- Fix discard stats moved by GC bug. (#929)
- Fix busy-wait loop in Watermark. (#920)
### Performance
- Introduce fast merge iterator. (#1080)
- Binary search based table picker. (#983)
- Flush vlog buffer if it grows beyond threshold. (#1067)
- Introduce StreamDone in Stream Writer. (#1061)
- Performance Improvements to block iterator. (#977)
- Prevent unnecessary safecopy in iterator parseKV. (#971)
- Use pointers instead of binary encoding. (#965)
- Reuse block iterator inside table iterator. (#972)
- [breaking/format] Remove vlen from entry header. (#945)
- Replace FarmHash with AESHash for Oracle conflicts. (#952)
- [breaking/format] Optimize Bloom filters. (#940)
- [breaking/format] Use varint for header encoding (without header length). (#935)
- Change file picking strategy in compaction. (#894)
- [breaking/format] Block level changes. (#880)
- [breaking/format] Add key-offset index to the end of SST table. (#881)
## [1.6.0] - 2019-07-01
This is a release including almost 200 commits, so expect many changes - some of them
not backward compatible.
Regarding backward compatibility in Badger versions, you might be interested on reading
[VERSIONING.md](VERSIONING.md).
_Note_: The hashes in parentheses correspond to the commits that impacted the given feature.
### New APIs
- badger.DB
- DropPrefix (291295e)
- Flatten (7e41bba)
- KeySplits (4751ef1)
- MaxBatchCount (b65e2a3)
- MaxBatchSize (b65e2a3)
- PrintKeyValueHistogram (fd59907)
- Subscribe (26128a7)
- Sync (851e462)
- badger.DefaultOptions() and badger.LSMOnlyOptions() (91ce687)
- badger.Options.WithX methods
- badger.Entry (e9447c9)
- NewEntry
- WithMeta
- WithDiscard
- WithTTL
- badger.Item
- KeySize (fd59907)
- ValueSize (5242a99)
- badger.IteratorOptions
- PickTable (7d46029, 49a49e3)
- Prefix (7d46029)
- badger.Logger (fbb2778)
- badger.Options
- CompactL0OnClose (7e41bba)
- Logger (3f66663)
- LogRotatesToFlush (2237832)
- badger.Stream (14cbd89, 3258067)
- badger.StreamWriter (7116e16)
- badger.TableInfo.KeyCount (fd59907)
- badger.TableManifest (2017987)
- badger.Tx.NewKeyIterator (49a49e3)
- badger.WriteBatch (6daccf9, 7e78e80)
### Modified APIs
#### Breaking changes:
- badger.DefaultOptions and badger.LSMOnlyOptions are now functions rather than variables (91ce687)
- badger.Item.Value now receives a function that returns an error (439fd46)
- badger.Txn.Commit doesn't receive any params now (6daccf9)
- badger.DB.Tables now receives a boolean (76b5341)
#### Not breaking changes:
- badger.LSMOptions changed values (799c33f)
- badger.DB.NewIterator now allows multiple iterators per RO txn (41d9656)
- badger.Options.TableLoadingMode's new default is options.MemoryMap (6b97bac)
### Removed APIs
- badger.ManagedDB (d22c0e8)
- badger.Options.DoNotCompact (7e41bba)
- badger.Txn.SetWithX (e9447c9)
### Tools:
- badger bank disect (13db058)
- badger bank test (13db058) --mmap (03870e3)
- badger fill (7e41bba)
- badger flatten (7e41bba)
- badger info --histogram (fd59907) --history --lookup --show-keys --show-meta --with-prefix (09e9b63) --show-internal (fb2eed9)
- badger benchmark read (239041e)
- badger benchmark write (6d3b67d)
## [1.5.5] - 2019-06-20
* Introduce support for Go Modules
## [1.5.3] - 2018-07-11
Bug Fixes:
* Fix a panic caused due to item.vptr not copying over vs.Value, when looking
for a move key.
## [1.5.2] - 2018-06-19
Bug Fixes:
* Fix the way move key gets generated.
* If a transaction has unclosed, or multiple iterators running simultaneously,
throw a panic. Every iterator must be properly closed. At any point in time,
only one iterator per transaction can be running. This is to avoid bugs in a
transaction data structure which is thread unsafe.
* *Warning: This change might cause panics in user code. Fix is to properly
close your iterators, and only have one running at a time per transaction.*
## [1.5.1] - 2018-06-04
Bug Fixes:
* Fix for infinite yieldItemValue recursion. #503
* Fix recursive addition of `badgerMove` prefix. https://github.com/dgraph-io/badger/commit/2e3a32f0ccac3066fb4206b28deb39c210c5266f
* Use file size based window size for sampling, instead of fixing it to 10MB. #501
Cleanup:
* Clarify comments and documentation.
* Move badger tool one directory level up.
## [1.5.0] - 2018-05-08
* Introduce `NumVersionsToKeep` option. This option is used to discard many
versions of the same key, which saves space.
* Add a new `SetWithDiscard` method, which would indicate that all the older
versions of the key are now invalid. Those versions would be discarded during
compactions.
* Value log GC moves are now bound to another keyspace to ensure latest versions
of data are always at the top in LSM tree.
* Introduce `ValueLogMaxEntries` to restrict the number of key-value pairs per
value log file. This helps bound the time it takes to garbage collect one
file.
## [1.4.0] - 2018-05-04
* Make mmap-ing of value log optional.
* Run GC multiple times, based on recorded discard statistics.
* Add MergeOperator.
* Force compact L0 on clsoe (#439).
* Add truncate option to warn about data loss (#452).
* Discard key versions during compaction (#464).
* Introduce new `LSMOnlyOptions`, to make Badger act like a typical LSM based DB.
Bug fix:
* (Temporary) Check max version across all tables in Get (removed in next
release).
* Update commit and read ts while loading from backup.
* Ensure all transaction entries are part of the same value log file.
* On commit, run unlock callbacks before doing writes (#413).
* Wait for goroutines to finish before closing iterators (#421).
## [1.3.0] - 2017-12-12
* Add `DB.NextSequence()` method to generate monotonically increasing integer
sequences.
* Add `DB.Size()` method to return the size of LSM and value log files.
* Tweaked mmap code to make Windows 32-bit builds work.
* Tweaked build tags on some files to make iOS builds work.
* Fix `DB.PurgeOlderVersions()` to not violate some constraints.
## [1.2.0] - 2017-11-30
* Expose a `Txn.SetEntry()` method to allow setting the key-value pair
and all the metadata at the same time.
## [1.1.1] - 2017-11-28
* Fix bug where txn.Get was returing key deleted in same transaction.
* Fix race condition while decrementing reference in oracle.
* Update doneCommit in the callback for CommitAsync.
* Iterator see writes of current txn.
## [1.1.0] - 2017-11-13
* Create Badger directory if it does not exist when `badger.Open` is called.
* Added `Item.ValueCopy()` to avoid deadlocks in long-running iterations
* Fixed 64-bit alignment issues to make Badger run on Arm v7
## [1.0.1] - 2017-11-06
* Fix an uint16 overflow when resizing key slice
[Unreleased]: https://github.com/dgraph-io/badger/compare/v2.2007.2...HEAD
[2.2007.2]: https://github.com/dgraph-io/badger/compare/v2.2007.1...v2.2007.2
[2.2007.1]: https://github.com/dgraph-io/badger/compare/v2.2007.0...v2.2007.1
[2.2007.0]: https://github.com/dgraph-io/badger/compare/v2.0.3...v2.2007.0
[2.0.3]: https://github.com/dgraph-io/badger/compare/v2.0.2...v2.0.3
[2.0.2]: https://github.com/dgraph-io/badger/compare/v2.0.1...v2.0.2
[2.0.1]: https://github.com/dgraph-io/badger/compare/v2.0.0...v2.0.1
[2.0.0]: https://github.com/dgraph-io/badger/compare/v1.6.0...v2.0.0
[1.6.0]: https://github.com/dgraph-io/badger/compare/v1.5.5...v1.6.0
[1.5.5]: https://github.com/dgraph-io/badger/compare/v1.5.3...v1.5.5
[1.5.3]: https://github.com/dgraph-io/badger/compare/v1.5.2...v1.5.3
[1.5.2]: https://github.com/dgraph-io/badger/compare/v1.5.1...v1.5.2
[1.5.1]: https://github.com/dgraph-io/badger/compare/v1.5.0...v1.5.1
[1.5.0]: https://github.com/dgraph-io/badger/compare/v1.4.0...v1.5.0
[1.4.0]: https://github.com/dgraph-io/badger/compare/v1.3.0...v1.4.0
[1.3.0]: https://github.com/dgraph-io/badger/compare/v1.2.0...v1.3.0
[1.2.0]: https://github.com/dgraph-io/badger/compare/v1.1.1...v1.2.0
[1.1.1]: https://github.com/dgraph-io/badger/compare/v1.1.0...v1.1.1
[1.1.0]: https://github.com/dgraph-io/badger/compare/v1.0.1...v1.1.0
[1.0.1]: https://github.com/dgraph-io/badger/compare/v1.0.0...v1.0.1
badger-2.2007.2/CODE_OF_CONDUCT.md 0000664 0000000 0000000 00000000125 13721731165 0015773 0 ustar 00root root 0000000 0000000 # Code of Conduct
Our Code of Conduct can be found here:
https://dgraph.io/conduct
badger-2.2007.2/CONTRIBUTING.md 0000664 0000000 0000000 00000005243 13721731165 0015433 0 ustar 00root root 0000000 0000000 # Contribution Guide
* [Before you get started](#before-you-get-started)
* [Code of Conduct](#code-of-conduct)
* [Your First Contribution](#your-first-contribution)
* [Find a good first topic](#find-a-good-first-topic)
* [Setting up your development environment](#setting-up-your-development-environment)
* [Fork the project](#fork-the-project)
* [Clone the project](#clone-the-project)
* [New branch for a new code](#new-branch-for-a-new-code)
* [Test](#test)
* [Commit and push](#commit-and-push)
* [Create a Pull Request](#create-a-pull-request)
* [Sign the CLA](#sign-the-cla)
* [Get a code review](#get-a-code-review)
## Before you get started
### Code of Conduct
Please make sure to read and observe our [Code of Conduct](./CODE_OF_CONDUCT.md).
## Your First Contribution
### Find a good first topic
You can start by finding an existing issue with the
[good first issue](https://github.com/dgraph-io/badger/labels/good%20first%20issue) or [help wanted](https://github.com/dgraph-io/badger/labels/help%20wanted) labels. These issues are well suited for new contributors.
## Setting up your development environment
Badger uses [`Go Modules`](https://github.com/golang/go/wiki/Modules)
to manage dependencies. The version of Go should be **1.12** or above.
### Fork the project
- Visit https://github.com/dgraph-io/badger
- Click the `Fork` button (top right) to create a fork of the repository
### Clone the project
```sh
$ git clone https://github.com/$GITHUB_USER/badger
$ cd badger
$ git remote add upstream git@github.com:dgraph-io/badger.git
# Never push to the upstream master
git remote set-url --push upstream no_push
```
### New branch for a new code
Get your local master up to date:
```sh
$ git fetch upstream
$ git checkout master
$ git rebase upstream/master
```
Create a new branch from the master:
```sh
$ git checkout -b my_new_feature
```
And now you can finally add your changes to project.
### Test
Build and run all tests:
```sh
$ ./test.sh
```
### Commit and push
Commit your changes:
```sh
$ git commit
```
When the changes are ready to review:
```sh
$ git push origin my_new_feature
```
### Create a Pull Request
Just open `https://github.com/$GITHUB_USER/badger/pull/new/my_new_feature` and
fill the PR description.
### Sign the CLA
Click the **Sign in with Github to agree** button to sign the CLA. [An example](https://cla-assistant.io/dgraph-io/badger?pullRequest=1377).
### Get a code review
If your pull request (PR) is opened, it will be assigned to one or more
reviewers. Those reviewers will do a code review.
To address review comments, you should commit the changes to the same branch of
the PR on your fork.
badger-2.2007.2/LICENSE 0000664 0000000 0000000 00000023675 13721731165 0014220 0 ustar 00root root 0000000 0000000 Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
badger-2.2007.2/README.md 0000664 0000000 0000000 00000116715 13721731165 0014470 0 ustar 00root root 0000000 0000000 # BadgerDB [](https://godoc.org/github.com/dgraph-io/badger) [](https://goreportcard.com/report/github.com/dgraph-io/badger) [](https://sourcegraph.com/github.com/dgraph-io/badger?badge) [/statusIcon.svg)](https://teamcity.dgraph.io/viewLog.html?buildTypeId=Badger_UnitTests&buildId=lastFinished&guest=1)  [](https://coveralls.io/github/dgraph-io/badger?branch=master)

BadgerDB is an embeddable, persistent and fast key-value (KV) database written
in pure Go. It is the underlying database for [Dgraph](https://dgraph.io), a
fast, distributed graph database. It's meant to be a performant alternative to
non-Go-based key-value stores like RocksDB.
## Project Status [March 24, 2020]
Badger is stable and is being used to serve data sets worth hundreds of
terabytes. Badger supports concurrent ACID transactions with serializable
snapshot isolation (SSI) guarantees. A Jepsen-style bank test runs nightly for
8h, with `--race` flag and ensures the maintenance of transactional guarantees.
Badger has also been tested to work with filesystem level anomalies, to ensure
persistence and consistency. Badger is being used by a number of projects which
includes Dgraph, Jaeger Tracing, UsenetExpress, and many more.
The list of projects using Badger can be found [here](#projects-using-badger).
Badger v1.0 was released in Nov 2017, and the latest version that is data-compatible
with v1.0 is v1.6.0.
Badger v2.0 was released in Nov 2019 with a new storage format which won't
be compatible with all of the v1.x. Badger v2.0 supports compression, encryption and uses a cache to speed up lookup.
The [Changelog] is kept fairly up-to-date.
For more details on our version naming schema please read [Choosing a version](#choosing-a-version).
[Changelog]:https://github.com/dgraph-io/badger/blob/master/CHANGELOG.md
## Table of Contents
* [Getting Started](#getting-started)
+ [Installing](#installing)
- [Choosing a version](#choosing-a-version)
+ [Opening a database](#opening-a-database)
+ [Transactions](#transactions)
- [Read-only transactions](#read-only-transactions)
- [Read-write transactions](#read-write-transactions)
- [Managing transactions manually](#managing-transactions-manually)
+ [Using key/value pairs](#using-keyvalue-pairs)
+ [Monotonically increasing integers](#monotonically-increasing-integers)
* [Merge Operations](#merge-operations)
+ [Setting Time To Live(TTL) and User Metadata on Keys](#setting-time-to-livettl-and-user-metadata-on-keys)
+ [Iterating over keys](#iterating-over-keys)
- [Prefix scans](#prefix-scans)
- [Key-only iteration](#key-only-iteration)
+ [Stream](#stream)
+ [Garbage Collection](#garbage-collection)
+ [Database backup](#database-backup)
+ [Memory usage](#memory-usage)
+ [Statistics](#statistics)
* [Resources](#resources)
+ [Blog Posts](#blog-posts)
* [Contact](#contact)
* [Design](#design)
+ [Comparisons](#comparisons)
+ [Benchmarks](#benchmarks)
* [Projects Using Badger](#projects-using-badger)
* [Contributing](#contributing)
* [Frequently Asked Questions](#frequently-asked-questions)
## Getting Started
### Installing
To start using Badger, install Go 1.12 or above and run `go get`:
```sh
$ go get github.com/dgraph-io/badger/v2
```
This will retrieve the library and install the `badger` command line
utility into your `$GOBIN` path.
##### Note: Badger does not directly use CGO but it relies on https://github.com/DataDog/zstd for compression and it requires gcc/cgo. If you wish to use badger without gcc/cgo, you can run `CGO_ENABLED=0 go get github.com/dgraph-io/badger/...` which will download badger without the support for ZSTD compression algorithm.
#### Choosing a version
BadgerDB is a pretty special package from the point of view that the most important change we can
make to it is not on its API but rather on how data is stored on disk.
This is why we follow a version naming schema that differs from Semantic Versioning.
- New major versions are released when the data format on disk changes in an incompatible way.
- New minor versions are released whenever the API changes but data compatibility is maintained.
Note that the changes on the API could be backward-incompatible - unlike Semantic Versioning.
- New patch versions are released when there's no changes to the data format nor the API.
Following these rules:
- v1.5.0 and v1.6.0 can be used on top of the same files without any concerns, as their major
version is the same, therefore the data format on disk is compatible.
- v1.6.0 and v2.0.0 are data incompatible as their major version implies, so files created with
v1.6.0 will need to be converted into the new format before they can be used by v2.0.0.
For a longer explanation on the reasons behind using a new versioning naming schema, you can read
[VERSIONING.md](VERSIONING.md).
### Opening a database
The top-level object in Badger is a `DB`. It represents multiple files on disk
in specific directories, which contain the data for a single database.
To open your database, use the `badger.Open()` function, with the appropriate
options. The `Dir` and `ValueDir` options are mandatory and must be
specified by the client. They can be set to the same value to simplify things.
```go
package main
import (
"log"
badger "github.com/dgraph-io/badger/v2"
)
func main() {
// Open the Badger database located in the /tmp/badger directory.
// It will be created if it doesn't exist.
db, err := badger.Open(badger.DefaultOptions("/tmp/badger"))
if err != nil {
log.Fatal(err)
}
defer db.Close()
// Your code here…
}
```
Please note that Badger obtains a lock on the directories so multiple processes
cannot open the same database at the same time.
#### In-Memory Mode/Diskless Mode
By default, Badger ensures all the data is persisted to the disk. It also supports a pure
in-memory mode. When Badger is running in in-memory mode, all the data is stored in the memory.
Reads and writes are much faster in in-memory mode, but all the data stored in Badger will be lost
in case of a crash or close. To open badger in in-memory mode, set the `InMemory` option.
```
opt := badger.DefaultOptions("").WithInMemory(true)
```
### Transactions
#### Read-only transactions
To start a read-only transaction, you can use the `DB.View()` method:
```go
err := db.View(func(txn *badger.Txn) error {
// Your code here…
return nil
})
```
You cannot perform any writes or deletes within this transaction. Badger
ensures that you get a consistent view of the database within this closure. Any
writes that happen elsewhere after the transaction has started, will not be
seen by calls made within the closure.
#### Read-write transactions
To start a read-write transaction, you can use the `DB.Update()` method:
```go
err := db.Update(func(txn *badger.Txn) error {
// Your code here…
return nil
})
```
All database operations are allowed inside a read-write transaction.
Always check the returned error value. If you return an error
within your closure it will be passed through.
An `ErrConflict` error will be reported in case of a conflict. Depending on the state
of your application, you have the option to retry the operation if you receive
this error.
An `ErrTxnTooBig` will be reported in case the number of pending writes/deletes in
the transaction exceeds a certain limit. In that case, it is best to commit the
transaction and start a new transaction immediately. Here is an example (we are
not checking for errors in some places for simplicity):
```go
updates := make(map[string]string)
txn := db.NewTransaction(true)
for k,v := range updates {
if err := txn.Set([]byte(k),[]byte(v)); err == badger.ErrTxnTooBig {
_ = txn.Commit()
txn = db.NewTransaction(true)
_ = txn.Set([]byte(k),[]byte(v))
}
}
_ = txn.Commit()
```
#### Managing transactions manually
The `DB.View()` and `DB.Update()` methods are wrappers around the
`DB.NewTransaction()` and `Txn.Commit()` methods (or `Txn.Discard()` in case of
read-only transactions). These helper methods will start the transaction,
execute a function, and then safely discard your transaction if an error is
returned. This is the recommended way to use Badger transactions.
However, sometimes you may want to manually create and commit your
transactions. You can use the `DB.NewTransaction()` function directly, which
takes in a boolean argument to specify whether a read-write transaction is
required. For read-write transactions, it is necessary to call `Txn.Commit()`
to ensure the transaction is committed. For read-only transactions, calling
`Txn.Discard()` is sufficient. `Txn.Commit()` also calls `Txn.Discard()`
internally to cleanup the transaction, so just calling `Txn.Commit()` is
sufficient for read-write transaction. However, if your code doesn’t call
`Txn.Commit()` for some reason (for e.g it returns prematurely with an error),
then please make sure you call `Txn.Discard()` in a `defer` block. Refer to the
code below.
```go
// Start a writable transaction.
txn := db.NewTransaction(true)
defer txn.Discard()
// Use the transaction...
err := txn.Set([]byte("answer"), []byte("42"))
if err != nil {
return err
}
// Commit the transaction and check for error.
if err := txn.Commit(); err != nil {
return err
}
```
The first argument to `DB.NewTransaction()` is a boolean stating if the transaction
should be writable.
Badger allows an optional callback to the `Txn.Commit()` method. Normally, the
callback can be set to `nil`, and the method will return after all the writes
have succeeded. However, if this callback is provided, the `Txn.Commit()`
method returns as soon as it has checked for any conflicts. The actual writing
to the disk happens asynchronously, and the callback is invoked once the
writing has finished, or an error has occurred. This can improve the throughput
of the application in some cases. But it also means that a transaction is not
durable until the callback has been invoked with a `nil` error value.
### Using key/value pairs
To save a key/value pair, use the `Txn.Set()` method:
```go
err := db.Update(func(txn *badger.Txn) error {
err := txn.Set([]byte("answer"), []byte("42"))
return err
})
```
Key/Value pair can also be saved by first creating `Entry`, then setting this
`Entry` using `Txn.SetEntry()`. `Entry` also exposes methods to set properties
on it.
```go
err := db.Update(func(txn *badger.Txn) error {
e := badger.NewEntry([]byte("answer"), []byte("42"))
err := txn.SetEntry(e)
return err
})
```
This will set the value of the `"answer"` key to `"42"`. To retrieve this
value, we can use the `Txn.Get()` method:
```go
err := db.View(func(txn *badger.Txn) error {
item, err := txn.Get([]byte("answer"))
handle(err)
var valNot, valCopy []byte
err := item.Value(func(val []byte) error {
// This func with val would only be called if item.Value encounters no error.
// Accessing val here is valid.
fmt.Printf("The answer is: %s\n", val)
// Copying or parsing val is valid.
valCopy = append([]byte{}, val...)
// Assigning val slice to another variable is NOT OK.
valNot = val // Do not do this.
return nil
})
handle(err)
// DO NOT access val here. It is the most common cause of bugs.
fmt.Printf("NEVER do this. %s\n", valNot)
// You must copy it to use it outside item.Value(...).
fmt.Printf("The answer is: %s\n", valCopy)
// Alternatively, you could also use item.ValueCopy().
valCopy, err = item.ValueCopy(nil)
handle(err)
fmt.Printf("The answer is: %s\n", valCopy)
return nil
})
```
`Txn.Get()` returns `ErrKeyNotFound` if the value is not found.
Please note that values returned from `Get()` are only valid while the
transaction is open. If you need to use a value outside of the transaction
then you must use `copy()` to copy it to another byte slice.
Use the `Txn.Delete()` method to delete a key.
### Monotonically increasing integers
To get unique monotonically increasing integers with strong durability, you can
use the `DB.GetSequence` method. This method returns a `Sequence` object, which
is thread-safe and can be used concurrently via various goroutines.
Badger would lease a range of integers to hand out from memory, with the
bandwidth provided to `DB.GetSequence`. The frequency at which disk writes are
done is determined by this lease bandwidth and the frequency of `Next`
invocations. Setting a bandwidth too low would do more disk writes, setting it
too high would result in wasted integers if Badger is closed or crashes.
To avoid wasted integers, call `Release` before closing Badger.
```go
seq, err := db.GetSequence(key, 1000)
defer seq.Release()
for {
num, err := seq.Next()
}
```
### Merge Operations
Badger provides support for ordered merge operations. You can define a func
of type `MergeFunc` which takes in an existing value, and a value to be
_merged_ with it. It returns a new value which is the result of the _merge_
operation. All values are specified in byte arrays. For e.g., here is a merge
function (`add`) which appends a `[]byte` value to an existing `[]byte` value.
```Go
// Merge function to append one byte slice to another
func add(originalValue, newValue []byte) []byte {
return append(originalValue, newValue...)
}
```
This function can then be passed to the `DB.GetMergeOperator()` method, along
with a key, and a duration value. The duration specifies how often the merge
function is run on values that have been added using the `MergeOperator.Add()`
method.
`MergeOperator.Get()` method can be used to retrieve the cumulative value of the key
associated with the merge operation.
```Go
key := []byte("merge")
m := db.GetMergeOperator(key, add, 200*time.Millisecond)
defer m.Stop()
m.Add([]byte("A"))
m.Add([]byte("B"))
m.Add([]byte("C"))
res, _ := m.Get() // res should have value ABC encoded
```
Example: Merge operator which increments a counter
```Go
func uint64ToBytes(i uint64) []byte {
var buf [8]byte
binary.BigEndian.PutUint64(buf[:], i)
return buf[:]
}
func bytesToUint64(b []byte) uint64 {
return binary.BigEndian.Uint64(b)
}
// Merge function to add two uint64 numbers
func add(existing, new []byte) []byte {
return uint64ToBytes(bytesToUint64(existing) + bytesToUint64(new))
}
```
It can be used as
```Go
key := []byte("merge")
m := db.GetMergeOperator(key, add, 200*time.Millisecond)
defer m.Stop()
m.Add(uint64ToBytes(1))
m.Add(uint64ToBytes(2))
m.Add(uint64ToBytes(3))
res, _ := m.Get() // res should have value 6 encoded
```
### Setting Time To Live(TTL) and User Metadata on Keys
Badger allows setting an optional Time to Live (TTL) value on keys. Once the TTL has
elapsed, the key will no longer be retrievable and will be eligible for garbage
collection. A TTL can be set as a `time.Duration` value using the `Entry.WithTTL()`
and `Txn.SetEntry()` API methods.
```go
err := db.Update(func(txn *badger.Txn) error {
e := badger.NewEntry([]byte("answer"), []byte("42")).WithTTL(time.Hour)
err := txn.SetEntry(e)
return err
})
```
An optional user metadata value can be set on each key. A user metadata value
is represented by a single byte. It can be used to set certain bits along
with the key to aid in interpreting or decoding the key-value pair. User
metadata can be set using `Entry.WithMeta()` and `Txn.SetEntry()` API methods.
```go
err := db.Update(func(txn *badger.Txn) error {
e := badger.NewEntry([]byte("answer"), []byte("42")).WithMeta(byte(1))
err := txn.SetEntry(e)
return err
})
```
`Entry` APIs can be used to add the user metadata and TTL for same key. This `Entry`
then can be set using `Txn.SetEntry()`.
```go
err := db.Update(func(txn *badger.Txn) error {
e := badger.NewEntry([]byte("answer"), []byte("42")).WithMeta(byte(1)).WithTTL(time.Hour)
err := txn.SetEntry(e)
return err
})
```
### Iterating over keys
To iterate over keys, we can use an `Iterator`, which can be obtained using the
`Txn.NewIterator()` method. Iteration happens in byte-wise lexicographical sorting
order.
```go
err := db.View(func(txn *badger.Txn) error {
opts := badger.DefaultIteratorOptions
opts.PrefetchSize = 10
it := txn.NewIterator(opts)
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
k := item.Key()
err := item.Value(func(v []byte) error {
fmt.Printf("key=%s, value=%s\n", k, v)
return nil
})
if err != nil {
return err
}
}
return nil
})
```
The iterator allows you to move to a specific point in the list of keys and move
forward or backward through the keys one at a time.
By default, Badger prefetches the values of the next 100 items. You can adjust
that with the `IteratorOptions.PrefetchSize` field. However, setting it to
a value higher than `GOMAXPROCS` (which we recommend to be 128 or higher)
shouldn’t give any additional benefits. You can also turn off the fetching of
values altogether. See section below on key-only iteration.
#### Prefix scans
To iterate over a key prefix, you can combine `Seek()` and `ValidForPrefix()`:
```go
db.View(func(txn *badger.Txn) error {
it := txn.NewIterator(badger.DefaultIteratorOptions)
defer it.Close()
prefix := []byte("1234")
for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
item := it.Item()
k := item.Key()
err := item.Value(func(v []byte) error {
fmt.Printf("key=%s, value=%s\n", k, v)
return nil
})
if err != nil {
return err
}
}
return nil
})
```
#### Key-only iteration
Badger supports a unique mode of iteration called _key-only_ iteration. It is
several order of magnitudes faster than regular iteration, because it involves
access to the LSM-tree only, which is usually resident entirely in RAM. To
enable key-only iteration, you need to set the `IteratorOptions.PrefetchValues`
field to `false`. This can also be used to do sparse reads for selected keys
during an iteration, by calling `item.Value()` only when required.
```go
err := db.View(func(txn *badger.Txn) error {
opts := badger.DefaultIteratorOptions
opts.PrefetchValues = false
it := txn.NewIterator(opts)
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
k := item.Key()
fmt.Printf("key=%s\n", k)
}
return nil
})
```
### Stream
Badger provides a Stream framework, which concurrently iterates over all or a
portion of the DB, converting data into custom key-values, and streams it out
serially to be sent over network, written to disk, or even written back to
Badger. This is a lot faster way to iterate over Badger than using a single
Iterator. Stream supports Badger in both managed and normal mode.
Stream uses the natural boundaries created by SSTables within the LSM tree, to
quickly generate key ranges. Each goroutine then picks a range and runs an
iterator to iterate over it. Each iterator iterates over all versions of values
and is created from the same transaction, thus working over a snapshot of the
DB. Every time a new key is encountered, it calls `ChooseKey(item)`, followed
by `KeyToList(key, itr)`. This allows a user to select or reject that key, and
if selected, convert the value versions into custom key-values. The goroutine
batches up 4MB worth of key-values, before sending it over to a channel.
Another goroutine further batches up data from this channel using *smart
batching* algorithm and calls `Send` serially.
This framework is designed for high throughput key-value iteration, spreading
the work of iteration across many goroutines. `DB.Backup` uses this framework to
provide full and incremental backups quickly. Dgraph is a heavy user of this
framework. In fact, this framework was developed and used within Dgraph, before
getting ported over to Badger.
```go
stream := db.NewStream()
// db.NewStreamAt(readTs) for managed mode.
// -- Optional settings
stream.NumGo = 16 // Set number of goroutines to use for iteration.
stream.Prefix = []byte("some-prefix") // Leave nil for iteration over the whole DB.
stream.LogPrefix = "Badger.Streaming" // For identifying stream logs. Outputs to Logger.
// ChooseKey is called concurrently for every key. If left nil, assumes true by default.
stream.ChooseKey = func(item *badger.Item) bool {
return bytes.HasSuffix(item.Key(), []byte("er"))
}
// KeyToList is called concurrently for chosen keys. This can be used to convert
// Badger data into custom key-values. If nil, uses stream.ToList, a default
// implementation, which picks all valid key-values.
stream.KeyToList = nil
// -- End of optional settings.
// Send is called serially, while Stream.Orchestrate is running.
stream.Send = func(list *pb.KVList) error {
return proto.MarshalText(w, list) // Write to w.
}
// Run the stream
if err := stream.Orchestrate(context.Background()); err != nil {
return err
}
// Done.
```
### Garbage Collection
Badger values need to be garbage collected, because of two reasons:
* Badger keeps values separately from the LSM tree. This means that the compaction operations
that clean up the LSM tree do not touch the values at all. Values need to be cleaned up
separately.
* Concurrent read/write transactions could leave behind multiple values for a single key, because they
are stored with different versions. These could accumulate, and take up unneeded space beyond the
time these older versions are needed.
Badger relies on the client to perform garbage collection at a time of their choosing. It provides
the following method, which can be invoked at an appropriate time:
* `DB.RunValueLogGC()`: This method is designed to do garbage collection while
Badger is online. Along with randomly picking a file, it uses statistics generated by the
LSM-tree compactions to pick files that are likely to lead to maximum space
reclamation. It is recommended to be called during periods of low activity in
your system, or periodically. One call would only result in removal of at max
one log file. As an optimization, you could also immediately re-run it whenever
it returns nil error (indicating a successful value log GC), as shown below.
```go
ticker := time.NewTicker(5 * time.Minute)
defer ticker.Stop()
for range ticker.C {
again:
err := db.RunValueLogGC(0.7)
if err == nil {
goto again
}
}
```
* `DB.PurgeOlderVersions()`: This method is **DEPRECATED** since v1.5.0. Now, Badger's LSM tree automatically discards older/invalid versions of keys.
**Note: The RunValueLogGC method would not garbage collect the latest value log.**
### Database backup
There are two public API methods `DB.Backup()` and `DB.Load()` which can be
used to do online backups and restores. Badger v0.9 provides a CLI tool
`badger`, which can do offline backup/restore. Make sure you have `$GOPATH/bin`
in your PATH to use this tool.
The command below will create a version-agnostic backup of the database, to a
file `badger.bak` in the current working directory
```
badger backup --dir
```
To restore `badger.bak` in the current working directory to a new database:
```
badger restore --dir
```
See `badger --help` for more details.
If you have a Badger database that was created using v0.8 (or below), you can
use the `badger_backup` tool provided in v0.8.1, and then restore it using the
command above to upgrade your database to work with the latest version.
```
badger_backup --dir --backup-file badger.bak
```
We recommend all users to use the `Backup` and `Restore` APIs and tools. However,
Badger is also rsync-friendly because all files are immutable, barring the
latest value log which is append-only. So, rsync can be used as rudimentary way
to perform a backup. In the following script, we repeat rsync to ensure that the
LSM tree remains consistent with the MANIFEST file while doing a full backup.
```
#!/bin/bash
set -o history
set -o histexpand
# Makes a complete copy of a Badger database directory.
# Repeat rsync if the MANIFEST and SSTables are updated.
rsync -avz --delete db/ dst
while !! | grep -q "(MANIFEST\|\.sst)$"; do :; done
```
### Memory usage
Badger's memory usage can be managed by tweaking several options available in
the `Options` struct that is passed in when opening the database using
`DB.Open`.
- `Options.ValueLogLoadingMode` can be set to `options.FileIO` (instead of the
default `options.MemoryMap`) to avoid memory-mapping log files. This can be
useful in environments with low RAM.
- Number of memtables (`Options.NumMemtables`)
- If you modify `Options.NumMemtables`, also adjust `Options.NumLevelZeroTables` and
`Options.NumLevelZeroTablesStall` accordingly.
- Number of concurrent compactions (`Options.NumCompactors`)
- Mode in which LSM tree is loaded (`Options.TableLoadingMode`)
- Size of table (`Options.MaxTableSize`)
- Size of value log file (`Options.ValueLogFileSize`)
If you want to decrease the memory usage of Badger instance, tweak these
options (ideally one at a time) until you achieve the desired
memory usage.
### Statistics
Badger records metrics using the [expvar] package, which is included in the Go
standard library. All the metrics are documented in [y/metrics.go][metrics]
file.
`expvar` package adds a handler in to the default HTTP server (which has to be
started explicitly), and serves up the metrics at the `/debug/vars` endpoint.
These metrics can then be collected by a system like [Prometheus], to get
better visibility into what Badger is doing.
[expvar]: https://golang.org/pkg/expvar/
[metrics]: https://github.com/dgraph-io/badger/blob/master/y/metrics.go
[Prometheus]: https://prometheus.io/
## Resources
### Blog Posts
1. [Introducing Badger: A fast key-value store written natively in
Go](https://open.dgraph.io/post/badger/)
2. [Make Badger crash resilient with ALICE](https://blog.dgraph.io/post/alice/)
3. [Badger vs LMDB vs BoltDB: Benchmarking key-value databases in Go](https://blog.dgraph.io/post/badger-lmdb-boltdb/)
4. [Concurrent ACID Transactions in Badger](https://blog.dgraph.io/post/badger-txn/)
## Design
Badger was written with these design goals in mind:
- Write a key-value database in pure Go.
- Use latest research to build the fastest KV database for data sets spanning terabytes.
- Optimize for SSDs.
Badger’s design is based on a paper titled _[WiscKey: Separating Keys from
Values in SSD-conscious Storage][wisckey]_.
[wisckey]: https://www.usenix.org/system/files/conference/fast16/fast16-papers-lu.pdf
### Comparisons
| Feature | Badger | RocksDB | BoltDB |
| ------- | ------ | ------- | ------ |
| Design | LSM tree with value log | LSM tree only | B+ tree |
| High Read throughput | Yes | No | Yes |
| High Write throughput | Yes | Yes | No |
| Designed for SSDs | Yes (with latest research 1) | Not specifically 2 | No |
| Embeddable | Yes | Yes | Yes |
| Sorted KV access | Yes | Yes | Yes |
| Pure Go (no Cgo) | Yes | No | Yes |
| Transactions | Yes, ACID, concurrent with SSI3 | Yes (but non-ACID) | Yes, ACID |
| Snapshots | Yes | Yes | Yes |
| TTL support | Yes | Yes | No |
| 3D access (key-value-version) | Yes4 | No | No |
1 The [WISCKEY paper][wisckey] (on which Badger is based) saw big
wins with separating values from keys, significantly reducing the write
amplification compared to a typical LSM tree.
2 RocksDB is an SSD optimized version of LevelDB, which was designed specifically for rotating disks.
As such RocksDB's design isn't aimed at SSDs.
3 SSI: Serializable Snapshot Isolation. For more details, see the blog post [Concurrent ACID Transactions in Badger](https://blog.dgraph.io/post/badger-txn/)
4 Badger provides direct access to value versions via its Iterator API.
Users can also specify how many versions to keep per key via Options.
### Benchmarks
We have run comprehensive benchmarks against RocksDB, Bolt and LMDB. The
benchmarking code, and the detailed logs for the benchmarks can be found in the
[badger-bench] repo. More explanation, including graphs can be found the blog posts (linked
above).
[badger-bench]: https://github.com/dgraph-io/badger-bench
## Projects Using Badger
Below is a list of known projects that use Badger:
* [Dgraph](https://github.com/dgraph-io/dgraph) - Distributed graph database.
* [Jaeger](https://github.com/jaegertracing/jaeger) - Distributed tracing platform.
* [go-ipfs](https://github.com/ipfs/go-ipfs) - Go client for the InterPlanetary File System (IPFS), a new hypermedia distribution protocol.
* [Riot](https://github.com/go-ego/riot) - An open-source, distributed search engine.
* [emitter](https://github.com/emitter-io/emitter) - Scalable, low latency, distributed pub/sub broker with message storage, uses MQTT, gossip and badger.
* [OctoSQL](https://github.com/cube2222/octosql) - Query tool that allows you to join, analyse and transform data from multiple databases using SQL.
* [Dkron](https://dkron.io/) - Distributed, fault tolerant job scheduling system.
* [Sandglass](https://github.com/celrenheit/sandglass) - distributed, horizontally scalable, persistent, time sorted message queue.
* [TalariaDB](https://github.com/grab/talaria) - Grab's Distributed, low latency time-series database.
* [Sloop](https://github.com/salesforce/sloop) - Salesforce's Kubernetes History Visualization Project.
* [Immudb](https://github.com/codenotary/immudb) - Lightweight, high-speed immutable database for systems and applications.
* [Usenet Express](https://usenetexpress.com/) - Serving over 300TB of data with Badger.
* [gorush](https://github.com/appleboy/gorush) - A push notification server written in Go.
* [0-stor](https://github.com/zero-os/0-stor) - Single device object store.
* [Dispatch Protocol](https://github.com/dispatchlabs/disgo) - Blockchain protocol for distributed application data analytics.
* [GarageMQ](https://github.com/valinurovam/garagemq) - AMQP server written in Go.
* [RedixDB](https://alash3al.github.io/redix/) - A real-time persistent key-value store with the same redis protocol.
* [BBVA](https://github.com/BBVA/raft-badger) - Raft backend implementation using BadgerDB for Hashicorp raft.
* [Fantom](https://github.com/Fantom-foundation/go-lachesis) - aBFT Consensus platform for distributed applications.
* [decred](https://github.com/decred/dcrdata) - An open, progressive, and self-funding cryptocurrency with a system of community-based governance integrated into its blockchain.
* [OpenNetSys](https://github.com/opennetsys/c3-go) - Create useful dApps in any software language.
* [HoneyTrap](https://github.com/honeytrap/honeytrap) - An extensible and opensource system for running, monitoring and managing honeypots.
* [Insolar](https://github.com/insolar/insolar) - Enterprise-ready blockchain platform.
* [IoTeX](https://github.com/iotexproject/iotex-core) - The next generation of the decentralized network for IoT powered by scalability- and privacy-centric blockchains.
* [go-sessions](https://github.com/kataras/go-sessions) - The sessions manager for Go net/http and fasthttp.
* [Babble](https://github.com/mosaicnetworks/babble) - BFT Consensus platform for distributed applications.
* [Tormenta](https://github.com/jpincas/tormenta) - Embedded object-persistence layer / simple JSON database for Go projects.
* [BadgerHold](https://github.com/timshannon/badgerhold) - An embeddable NoSQL store for querying Go types built on Badger
* [Goblero](https://github.com/didil/goblero) - Pure Go embedded persistent job queue backed by BadgerDB
* [Surfline](https://www.surfline.com) - Serving global wave and weather forecast data with Badger.
* [Cete](https://github.com/mosuka/cete) - Simple and highly available distributed key-value store built on Badger. Makes it easy bringing up a cluster of Badger with Raft consensus algorithm by hashicorp/raft.
* [Volument](https://volument.com/) - A new take on website analytics backed by Badger.
* [KVdb](https://kvdb.io/) - Hosted key-value store and serverless platform built on top of Badger.
If you are using Badger in a project please send a pull request to add it to the list.
## Contributing
If you're interested in contributing to Badger see [CONTRIBUTING.md](./CONTRIBUTING.md).
## Frequently Asked Questions
### My writes are getting stuck. Why?
**Update: With the new `Value(func(v []byte))` API, this deadlock can no longer
happen.**
The following is true for users on Badger v1.x.
This can happen if a long running iteration with `Prefetch` is set to false, but
a `Item::Value` call is made internally in the loop. That causes Badger to
acquire read locks over the value log files to avoid value log GC removing the
file from underneath. As a side effect, this also blocks a new value log GC
file from being created, when the value log file boundary is hit.
Please see Github issues [#293](https://github.com/dgraph-io/badger/issues/293)
and [#315](https://github.com/dgraph-io/badger/issues/315).
There are multiple workarounds during iteration:
1. Use `Item::ValueCopy` instead of `Item::Value` when retrieving value.
1. Set `Prefetch` to true. Badger would then copy over the value and release the
file lock immediately.
1. When `Prefetch` is false, don't call `Item::Value` and do a pure key-only
iteration. This might be useful if you just want to delete a lot of keys.
1. Do the writes in a separate transaction after the reads.
### My writes are really slow. Why?
Are you creating a new transaction for every single key update, and waiting for
it to `Commit` fully before creating a new one? This will lead to very low
throughput.
We have created `WriteBatch` API which provides a way to batch up
many updates into a single transaction and `Commit` that transaction using
callbacks to avoid blocking. This amortizes the cost of a transaction really
well, and provides the most efficient way to do bulk writes.
```go
wb := db.NewWriteBatch()
defer wb.Cancel()
for i := 0; i < N; i++ {
err := wb.Set(key(i), value(i), 0) // Will create txns as needed.
handle(err)
}
handle(wb.Flush()) // Wait for all txns to finish.
```
Note that `WriteBatch` API does not allow any reads. For read-modify-write
workloads, you should be using the `Transaction` API.
### I don't see any disk writes. Why?
If you're using Badger with `SyncWrites=false`, then your writes might not be written to value log
and won't get synced to disk immediately. Writes to LSM tree are done inmemory first, before they
get compacted to disk. The compaction would only happen once `MaxTableSize` has been reached. So, if
you're doing a few writes and then checking, you might not see anything on disk. Once you `Close`
the database, you'll see these writes on disk.
### Reverse iteration doesn't give me the right results.
Just like forward iteration goes to the first key which is equal or greater than the SEEK key, reverse iteration goes to the first key which is equal or lesser than the SEEK key. Therefore, SEEK key would not be part of the results. You can typically add a `0xff` byte as a suffix to the SEEK key to include it in the results. See the following issues: [#436](https://github.com/dgraph-io/badger/issues/436) and [#347](https://github.com/dgraph-io/badger/issues/347).
### Which instances should I use for Badger?
We recommend using instances which provide local SSD storage, without any limit
on the maximum IOPS. In AWS, these are storage optimized instances like i3. They
provide local SSDs which clock 100K IOPS over 4KB blocks easily.
### I'm getting a closed channel error. Why?
```
panic: close of closed channel
panic: send on closed channel
```
If you're seeing panics like above, this would be because you're operating on a closed DB. This can happen, if you call `Close()` before sending a write, or multiple times. You should ensure that you only call `Close()` once, and all your read/write operations finish before closing.
### Are there any Go specific settings that I should use?
We *highly* recommend setting a high number for `GOMAXPROCS`, which allows Go to
observe the full IOPS throughput provided by modern SSDs. In Dgraph, we have set
it to 128. For more details, [see this
thread](https://groups.google.com/d/topic/golang-nuts/jPb_h3TvlKE/discussion).
### Are there any Linux specific settings that I should use?
We recommend setting `max file descriptors` to a high number depending upon the expected size of
your data. On Linux and Mac, you can check the file descriptor limit with `ulimit -n -H` for the
hard limit and `ulimit -n -S` for the soft limit. A soft limit of `65535` is a good lower bound.
You can adjust the limit as needed.
### I see "manifest has unsupported version: X (we support Y)" error.
This error means you have a badger directory which was created by an older version of badger and
you're trying to open in a newer version of badger. The underlying data format can change across
badger versions and users will have to migrate their data directory.
Badger data can be migrated from version X of badger to version Y of badger by following the steps
listed below.
Assume you were on badger v1.6.0 and you wish to migrate to v2.0.0 version.
1. Install badger version v1.6.0
- `cd $GOPATH/src/github.com/dgraph-io/badger`
- `git checkout v1.6.0`
- `cd badger && go install`
This should install the old badger binary in your $GOBIN.
2. Create Backup
- `badger backup --dir path/to/badger/directory -f badger.backup`
3. Install badger version v2.0.0
- `cd $GOPATH/src/github.com/dgraph-io/badger`
- `git checkout v2.0.0`
- `cd badger && go install`
This should install new badger binary in your $GOBIN
4. Install badger version v2.0.0
- `badger restore --dir path/to/new/badger/directory -f badger.backup`
This will create a new directory on `path/to/new/badger/directory` and add badger data in
newer format to it.
NOTE - The above steps shouldn't cause any data loss but please ensure the new data is valid before
deleting the old badger directory.
### Why do I need gcc to build badger? Does badger need CGO?
Badger does not directly use CGO but it relies on https://github.com/DataDog/zstd library for
zstd compression and the library requires `gcc/cgo`. You can build badger without cgo by running
`CGO_ENABLED=0 go build`. This will build badger without the support for ZSTD compression algorithm.
## Contact
- Please use [discuss.dgraph.io](https://discuss.dgraph.io) for questions, feature requests and discussions.
- Please use [Github issue tracker](https://github.com/dgraph-io/badger/issues) for filing bugs or feature requests.
- Join [](http://slack.dgraph.io).
- Follow us on Twitter [@dgraphlabs](https://twitter.com/dgraphlabs).
badger-2.2007.2/VERSIONING.md 0000664 0000000 0000000 00000004620 13721731165 0015205 0 ustar 00root root 0000000 0000000 # Serialization Versioning: Semantic Versioning for databases
Semantic Versioning, commonly known as SemVer, is a great idea that has been very widely adopted as
a way to decide how to name software versions. The whole concept is very well summarized on
semver.org with the following lines:
> Given a version number MAJOR.MINOR.PATCH, increment the:
>
> 1. MAJOR version when you make incompatible API changes,
> 2. MINOR version when you add functionality in a backwards-compatible manner, and
> 3. PATCH version when you make backwards-compatible bug fixes.
>
> Additional labels for pre-release and build metadata are available as extensions to the
> MAJOR.MINOR.PATCH format.
Unfortunately, API changes are not the most important changes for libraries that serialize data for
later consumption. For these libraries, such as BadgerDB, changes to the API are much easier to
handle than change to the data format used to store data on disk.
## Serialization Version specification
Serialization Versioning, like Semantic Versioning, uses 3 numbers and also calls them
MAJOR.MINOR.PATCH, but the semantics of the numbers are slightly modified:
Given a version number MAJOR.MINOR.PATCH, increment the:
- MAJOR version when you make changes that require a transformation of the dataset before it can be
used again.
- MINOR version when old datasets are still readable but the API might have changed in
backwards-compatible or incompatible ways.
- PATCH version when you make backwards-compatible bug fixes.
Additional labels for pre-release and build metadata are available as extensions to the
MAJOR.MINOR.PATCH format.
Following this naming strategy, migration from v1.x to v2.x requires a migration strategy for your
existing dataset, and as such has to be carefully planned. Migrations in between different minor
versions (e.g. v1.5.x and v1.6.x) might break your build, as the API *might* have changed, but once
your code compiles there's no need for any data migration. Lastly, changes in between two different
patch versions should never break your build or dataset.
For more background on our decision to adopt Serialization Versioning, read the blog post
[Semantic Versioning, Go Modules, and Databases][blog] and the original proposal on
[this comment on Dgraph's Discuss forum][discuss].
[blog]: https://blog.dgraph.io/post/serialization-versioning/
[discuss]: https://discuss.dgraph.io/t/go-modules-on-badger-and-dgraph/4662/7 badger-2.2007.2/appveyor.yml 0000664 0000000 0000000 00000002376 13721731165 0015576 0 ustar 00root root 0000000 0000000 # version format
version: "{build}"
# Operating system (build VM template)
os: Windows Server 2012 R2
# Platform.
platform: x64
clone_folder: c:\gopath\src\github.com\dgraph-io\badger
# Environment variables
environment:
GOVERSION: 1.12
GOPATH: c:\gopath
GO111MODULE: on
# scripts that run after cloning repository
install:
- set PATH=%GOPATH%\bin;c:\go\bin;c:\msys64\mingw64\bin;%PATH%
- go version
- go env
- python --version
- gcc --version
# To run your custom scripts instead of automatic MSBuild
build_script:
# We need to disable firewall - https://github.com/appveyor/ci/issues/1579#issuecomment-309830648
- ps: Disable-NetFirewallRule -DisplayName 'File and Printer Sharing (SMB-Out)'
- cd c:\gopath\src\github.com\dgraph-io\badger
- git branch
- go get -t ./...
# To run your custom scripts instead of automatic tests
test_script:
# Unit tests
- ps: Add-AppveyorTest "Unit Tests" -Outcome Running
- go test -v github.com/dgraph-io/badger/...
- go test -v -vlog_mmap=false github.com/dgraph-io/badger/...
- ps: Update-AppveyorTest "Unit Tests" -Outcome Passed
notifications:
- provider: Email
to:
- pawan@dgraph.io
on_build_failure: true
on_build_status_changed: true
# to disable deployment
deploy: off
badger-2.2007.2/backup.go 0000664 0000000 0000000 00000016311 13721731165 0014774 0 ustar 00root root 0000000 0000000 /*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bufio"
"bytes"
"context"
"encoding/binary"
"io"
"github.com/dgraph-io/badger/v2/pb"
"github.com/dgraph-io/badger/v2/y"
"github.com/golang/protobuf/proto"
)
// flushThreshold determines when a buffer will be flushed. When performing a
// backup/restore, the entries will be batched up until the total size of batch
// is more than flushThreshold or entry size (without the value size) is more
// than the maxBatchSize.
const flushThreshold = 100 << 20
// Backup is a wrapper function over Stream.Backup to generate full and incremental backups of the
// DB. For more control over how many goroutines are used to generate the backup, or if you wish to
// backup only a certain range of keys, use Stream.Backup directly.
func (db *DB) Backup(w io.Writer, since uint64) (uint64, error) {
stream := db.NewStream()
stream.LogPrefix = "DB.Backup"
return stream.Backup(w, since)
}
// Backup dumps a protobuf-encoded list of all entries in the database into the
// given writer, that are newer than the specified version. It returns a
// timestamp indicating when the entries were dumped which can be passed into a
// later invocation to generate an incremental dump, of entries that have been
// added/modified since the last invocation of Stream.Backup().
//
// This can be used to backup the data in a database at a given point in time.
func (stream *Stream) Backup(w io.Writer, since uint64) (uint64, error) {
stream.KeyToList = func(key []byte, itr *Iterator) (*pb.KVList, error) {
list := &pb.KVList{}
for ; itr.Valid(); itr.Next() {
item := itr.Item()
if !bytes.Equal(item.Key(), key) {
return list, nil
}
if item.Version() < since {
// Ignore versions less than given timestamp, or skip older
// versions of the given key.
return list, nil
}
var valCopy []byte
if !item.IsDeletedOrExpired() {
// No need to copy value, if item is deleted or expired.
var err error
valCopy, err = item.ValueCopy(nil)
if err != nil {
stream.db.opt.Errorf("Key [%x, %d]. Error while fetching value [%v]\n",
item.Key(), item.Version(), err)
return nil, err
}
}
// clear txn bits
meta := item.meta &^ (bitTxn | bitFinTxn)
kv := &pb.KV{
Key: item.KeyCopy(nil),
Value: valCopy,
UserMeta: []byte{item.UserMeta()},
Version: item.Version(),
ExpiresAt: item.ExpiresAt(),
Meta: []byte{meta},
}
list.Kv = append(list.Kv, kv)
switch {
case item.DiscardEarlierVersions():
// If we need to discard earlier versions of this item, add a delete
// marker just below the current version.
list.Kv = append(list.Kv, &pb.KV{
Key: item.KeyCopy(nil),
Version: item.Version() - 1,
Meta: []byte{bitDelete},
})
return list, nil
case item.IsDeletedOrExpired():
return list, nil
}
}
return list, nil
}
var maxVersion uint64
stream.Send = func(list *pb.KVList) error {
for _, kv := range list.Kv {
if maxVersion < kv.Version {
maxVersion = kv.Version
}
}
return writeTo(list, w)
}
if err := stream.Orchestrate(context.Background()); err != nil {
return 0, err
}
return maxVersion, nil
}
func writeTo(list *pb.KVList, w io.Writer) error {
if err := binary.Write(w, binary.LittleEndian, uint64(proto.Size(list))); err != nil {
return err
}
buf, err := proto.Marshal(list)
if err != nil {
return err
}
_, err = w.Write(buf)
return err
}
// KVLoader is used to write KVList objects in to badger. It can be used to restore a backup.
type KVLoader struct {
db *DB
throttle *y.Throttle
entries []*Entry
entriesSize int64
totalSize int64
}
// NewKVLoader returns a new instance of KVLoader.
func (db *DB) NewKVLoader(maxPendingWrites int) *KVLoader {
return &KVLoader{
db: db,
throttle: y.NewThrottle(maxPendingWrites),
entries: make([]*Entry, 0, db.opt.maxBatchCount),
}
}
// Set writes the key-value pair to the database.
func (l *KVLoader) Set(kv *pb.KV) error {
var userMeta, meta byte
if len(kv.UserMeta) > 0 {
userMeta = kv.UserMeta[0]
}
if len(kv.Meta) > 0 {
meta = kv.Meta[0]
}
e := &Entry{
Key: y.KeyWithTs(kv.Key, kv.Version),
Value: kv.Value,
UserMeta: userMeta,
ExpiresAt: kv.ExpiresAt,
meta: meta,
}
estimatedSize := int64(e.estimateSize(l.db.opt.ValueThreshold))
// Flush entries if inserting the next entry would overflow the transactional limits.
if int64(len(l.entries))+1 >= l.db.opt.maxBatchCount ||
l.entriesSize+estimatedSize >= l.db.opt.maxBatchSize ||
l.totalSize >= flushThreshold {
if err := l.send(); err != nil {
return err
}
}
l.entries = append(l.entries, e)
l.entriesSize += estimatedSize
l.totalSize += estimatedSize + int64(len(e.Value))
return nil
}
func (l *KVLoader) send() error {
if err := l.throttle.Do(); err != nil {
return err
}
if err := l.db.batchSetAsync(l.entries, func(err error) {
l.throttle.Done(err)
}); err != nil {
return err
}
l.entries = make([]*Entry, 0, l.db.opt.maxBatchCount)
l.entriesSize = 0
l.totalSize = 0
return nil
}
// Finish is meant to be called after all the key-value pairs have been loaded.
func (l *KVLoader) Finish() error {
if len(l.entries) > 0 {
if err := l.send(); err != nil {
return err
}
}
return l.throttle.Finish()
}
// Load reads a protobuf-encoded list of all entries from a reader and writes
// them to the database. This can be used to restore the database from a backup
// made by calling DB.Backup(). If more complex logic is needed to restore a badger
// backup, the KVLoader interface should be used instead.
//
// DB.Load() should be called on a database that is not running any other
// concurrent transactions while it is running.
func (db *DB) Load(r io.Reader, maxPendingWrites int) error {
br := bufio.NewReaderSize(r, 16<<10)
unmarshalBuf := make([]byte, 1<<10)
ldr := db.NewKVLoader(maxPendingWrites)
for {
var sz uint64
err := binary.Read(br, binary.LittleEndian, &sz)
if err == io.EOF {
break
} else if err != nil {
return err
}
if cap(unmarshalBuf) < int(sz) {
unmarshalBuf = make([]byte, sz)
}
if _, err = io.ReadFull(br, unmarshalBuf[:sz]); err != nil {
return err
}
list := &pb.KVList{}
if err := proto.Unmarshal(unmarshalBuf[:sz], list); err != nil {
return err
}
for _, kv := range list.Kv {
if err := ldr.Set(kv); err != nil {
return err
}
// Update nextTxnTs, memtable stores this
// timestamp in badger head when flushed.
if kv.Version >= db.orc.nextTxnTs {
db.orc.nextTxnTs = kv.Version + 1
}
}
}
if err := ldr.Finish(); err != nil {
return err
}
db.orc.txnMark.Done(db.orc.nextTxnTs - 1)
return nil
}
badger-2.2007.2/backup_test.go 0000664 0000000 0000000 00000032243 13721731165 0016035 0 ustar 00root root 0000000 0000000 /*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bytes"
"fmt"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"reflect"
"strconv"
"testing"
"time"
"github.com/dgraph-io/badger/v2/pb"
"github.com/stretchr/testify/require"
)
func TestBackupRestore1(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
db, err := Open(getTestOptions(dir))
require.NoError(t, err)
// Write some stuff
entries := []struct {
key []byte
val []byte
userMeta byte
version uint64
}{
{key: []byte("answer1"), val: []byte("42"), version: 1},
{key: []byte("answer2"), val: []byte("43"), userMeta: 1, version: 2},
}
err = db.Update(func(txn *Txn) error {
e := entries[0]
err := txn.SetEntry(NewEntry(e.key, e.val).WithMeta(e.userMeta))
if err != nil {
return err
}
return nil
})
require.NoError(t, err)
err = db.Update(func(txn *Txn) error {
e := entries[1]
err := txn.SetEntry(NewEntry(e.key, e.val).WithMeta(e.userMeta))
if err != nil {
return err
}
return nil
})
require.NoError(t, err)
// Use different directory.
dir, err = ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
bak, err := ioutil.TempFile(dir, "badgerbak")
require.NoError(t, err)
_, err = db.Backup(bak, 0)
require.NoError(t, err)
require.NoError(t, bak.Close())
require.NoError(t, db.Close())
db, err = Open(getTestOptions(dir))
require.NoError(t, err)
defer db.Close()
bak, err = os.Open(bak.Name())
require.NoError(t, err)
defer bak.Close()
require.NoError(t, db.Load(bak, 16))
err = db.View(func(txn *Txn) error {
opts := DefaultIteratorOptions
opts.AllVersions = true
it := txn.NewIterator(opts)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
val, err := item.ValueCopy(nil)
if err != nil {
return err
}
require.Equal(t, entries[count].key, item.Key())
require.Equal(t, entries[count].val, val)
require.Equal(t, entries[count].version, item.Version())
require.Equal(t, entries[count].userMeta, item.UserMeta())
count++
}
require.Equal(t, count, 2)
return nil
})
require.NoError(t, err)
require.Equal(t, db.orc.nextTs(), uint64(3))
}
func TestBackupRestore2(t *testing.T) {
tmpdir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(tmpdir)
s1Path := filepath.Join(tmpdir, "test1")
s2Path := filepath.Join(tmpdir, "test2")
s3Path := filepath.Join(tmpdir, "test3")
db1, err := Open(getTestOptions(s1Path))
require.NoError(t, err)
defer db1.Close()
key1 := []byte("key1")
key2 := []byte("key2")
rawValue := []byte("NotLongValue")
N := byte(251)
err = db1.Update(func(tx *Txn) error {
if err := tx.SetEntry(NewEntry(key1, rawValue)); err != nil {
return err
}
return tx.SetEntry(NewEntry(key2, rawValue))
})
require.NoError(t, err)
for i := byte(1); i < N; i++ {
err = db1.Update(func(tx *Txn) error {
if err := tx.SetEntry(NewEntry(append(key1, i), rawValue)); err != nil {
return err
}
return tx.SetEntry(NewEntry(append(key2, i), rawValue))
})
require.NoError(t, err)
}
var backup bytes.Buffer
_, err = db1.Backup(&backup, 0)
require.NoError(t, err)
fmt.Println("backup1 length:", backup.Len())
db2, err := Open(getTestOptions(s2Path))
require.NoError(t, err)
defer db2.Close()
err = db2.Load(&backup, 16)
require.NoError(t, err)
// Check nextTs is correctly set.
require.Equal(t, db1.orc.nextTs(), db2.orc.nextTs())
for i := byte(1); i < N; i++ {
err = db2.View(func(tx *Txn) error {
k := append(key1, i)
item, err := tx.Get(k)
if err != nil {
if err == ErrKeyNotFound {
return fmt.Errorf("Key %q has been not found, but was set\n", k)
}
return err
}
v, err := item.ValueCopy(nil)
if err != nil {
return err
}
if !reflect.DeepEqual(v, rawValue) {
return fmt.Errorf("Values not match, got %v, expected %v", v, rawValue)
}
return nil
})
require.NoError(t, err)
}
for i := byte(1); i < N; i++ {
err = db2.Update(func(tx *Txn) error {
if err := tx.SetEntry(NewEntry(append(key1, i), rawValue)); err != nil {
return err
}
return tx.SetEntry(NewEntry(append(key2, i), rawValue))
})
require.NoError(t, err)
}
backup.Reset()
_, err = db2.Backup(&backup, 0)
require.NoError(t, err)
fmt.Println("backup2 length:", backup.Len())
db3, err := Open(getTestOptions(s3Path))
require.NoError(t, err)
defer db3.Close()
err = db3.Load(&backup, 16)
require.NoError(t, err)
// Check nextTs is correctly set.
require.Equal(t, db2.orc.nextTs(), db3.orc.nextTs())
for i := byte(1); i < N; i++ {
err = db3.View(func(tx *Txn) error {
k := append(key1, i)
item, err := tx.Get(k)
if err != nil {
if err == ErrKeyNotFound {
return fmt.Errorf("Key %q has been not found, but was set\n", k)
}
return err
}
v, err := item.ValueCopy(nil)
if err != nil {
return err
}
if !reflect.DeepEqual(v, rawValue) {
return fmt.Errorf("Values not match, got %v, expected %v", v, rawValue)
}
return nil
})
require.NoError(t, err)
}
}
var randSrc = rand.NewSource(time.Now().UnixNano())
func createEntries(n int) []*pb.KV {
entries := make([]*pb.KV, n)
for i := 0; i < n; i++ {
entries[i] = &pb.KV{
Key: []byte(fmt.Sprint("key", i)),
Value: []byte{1},
UserMeta: []byte{0},
Meta: []byte{0},
}
}
return entries
}
func populateEntries(db *DB, entries []*pb.KV) error {
return db.Update(func(txn *Txn) error {
var err error
for i, e := range entries {
if err = txn.SetEntry(NewEntry(e.Key, e.Value)); err != nil {
return err
}
entries[i].Version = 1
}
return nil
})
}
func TestBackup(t *testing.T) {
test := func(t *testing.T, db *DB) {
var bb bytes.Buffer
N := 1000
entries := createEntries(N)
require.NoError(t, populateEntries(db, entries))
_, err := db.Backup(&bb, 0)
require.NoError(t, err)
err = db.View(func(txn *Txn) error {
opts := DefaultIteratorOptions
it := txn.NewIterator(opts)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
idx, err := strconv.Atoi(string(item.Key())[3:])
if err != nil {
return err
}
if idx > N || !bytes.Equal(entries[idx].Key, item.Key()) {
return fmt.Errorf("%s: %s", string(item.Key()), ErrKeyNotFound)
}
count++
}
if N != count {
return fmt.Errorf("wrong number of items: %d expected, %d actual", N, count)
}
return nil
})
require.NoError(t, err)
}
t.Run("disk mode", func(t *testing.T) {
tmpdir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(tmpdir)
opt := DefaultOptions(filepath.Join(tmpdir, "backup0"))
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
test(t, db)
})
})
t.Run("InMemory mode", func(t *testing.T) {
opt := DefaultOptions("")
opt.InMemory = true
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
test(t, db)
})
})
}
func TestBackupRestore3(t *testing.T) {
var bb bytes.Buffer
tmpdir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(tmpdir)
N := 1000
entries := createEntries(N)
var db1NextTs uint64
// backup
{
db1, err := Open(DefaultOptions(filepath.Join(tmpdir, "backup1")))
require.NoError(t, err)
defer db1.Close()
require.NoError(t, populateEntries(db1, entries))
_, err = db1.Backup(&bb, 0)
require.NoError(t, err)
db1NextTs = db1.orc.nextTs()
require.NoError(t, db1.Close())
}
require.True(t, len(entries) == N)
require.True(t, bb.Len() > 0)
// restore
db2, err := Open(DefaultOptions(filepath.Join(tmpdir, "restore1")))
require.NoError(t, err)
defer db2.Close()
require.NotEqual(t, db1NextTs, db2.orc.nextTs())
require.NoError(t, db2.Load(&bb, 16))
require.Equal(t, db1NextTs, db2.orc.nextTs())
// verify
err = db2.View(func(txn *Txn) error {
opts := DefaultIteratorOptions
it := txn.NewIterator(opts)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
idx, err := strconv.Atoi(string(item.Key())[3:])
if err != nil {
return err
}
if idx > N || !bytes.Equal(entries[idx].Key, item.Key()) {
return fmt.Errorf("%s: %s", string(item.Key()), ErrKeyNotFound)
}
count++
}
if N != count {
return fmt.Errorf("wrong number of items: %d expected, %d actual", N, count)
}
return nil
})
require.NoError(t, err)
}
func TestBackupLoadIncremental(t *testing.T) {
tmpdir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(tmpdir)
N := 100
entries := createEntries(N)
updates := make(map[int]byte)
var bb bytes.Buffer
var db1NextTs uint64
// backup
{
db1, err := Open(DefaultOptions(filepath.Join(tmpdir, "backup2")))
require.NoError(t, err)
defer db1.Close()
require.NoError(t, populateEntries(db1, entries))
since, err := db1.Backup(&bb, 0)
require.NoError(t, err)
ints := rand.New(randSrc).Perm(N)
// pick 10 items to mark as deleted.
err = db1.Update(func(txn *Txn) error {
for _, i := range ints[:10] {
if err := txn.Delete(entries[i].Key); err != nil {
return err
}
updates[i] = bitDelete
}
return nil
})
require.NoError(t, err)
since, err = db1.Backup(&bb, since)
require.NoError(t, err)
// pick 5 items to mark as expired.
err = db1.Update(func(txn *Txn) error {
for _, i := range (ints)[10:15] {
entry := NewEntry(entries[i].Key, entries[i].Value).WithTTL(-time.Hour)
if err := txn.SetEntry(entry); err != nil {
return err
}
updates[i] = bitDelete // expired
}
return nil
})
require.NoError(t, err)
since, err = db1.Backup(&bb, since)
require.NoError(t, err)
// pick 5 items to mark as discard.
err = db1.Update(func(txn *Txn) error {
for _, i := range ints[15:20] {
entry := NewEntry(entries[i].Key, entries[i].Value).WithDiscard()
if err := txn.SetEntry(entry); err != nil {
return err
}
updates[i] = bitDiscardEarlierVersions
}
return nil
})
require.NoError(t, err)
_, err = db1.Backup(&bb, since)
require.NoError(t, err)
db1NextTs = db1.orc.nextTs()
require.NoError(t, db1.Close())
}
require.True(t, len(entries) == N)
require.True(t, bb.Len() > 0)
// restore
db2, err := Open(getTestOptions(filepath.Join(tmpdir, "restore2")))
require.NoError(t, err)
defer db2.Close()
require.NotEqual(t, db1NextTs, db2.orc.nextTs())
require.NoError(t, db2.Load(&bb, 16))
require.Equal(t, db1NextTs, db2.orc.nextTs())
// verify
actual := make(map[int]byte)
err = db2.View(func(txn *Txn) error {
opts := DefaultIteratorOptions
opts.AllVersions = true
it := txn.NewIterator(opts)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
idx, err := strconv.Atoi(string(item.Key())[3:])
if err != nil {
return err
}
if item.IsDeletedOrExpired() {
_, ok := updates[idx]
if !ok {
return fmt.Errorf("%s: not expected to be updated but it is",
string(item.Key()))
}
actual[idx] = item.meta
count++
continue
}
}
if len(updates) != count {
return fmt.Errorf("mismatched updated items: %d expected, %d actual",
len(updates), count)
}
return nil
})
require.NoError(t, err, "%v %v", updates, actual)
}
func TestBackupBitClear(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
opt := getTestOptions(dir)
opt.ValueThreshold = 10 // This is important
db, err := Open(opt)
require.NoError(t, err)
key := []byte("foo")
val := []byte(fmt.Sprintf("%0100d", 1))
require.Greater(t, len(val), db.opt.ValueThreshold)
err = db.Update(func(txn *Txn) error {
e := NewEntry(key, val)
// Value > valueTheshold so bitValuePointer will be set.
return txn.SetEntry(e)
})
require.NoError(t, err)
// Use different directory.
dir, err = ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
bak, err := ioutil.TempFile(dir, "badgerbak")
require.NoError(t, err)
_, err = db.Backup(bak, 0)
require.NoError(t, err)
require.NoError(t, bak.Close())
oldValue := db.orc.nextTs()
require.NoError(t, db.Close())
opt = getTestOptions(dir)
opt.ValueThreshold = 200 // This is important.
db, err = Open(opt)
require.NoError(t, err)
defer db.Close()
bak, err = os.Open(bak.Name())
require.NoError(t, err)
defer bak.Close()
require.NoError(t, db.Load(bak, 16))
// Ensure nextTs is still the same.
require.Equal(t, oldValue, db.orc.nextTs())
require.NoError(t, db.View(func(txn *Txn) error {
e, err := txn.Get(key)
require.NoError(t, err)
v, err := e.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, val, v)
return nil
}))
}
badger-2.2007.2/badger/ 0000775 0000000 0000000 00000000000 13721731165 0014422 5 ustar 00root root 0000000 0000000 badger-2.2007.2/badger/.gitignore 0000664 0000000 0000000 00000000010 13721731165 0016401 0 ustar 00root root 0000000 0000000 /badger
badger-2.2007.2/badger/cmd/ 0000775 0000000 0000000 00000000000 13721731165 0015165 5 ustar 00root root 0000000 0000000 badger-2.2007.2/badger/cmd/backup.go 0000664 0000000 0000000 00000003713 13721731165 0016765 0 ustar 00root root 0000000 0000000 /*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"bufio"
"os"
"github.com/dgraph-io/badger/v2"
"github.com/spf13/cobra"
)
var backupFile string
var truncate bool
// backupCmd represents the backup command
var backupCmd = &cobra.Command{
Use: "backup",
Short: "Backup Badger database.",
Long: `Backup Badger database to a file in a version-agnostic manner.
Iterates over each key-value pair, encodes it along with its metadata and
version in protocol buffers and writes them to a file. This file can later be
used by the restore command to create an identical copy of the
database.`,
RunE: doBackup,
}
func init() {
RootCmd.AddCommand(backupCmd)
backupCmd.Flags().StringVarP(&backupFile, "backup-file", "f",
"badger.bak", "File to backup to")
backupCmd.Flags().BoolVarP(&truncate, "truncate", "t",
false, "Allow value log truncation if required.")
}
func doBackup(cmd *cobra.Command, args []string) error {
// Open DB
db, err := badger.Open(badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithTruncate(truncate))
if err != nil {
return err
}
defer db.Close()
// Create File
f, err := os.Create(backupFile)
if err != nil {
return err
}
bw := bufio.NewWriterSize(f, 64<<20)
if _, err = db.Backup(bw, 0); err != nil {
return err
}
if err = bw.Flush(); err != nil {
return err
}
if err = f.Sync(); err != nil {
return err
}
return f.Close()
}
badger-2.2007.2/badger/cmd/bank.go 0000664 0000000 0000000 00000033575 13721731165 0016444 0 ustar 00root root 0000000 0000000 /*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"bytes"
"context"
"errors"
"fmt"
"io/ioutil"
"log"
"math"
"math/rand"
"strconv"
"sync"
"sync/atomic"
"time"
"github.com/dgraph-io/badger/v2"
"github.com/dgraph-io/badger/v2/options"
"github.com/dgraph-io/badger/v2/pb"
"github.com/dgraph-io/badger/v2/y"
"github.com/spf13/cobra"
)
var testCmd = &cobra.Command{
Use: "bank",
Short: "Run bank test on Badger.",
Long: `
This command runs bank test on Badger, inspired by Jepsen. It creates many
accounts and moves money among them transactionally. It also reads the sum total
of all the accounts, to ensure that the total never changes.
`,
}
var bankTest = &cobra.Command{
Use: "test",
Short: "Execute bank test on Badger.",
RunE: runTest,
}
var bankDisect = &cobra.Command{
Use: "disect",
Short: "Disect the bank output.",
Long: `
Disect the bank output BadgerDB to find the first transaction which causes
failure of the total invariant.
`,
RunE: runDisect,
}
var (
numGoroutines int
numAccounts int
numPrevious int
duration string
stopAll int32
mmap bool
checkStream bool
checkSubscriber bool
verbose bool
encryptionKey string
)
const (
keyPrefix = "account:"
initialBal uint64 = 100
)
func init() {
RootCmd.AddCommand(testCmd)
testCmd.AddCommand(bankTest)
testCmd.AddCommand(bankDisect)
testCmd.Flags().IntVarP(
&numAccounts, "accounts", "a", 10000, "Number of accounts in the bank.")
bankTest.Flags().IntVarP(
&numGoroutines, "conc", "c", 16, "Number of concurrent transactions to run.")
bankTest.Flags().StringVarP(&duration, "duration", "d", "3m", "How long to run the test.")
bankTest.Flags().BoolVarP(&mmap, "mmap", "m", false, "If true, mmap LSM tree. Default is RAM.")
bankTest.Flags().BoolVarP(&checkStream, "check_stream", "s", false,
"If true, the test will send transactions to another badger instance via the stream "+
"interface in order to verify that all data is streamed correctly.")
bankTest.Flags().BoolVarP(&checkSubscriber, "check_subscriber", "w", false,
"If true, the test will send transactions to another badger instance via the subscriber "+
"interface in order to verify that all the data is published correctly.")
bankTest.Flags().BoolVarP(&verbose, "verbose", "v", false,
"If true, the test will print all the executed bank transfers to standard output. "+
"This outputs a lot so it's best to turn it off when running the test for a while.")
bankTest.Flags().StringVarP(&encryptionKey, "encryption-key", "e", "",
"If it is true, badger will encrypt all the data stored on the disk.")
bankDisect.Flags().IntVarP(&numPrevious, "previous", "p", 12,
"Starting from the violation txn, how many previous versions to retrieve.")
bankDisect.Flags().StringVar(&encryptionKey, "decryption-key", "",
"If set, DB will be opened using the provided decryption key.")
}
func key(account int) []byte {
return []byte(fmt.Sprintf("%s%s", keyPrefix, strconv.Itoa(account)))
}
func toUint64(val []byte) uint64 {
u, err := strconv.ParseUint(string(val), 10, 64)
y.Check(err)
return uint64(u)
}
func toSlice(bal uint64) []byte {
return []byte(strconv.FormatUint(bal, 10))
}
func getBalance(txn *badger.Txn, account int) (uint64, error) {
item, err := txn.Get(key(account))
if err != nil {
return 0, err
}
var bal uint64
err = item.Value(func(v []byte) error {
bal = toUint64(v)
return nil
})
return bal, err
}
func putBalance(txn *badger.Txn, account int, bal uint64) error {
return txn.SetEntry(badger.NewEntry(key(account), toSlice(bal)))
}
func min(a, b uint64) uint64 {
if a < b {
return a
}
return b
}
var errAbandoned = errors.New("Transaction abandonded due to insufficient balance")
func moveMoney(db *badger.DB, from, to int) error {
return db.Update(func(txn *badger.Txn) error {
balf, err := getBalance(txn, from)
if err != nil {
return err
}
balt, err := getBalance(txn, to)
if err != nil {
return err
}
floor := min(balf, balt)
if floor < 5 {
return errAbandoned
}
// Move the money.
balf -= 5
balt += 5
if err = putBalance(txn, from, balf); err != nil {
return err
}
return putBalance(txn, to, balt)
})
}
type account struct {
Id int
Bal uint64
}
func diff(a, b []account) string {
var buf bytes.Buffer
y.AssertTruef(len(a) == len(b), "len(a)=%d. len(b)=%d\n", len(a), len(b))
for i := range a {
ai := a[i]
bi := b[i]
if ai.Id != bi.Id || ai.Bal != bi.Bal {
buf.WriteString(fmt.Sprintf("Index: %d. Account [%+v] -> [%+v]\n", i, ai, bi))
}
}
return buf.String()
}
var errFailure = errors.New("test failed due to balance mismatch")
// seekTotal retrives the total of all accounts by seeking for each account key.
func seekTotal(txn *badger.Txn) ([]account, error) {
expected := uint64(numAccounts) * uint64(initialBal)
var accounts []account
var total uint64
for i := 0; i < numAccounts; i++ {
item, err := txn.Get(key(i))
if err != nil {
log.Printf("Error for account: %d. err=%v. key=%q\n", i, err, key(i))
return accounts, err
}
val, err := item.ValueCopy(nil)
if err != nil {
return accounts, err
}
acc := account{
Id: i,
Bal: toUint64(val),
}
accounts = append(accounts, acc)
total += acc.Bal
}
if total != expected {
log.Printf("Balance did NOT match up. Expected: %d. Received: %d",
expected, total)
atomic.AddInt32(&stopAll, 1)
return accounts, errFailure
}
return accounts, nil
}
// Range is [lowTs, highTs).
func findFirstInvalidTxn(db *badger.DB, lowTs, highTs uint64) uint64 {
checkAt := func(ts uint64) error {
txn := db.NewTransactionAt(ts, false)
_, err := seekTotal(txn)
txn.Discard()
return err
}
if highTs-lowTs < 1 {
log.Printf("Checking at lowTs: %d\n", lowTs)
err := checkAt(lowTs)
if err == errFailure {
fmt.Printf("Violation at ts: %d\n", lowTs)
return lowTs
} else if err != nil {
log.Printf("Error at lowTs: %d. Err=%v\n", lowTs, err)
return 0
}
fmt.Printf("No violation found at ts: %d\n", lowTs)
return 0
}
midTs := (lowTs + highTs) / 2
log.Println()
log.Printf("Checking. low=%d. high=%d. mid=%d\n", lowTs, highTs, midTs)
err := checkAt(midTs)
if err == badger.ErrKeyNotFound || err == nil {
// If no failure, move to higher ts.
return findFirstInvalidTxn(db, midTs+1, highTs)
}
// Found an error.
return findFirstInvalidTxn(db, lowTs, midTs)
}
func compareTwo(db *badger.DB, before, after uint64) {
fmt.Printf("Comparing @ts=%d with @ts=%d\n", before, after)
txn := db.NewTransactionAt(before, false)
prev, err := seekTotal(txn)
if err == errFailure {
// pass
} else {
y.Check(err)
}
txn.Discard()
txn = db.NewTransactionAt(after, false)
now, err := seekTotal(txn)
if err == errFailure {
// pass
} else {
y.Check(err)
}
txn.Discard()
fmt.Println(diff(prev, now))
}
func runDisect(cmd *cobra.Command, args []string) error {
// The total did not match up. So, let's disect the DB to find the
// transction which caused the total mismatch.
db, err := badger.OpenManaged(badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithReadOnly(true).
WithEncryptionKey([]byte(encryptionKey)))
if err != nil {
return err
}
fmt.Println("opened db")
var min, max uint64 = math.MaxUint64, 0
{
txn := db.NewTransactionAt(uint64(math.MaxUint32), false)
iopt := badger.DefaultIteratorOptions
iopt.AllVersions = true
itr := txn.NewIterator(iopt)
for itr.Rewind(); itr.Valid(); itr.Next() {
item := itr.Item()
if min > item.Version() {
min = item.Version()
}
if max < item.Version() {
max = item.Version()
}
}
itr.Close()
txn.Discard()
}
log.Printf("min=%d. max=%d\n", min, max)
ts := findFirstInvalidTxn(db, min, max)
fmt.Println()
if ts == 0 {
fmt.Println("Nothing found. Exiting.")
return nil
}
for i := 0; i < numPrevious; i++ {
compareTwo(db, ts-1-uint64(i), ts-uint64(i))
}
return nil
}
func runTest(cmd *cobra.Command, args []string) error {
rand.Seed(time.Now().UnixNano())
// Open DB
opts := badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithMaxTableSize(4 << 20). // Force more compactions.
WithNumLevelZeroTables(2).
WithNumMemtables(2).
// Do not GC any versions, because we need them for the disect..
WithNumVersionsToKeep(int(math.MaxInt32)).
WithValueThreshold(1) // Make all values go to value log
if mmap {
opts = opts.WithTableLoadingMode(options.MemoryMap)
}
if encryptionKey != "" {
opts = opts.WithEncryptionKey([]byte(encryptionKey))
// The following comment is intentional as we would need the encryption key in case
// we want to run disect tool on the directory generated by bank test tool.
log.Printf("Using encryption key %s\n", encryptionKey)
}
log.Printf("Opening DB with options: %+v\n", opts)
db, err := badger.Open(opts)
if err != nil {
return err
}
defer db.Close()
var tmpDb *badger.DB
var subscribeDB *badger.DB
if checkSubscriber {
dir, err := ioutil.TempDir("", "bank_subscribe")
y.Check(err)
subscribeDB, err = badger.Open(badger.DefaultOptions(dir).WithSyncWrites(false))
if err != nil {
return err
}
defer subscribeDB.Close()
}
if checkStream {
dir, err := ioutil.TempDir("", "bank_stream")
y.Check(err)
tmpDb, err = badger.Open(badger.DefaultOptions(dir).WithSyncWrites(false))
if err != nil {
return err
}
defer tmpDb.Close()
}
wb := db.NewWriteBatch()
for i := 0; i < numAccounts; i++ {
y.Check(wb.Set(key(i), toSlice(initialBal)))
}
log.Println("Waiting for writes to be done...")
y.Check(wb.Flush())
log.Println("Bank initialization OK. Commencing test.")
log.Printf("Running with %d accounts, and %d goroutines.\n", numAccounts, numGoroutines)
log.Printf("Using keyPrefix: %s\n", keyPrefix)
dur, err := time.ParseDuration(duration)
y.Check(err)
// startTs := time.Now()
endTs := time.Now().Add(dur)
var total, errors, reads uint64
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for range ticker.C {
if atomic.LoadInt32(&stopAll) > 0 {
// Do not proceed.
return
}
// log.Printf("[%6s] Total: %d. Errors: %d Reads: %d.\n",
// time.Since(startTs).Round(time.Second).String(),
// atomic.LoadUint64(&total),
// atomic.LoadUint64(&errors),
// atomic.LoadUint64(&reads))
if time.Now().After(endTs) {
return
}
}
}()
// RW goroutines.
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func() {
defer wg.Done()
ticker := time.NewTicker(10 * time.Microsecond)
defer ticker.Stop()
for range ticker.C {
if atomic.LoadInt32(&stopAll) > 0 {
// Do not proceed.
return
}
if time.Now().After(endTs) {
return
}
from := rand.Intn(numAccounts)
to := rand.Intn(numAccounts)
if from == to {
continue
}
err := moveMoney(db, from, to)
atomic.AddUint64(&total, 1)
if err == nil && verbose {
log.Printf("Moved $5. %d -> %d\n", from, to)
} else {
atomic.AddUint64(&errors, 1)
}
}
}()
}
if checkStream {
wg.Add(1)
go func() {
defer wg.Done()
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for range ticker.C {
log.Printf("Received stream\n")
// Do not proceed.
if atomic.LoadInt32(&stopAll) > 0 || time.Now().After(endTs) {
return
}
// Clean up the database receiving the stream.
err = tmpDb.DropAll()
y.Check(err)
batch := tmpDb.NewWriteBatch()
stream := db.NewStream()
stream.Send = func(list *pb.KVList) error {
for _, kv := range list.Kv {
if err := batch.Set(kv.Key, kv.Value); err != nil {
return err
}
}
return nil
}
y.Check(stream.Orchestrate(context.Background()))
y.Check(batch.Flush())
y.Check(tmpDb.View(func(txn *badger.Txn) error {
_, err := seekTotal(txn)
if err != nil {
log.Printf("Error while calculating total in stream: %v", err)
}
return nil
}))
}
}()
}
// RO goroutine.
wg.Add(1)
go func() {
defer wg.Done()
ticker := time.NewTicker(10 * time.Microsecond)
defer ticker.Stop()
for range ticker.C {
if atomic.LoadInt32(&stopAll) > 0 {
// Do not proceed.
return
}
if time.Now().After(endTs) {
return
}
y.Check(db.View(func(txn *badger.Txn) error {
_, err := seekTotal(txn)
if err != nil {
log.Printf("Error while calculating total: %v", err)
} else {
atomic.AddUint64(&reads, 1)
}
return nil
}))
}
}()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var subWg sync.WaitGroup
if checkSubscriber {
subWg.Add(1)
go func() {
defer subWg.Done()
accountIDS := [][]byte{}
for i := 0; i < numAccounts; i++ {
accountIDS = append(accountIDS, key(i))
}
updater := func(kvs *pb.KVList) error {
batch := subscribeDB.NewWriteBatch()
for _, kv := range kvs.GetKv() {
y.Check(batch.Set(kv.Key, kv.Value))
}
return batch.Flush()
}
_ = db.Subscribe(ctx, updater, accountIDS...)
}()
}
wg.Wait()
if checkSubscriber {
cancel()
subWg.Wait()
y.Check(subscribeDB.View(func(txn *badger.Txn) error {
_, err := seekTotal(txn)
if err != nil {
log.Printf("Error while calculating subscriber DB total: %v", err)
} else {
atomic.AddUint64(&reads, 1)
}
return nil
}))
}
if atomic.LoadInt32(&stopAll) == 0 {
log.Println("Test OK")
return nil
}
log.Println("Test FAILED")
return fmt.Errorf("Test FAILED")
}
badger-2.2007.2/badger/cmd/bench.go 0000664 0000000 0000000 00000001710 13721731165 0016572 0 ustar 00root root 0000000 0000000 /*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"github.com/spf13/cobra"
)
var benchCmd = &cobra.Command{
Use: "benchmark",
Short: "Benchmark Badger database.",
Long: `This command will benchmark Badger for different usecases. Currently only read benchmark
is supported. Useful for testing and performance analysis.`,
}
func init() {
RootCmd.AddCommand(benchCmd)
}
badger-2.2007.2/badger/cmd/flatten.go 0000664 0000000 0000000 00000002625 13721731165 0017156 0 ustar 00root root 0000000 0000000 /*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"github.com/dgraph-io/badger/v2"
"github.com/spf13/cobra"
)
var flattenCmd = &cobra.Command{
Use: "flatten",
Short: "Flatten the LSM tree.",
Long: `
This command would compact all the LSM tables into one level.
`,
RunE: flatten,
}
var numWorkers int
func init() {
RootCmd.AddCommand(flattenCmd)
flattenCmd.Flags().IntVarP(&numWorkers, "num-workers", "w", 1,
"Number of concurrent compactors to run. More compactors would use more"+
" server resources to potentially achieve faster compactions.")
}
func flatten(cmd *cobra.Command, args []string) error {
db, err := badger.Open(badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithTruncate(truncate).
WithNumCompactors(0))
if err != nil {
return err
}
defer db.Close()
return db.Flatten(numWorkers)
}
badger-2.2007.2/badger/cmd/info.go 0000664 0000000 0000000 00000027365 13721731165 0016464 0 ustar 00root root 0000000 0000000 /*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"bytes"
"encoding/hex"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/pkg/errors"
"github.com/dgraph-io/badger/v2"
"github.com/dgraph-io/badger/v2/options"
"github.com/dgraph-io/badger/v2/table"
"github.com/dgraph-io/badger/v2/y"
humanize "github.com/dustin/go-humanize"
"github.com/spf13/cobra"
)
type flagOptions struct {
showTables bool
showHistogram bool
showKeys bool
withPrefix string
keyLookup string
itemMeta bool
keyHistory bool
showInternal bool
readOnly bool
truncate bool
}
var (
opt flagOptions
)
func init() {
RootCmd.AddCommand(infoCmd)
infoCmd.Flags().BoolVarP(&opt.showTables, "show-tables", "s", false,
"If set to true, show tables as well.")
infoCmd.Flags().BoolVar(&opt.showHistogram, "histogram", false,
"Show a histogram of the key and value sizes.")
infoCmd.Flags().BoolVar(&opt.showKeys, "show-keys", false, "Show keys stored in Badger")
infoCmd.Flags().StringVar(&opt.withPrefix, "with-prefix", "",
"Consider only the keys with specified prefix")
infoCmd.Flags().StringVarP(&opt.keyLookup, "lookup", "l", "", "Hex of the key to lookup")
infoCmd.Flags().BoolVar(&opt.itemMeta, "show-meta", true, "Output item meta data as well")
infoCmd.Flags().BoolVar(&opt.keyHistory, "history", false, "Show all versions of a key")
infoCmd.Flags().BoolVar(
&opt.showInternal, "show-internal", false, "Show internal keys along with other keys."+
" This option should be used along with --show-key option")
infoCmd.Flags().BoolVar(&opt.readOnly, "read-only", true, "If set to true, DB will be opened "+
"in read only mode. If DB has not been closed properly, this option can be set to false "+
"to open DB.")
infoCmd.Flags().BoolVar(&opt.truncate, "truncate", false, "If set to true, it allows "+
"truncation of value log files if they have corrupt data.")
}
var infoCmd = &cobra.Command{
Use: "info",
Short: "Health info about Badger database.",
Long: `
This command prints information about the badger key-value store. It reads MANIFEST and prints its
info. It also prints info about missing/extra files, and general information about the value log
files (which are not referenced by the manifest). Use this tool to report any issues about Badger
to the Dgraph team.
`,
RunE: handleInfo,
}
func handleInfo(cmd *cobra.Command, args []string) error {
if err := printInfo(sstDir, vlogDir); err != nil {
return errors.Wrap(err, "failed to print information in MANIFEST file")
}
// Open DB
db, err := badger.Open(badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithReadOnly(opt.readOnly).
WithTruncate(opt.truncate).
WithTableLoadingMode(options.MemoryMap))
if err != nil {
return errors.Wrap(err, "failed to open database")
}
defer db.Close()
if opt.showTables {
tableInfo(sstDir, vlogDir, db)
}
prefix, err := hex.DecodeString(opt.withPrefix)
if err != nil {
return errors.Wrapf(err, "failed to decode hex prefix: %s", opt.withPrefix)
}
if opt.showHistogram {
db.PrintHistogram(prefix)
}
if opt.showKeys {
if err := showKeys(db, prefix); err != nil {
return err
}
}
if len(opt.keyLookup) > 0 {
if err := lookup(db); err != nil {
return errors.Wrapf(err, "failed to perform lookup for the key: %x", opt.keyLookup)
}
}
return nil
}
func showKeys(db *badger.DB, prefix []byte) error {
if len(prefix) > 0 {
fmt.Printf("Only choosing keys with prefix: \n%s", hex.Dump(prefix))
}
txn := db.NewTransaction(false)
defer txn.Discard()
iopt := badger.DefaultIteratorOptions
iopt.Prefix = []byte(prefix)
iopt.PrefetchValues = false
iopt.AllVersions = opt.keyHistory
iopt.InternalAccess = opt.showInternal
it := txn.NewIterator(iopt)
defer it.Close()
totalKeys := 0
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
if err := printKey(item, false); err != nil {
return errors.Wrapf(err, "failed to print information about key: %x(%d)",
item.Key(), item.Version())
}
totalKeys++
}
fmt.Print("\n[Summary]\n")
fmt.Println("Total Number of keys:", totalKeys)
return nil
}
func lookup(db *badger.DB) error {
txn := db.NewTransaction(false)
defer txn.Discard()
key, err := hex.DecodeString(opt.keyLookup)
if err != nil {
return errors.Wrapf(err, "failed to decode key: %q", opt.keyLookup)
}
iopts := badger.DefaultIteratorOptions
iopts.AllVersions = opt.keyHistory
iopts.PrefetchValues = opt.keyHistory
itr := txn.NewKeyIterator(key, iopts)
defer itr.Close()
itr.Rewind()
if !itr.Valid() {
return errors.Errorf("Unable to rewind to key:\n%s", hex.Dump(key))
}
fmt.Println()
item := itr.Item()
if err := printKey(item, true); err != nil {
return errors.Wrapf(err, "failed to print information about key: %x(%d)",
item.Key(), item.Version())
}
if !opt.keyHistory {
return nil
}
itr.Next() // Move to the next key
for ; itr.Valid(); itr.Next() {
item := itr.Item()
if !bytes.Equal(key, item.Key()) {
break
}
if err := printKey(item, true); err != nil {
return errors.Wrapf(err, "failed to print information about key: %x(%d)",
item.Key(), item.Version())
}
}
return nil
}
func printKey(item *badger.Item, showValue bool) error {
var buf bytes.Buffer
fmt.Fprintf(&buf, "Key: %x\tversion: %d", item.Key(), item.Version())
if opt.itemMeta {
fmt.Fprintf(&buf, "\tsize: %d\tmeta: b%04b", item.EstimatedSize(), item.UserMeta())
}
if item.IsDeletedOrExpired() {
buf.WriteString("\t{deleted}")
}
if item.DiscardEarlierVersions() {
buf.WriteString("\t{discard}")
}
if showValue {
val, err := item.ValueCopy(nil)
if err != nil {
return errors.Wrapf(err,
"failed to copy value of the key: %x(%d)", item.Key(), item.Version())
}
fmt.Fprintf(&buf, "\n\tvalue: %v", val)
}
fmt.Println(buf.String())
return nil
}
func hbytes(sz int64) string {
return humanize.Bytes(uint64(sz))
}
func dur(src, dst time.Time) string {
return humanize.RelTime(dst, src, "earlier", "later")
}
func tableInfo(dir, valueDir string, db *badger.DB) {
// we want all tables with keys count here.
tables := db.Tables(true)
fmt.Println()
fmt.Println("SSTable [Li, Id, Total Keys including internal keys] " +
"[Left Key, Version -> Right Key, Version]")
for _, t := range tables {
lk, lt := y.ParseKey(t.Left), y.ParseTs(t.Left)
rk, rt := y.ParseKey(t.Right), y.ParseTs(t.Right)
fmt.Printf("SSTable [L%d, %03d, %07d] [%20X, v%d -> %20X, v%d]\n",
t.Level, t.ID, t.KeyCount, lk, lt, rk, rt)
}
fmt.Println()
}
func printInfo(dir, valueDir string) error {
if dir == "" {
return fmt.Errorf("--dir not supplied")
}
if valueDir == "" {
valueDir = dir
}
fp, err := os.Open(filepath.Join(dir, badger.ManifestFilename))
if err != nil {
return err
}
defer func() {
if fp != nil {
fp.Close()
}
}()
manifest, truncOffset, err := badger.ReplayManifestFile(fp)
if err != nil {
return err
}
fp.Close()
fp = nil
fileinfos, err := ioutil.ReadDir(dir)
if err != nil {
return err
}
fileinfoByName := make(map[string]os.FileInfo)
fileinfoMarked := make(map[string]bool)
for _, info := range fileinfos {
fileinfoByName[info.Name()] = info
fileinfoMarked[info.Name()] = false
}
fmt.Println()
var baseTime time.Time
manifestTruncated := false
manifestInfo, ok := fileinfoByName[badger.ManifestFilename]
if ok {
fileinfoMarked[badger.ManifestFilename] = true
truncatedString := ""
if truncOffset != manifestInfo.Size() {
truncatedString = fmt.Sprintf(" [TRUNCATED to %d]", truncOffset)
manifestTruncated = true
}
baseTime = manifestInfo.ModTime()
fmt.Printf("[%25s] %-12s %6s MA%s\n", manifestInfo.ModTime().Format(time.RFC3339),
manifestInfo.Name(), hbytes(manifestInfo.Size()), truncatedString)
} else {
fmt.Printf("%s [MISSING]\n", manifestInfo.Name())
}
numMissing := 0
numEmpty := 0
levelSizes := make([]int64, len(manifest.Levels))
for level, lm := range manifest.Levels {
// fmt.Printf("\n[Level %d]\n", level)
// We create a sorted list of table ID's so that output is in consistent order.
tableIDs := make([]uint64, 0, len(lm.Tables))
for id := range lm.Tables {
tableIDs = append(tableIDs, id)
}
sort.Slice(tableIDs, func(i, j int) bool {
return tableIDs[i] < tableIDs[j]
})
for _, tableID := range tableIDs {
tableFile := table.IDToFilename(tableID)
_, ok1 := manifest.Tables[tableID]
file, ok2 := fileinfoByName[tableFile]
if ok1 && ok2 {
fileinfoMarked[tableFile] = true
emptyString := ""
fileSize := file.Size()
if fileSize == 0 {
emptyString = " [EMPTY]"
numEmpty++
}
levelSizes[level] += fileSize
// (Put level on every line to make easier to process with sed/perl.)
fmt.Printf("[%25s] %-12s %6s L%d %s\n", dur(baseTime, file.ModTime()),
tableFile, hbytes(fileSize), level, emptyString)
} else {
fmt.Printf("%s [MISSING]\n", tableFile)
numMissing++
}
}
}
valueDirFileinfos := fileinfos
if valueDir != dir {
valueDirFileinfos, err = ioutil.ReadDir(valueDir)
if err != nil {
return err
}
}
// If valueDir is different from dir, holds extra files in the value dir.
valueDirExtras := []os.FileInfo{}
valueLogSize := int64(0)
// fmt.Print("\n[Value Log]\n")
for _, file := range valueDirFileinfos {
if !strings.HasSuffix(file.Name(), ".vlog") {
if valueDir != dir {
valueDirExtras = append(valueDirExtras, file)
}
continue
}
fileSize := file.Size()
emptyString := ""
if fileSize == 0 {
emptyString = " [EMPTY]"
numEmpty++
}
valueLogSize += fileSize
fmt.Printf("[%25s] %-12s %6s VL%s\n", dur(baseTime, file.ModTime()), file.Name(),
hbytes(fileSize), emptyString)
fileinfoMarked[file.Name()] = true
}
numExtra := 0
for _, file := range fileinfos {
if fileinfoMarked[file.Name()] {
continue
}
if numExtra == 0 {
fmt.Print("\n[EXTRA]\n")
}
fmt.Printf("[%s] %-12s %6s\n", file.ModTime().Format(time.RFC3339),
file.Name(), hbytes(file.Size()))
numExtra++
}
numValueDirExtra := 0
for _, file := range valueDirExtras {
if numValueDirExtra == 0 {
fmt.Print("\n[ValueDir EXTRA]\n")
}
fmt.Printf("[%s] %-12s %6s\n", file.ModTime().Format(time.RFC3339),
file.Name(), hbytes(file.Size()))
numValueDirExtra++
}
fmt.Print("\n[Summary]\n")
totalIndexSize := int64(0)
for i, sz := range levelSizes {
fmt.Printf("Level %d size: %12s\n", i, hbytes(sz))
totalIndexSize += sz
}
fmt.Printf("Total index size: %8s\n", hbytes(totalIndexSize))
fmt.Printf("Value log size: %10s\n", hbytes(valueLogSize))
fmt.Println()
totalExtra := numExtra + numValueDirExtra
if totalExtra == 0 && numMissing == 0 && numEmpty == 0 && !manifestTruncated {
fmt.Println("Abnormalities: None.")
} else {
fmt.Println("Abnormalities:")
}
fmt.Printf("%d extra %s.\n", totalExtra, pluralFiles(totalExtra))
fmt.Printf("%d missing %s.\n", numMissing, pluralFiles(numMissing))
fmt.Printf("%d empty %s.\n", numEmpty, pluralFiles(numEmpty))
fmt.Printf("%d truncated %s.\n", boolToNum(manifestTruncated),
pluralManifest(manifestTruncated))
return nil
}
func boolToNum(x bool) int {
if x {
return 1
}
return 0
}
func pluralManifest(manifestTruncated bool) string {
if manifestTruncated {
return "manifest"
}
return "manifests"
}
func pluralFiles(count int) string {
if count == 1 {
return "file"
}
return "files"
}
badger-2.2007.2/badger/cmd/read_bench.go 0000664 0000000 0000000 00000014300 13721731165 0017564 0 ustar 00root root 0000000 0000000 /*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"context"
"fmt"
"math/rand"
"strings"
"sync/atomic"
"time"
humanize "github.com/dustin/go-humanize"
"github.com/spf13/cobra"
"github.com/dgraph-io/badger/v2"
"github.com/dgraph-io/badger/v2/options"
"github.com/dgraph-io/badger/v2/pb"
"github.com/dgraph-io/badger/v2/y"
)
var readBenchCmd = &cobra.Command{
Use: "read",
Short: "Read data from Badger randomly to benchmark read speed.",
Long: `
This command reads data from existing Badger database randomly using multiple go routines.`,
RunE: readBench,
}
var (
sizeRead uint64 // will store size read till now
entriesRead uint64 // will store entries read till now
startTime time.Time // start time of read benchmarking
sampleSize int
loadingMode string
keysOnly bool
readOnly bool
)
func init() {
benchCmd.AddCommand(readBenchCmd)
readBenchCmd.Flags().IntVarP(
&numGoroutines, "goroutines", "g", 16, "Number of goroutines to run for reading.")
readBenchCmd.Flags().StringVarP(
&duration, "duration", "d", "1m", "How long to run the benchmark.")
readBenchCmd.Flags().IntVar(
&sampleSize, "sample-size", 1000000, "Keys sample size to be used for random lookup.")
readBenchCmd.Flags().BoolVar(
&keysOnly, "keys-only", false, "If false, values will also be read.")
readBenchCmd.Flags().BoolVar(
&readOnly, "read-only", true, "If true, DB will be opened in read only mode.")
readBenchCmd.Flags().StringVar(
&loadingMode, "loading-mode", "mmap", "Mode for accessing SSTables and value log files. "+
"Valid loading modes are fileio and mmap.")
}
func readBench(cmd *cobra.Command, args []string) error {
rand.Seed(time.Now().Unix())
dur, err := time.ParseDuration(duration)
if err != nil {
return y.Wrapf(err, "unable to parse duration")
}
y.AssertTrue(numGoroutines > 0)
mode := getLoadingMode(loadingMode)
db, err := badger.Open(badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithReadOnly(readOnly).
WithTableLoadingMode(mode).
WithValueLogLoadingMode(mode))
if err != nil {
return y.Wrapf(err, "unable to open DB")
}
defer db.Close()
now := time.Now()
keys, err := getSampleKeys(db)
if err != nil {
return y.Wrapf(err, "error while sampling keys")
}
fmt.Println("*********************************************************")
fmt.Printf("Total Sampled Keys: %d, read in time: %s\n", len(keys), time.Since(now))
fmt.Println("*********************************************************")
if len(keys) == 0 {
fmt.Println("DB is empty, hence returning")
return nil
}
fmt.Println("*********************************************************")
fmt.Println("Starting to benchmark Reads")
fmt.Println("*********************************************************")
c := y.NewCloser(0)
startTime = time.Now()
for i := 0; i < numGoroutines; i++ {
c.AddRunning(1)
go readKeys(db, c, keys)
}
// also start printing stats
c.AddRunning(1)
go printStats(c)
<-time.After(dur)
c.SignalAndWait()
return nil
}
func printStats(c *y.Closer) {
defer c.Done()
t := time.NewTicker(time.Second)
defer t.Stop()
for {
select {
case <-c.HasBeenClosed():
return
case <-t.C:
dur := time.Since(startTime)
sz := atomic.LoadUint64(&sizeRead)
entries := atomic.LoadUint64(&entriesRead)
bytesRate := sz / uint64(dur.Seconds())
entriesRate := entries / uint64(dur.Seconds())
fmt.Printf("Time elapsed: %s, bytes read: %s, speed: %s/sec, "+
"entries read: %d, speed: %d/sec\n", y.FixedDuration(time.Since(startTime)),
humanize.Bytes(sz), humanize.Bytes(bytesRate), entries, entriesRate)
}
}
}
func readKeys(db *badger.DB, c *y.Closer, keys [][]byte) {
defer c.Done()
r := rand.New(rand.NewSource(time.Now().Unix()))
for {
select {
case <-c.HasBeenClosed():
return
default:
key := keys[r.Int31n(int32(len(keys)))]
atomic.AddUint64(&sizeRead, lookupForKey(db, key))
atomic.AddUint64(&entriesRead, 1)
}
}
}
func lookupForKey(db *badger.DB, key []byte) (sz uint64) {
err := db.View(func(txn *badger.Txn) error {
itm, err := txn.Get(key)
y.Check(err)
if keysOnly {
sz = uint64(itm.KeySize())
} else {
y.Check2(itm.ValueCopy(nil))
sz = uint64(itm.EstimatedSize())
}
return nil
})
y.Check(err)
return
}
// getSampleKeys uses stream framework internally, to get keys in random order.
func getSampleKeys(db *badger.DB) ([][]byte, error) {
var keys [][]byte
count := 0
stream := db.NewStream()
// overide stream.KeyToList as we only want keys. Also
// we can take only first version for the key.
stream.KeyToList = func(key []byte, itr *badger.Iterator) (*pb.KVList, error) {
l := &pb.KVList{}
// Since stream framework copies the item's key while calling
// KeyToList, we can directly append key to list.
l.Kv = append(l.Kv, &pb.KV{Key: key})
return l, nil
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
stream.Send = func(l *pb.KVList) error {
if count >= sampleSize {
return nil
}
for _, kv := range l.Kv {
keys = append(keys, kv.Key)
count++
if count >= sampleSize {
cancel()
return nil
}
}
return nil
}
if err := stream.Orchestrate(ctx); err != nil && err != context.Canceled {
return nil, err
}
// Shuffle keys before returning to minimise locality
// of keys coming from stream framework.
rand.Shuffle(len(keys), func(i, j int) {
keys[i], keys[j] = keys[j], keys[i]
})
return keys, nil
}
func getLoadingMode(m string) options.FileLoadingMode {
m = strings.ToLower(m)
var mode options.FileLoadingMode
switch m {
case "fileio":
mode = options.FileIO
case "mmap":
mode = options.MemoryMap
default:
panic("loading mode not supported")
}
return mode
}
badger-2.2007.2/badger/cmd/restore.go 0000664 0000000 0000000 00000004472 13721731165 0017206 0 ustar 00root root 0000000 0000000 /*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"errors"
"os"
"path"
"github.com/dgraph-io/badger/v2"
"github.com/spf13/cobra"
)
var restoreFile string
var maxPendingWrites int
// restoreCmd represents the restore command
var restoreCmd = &cobra.Command{
Use: "restore",
Short: "Restore Badger database.",
Long: `Restore Badger database from a file.
It reads a file generated using the backup command (or by calling the
DB.Backup() API method) and writes each key-value pair found in the file to
the Badger database.
Restore creates a new database, and currently does not work on an already
existing database.`,
RunE: doRestore,
}
func init() {
RootCmd.AddCommand(restoreCmd)
restoreCmd.Flags().StringVarP(&restoreFile, "backup-file", "f",
"badger.bak", "File to restore from")
// Default value for maxPendingWrites is 256, to minimise memory usage
// and overall finish time.
restoreCmd.Flags().IntVarP(&maxPendingWrites, "max-pending-writes", "w",
256, "Max number of pending writes at any time while restore")
}
func doRestore(cmd *cobra.Command, args []string) error {
// Check if the DB already exists
manifestFile := path.Join(sstDir, badger.ManifestFilename)
if _, err := os.Stat(manifestFile); err == nil { // No error. File already exists.
return errors.New("Cannot restore to an already existing database")
} else if os.IsNotExist(err) {
// pass
} else { // Return an error if anything other than the error above
return err
}
// Open DB
db, err := badger.Open(badger.DefaultOptions(sstDir).WithValueDir(vlogDir))
if err != nil {
return err
}
defer db.Close()
// Open File
f, err := os.Open(restoreFile)
if err != nil {
return err
}
defer f.Close()
// Run restore
return db.Load(f, maxPendingWrites)
}
badger-2.2007.2/badger/cmd/root.go 0000664 0000000 0000000 00000003402 13721731165 0016476 0 ustar 00root root 0000000 0000000 /*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"errors"
"fmt"
"os"
"strings"
"github.com/spf13/cobra"
)
var sstDir, vlogDir string
// RootCmd represents the base command when called without any subcommands
var RootCmd = &cobra.Command{
Use: "badger",
Short: "Tools to manage Badger database.",
PersistentPreRunE: validateRootCmdArgs,
}
// Execute adds all child commands to the root command and sets flags appropriately.
// This is called by main.main(). It only needs to happen once to the rootCmd.
func Execute() {
if err := RootCmd.Execute(); err != nil {
fmt.Println(err)
os.Exit(1)
}
}
func init() {
RootCmd.PersistentFlags().StringVar(&sstDir, "dir", "",
"Directory where the LSM tree files are located. (required)")
RootCmd.PersistentFlags().StringVar(&vlogDir, "vlog-dir", "",
"Directory where the value log files are located, if different from --dir")
}
func validateRootCmdArgs(cmd *cobra.Command, args []string) error {
if strings.HasPrefix(cmd.Use, "help ") { // No need to validate if it is help
return nil
}
if sstDir == "" {
return errors.New("--dir not specified")
}
if vlogDir == "" {
vlogDir = sstDir
}
return nil
}
badger-2.2007.2/badger/cmd/rotate.go 0000664 0000000 0000000 00000003740 13721731165 0017016 0 ustar 00root root 0000000 0000000 /*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"io/ioutil"
"os"
"time"
"github.com/dgraph-io/badger/v2"
"github.com/spf13/cobra"
)
var oldKeyPath string
var newKeyPath string
var rotateCmd = &cobra.Command{
Use: "rotate",
Short: "Rotate encryption key.",
Long: "Rotate will rotate the old key with new encryption key.",
RunE: doRotate,
}
func init() {
RootCmd.AddCommand(rotateCmd)
rotateCmd.Flags().StringVarP(&oldKeyPath, "old-key-path", "o",
"", "Path of the old key")
rotateCmd.Flags().StringVarP(&newKeyPath, "new-key-path", "n",
"", "Path of the new key")
}
func doRotate(cmd *cobra.Command, args []string) error {
oldKey, err := getKey(oldKeyPath)
if err != nil {
return err
}
opt := badger.KeyRegistryOptions{
Dir: sstDir,
ReadOnly: true,
EncryptionKey: oldKey,
EncryptionKeyRotationDuration: 10 * 24 * time.Hour,
}
kr, err := badger.OpenKeyRegistry(opt)
if err != nil {
return err
}
newKey, err := getKey(newKeyPath)
if err != nil {
return err
}
opt.EncryptionKey = newKey
err = badger.WriteKeyRegistry(kr, opt)
if err != nil {
return err
}
return nil
}
func getKey(path string) ([]byte, error) {
if path == "" {
// Empty bytes for plain text to encryption(vice versa).
return []byte{}, nil
}
fp, err := os.Open(path)
if err != nil {
return nil, err
}
return ioutil.ReadAll(fp)
}
badger-2.2007.2/badger/cmd/rotate_test.go 0000664 0000000 0000000 00000007527 13721731165 0020064 0 ustar 00root root 0000000 0000000 /*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"io/ioutil"
"math/rand"
"os"
"testing"
"github.com/dgraph-io/badger/v2"
"github.com/dgraph-io/badger/v2/y"
"github.com/stretchr/testify/require"
)
func TestRotate(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer os.RemoveAll(dir)
// Creating sample key.
key := make([]byte, 32)
_, err = rand.Read(key)
require.NoError(t, err)
fp, err := ioutil.TempFile("", "*.key")
require.NoError(t, err)
_, err = fp.Write(key)
require.NoError(t, err)
defer fp.Close()
// Opening DB with the encryption key.
opts := badger.DefaultOptions(dir)
opts.EncryptionKey = key
db, err := badger.Open(opts)
require.NoError(t, err)
// Closing the db.
require.NoError(t, db.Close())
// Opening the db again for the successful open.
db, err = badger.Open(opts)
require.NoError(t, err)
// Closing so that we can open another db
require.NoError(t, db.Close())
// Creating another sample key.
key2 := make([]byte, 32)
_, err = rand.Read(key2)
require.NoError(t, err)
fp2, err := ioutil.TempFile("", "*.key")
require.NoError(t, err)
_, err = fp2.Write(key2)
require.NoError(t, err)
defer fp2.Close()
oldKeyPath = fp2.Name()
sstDir = dir
// Check whether we able to rotate the key with some sample key. We should get mismatch
// error.
require.EqualError(t, doRotate(nil, []string{}), badger.ErrEncryptionKeyMismatch.Error())
// rotating key with proper key.
oldKeyPath = fp.Name()
newKeyPath = fp2.Name()
require.NoError(t, doRotate(nil, []string{}))
// Checking whether db opens with the new key.
opts.EncryptionKey = key2
db, err = badger.Open(opts)
require.NoError(t, err)
require.NoError(t, db.Close())
// Checking for plain text rotation.
oldKeyPath = newKeyPath
newKeyPath = ""
require.NoError(t, doRotate(nil, []string{}))
opts.EncryptionKey = []byte{}
db, err = badger.Open(opts)
require.NoError(t, err)
defer db.Close()
}
// This test shows that rotate tool can be used to enable encryption.
func TestRotatePlainTextToEncrypted(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer os.RemoveAll(dir)
// Open DB without encryption.
opts := badger.DefaultOptions(dir)
db, err := badger.Open(opts)
require.NoError(t, err)
db.Update(func(txn *badger.Txn) error {
return txn.Set([]byte("foo"), []byte("bar"))
})
require.NoError(t, db.Close())
// Create an encryption key.
key := make([]byte, 32)
y.Check2(rand.Read(key))
fp, err := ioutil.TempFile("", "*.key")
require.NoError(t, err)
_, err = fp.Write(key)
require.NoError(t, err)
defer fp.Close()
oldKeyPath = ""
newKeyPath = fp.Name()
sstDir = dir
// Enable encryption. newKeyPath is encrypted.
require.Nil(t, doRotate(nil, []string{}))
// Try opening DB without the key.
_, err = badger.Open(opts)
require.EqualError(t, err, badger.ErrEncryptionKeyMismatch.Error())
// Check whether db opens with the new key.
opts.EncryptionKey = key
db, err = badger.Open(opts)
require.NoError(t, err)
db.View(func(txn *badger.Txn) error {
iopt := badger.DefaultIteratorOptions
it := txn.NewIterator(iopt)
defer it.Close()
count := 0
for it.Rewind(); it.Valid(); it.Next() {
count++
}
require.Equal(t, 1, count)
return nil
})
require.NoError(t, db.Close())
}
badger-2.2007.2/badger/cmd/write_bench.go 0000664 0000000 0000000 00000012304 13721731165 0020005 0 ustar 00root root 0000000 0000000 /*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"encoding/binary"
"fmt"
"log"
"math/rand"
"sync"
"sync/atomic"
"time"
humanize "github.com/dustin/go-humanize"
"github.com/spf13/cobra"
"github.com/dgraph-io/badger/v2"
"github.com/dgraph-io/badger/v2/pb"
"github.com/dgraph-io/badger/v2/y"
)
var writeBenchCmd = &cobra.Command{
Use: "write",
Short: "Writes random data to Badger to benchmark write speed.",
Long: `
This command writes random data to Badger to benchmark write speed. Useful for testing and
performance analysis.
`,
RunE: writeBench,
}
var (
keySz int
valSz int
numKeys float64
force bool
sorted bool
showLogs bool
sizeWritten uint64
entriesWritten uint64
)
const (
mil float64 = 1e6
)
func init() {
benchCmd.AddCommand(writeBenchCmd)
writeBenchCmd.Flags().IntVarP(&keySz, "key-size", "k", 32, "Size of key")
writeBenchCmd.Flags().IntVarP(&valSz, "val-size", "v", 128, "Size of value")
writeBenchCmd.Flags().Float64VarP(&numKeys, "keys-mil", "m", 10.0,
"Number of keys to add in millions")
writeBenchCmd.Flags().BoolVarP(&force, "force-compact", "f", true,
"Force compact level 0 on close.")
writeBenchCmd.Flags().BoolVarP(&sorted, "sorted", "s", false, "Write keys in sorted order.")
writeBenchCmd.Flags().BoolVarP(&showLogs, "logs", "l", false, "Show Badger logs.")
}
func writeRandom(db *badger.DB, num uint64) error {
value := make([]byte, valSz)
y.Check2(rand.Read(value))
es := uint64(keySz + valSz) // entry size is keySz + valSz
batch := db.NewWriteBatch()
for i := uint64(1); i <= num; i++ {
key := make([]byte, keySz)
y.Check2(rand.Read(key))
if err := batch.Set(key, value); err != nil {
return err
}
atomic.AddUint64(&entriesWritten, 1)
atomic.AddUint64(&sizeWritten, es)
}
return batch.Flush()
}
func writeSorted(db *badger.DB, num uint64) error {
value := make([]byte, valSz)
y.Check2(rand.Read(value))
es := 8 + valSz // key size is 8 bytes and value size is valSz
writer := db.NewStreamWriter()
if err := writer.Prepare(); err != nil {
return err
}
wg := &sync.WaitGroup{}
writeCh := make(chan *pb.KVList, 3)
writeRange := func(start, end uint64, streamId uint32) {
// end is not included.
defer wg.Done()
kvs := &pb.KVList{}
var sz int
for i := start; i < end; i++ {
key := make([]byte, 8)
binary.BigEndian.PutUint64(key, i)
kvs.Kv = append(kvs.Kv, &pb.KV{
Key: key,
Value: value,
Version: 1,
StreamId: streamId,
})
sz += es
atomic.AddUint64(&entriesWritten, 1)
atomic.AddUint64(&sizeWritten, uint64(es))
if sz >= 4<<20 { // 4 MB
writeCh <- kvs
kvs = &pb.KVList{}
sz = 0
}
}
writeCh <- kvs
}
// Let's create some streams.
width := num / 16
streamID := uint32(0)
for start := uint64(0); start < num; start += width {
end := start + width
if end > num {
end = num
}
streamID++
wg.Add(1)
go writeRange(start, end, streamID)
}
go func() {
wg.Wait()
close(writeCh)
}()
log.Printf("Max StreamId used: %d. Width: %d\n", streamID, width)
for kvs := range writeCh {
if err := writer.Write(kvs); err != nil {
panic(err)
}
}
log.Println("DONE streaming. Flushing...")
return writer.Flush()
}
func writeBench(cmd *cobra.Command, args []string) error {
opt := badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithTruncate(truncate).
WithSyncWrites(false).
WithCompactL0OnClose(force)
if !showLogs {
opt = opt.WithLogger(nil)
}
db, err := badger.Open(opt)
if err != nil {
return err
}
defer func() {
start := time.Now()
err := db.Close()
log.Printf("DB.Close. Error: %v. Time taken to close: %s", err, time.Since(start))
}()
fmt.Println("*********************************************************")
fmt.Println("Starting to benchmark Writes")
fmt.Println("*********************************************************")
startTime = time.Now()
num := uint64(numKeys * mil)
c := y.NewCloser(1)
go reportStats(c)
if sorted {
err = writeSorted(db, num)
} else {
err = writeRandom(db, num)
}
c.SignalAndWait()
return err
}
func reportStats(c *y.Closer) {
defer c.Done()
t := time.NewTicker(time.Second)
defer t.Stop()
for {
select {
case <-c.HasBeenClosed():
return
case <-t.C:
dur := time.Since(startTime)
sz := atomic.LoadUint64(&sizeWritten)
entries := atomic.LoadUint64(&entriesWritten)
bytesRate := sz / uint64(dur.Seconds())
entriesRate := entries / uint64(dur.Seconds())
fmt.Printf("Time elapsed: %s, bytes written: %s, speed: %s/sec, "+
"entries written: %d, speed: %d/sec\n", y.FixedDuration(time.Since(startTime)),
humanize.Bytes(sz), humanize.Bytes(bytesRate), entries, entriesRate)
}
}
}
badger-2.2007.2/badger/main.go 0000664 0000000 0000000 00000002124 13721731165 0015674 0 ustar 00root root 0000000 0000000 /*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"fmt"
"net/http"
_ "net/http/pprof"
"runtime"
"github.com/dgraph-io/badger/v2/badger/cmd"
)
func main() {
go func() {
for i := 8080; i < 9080; i++ {
fmt.Printf("Listening for /debug HTTP requests at port: %d\n", i)
if err := http.ListenAndServe(fmt.Sprintf("0.0.0.0:%d", i), nil); err != nil {
fmt.Println("Port busy. Trying another one...")
continue
}
}
}()
runtime.SetBlockProfileRate(100)
runtime.GOMAXPROCS(128)
cmd.Execute()
}
badger-2.2007.2/batch.go 0000664 0000000 0000000 00000013316 13721731165 0014612 0 ustar 00root root 0000000 0000000 /*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"sync"
"github.com/dgraph-io/badger/v2/pb"
"github.com/dgraph-io/badger/v2/y"
"github.com/pkg/errors"
)
// WriteBatch holds the necessary info to perform batched writes.
type WriteBatch struct {
sync.Mutex
txn *Txn
db *DB
throttle *y.Throttle
err error
isManaged bool
commitTs uint64
}
// NewWriteBatch creates a new WriteBatch. This provides a way to conveniently do a lot of writes,
// batching them up as tightly as possible in a single transaction and using callbacks to avoid
// waiting for them to commit, thus achieving good performance. This API hides away the logic of
// creating and committing transactions. Due to the nature of SSI guaratees provided by Badger,
// blind writes can never encounter transaction conflicts (ErrConflict).
func (db *DB) NewWriteBatch() *WriteBatch {
if db.opt.managedTxns {
panic("cannot use NewWriteBatch in managed mode. Use NewWriteBatchAt instead")
}
return db.newWriteBatch(false)
}
func (db *DB) newWriteBatch(isManaged bool) *WriteBatch {
return &WriteBatch{
db: db,
isManaged: isManaged,
txn: db.newTransaction(true, isManaged),
throttle: y.NewThrottle(16),
}
}
// SetMaxPendingTxns sets a limit on maximum number of pending transactions while writing batches.
// This function should be called before using WriteBatch. Default value of MaxPendingTxns is
// 16 to minimise memory usage.
func (wb *WriteBatch) SetMaxPendingTxns(max int) {
wb.throttle = y.NewThrottle(max)
}
// Cancel function must be called if there's a chance that Flush might not get
// called. If neither Flush or Cancel is called, the transaction oracle would
// never get a chance to clear out the row commit timestamp map, thus causing an
// unbounded memory consumption. Typically, you can call Cancel as a defer
// statement right after NewWriteBatch is called.
//
// Note that any committed writes would still go through despite calling Cancel.
func (wb *WriteBatch) Cancel() {
if err := wb.throttle.Finish(); err != nil {
wb.db.opt.Errorf("WatchBatch.Cancel error while finishing: %v", err)
}
wb.txn.Discard()
}
func (wb *WriteBatch) callback(err error) {
// sync.WaitGroup is thread-safe, so it doesn't need to be run inside wb.Lock.
defer wb.throttle.Done(err)
if err == nil {
return
}
wb.Lock()
defer wb.Unlock()
if wb.err != nil {
return
}
wb.err = err
}
func (wb *WriteBatch) Write(kvList *pb.KVList) error {
wb.Lock()
defer wb.Unlock()
for _, kv := range kvList.Kv {
e := Entry{Key: kv.Key, Value: kv.Value}
if len(kv.UserMeta) > 0 {
e.UserMeta = kv.UserMeta[0]
}
y.AssertTrue(kv.Version != 0)
e.version = kv.Version
if err := wb.handleEntry(&e); err != nil {
return err
}
}
return nil
}
// SetEntryAt is the equivalent of Txn.SetEntry but it also allows setting version for the entry.
// SetEntryAt can be used only in managed mode.
func (wb *WriteBatch) SetEntryAt(e *Entry, ts uint64) error {
if !wb.db.opt.managedTxns {
return errors.New("SetEntryAt can only be used in managed mode. Use SetEntry instead")
}
e.version = ts
return wb.SetEntry(e)
}
// Should be called with lock acquired.
func (wb *WriteBatch) handleEntry(e *Entry) error {
if err := wb.txn.SetEntry(e); err != ErrTxnTooBig {
return err
}
// Txn has reached it's zenith. Commit now.
if cerr := wb.commit(); cerr != nil {
return cerr
}
// This time the error must not be ErrTxnTooBig, otherwise, we make the
// error permanent.
if err := wb.txn.SetEntry(e); err != nil {
wb.err = err
return err
}
return nil
}
// SetEntry is the equivalent of Txn.SetEntry.
func (wb *WriteBatch) SetEntry(e *Entry) error {
wb.Lock()
defer wb.Unlock()
return wb.handleEntry(e)
}
// Set is equivalent of Txn.Set().
func (wb *WriteBatch) Set(k, v []byte) error {
e := &Entry{Key: k, Value: v}
return wb.SetEntry(e)
}
// DeleteAt is equivalent of Txn.Delete but accepts a delete timestamp.
func (wb *WriteBatch) DeleteAt(k []byte, ts uint64) error {
e := Entry{Key: k, meta: bitDelete, version: ts}
return wb.SetEntry(&e)
}
// Delete is equivalent of Txn.Delete.
func (wb *WriteBatch) Delete(k []byte) error {
wb.Lock()
defer wb.Unlock()
if err := wb.txn.Delete(k); err != ErrTxnTooBig {
return err
}
if err := wb.commit(); err != nil {
return err
}
if err := wb.txn.Delete(k); err != nil {
wb.err = err
return err
}
return nil
}
// Caller to commit must hold a write lock.
func (wb *WriteBatch) commit() error {
if wb.err != nil {
return wb.err
}
if err := wb.throttle.Do(); err != nil {
return err
}
wb.txn.CommitWith(wb.callback)
wb.txn = wb.db.newTransaction(true, wb.isManaged)
wb.txn.commitTs = wb.commitTs
return wb.err
}
// Flush must be called at the end to ensure that any pending writes get committed to Badger. Flush
// returns any error stored by WriteBatch.
func (wb *WriteBatch) Flush() error {
wb.Lock()
_ = wb.commit()
wb.txn.Discard()
wb.Unlock()
if err := wb.throttle.Finish(); err != nil {
return err
}
return wb.err
}
// Error returns any errors encountered so far. No commits would be run once an error is detected.
func (wb *WriteBatch) Error() error {
wb.Lock()
defer wb.Unlock()
return wb.err
}
badger-2.2007.2/batch_test.go 0000664 0000000 0000000 00000006541 13721731165 0015653 0 ustar 00root root 0000000 0000000 /*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"fmt"
"testing"
"time"
"github.com/stretchr/testify/require"
)
func TestWriteBatch(t *testing.T) {
key := func(i int) []byte {
return []byte(fmt.Sprintf("%10d", i))
}
val := func(i int) []byte {
return []byte(fmt.Sprintf("%128d", i))
}
test := func(t *testing.T, db *DB) {
wb := db.NewWriteBatch()
defer wb.Cancel()
// Sanity check for SetEntryAt.
require.Error(t, wb.SetEntryAt(&Entry{}, 12))
N, M := 50000, 1000
start := time.Now()
for i := 0; i < N; i++ {
require.NoError(t, wb.Set(key(i), val(i)))
}
for i := 0; i < M; i++ {
require.NoError(t, wb.Delete(key(i)))
}
require.NoError(t, wb.Flush())
t.Logf("Time taken for %d writes (w/ test options): %s\n", N+M, time.Since(start))
err := db.View(func(txn *Txn) error {
itr := txn.NewIterator(DefaultIteratorOptions)
defer itr.Close()
i := M
for itr.Rewind(); itr.Valid(); itr.Next() {
item := itr.Item()
require.Equal(t, string(key(i)), string(item.Key()))
valcopy, err := item.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, val(i), valcopy)
i++
}
require.Equal(t, N, i)
return nil
})
require.NoError(t, err)
}
t.Run("disk mode", func(t *testing.T) {
opt := getTestOptions("")
// Set value threshold to 32 bytes otherwise write batch will generate
// too many files and we will crash with too many files open error.
opt.ValueThreshold = 32
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
test(t, db)
})
})
t.Run("InMemory mode", func(t *testing.T) {
opt := getTestOptions("")
opt.InMemory = true
db, err := Open(opt)
require.NoError(t, err)
test(t, db)
require.NoError(t, db.Close())
})
}
// This test ensures we don't end up in deadlock in case of empty writebatch.
func TestEmptyWriteBatch(t *testing.T) {
t.Run("normal mode", func(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
wb := db.NewWriteBatch()
require.NoError(t, wb.Flush())
wb = db.NewWriteBatch()
require.NoError(t, wb.Flush())
wb = db.NewWriteBatch()
require.NoError(t, wb.Flush())
})
})
t.Run("managed mode", func(t *testing.T) {
opt := getTestOptions("")
opt.managedTxns = true
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
t.Run("WriteBatchAt", func(t *testing.T) {
wb := db.NewWriteBatchAt(2)
require.NoError(t, wb.Flush())
wb = db.NewWriteBatchAt(208)
require.NoError(t, wb.Flush())
wb = db.NewWriteBatchAt(31)
require.NoError(t, wb.Flush())
})
t.Run("ManagedWriteBatch", func(t *testing.T) {
wb := db.NewManagedWriteBatch()
require.NoError(t, wb.Flush())
wb = db.NewManagedWriteBatch()
require.NoError(t, wb.Flush())
wb = db.NewManagedWriteBatch()
require.NoError(t, wb.Flush())
})
})
})
}
badger-2.2007.2/compaction.go 0000664 0000000 0000000 00000012424 13721731165 0015664 0 ustar 00root root 0000000 0000000 /*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bytes"
"fmt"
"log"
"math"
"sync"
"golang.org/x/net/trace"
"github.com/dgraph-io/badger/v2/table"
"github.com/dgraph-io/badger/v2/y"
)
type keyRange struct {
left []byte
right []byte
inf bool
}
var infRange = keyRange{inf: true}
func (r keyRange) String() string {
return fmt.Sprintf("[left=%x, right=%x, inf=%v]", r.left, r.right, r.inf)
}
func (r keyRange) equals(dst keyRange) bool {
return bytes.Equal(r.left, dst.left) &&
bytes.Equal(r.right, dst.right) &&
r.inf == dst.inf
}
func (r keyRange) overlapsWith(dst keyRange) bool {
if r.inf || dst.inf {
return true
}
// If my left is greater than dst right, we have no overlap.
if y.CompareKeys(r.left, dst.right) > 0 {
return false
}
// If my right is less than dst left, we have no overlap.
if y.CompareKeys(r.right, dst.left) < 0 {
return false
}
// We have overlap.
return true
}
func getKeyRange(tables ...*table.Table) keyRange {
if len(tables) == 0 {
return keyRange{}
}
smallest := tables[0].Smallest()
biggest := tables[0].Biggest()
for i := 1; i < len(tables); i++ {
if y.CompareKeys(tables[i].Smallest(), smallest) < 0 {
smallest = tables[i].Smallest()
}
if y.CompareKeys(tables[i].Biggest(), biggest) > 0 {
biggest = tables[i].Biggest()
}
}
// We pick all the versions of the smallest and the biggest key. Note that version zero would
// be the rightmost key, considering versions are default sorted in descending order.
return keyRange{
left: y.KeyWithTs(y.ParseKey(smallest), math.MaxUint64),
right: y.KeyWithTs(y.ParseKey(biggest), 0),
}
}
type levelCompactStatus struct {
ranges []keyRange
delSize int64
}
func (lcs *levelCompactStatus) debug() string {
var b bytes.Buffer
for _, r := range lcs.ranges {
b.WriteString(r.String())
}
return b.String()
}
func (lcs *levelCompactStatus) overlapsWith(dst keyRange) bool {
for _, r := range lcs.ranges {
if r.overlapsWith(dst) {
return true
}
}
return false
}
func (lcs *levelCompactStatus) remove(dst keyRange) bool {
final := lcs.ranges[:0]
var found bool
for _, r := range lcs.ranges {
if !r.equals(dst) {
final = append(final, r)
} else {
found = true
}
}
lcs.ranges = final
return found
}
type compactStatus struct {
sync.RWMutex
levels []*levelCompactStatus
}
func (cs *compactStatus) toLog(tr trace.Trace) {
cs.RLock()
defer cs.RUnlock()
tr.LazyPrintf("Compaction status:")
for i, l := range cs.levels {
if l.debug() == "" {
continue
}
tr.LazyPrintf("[%d] %s", i, l.debug())
}
}
func (cs *compactStatus) overlapsWith(level int, this keyRange) bool {
cs.RLock()
defer cs.RUnlock()
thisLevel := cs.levels[level]
return thisLevel.overlapsWith(this)
}
func (cs *compactStatus) delSize(l int) int64 {
cs.RLock()
defer cs.RUnlock()
return cs.levels[l].delSize
}
type thisAndNextLevelRLocked struct{}
// compareAndAdd will check whether we can run this compactDef. That it doesn't overlap with any
// other running compaction. If it can be run, it would store this run in the compactStatus state.
func (cs *compactStatus) compareAndAdd(_ thisAndNextLevelRLocked, cd compactDef) bool {
cs.Lock()
defer cs.Unlock()
level := cd.thisLevel.level
y.AssertTruef(level < len(cs.levels)-1, "Got level %d. Max levels: %d", level, len(cs.levels))
thisLevel := cs.levels[level]
nextLevel := cs.levels[level+1]
if thisLevel.overlapsWith(cd.thisRange) {
return false
}
if nextLevel.overlapsWith(cd.nextRange) {
return false
}
// Check whether this level really needs compaction or not. Otherwise, we'll end up
// running parallel compactions for the same level.
// Update: We should not be checking size here. Compaction priority already did the size checks.
// Here we should just be executing the wish of others.
thisLevel.ranges = append(thisLevel.ranges, cd.thisRange)
nextLevel.ranges = append(nextLevel.ranges, cd.nextRange)
thisLevel.delSize += cd.thisSize
return true
}
func (cs *compactStatus) delete(cd compactDef) {
cs.Lock()
defer cs.Unlock()
level := cd.thisLevel.level
y.AssertTruef(level < len(cs.levels)-1, "Got level %d. Max levels: %d", level, len(cs.levels))
thisLevel := cs.levels[level]
nextLevel := cs.levels[level+1]
thisLevel.delSize -= cd.thisSize
found := thisLevel.remove(cd.thisRange)
found = nextLevel.remove(cd.nextRange) && found
if !found {
this := cd.thisRange
next := cd.nextRange
fmt.Printf("Looking for: [%q, %q, %v] in this level.\n", this.left, this.right, this.inf)
fmt.Printf("This Level:\n%s\n", thisLevel.debug())
fmt.Println()
fmt.Printf("Looking for: [%q, %q, %v] in next level.\n", next.left, next.right, next.inf)
fmt.Printf("Next Level:\n%s\n", nextLevel.debug())
log.Fatal("keyRange not found")
}
}
badger-2.2007.2/db.go 0000664 0000000 0000000 00000144251 13721731165 0014121 0 ustar 00root root 0000000 0000000 /*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bytes"
"context"
"encoding/binary"
"expvar"
"math"
"os"
"path/filepath"
"sort"
"strconv"
"sync"
"sync/atomic"
"time"
"github.com/dgraph-io/badger/v2/options"
"github.com/dgraph-io/badger/v2/pb"
"github.com/dgraph-io/badger/v2/skl"
"github.com/dgraph-io/badger/v2/table"
"github.com/dgraph-io/badger/v2/y"
"github.com/dgraph-io/ristretto"
humanize "github.com/dustin/go-humanize"
"github.com/pkg/errors"
)
var (
badgerPrefix = []byte("!badger!") // Prefix for internal keys used by badger.
head = []byte("!badger!head") // For storing value offset for replay.
txnKey = []byte("!badger!txn") // For indicating end of entries in txn.
badgerMove = []byte("!badger!move") // For key-value pairs which got moved during GC.
lfDiscardStatsKey = []byte("!badger!discard") // For storing lfDiscardStats
)
type closers struct {
updateSize *y.Closer
compactors *y.Closer
memtable *y.Closer
writes *y.Closer
valueGC *y.Closer
pub *y.Closer
}
// DB provides the various functions required to interact with Badger.
// DB is thread-safe.
type DB struct {
sync.RWMutex // Guards list of inmemory tables, not individual reads and writes.
dirLockGuard *directoryLockGuard
// nil if Dir and ValueDir are the same
valueDirGuard *directoryLockGuard
closers closers
mt *skl.Skiplist // Our latest (actively written) in-memory table
imm []*skl.Skiplist // Add here only AFTER pushing to flushChan.
opt Options
manifest *manifestFile
lc *levelsController
vlog valueLog
vhead valuePointer // less than or equal to a pointer to the last vlog value put into mt
writeCh chan *request
flushChan chan flushTask // For flushing memtables.
closeOnce sync.Once // For closing DB only once.
// Number of log rotates since the last memtable flush. We will access this field via atomic
// functions. Since we are not going to use any 64bit atomic functions, there is no need for
// 64 bit alignment of this struct(see #311).
logRotates int32
blockWrites int32
isClosed uint32
orc *oracle
pub *publisher
registry *KeyRegistry
blockCache *ristretto.Cache
indexCache *ristretto.Cache
}
const (
kvWriteChCapacity = 1000
)
func (db *DB) replayFunction() func(Entry, valuePointer) error {
type txnEntry struct {
nk []byte
v y.ValueStruct
}
var txn []txnEntry
var lastCommit uint64
toLSM := func(nk []byte, vs y.ValueStruct) {
for err := db.ensureRoomForWrite(); err != nil; err = db.ensureRoomForWrite() {
db.opt.Debugf("Replay: Making room for writes")
time.Sleep(10 * time.Millisecond)
}
db.mt.Put(nk, vs)
}
first := true
return func(e Entry, vp valuePointer) error { // Function for replaying.
if first {
db.opt.Debugf("First key=%q\n", e.Key)
}
first = false
db.orc.Lock()
if db.orc.nextTxnTs < y.ParseTs(e.Key) {
db.orc.nextTxnTs = y.ParseTs(e.Key)
}
db.orc.Unlock()
nk := make([]byte, len(e.Key))
copy(nk, e.Key)
var nv []byte
meta := e.meta
if db.shouldWriteValueToLSM(e) {
nv = make([]byte, len(e.Value))
copy(nv, e.Value)
} else {
nv = vp.Encode()
meta = meta | bitValuePointer
}
// Update vhead. If the crash happens while replay was in progess
// and the head is not updated, we will end up replaying all the
// files starting from file zero, again.
db.updateHead([]valuePointer{vp})
v := y.ValueStruct{
Value: nv,
Meta: meta,
UserMeta: e.UserMeta,
ExpiresAt: e.ExpiresAt,
}
switch {
case e.meta&bitFinTxn > 0:
txnTs, err := strconv.ParseUint(string(e.Value), 10, 64)
if err != nil {
return errors.Wrapf(err, "Unable to parse txn fin: %q", e.Value)
}
y.AssertTrue(lastCommit == txnTs)
y.AssertTrue(len(txn) > 0)
// Got the end of txn. Now we can store them.
for _, t := range txn {
toLSM(t.nk, t.v)
}
txn = txn[:0]
lastCommit = 0
case e.meta&bitTxn > 0:
txnTs := y.ParseTs(nk)
if lastCommit == 0 {
lastCommit = txnTs
}
if lastCommit != txnTs {
db.opt.Warningf("Found an incomplete txn at timestamp %d. Discarding it.\n",
lastCommit)
txn = txn[:0]
lastCommit = txnTs
}
te := txnEntry{nk: nk, v: v}
txn = append(txn, te)
default:
// This entry is from a rewrite or via SetEntryAt(..).
toLSM(nk, v)
// We shouldn't get this entry in the middle of a transaction.
y.AssertTrue(lastCommit == 0)
y.AssertTrue(len(txn) == 0)
}
return nil
}
}
// Open returns a new DB object.
func Open(opt Options) (db *DB, err error) {
// It's okay to have zero compactors which will disable all compactions but
// we cannot have just one compactor otherwise we will end up with all data
// one level 2.
if opt.NumCompactors == 1 {
return nil, errors.New("Cannot have 1 compactor. Need at least 2")
}
if opt.InMemory && (opt.Dir != "" || opt.ValueDir != "") {
return nil, errors.New("Cannot use badger in Disk-less mode with Dir or ValueDir set")
}
opt.maxBatchSize = (15 * opt.MaxTableSize) / 100
opt.maxBatchCount = opt.maxBatchSize / int64(skl.MaxNodeSize)
// We are limiting opt.ValueThreshold to maxValueThreshold for now.
if opt.ValueThreshold > maxValueThreshold {
return nil, errors.Errorf("Invalid ValueThreshold, must be less or equal to %d",
maxValueThreshold)
}
// If ValueThreshold is greater than opt.maxBatchSize, we won't be able to push any data using
// the transaction APIs. Transaction batches entries into batches of size opt.maxBatchSize.
if int64(opt.ValueThreshold) > opt.maxBatchSize {
return nil, errors.Errorf("Valuethreshold greater than max batch size of %d. Either "+
"reduce opt.ValueThreshold or increase opt.MaxTableSize.", opt.maxBatchSize)
}
if !(opt.ValueLogFileSize <= 2<<30 && opt.ValueLogFileSize >= 1<<20) {
return nil, ErrValueLogSize
}
if !(opt.ValueLogLoadingMode == options.FileIO ||
opt.ValueLogLoadingMode == options.MemoryMap) {
return nil, ErrInvalidLoadingMode
}
// Return error if badger is built without cgo and compression is set to ZSTD.
if opt.Compression == options.ZSTD && !y.CgoEnabled {
return nil, y.ErrZstdCgo
}
// Keep L0 in memory if either KeepL0InMemory is set or if InMemory is set.
opt.KeepL0InMemory = opt.KeepL0InMemory || opt.InMemory
// Compact L0 on close if either it is set or if KeepL0InMemory is set. When
// keepL0InMemory is set we need to compact L0 on close otherwise we might lose data.
opt.CompactL0OnClose = opt.CompactL0OnClose || opt.KeepL0InMemory
if opt.ReadOnly {
// Can't truncate if the DB is read only.
opt.Truncate = false
// Do not perform compaction in read only mode.
opt.CompactL0OnClose = false
}
var dirLockGuard, valueDirLockGuard *directoryLockGuard
// Create directories and acquire lock on it only if badger is not running in InMemory mode.
// We don't have any directories/files in InMemory mode so we don't need to acquire
// any locks on them.
if !opt.InMemory {
if err := createDirs(opt); err != nil {
return nil, err
}
if !opt.BypassLockGuard {
dirLockGuard, err = acquireDirectoryLock(opt.Dir, lockFile, opt.ReadOnly)
if err != nil {
return nil, err
}
defer func() {
if dirLockGuard != nil {
_ = dirLockGuard.release()
}
}()
absDir, err := filepath.Abs(opt.Dir)
if err != nil {
return nil, err
}
absValueDir, err := filepath.Abs(opt.ValueDir)
if err != nil {
return nil, err
}
if absValueDir != absDir {
valueDirLockGuard, err = acquireDirectoryLock(opt.ValueDir, lockFile, opt.ReadOnly)
if err != nil {
return nil, err
}
defer func() {
if valueDirLockGuard != nil {
_ = valueDirLockGuard.release()
}
}()
}
}
}
manifestFile, manifest, err := openOrCreateManifestFile(opt)
if err != nil {
return nil, err
}
defer func() {
if manifestFile != nil {
_ = manifestFile.close()
}
}()
db = &DB{
imm: make([]*skl.Skiplist, 0, opt.NumMemtables),
flushChan: make(chan flushTask, opt.NumMemtables),
writeCh: make(chan *request, kvWriteChCapacity),
opt: opt,
manifest: manifestFile,
dirLockGuard: dirLockGuard,
valueDirGuard: valueDirLockGuard,
orc: newOracle(opt),
pub: newPublisher(),
}
// Cleanup all the goroutines started by badger in case of an error.
defer func() {
if err != nil {
db.cleanup()
db = nil
}
}()
if opt.BlockCacheSize > 0 {
config := ristretto.Config{
// Use 5% of cache memory for storing counters.
NumCounters: int64(float64(opt.BlockCacheSize) * 0.05 * 2),
MaxCost: int64(float64(opt.BlockCacheSize) * 0.95),
BufferItems: 64,
Metrics: true,
}
db.blockCache, err = ristretto.NewCache(&config)
if err != nil {
return nil, errors.Wrap(err, "failed to create data cache")
}
}
if opt.IndexCacheSize > 0 {
config := ristretto.Config{
// Use 5% of cache memory for storing counters.
NumCounters: int64(float64(opt.IndexCacheSize) * 0.05 * 2),
MaxCost: int64(float64(opt.IndexCacheSize) * 0.95),
BufferItems: 64,
Metrics: true,
}
db.indexCache, err = ristretto.NewCache(&config)
if err != nil {
return nil, errors.Wrap(err, "failed to create bf cache")
}
}
if db.opt.InMemory {
db.opt.SyncWrites = false
// If badger is running in memory mode, push everything into the LSM Tree.
db.opt.ValueThreshold = math.MaxInt32
}
krOpt := KeyRegistryOptions{
ReadOnly: opt.ReadOnly,
Dir: opt.Dir,
EncryptionKey: opt.EncryptionKey,
EncryptionKeyRotationDuration: opt.EncryptionKeyRotationDuration,
InMemory: opt.InMemory,
}
if db.registry, err = OpenKeyRegistry(krOpt); err != nil {
return db, err
}
db.calculateSize()
db.closers.updateSize = y.NewCloser(1)
go db.updateSize(db.closers.updateSize)
db.mt = skl.NewSkiplist(arenaSize(opt))
// newLevelsController potentially loads files in directory.
if db.lc, err = newLevelsController(db, &manifest); err != nil {
return db, err
}
// Initialize vlog struct.
db.vlog.init(db)
if !opt.ReadOnly {
db.closers.compactors = y.NewCloser(1)
db.lc.startCompact(db.closers.compactors)
db.closers.memtable = y.NewCloser(1)
go func() {
_ = db.flushMemtable(db.closers.memtable) // Need levels controller to be up.
}()
}
headKey := y.KeyWithTs(head, math.MaxUint64)
// Need to pass with timestamp, lsm get removes the last 8 bytes and compares key
vs, err := db.get(headKey)
if err != nil {
return db, errors.Wrap(err, "Retrieving head")
}
db.orc.nextTxnTs = vs.Version
var vptr valuePointer
if len(vs.Value) > 0 {
vptr.Decode(vs.Value)
}
replayCloser := y.NewCloser(1)
go db.doWrites(replayCloser)
if err = db.vlog.open(db, vptr, db.replayFunction()); err != nil {
replayCloser.SignalAndWait()
return db, y.Wrapf(err, "During db.vlog.open")
}
replayCloser.SignalAndWait() // Wait for replay to be applied first.
// Let's advance nextTxnTs to one more than whatever we observed via
// replaying the logs.
db.orc.txnMark.Done(db.orc.nextTxnTs)
// In normal mode, we must update readMark so older versions of keys can be removed during
// compaction when run in offline mode via the flatten tool.
db.orc.readMark.Done(db.orc.nextTxnTs)
db.orc.incrementNextTs()
db.closers.writes = y.NewCloser(1)
go db.doWrites(db.closers.writes)
if !db.opt.InMemory {
db.closers.valueGC = y.NewCloser(1)
go db.vlog.waitOnGC(db.closers.valueGC)
}
db.closers.pub = y.NewCloser(1)
go db.pub.listenForUpdates(db.closers.pub)
valueDirLockGuard = nil
dirLockGuard = nil
manifestFile = nil
return db, nil
}
// cleanup stops all the goroutines started by badger. This is used in open to
// cleanup goroutines in case of an error.
func (db *DB) cleanup() {
db.stopMemoryFlush()
db.stopCompactions()
db.blockCache.Close()
db.indexCache.Close()
if db.closers.updateSize != nil {
db.closers.updateSize.Signal()
}
if db.closers.valueGC != nil {
db.closers.valueGC.Signal()
}
if db.closers.writes != nil {
db.closers.writes.Signal()
}
if db.closers.pub != nil {
db.closers.pub.Signal()
}
db.orc.Stop()
// Do not use vlog.Close() here. vlog.Close truncates the files. We don't
// want to truncate files unless the user has specified the truncate flag.
db.vlog.stopFlushDiscardStats()
}
// BlockCacheMetrics returns the metrics for the underlying block cache.
func (db *DB) BlockCacheMetrics() *ristretto.Metrics {
if db.blockCache != nil {
return db.blockCache.Metrics
}
return nil
}
// IndexCacheMetrics returns the metrics for the underlying index cache.
func (db *DB) IndexCacheMetrics() *ristretto.Metrics {
if db.indexCache != nil {
return db.indexCache.Metrics
}
return nil
}
// Close closes a DB. It's crucial to call it to ensure all the pending updates make their way to
// disk. Calling DB.Close() multiple times would still only close the DB once.
func (db *DB) Close() error {
var err error
db.closeOnce.Do(func() {
err = db.close()
})
return err
}
// IsClosed denotes if the badger DB is closed or not. A DB instance should not
// be used after closing it.
func (db *DB) IsClosed() bool {
return atomic.LoadUint32(&db.isClosed) == 1
}
func (db *DB) close() (err error) {
db.opt.Debugf("Closing database")
atomic.StoreInt32(&db.blockWrites, 1)
if !db.opt.InMemory {
// Stop value GC first.
db.closers.valueGC.SignalAndWait()
}
// Stop writes next.
db.closers.writes.SignalAndWait()
// Don't accept any more write.
close(db.writeCh)
db.closers.pub.SignalAndWait()
// Now close the value log.
if vlogErr := db.vlog.Close(); vlogErr != nil {
err = errors.Wrap(vlogErr, "DB.Close")
}
// Make sure that block writer is done pushing stuff into memtable!
// Otherwise, you will have a race condition: we are trying to flush memtables
// and remove them completely, while the block / memtable writer is still
// trying to push stuff into the memtable. This will also resolve the value
// offset problem: as we push into memtable, we update value offsets there.
if !db.mt.Empty() {
db.opt.Debugf("Flushing memtable")
for {
pushedFlushTask := func() bool {
db.Lock()
defer db.Unlock()
y.AssertTrue(db.mt != nil)
select {
case db.flushChan <- flushTask{mt: db.mt, vptr: db.vhead}:
db.imm = append(db.imm, db.mt) // Flusher will attempt to remove this from s.imm.
db.mt = nil // Will segfault if we try writing!
db.opt.Debugf("pushed to flush chan\n")
return true
default:
// If we fail to push, we need to unlock and wait for a short while.
// The flushing operation needs to update s.imm. Otherwise, we have a deadlock.
// TODO: Think about how to do this more cleanly, maybe without any locks.
}
return false
}()
if pushedFlushTask {
break
}
time.Sleep(10 * time.Millisecond)
}
}
db.stopMemoryFlush()
db.stopCompactions()
// Force Compact L0
// We don't need to care about cstatus since no parallel compaction is running.
if db.opt.CompactL0OnClose {
err := db.lc.doCompact(173, compactionPriority{level: 0, score: 1.73})
switch err {
case errFillTables:
// This error only means that there might be enough tables to do a compaction. So, we
// should not report it to the end user to avoid confusing them.
case nil:
db.opt.Infof("Force compaction on level 0 done")
default:
db.opt.Warningf("While forcing compaction on level 0: %v", err)
}
}
if lcErr := db.lc.close(); err == nil {
err = errors.Wrap(lcErr, "DB.Close")
}
db.opt.Debugf("Waiting for closer")
db.closers.updateSize.SignalAndWait()
db.orc.Stop()
db.blockCache.Close()
db.indexCache.Close()
atomic.StoreUint32(&db.isClosed, 1)
if db.opt.InMemory {
return
}
if db.dirLockGuard != nil {
if guardErr := db.dirLockGuard.release(); err == nil {
err = errors.Wrap(guardErr, "DB.Close")
}
}
if db.valueDirGuard != nil {
if guardErr := db.valueDirGuard.release(); err == nil {
err = errors.Wrap(guardErr, "DB.Close")
}
}
if manifestErr := db.manifest.close(); err == nil {
err = errors.Wrap(manifestErr, "DB.Close")
}
if registryErr := db.registry.Close(); err == nil {
err = errors.Wrap(registryErr, "DB.Close")
}
// Fsync directories to ensure that lock file, and any other removed files whose directory
// we haven't specifically fsynced, are guaranteed to have their directory entry removal
// persisted to disk.
if syncErr := db.syncDir(db.opt.Dir); err == nil {
err = errors.Wrap(syncErr, "DB.Close")
}
if syncErr := db.syncDir(db.opt.ValueDir); err == nil {
err = errors.Wrap(syncErr, "DB.Close")
}
return err
}
// VerifyChecksum verifies checksum for all tables on all levels.
// This method can be used to verify checksum, if opt.ChecksumVerificationMode is NoVerification.
func (db *DB) VerifyChecksum() error {
return db.lc.verifyChecksum()
}
const (
lockFile = "LOCK"
)
// Sync syncs database content to disk. This function provides
// more control to user to sync data whenever required.
func (db *DB) Sync() error {
return db.vlog.sync(math.MaxUint32)
}
// getMemtables returns the current memtables and get references.
func (db *DB) getMemTables() ([]*skl.Skiplist, func()) {
db.RLock()
defer db.RUnlock()
tables := make([]*skl.Skiplist, len(db.imm)+1)
// Get mutable memtable.
tables[0] = db.mt
tables[0].IncrRef()
// Get immutable memtables.
last := len(db.imm) - 1
for i := range db.imm {
tables[i+1] = db.imm[last-i]
tables[i+1].IncrRef()
}
return tables, func() {
for _, tbl := range tables {
tbl.DecrRef()
}
}
}
// get returns the value in memtable or disk for given key.
// Note that value will include meta byte.
//
// IMPORTANT: We should never write an entry with an older timestamp for the same key, We need to
// maintain this invariant to search for the latest value of a key, or else we need to search in all
// tables and find the max version among them. To maintain this invariant, we also need to ensure
// that all versions of a key are always present in the same table from level 1, because compaction
// can push any table down.
//
// Update (Sep 22, 2018): To maintain the above invariant, and to allow keys to be moved from one
// value log to another (while reclaiming space during value log GC), we have logically moved this
// need to write "old versions after new versions" to the badgerMove keyspace. Thus, for normal
// gets, we can stop going down the LSM tree once we find any version of the key (note however that
// we will ALWAYS skip versions with ts greater than the key version). However, if that key has
// been moved, then for the corresponding movekey, we'll look through all the levels of the tree
// to ensure that we pick the highest version of the movekey present.
func (db *DB) get(key []byte) (y.ValueStruct, error) {
if db.IsClosed() {
return y.ValueStruct{}, ErrDBClosed
}
tables, decr := db.getMemTables() // Lock should be released.
defer decr()
var maxVs *y.ValueStruct
var version uint64
if bytes.HasPrefix(key, badgerMove) {
// If we are checking badgerMove key, we should look into all the
// levels, so we can pick up the newer versions, which might have been
// compacted down the tree.
maxVs = &y.ValueStruct{}
version = y.ParseTs(key)
}
y.NumGets.Add(1)
for i := 0; i < len(tables); i++ {
vs := tables[i].Get(key)
y.NumMemtableGets.Add(1)
if vs.Meta == 0 && vs.Value == nil {
continue
}
// Found a version of the key. For user keyspace, return immediately. For move keyspace,
// continue iterating, unless we found a version == given key version.
if maxVs == nil || vs.Version == version {
return vs, nil
}
if maxVs.Version < vs.Version {
*maxVs = vs
}
}
return db.lc.get(key, maxVs, 0)
}
// updateHead should not be called without the db.Lock() since db.vhead is used
// by the writer go routines and memtable flushing goroutine.
func (db *DB) updateHead(ptrs []valuePointer) {
var ptr valuePointer
for i := len(ptrs) - 1; i >= 0; i-- {
p := ptrs[i]
if !p.IsZero() {
ptr = p
break
}
}
if ptr.IsZero() {
return
}
y.AssertTrue(!ptr.Less(db.vhead))
db.vhead = ptr
}
var requestPool = sync.Pool{
New: func() interface{} {
return new(request)
},
}
func (db *DB) shouldWriteValueToLSM(e Entry) bool {
return len(e.Value) < db.opt.ValueThreshold
}
func (db *DB) writeToLSM(b *request) error {
// We should check the length of b.Prts and b.Entries only when badger is not
// running in InMemory mode. In InMemory mode, we don't write anything to the
// value log and that's why the length of b.Ptrs will always be zero.
if !db.opt.InMemory && len(b.Ptrs) != len(b.Entries) {
return errors.Errorf("Ptrs and Entries don't match: %+v", b)
}
for i, entry := range b.Entries {
if entry.meta&bitFinTxn != 0 {
continue
}
if db.shouldWriteValueToLSM(*entry) { // Will include deletion / tombstone case.
db.mt.Put(entry.Key,
y.ValueStruct{
Value: entry.Value,
// Ensure value pointer flag is removed. Otherwise, the value will fail
// to be retrieved during iterator prefetch. `bitValuePointer` is only
// known to be set in write to LSM when the entry is loaded from a backup
// with lower ValueThreshold and its value was stored in the value log.
Meta: entry.meta &^ bitValuePointer,
UserMeta: entry.UserMeta,
ExpiresAt: entry.ExpiresAt,
})
} else {
db.mt.Put(entry.Key,
y.ValueStruct{
Value: b.Ptrs[i].Encode(),
Meta: entry.meta | bitValuePointer,
UserMeta: entry.UserMeta,
ExpiresAt: entry.ExpiresAt,
})
}
}
return nil
}
// writeRequests is called serially by only one goroutine.
func (db *DB) writeRequests(reqs []*request) error {
if len(reqs) == 0 {
return nil
}
done := func(err error) {
for _, r := range reqs {
r.Err = err
r.Wg.Done()
}
}
db.opt.Debugf("writeRequests called. Writing to value log")
err := db.vlog.write(reqs)
if err != nil {
done(err)
return err
}
db.opt.Debugf("Sending updates to subscribers")
db.pub.sendUpdates(reqs)
db.opt.Debugf("Writing to memtable")
var count int
for _, b := range reqs {
if len(b.Entries) == 0 {
continue
}
count += len(b.Entries)
var i uint64
for err = db.ensureRoomForWrite(); err == errNoRoom; err = db.ensureRoomForWrite() {
i++
if i%100 == 0 {
db.opt.Debugf("Making room for writes")
}
// We need to poll a bit because both hasRoomForWrite and the flusher need access to s.imm.
// When flushChan is full and you are blocked there, and the flusher is trying to update s.imm,
// you will get a deadlock.
time.Sleep(10 * time.Millisecond)
}
if err != nil {
done(err)
return errors.Wrap(err, "writeRequests")
}
if err := db.writeToLSM(b); err != nil {
done(err)
return errors.Wrap(err, "writeRequests")
}
db.Lock()
db.updateHead(b.Ptrs)
db.Unlock()
}
done(nil)
db.opt.Debugf("%d entries written", count)
return nil
}
func (db *DB) sendToWriteCh(entries []*Entry) (*request, error) {
if atomic.LoadInt32(&db.blockWrites) == 1 {
return nil, ErrBlockedWrites
}
var count, size int64
for _, e := range entries {
size += int64(e.estimateSize(db.opt.ValueThreshold))
count++
}
if count >= db.opt.maxBatchCount || size >= db.opt.maxBatchSize {
return nil, ErrTxnTooBig
}
// We can only service one request because we need each txn to be stored in a contigous section.
// Txns should not interleave among other txns or rewrites.
req := requestPool.Get().(*request)
req.reset()
req.Entries = entries
req.Wg.Add(1)
req.IncrRef() // for db write
db.writeCh <- req // Handled in doWrites.
y.NumPuts.Add(int64(len(entries)))
return req, nil
}
func (db *DB) doWrites(lc *y.Closer) {
defer lc.Done()
pendingCh := make(chan struct{}, 1)
writeRequests := func(reqs []*request) {
if err := db.writeRequests(reqs); err != nil {
db.opt.Errorf("writeRequests: %v", err)
}
<-pendingCh
}
// This variable tracks the number of pending writes.
reqLen := new(expvar.Int)
y.PendingWrites.Set(db.opt.Dir, reqLen)
reqs := make([]*request, 0, 10)
for {
var r *request
select {
case r = <-db.writeCh:
case <-lc.HasBeenClosed():
goto closedCase
}
for {
reqs = append(reqs, r)
reqLen.Set(int64(len(reqs)))
if len(reqs) >= 3*kvWriteChCapacity {
pendingCh <- struct{}{} // blocking.
goto writeCase
}
select {
// Either push to pending, or continue to pick from writeCh.
case r = <-db.writeCh:
case pendingCh <- struct{}{}:
goto writeCase
case <-lc.HasBeenClosed():
goto closedCase
}
}
closedCase:
// All the pending request are drained.
// Don't close the writeCh, because it has be used in several places.
for {
select {
case r = <-db.writeCh:
reqs = append(reqs, r)
default:
pendingCh <- struct{}{} // Push to pending before doing a write.
writeRequests(reqs)
return
}
}
writeCase:
go writeRequests(reqs)
reqs = make([]*request, 0, 10)
reqLen.Set(0)
}
}
// batchSet applies a list of badger.Entry. If a request level error occurs it
// will be returned.
// Check(kv.BatchSet(entries))
func (db *DB) batchSet(entries []*Entry) error {
req, err := db.sendToWriteCh(entries)
if err != nil {
return err
}
return req.Wait()
}
// batchSetAsync is the asynchronous version of batchSet. It accepts a callback
// function which is called when all the sets are complete. If a request level
// error occurs, it will be passed back via the callback.
// err := kv.BatchSetAsync(entries, func(err error)) {
// Check(err)
// }
func (db *DB) batchSetAsync(entries []*Entry, f func(error)) error {
req, err := db.sendToWriteCh(entries)
if err != nil {
return err
}
go func() {
err := req.Wait()
// Write is complete. Let's call the callback function now.
f(err)
}()
return nil
}
var errNoRoom = errors.New("No room for write")
// ensureRoomForWrite is always called serially.
func (db *DB) ensureRoomForWrite() error {
var err error
db.Lock()
defer db.Unlock()
// Here we determine if we need to force flush memtable. Given we rotated log file, it would
// make sense to force flush a memtable, so the updated value head would have a chance to be
// pushed to L0. Otherwise, it would not go to L0, until the memtable has been fully filled,
// which can take a lot longer if the write load has fewer keys and larger values. This force
// flush, thus avoids the need to read through a lot of log files on a crash and restart.
// Above approach is quite simple with small drawback. We are calling ensureRoomForWrite before
// inserting every entry in Memtable. We will get latest db.head after all entries for a request
// are inserted in Memtable. If we have done >= db.logRotates rotations, then while inserting
// first entry in Memtable, below condition will be true and we will endup flushing old value of
// db.head. Hence we are limiting no of value log files to be read to db.logRotates only.
forceFlush := atomic.LoadInt32(&db.logRotates) >= db.opt.LogRotatesToFlush
if !forceFlush && db.mt.MemSize() < db.opt.MaxTableSize {
return nil
}
y.AssertTrue(db.mt != nil) // A nil mt indicates that DB is being closed.
select {
case db.flushChan <- flushTask{mt: db.mt, vptr: db.vhead}:
// After every memtable flush, let's reset the counter.
atomic.StoreInt32(&db.logRotates, 0)
// Ensure value log is synced to disk so this memtable's contents wouldn't be lost.
err = db.vlog.sync(db.vhead.Fid)
if err != nil {
return err
}
db.opt.Debugf("Flushing memtable, mt.size=%d size of flushChan: %d\n",
db.mt.MemSize(), len(db.flushChan))
// We manage to push this task. Let's modify imm.
db.imm = append(db.imm, db.mt)
db.mt = skl.NewSkiplist(arenaSize(db.opt))
// New memtable is empty. We certainly have room.
return nil
default:
// We need to do this to unlock and allow the flusher to modify imm.
return errNoRoom
}
}
func arenaSize(opt Options) int64 {
return opt.MaxTableSize + opt.maxBatchSize + opt.maxBatchCount*int64(skl.MaxNodeSize)
}
// buildL0Table builds a new table from the memtable.
func buildL0Table(ft flushTask, bopts table.Options) []byte {
iter := ft.mt.NewIterator()
defer iter.Close()
b := table.NewTableBuilder(bopts)
defer b.Close()
var vp valuePointer
for iter.SeekToFirst(); iter.Valid(); iter.Next() {
if len(ft.dropPrefixes) > 0 && hasAnyPrefixes(iter.Key(), ft.dropPrefixes) {
continue
}
vs := iter.Value()
if vs.Meta&bitValuePointer > 0 {
vp.Decode(vs.Value)
}
b.Add(iter.Key(), iter.Value(), vp.Len)
}
return b.Finish()
}
type flushTask struct {
mt *skl.Skiplist
vptr valuePointer
dropPrefixes [][]byte
}
func (db *DB) pushHead(ft flushTask) error {
// We don't need to store head pointer in the in-memory mode since we will
// never be replay anything.
if db.opt.InMemory {
return nil
}
// Ensure we never push a zero valued head pointer.
if ft.vptr.IsZero() {
return errors.New("Head should not be zero")
}
// Store badger head even if vptr is zero, need it for readTs
db.opt.Infof("Storing value log head: %+v\n", ft.vptr)
val := ft.vptr.Encode()
// Pick the max commit ts, so in case of crash, our read ts would be higher than all the
// commits.
headTs := y.KeyWithTs(head, db.orc.nextTs())
ft.mt.Put(headTs, y.ValueStruct{Value: val})
return nil
}
// handleFlushTask must be run serially.
func (db *DB) handleFlushTask(ft flushTask) error {
// There can be a scenario, when empty memtable is flushed. For example, memtable is empty and
// after writing request to value log, rotation count exceeds db.LogRotatesToFlush.
if ft.mt.Empty() {
return nil
}
if err := db.pushHead(ft); err != nil {
return err
}
dk, err := db.registry.latestDataKey()
if err != nil {
return y.Wrapf(err, "failed to get datakey in db.handleFlushTask")
}
bopts := buildTableOptions(db.opt)
bopts.DataKey = dk
// Builder does not need cache but the same options are used for opening table.
bopts.BlockCache = db.blockCache
bopts.IndexCache = db.indexCache
tableData := buildL0Table(ft, bopts)
fileID := db.lc.reserveFileID()
if db.opt.KeepL0InMemory {
tbl, err := table.OpenInMemoryTable(tableData, fileID, &bopts)
if err != nil {
return errors.Wrapf(err, "failed to open table in memory")
}
return db.lc.addLevel0Table(tbl)
}
fd, err := y.CreateSyncedFile(table.NewFilename(fileID, db.opt.Dir), true)
if err != nil {
return y.Wrap(err)
}
// Don't block just to sync the directory entry.
dirSyncCh := make(chan error, 1)
go func() { dirSyncCh <- db.syncDir(db.opt.Dir) }()
if _, err = fd.Write(tableData); err != nil {
db.opt.Errorf("ERROR while writing to level 0: %v", err)
return err
}
if dirSyncErr := <-dirSyncCh; dirSyncErr != nil {
// Do dir sync as best effort. No need to return due to an error there.
db.opt.Errorf("ERROR while syncing level directory: %v", dirSyncErr)
}
tbl, err := table.OpenTable(fd, bopts)
if err != nil {
db.opt.Debugf("ERROR while opening table: %v", err)
return err
}
// We own a ref on tbl.
err = db.lc.addLevel0Table(tbl) // This will incrRef
_ = tbl.DecrRef() // Releases our ref.
return err
}
// flushMemtable must keep running until we send it an empty flushTask. If there
// are errors during handling the flush task, we'll retry indefinitely.
func (db *DB) flushMemtable(lc *y.Closer) error {
defer lc.Done()
for ft := range db.flushChan {
if ft.mt == nil {
// We close db.flushChan now, instead of sending a nil ft.mt.
continue
}
for {
err := db.handleFlushTask(ft)
if err == nil {
// Update s.imm. Need a lock.
db.Lock()
// This is a single-threaded operation. ft.mt corresponds to the head of
// db.imm list. Once we flush it, we advance db.imm. The next ft.mt
// which would arrive here would match db.imm[0], because we acquire a
// lock over DB when pushing to flushChan.
// TODO: This logic is dirty AF. Any change and this could easily break.
y.AssertTrue(ft.mt == db.imm[0])
db.imm = db.imm[1:]
ft.mt.DecrRef() // Return memory.
db.Unlock()
break
}
// Encountered error. Retry indefinitely.
db.opt.Errorf("Failure while flushing memtable to disk: %v. Retrying...\n", err)
time.Sleep(time.Second)
}
}
return nil
}
func exists(path string) (bool, error) {
_, err := os.Stat(path)
if err == nil {
return true, nil
}
if os.IsNotExist(err) {
return false, nil
}
return true, err
}
// This function does a filewalk, calculates the size of vlog and sst files and stores it in
// y.LSMSize and y.VlogSize.
func (db *DB) calculateSize() {
if db.opt.InMemory {
return
}
newInt := func(val int64) *expvar.Int {
v := new(expvar.Int)
v.Add(val)
return v
}
totalSize := func(dir string) (int64, int64) {
var lsmSize, vlogSize int64
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
ext := filepath.Ext(path)
switch ext {
case ".sst":
lsmSize += info.Size()
case ".vlog":
vlogSize += info.Size()
}
return nil
})
if err != nil {
db.opt.Debugf("Got error while calculating total size of directory: %s", dir)
}
return lsmSize, vlogSize
}
lsmSize, vlogSize := totalSize(db.opt.Dir)
y.LSMSize.Set(db.opt.Dir, newInt(lsmSize))
// If valueDir is different from dir, we'd have to do another walk.
if db.opt.ValueDir != db.opt.Dir {
_, vlogSize = totalSize(db.opt.ValueDir)
}
y.VlogSize.Set(db.opt.ValueDir, newInt(vlogSize))
}
func (db *DB) updateSize(lc *y.Closer) {
defer lc.Done()
if db.opt.InMemory {
return
}
metricsTicker := time.NewTicker(time.Minute)
defer metricsTicker.Stop()
for {
select {
case <-metricsTicker.C:
db.calculateSize()
case <-lc.HasBeenClosed():
return
}
}
}
// RunValueLogGC triggers a value log garbage collection.
//
// It picks value log files to perform GC based on statistics that are collected
// during compactions. If no such statistics are available, then log files are
// picked in random order. The process stops as soon as the first log file is
// encountered which does not result in garbage collection.
//
// When a log file is picked, it is first sampled. If the sample shows that we
// can discard at least discardRatio space of that file, it would be rewritten.
//
// If a call to RunValueLogGC results in no rewrites, then an ErrNoRewrite is
// thrown indicating that the call resulted in no file rewrites.
//
// We recommend setting discardRatio to 0.5, thus indicating that a file be
// rewritten if half the space can be discarded. This results in a lifetime
// value log write amplification of 2 (1 from original write + 0.5 rewrite +
// 0.25 + 0.125 + ... = 2). Setting it to higher value would result in fewer
// space reclaims, while setting it to a lower value would result in more space
// reclaims at the cost of increased activity on the LSM tree. discardRatio
// must be in the range (0.0, 1.0), both endpoints excluded, otherwise an
// ErrInvalidRequest is returned.
//
// Only one GC is allowed at a time. If another value log GC is running, or DB
// has been closed, this would return an ErrRejected.
//
// Note: Every time GC is run, it would produce a spike of activity on the LSM
// tree.
func (db *DB) RunValueLogGC(discardRatio float64) error {
if db.opt.InMemory {
return ErrGCInMemoryMode
}
if discardRatio >= 1.0 || discardRatio <= 0.0 {
return ErrInvalidRequest
}
// startLevel is the level from which we should search for the head key. When badger is running
// with KeepL0InMemory flag, all tables on L0 are kept in memory. This means we should pick head
// key from Level 1 onwards because if we pick the headkey from Level 0 we might end up losing
// data. See test TestL0GCBug.
startLevel := 0
if db.opt.KeepL0InMemory {
startLevel = 1
}
// Find head on disk
headKey := y.KeyWithTs(head, math.MaxUint64)
// Need to pass with timestamp, lsm get removes the last 8 bytes and compares key
val, err := db.lc.get(headKey, nil, startLevel)
if err != nil {
return errors.Wrap(err, "Retrieving head from on-disk LSM")
}
var head valuePointer
if len(val.Value) > 0 {
head.Decode(val.Value)
}
// Pick a log file and run GC
return db.vlog.runGC(discardRatio, head)
}
// Size returns the size of lsm and value log files in bytes. It can be used to decide how often to
// call RunValueLogGC.
func (db *DB) Size() (lsm, vlog int64) {
if y.LSMSize.Get(db.opt.Dir) == nil {
lsm, vlog = 0, 0
return
}
lsm = y.LSMSize.Get(db.opt.Dir).(*expvar.Int).Value()
vlog = y.VlogSize.Get(db.opt.ValueDir).(*expvar.Int).Value()
return
}
// Sequence represents a Badger sequence.
type Sequence struct {
sync.Mutex
db *DB
key []byte
next uint64
leased uint64
bandwidth uint64
}
// Next would return the next integer in the sequence, updating the lease by running a transaction
// if needed.
func (seq *Sequence) Next() (uint64, error) {
seq.Lock()
defer seq.Unlock()
if seq.next >= seq.leased {
if err := seq.updateLease(); err != nil {
return 0, err
}
}
val := seq.next
seq.next++
return val, nil
}
// Release the leased sequence to avoid wasted integers. This should be done right
// before closing the associated DB. However it is valid to use the sequence after
// it was released, causing a new lease with full bandwidth.
func (seq *Sequence) Release() error {
seq.Lock()
defer seq.Unlock()
err := seq.db.Update(func(txn *Txn) error {
item, err := txn.Get(seq.key)
if err != nil {
return err
}
var num uint64
if err := item.Value(func(v []byte) error {
num = binary.BigEndian.Uint64(v)
return nil
}); err != nil {
return err
}
if num == seq.leased {
var buf [8]byte
binary.BigEndian.PutUint64(buf[:], seq.next)
return txn.SetEntry(NewEntry(seq.key, buf[:]))
}
return nil
})
if err != nil {
return err
}
seq.leased = seq.next
return nil
}
func (seq *Sequence) updateLease() error {
return seq.db.Update(func(txn *Txn) error {
item, err := txn.Get(seq.key)
switch {
case err == ErrKeyNotFound:
seq.next = 0
case err != nil:
return err
default:
var num uint64
if err := item.Value(func(v []byte) error {
num = binary.BigEndian.Uint64(v)
return nil
}); err != nil {
return err
}
seq.next = num
}
lease := seq.next + seq.bandwidth
var buf [8]byte
binary.BigEndian.PutUint64(buf[:], lease)
if err = txn.SetEntry(NewEntry(seq.key, buf[:])); err != nil {
return err
}
seq.leased = lease
return nil
})
}
// GetSequence would initiate a new sequence object, generating it from the stored lease, if
// available, in the database. Sequence can be used to get a list of monotonically increasing
// integers. Multiple sequences can be created by providing different keys. Bandwidth sets the
// size of the lease, determining how many Next() requests can be served from memory.
//
// GetSequence is not supported on ManagedDB. Calling this would result in a panic.
func (db *DB) GetSequence(key []byte, bandwidth uint64) (*Sequence, error) {
if db.opt.managedTxns {
panic("Cannot use GetSequence with managedDB=true.")
}
switch {
case len(key) == 0:
return nil, ErrEmptyKey
case bandwidth == 0:
return nil, ErrZeroBandwidth
}
seq := &Sequence{
db: db,
key: key,
next: 0,
leased: 0,
bandwidth: bandwidth,
}
err := seq.updateLease()
return seq, err
}
// Tables gets the TableInfo objects from the level controller. If withKeysCount
// is true, TableInfo objects also contain counts of keys for the tables.
func (db *DB) Tables(withKeysCount bool) []TableInfo {
return db.lc.getTableInfo(withKeysCount)
}
// KeySplits can be used to get rough key ranges to divide up iteration over
// the DB.
func (db *DB) KeySplits(prefix []byte) []string {
var splits []string
// We just want table ranges here and not keys count.
for _, ti := range db.Tables(false) {
// We don't use ti.Left, because that has a tendency to store !badger
// keys.
if bytes.HasPrefix(ti.Right, prefix) {
splits = append(splits, string(ti.Right))
}
}
sort.Strings(splits)
return splits
}
// MaxBatchCount returns max possible entries in batch
func (db *DB) MaxBatchCount() int64 {
return db.opt.maxBatchCount
}
// MaxBatchSize returns max possible batch size
func (db *DB) MaxBatchSize() int64 {
return db.opt.maxBatchSize
}
func (db *DB) stopMemoryFlush() {
// Stop memtable flushes.
if db.closers.memtable != nil {
close(db.flushChan)
db.closers.memtable.SignalAndWait()
}
}
func (db *DB) stopCompactions() {
// Stop compactions.
if db.closers.compactors != nil {
db.closers.compactors.SignalAndWait()
}
}
func (db *DB) startCompactions() {
// Resume compactions.
if db.closers.compactors != nil {
db.closers.compactors = y.NewCloser(1)
db.lc.startCompact(db.closers.compactors)
}
}
func (db *DB) startMemoryFlush() {
// Start memory fluhser.
if db.closers.memtable != nil {
db.flushChan = make(chan flushTask, db.opt.NumMemtables)
db.closers.memtable = y.NewCloser(1)
go func() {
_ = db.flushMemtable(db.closers.memtable)
}()
}
}
// Flatten can be used to force compactions on the LSM tree so all the tables fall on the same
// level. This ensures that all the versions of keys are colocated and not split across multiple
// levels, which is necessary after a restore from backup. During Flatten, live compactions are
// stopped. Ideally, no writes are going on during Flatten. Otherwise, it would create competition
// between flattening the tree and new tables being created at level zero.
func (db *DB) Flatten(workers int) error {
db.stopCompactions()
defer db.startCompactions()
compactAway := func(cp compactionPriority) error {
db.opt.Infof("Attempting to compact with %+v\n", cp)
errCh := make(chan error, 1)
for i := 0; i < workers; i++ {
go func() {
errCh <- db.lc.doCompact(175, cp)
}()
}
var success int
var rerr error
for i := 0; i < workers; i++ {
err := <-errCh
if err != nil {
rerr = err
db.opt.Warningf("While running doCompact with %+v. Error: %v\n", cp, err)
} else {
success++
}
}
if success == 0 {
return rerr
}
// We could do at least one successful compaction. So, we'll consider this a success.
db.opt.Infof("%d compactor(s) succeeded. One or more tables from level %d compacted.\n",
success, cp.level)
return nil
}
hbytes := func(sz int64) string {
return humanize.Bytes(uint64(sz))
}
for {
db.opt.Infof("\n")
var levels []int
for i, l := range db.lc.levels {
sz := l.getTotalSize()
db.opt.Infof("Level: %d. %8s Size. %8s Max.\n",
i, hbytes(l.getTotalSize()), hbytes(l.maxTotalSize))
if sz > 0 {
levels = append(levels, i)
}
}
if len(levels) <= 1 {
prios := db.lc.pickCompactLevels()
if len(prios) == 0 || prios[0].score <= 1.0 {
db.opt.Infof("All tables consolidated into one level. Flattening done.\n")
return nil
}
if err := compactAway(prios[0]); err != nil {
return err
}
continue
}
// Create an artificial compaction priority, to ensure that we compact the level.
cp := compactionPriority{level: levels[0], score: 1.71}
if err := compactAway(cp); err != nil {
return err
}
}
}
func (db *DB) blockWrite() error {
// Stop accepting new writes.
if !atomic.CompareAndSwapInt32(&db.blockWrites, 0, 1) {
return ErrBlockedWrites
}
// Make all pending writes finish. The following will also close writeCh.
db.closers.writes.SignalAndWait()
db.opt.Infof("Writes flushed. Stopping compactions now...")
return nil
}
func (db *DB) unblockWrite() {
db.closers.writes = y.NewCloser(1)
go db.doWrites(db.closers.writes)
// Resume writes.
atomic.StoreInt32(&db.blockWrites, 0)
}
func (db *DB) prepareToDrop() (func(), error) {
if db.opt.ReadOnly {
panic("Attempting to drop data in read-only mode.")
}
// In order prepare for drop, we need to block the incoming writes and
// write it to db. Then, flush all the pending flushtask. So that, we
// don't miss any entries.
if err := db.blockWrite(); err != nil {
return nil, err
}
reqs := make([]*request, 0, 10)
for {
select {
case r := <-db.writeCh:
reqs = append(reqs, r)
default:
if err := db.writeRequests(reqs); err != nil {
db.opt.Errorf("writeRequests: %v", err)
}
db.stopMemoryFlush()
return func() {
db.opt.Infof("Resuming writes")
db.startMemoryFlush()
db.unblockWrite()
}, nil
}
}
}
// DropAll would drop all the data stored in Badger. It does this in the following way.
// - Stop accepting new writes.
// - Pause memtable flushes and compactions.
// - Pick all tables from all levels, create a changeset to delete all these
// tables and apply it to manifest.
// - Pick all log files from value log, and delete all of them. Restart value log files from zero.
// - Resume memtable flushes and compactions.
//
// NOTE: DropAll is resilient to concurrent writes, but not to reads. It is up to the user to not do
// any reads while DropAll is going on, otherwise they may result in panics. Ideally, both reads and
// writes are paused before running DropAll, and resumed after it is finished.
func (db *DB) DropAll() error {
f, err := db.dropAll()
if f != nil {
f()
}
return err
}
func (db *DB) dropAll() (func(), error) {
db.opt.Infof("DropAll called. Blocking writes...")
f, err := db.prepareToDrop()
if err != nil {
return f, err
}
// prepareToDrop will stop all the incomming write and flushes any pending flush tasks.
// Before we drop, we'll stop the compaction because anyways all the datas are going to
// be deleted.
db.stopCompactions()
resume := func() {
db.startCompactions()
f()
}
// Block all foreign interactions with memory tables.
db.Lock()
defer db.Unlock()
// Remove inmemory tables. Calling DecrRef for safety. Not sure if they're absolutely needed.
db.mt.DecrRef()
for _, mt := range db.imm {
mt.DecrRef()
}
db.imm = db.imm[:0]
db.mt = skl.NewSkiplist(arenaSize(db.opt)) // Set it up for future writes.
num, err := db.lc.dropTree()
if err != nil {
return resume, err
}
db.opt.Infof("Deleted %d SSTables. Now deleting value logs...\n", num)
num, err = db.vlog.dropAll()
if err != nil {
return resume, err
}
db.vhead = valuePointer{} // Zero it out.
db.lc.nextFileID = 1
db.opt.Infof("Deleted %d value log files. DropAll done.\n", num)
db.blockCache.Clear()
db.indexCache.Clear()
return resume, nil
}
// DropPrefix would drop all the keys with the provided prefix. It does this in the following way:
// - Stop accepting new writes.
// - Stop memtable flushes before acquiring lock. Because we're acquring lock here
// and memtable flush stalls for lock, which leads to deadlock
// - Flush out all memtables, skipping over keys with the given prefix, Kp.
// - Write out the value log header to memtables when flushing, so we don't accidentally bring Kp
// back after a restart.
// - Stop compaction.
// - Compact L0->L1, skipping over Kp.
// - Compact rest of the levels, Li->Li, picking tables which have Kp.
// - Resume memtable flushes, compactions and writes.
func (db *DB) DropPrefix(prefixes ...[]byte) error {
db.opt.Infof("DropPrefix Called")
f, err := db.prepareToDrop()
if err != nil {
return err
}
defer f()
// Block all foreign interactions with memory tables.
db.Lock()
defer db.Unlock()
db.imm = append(db.imm, db.mt)
for _, memtable := range db.imm {
if memtable.Empty() {
memtable.DecrRef()
continue
}
task := flushTask{
mt: memtable,
// Ensure that the head of value log gets persisted to disk.
vptr: db.vhead,
dropPrefixes: prefixes,
}
db.opt.Debugf("Flushing memtable")
if err := db.handleFlushTask(task); err != nil {
db.opt.Errorf("While trying to flush memtable: %v", err)
return err
}
memtable.DecrRef()
}
db.stopCompactions()
defer db.startCompactions()
db.imm = db.imm[:0]
db.mt = skl.NewSkiplist(arenaSize(db.opt))
// Drop prefixes from the levels.
if err := db.lc.dropPrefixes(prefixes); err != nil {
return err
}
db.opt.Infof("DropPrefix done")
return nil
}
// KVList contains a list of key-value pairs.
type KVList = pb.KVList
// Subscribe can be used to watch key changes for the given key prefixes.
// At least one prefix should be passed, or an error will be returned.
// You can use an empty prefix to monitor all changes to the DB.
// This function blocks until the given context is done or an error occurs.
// The given function will be called with a new KVList containing the modified keys and the
// corresponding values.
func (db *DB) Subscribe(ctx context.Context, cb func(kv *KVList) error, prefixes ...[]byte) error {
if cb == nil {
return ErrNilCallback
}
c := y.NewCloser(1)
recvCh, id := db.pub.newSubscriber(c, prefixes...)
slurp := func(batch *pb.KVList) error {
for {
select {
case kvs := <-recvCh:
batch.Kv = append(batch.Kv, kvs.Kv...)
default:
if len(batch.GetKv()) > 0 {
return cb(batch)
}
return nil
}
}
}
for {
select {
case <-c.HasBeenClosed():
// No need to delete here. Closer will be called only while
// closing DB. Subscriber will be deleted by cleanSubscribers.
err := slurp(new(pb.KVList))
// Drain if any pending updates.
c.Done()
return err
case <-ctx.Done():
c.Done()
db.pub.deleteSubscriber(id)
// Delete the subscriber to avoid further updates.
return ctx.Err()
case batch := <-recvCh:
err := slurp(batch)
if err != nil {
c.Done()
// Delete the subscriber if there is an error by the callback.
db.pub.deleteSubscriber(id)
return err
}
}
}
}
// shouldEncrypt returns bool, which tells whether to encrypt or not.
func (db *DB) shouldEncrypt() bool {
return len(db.opt.EncryptionKey) > 0
}
func (db *DB) syncDir(dir string) error {
if db.opt.InMemory {
return nil
}
return syncDir(dir)
}
func createDirs(opt Options) error {
for _, path := range []string{opt.Dir, opt.ValueDir} {
dirExists, err := exists(path)
if err != nil {
return y.Wrapf(err, "Invalid Dir: %q", path)
}
if !dirExists {
if opt.ReadOnly {
return errors.Errorf("Cannot find directory %q for read-only open", path)
}
// Try to create the directory
err = os.Mkdir(path, 0700)
if err != nil {
return y.Wrapf(err, "Error Creating Dir: %q", path)
}
}
}
return nil
}
badger-2.2007.2/db2_test.go 0000664 0000000 0000000 00000056236 13721731165 0015247 0 ustar 00root root 0000000 0000000 /*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bytes"
"encoding/binary"
"flag"
"fmt"
"io/ioutil"
"log"
"math"
"math/rand"
"os"
"path"
"regexp"
"runtime"
"sync"
"testing"
"time"
"github.com/dgraph-io/badger/v2/options"
"github.com/dgraph-io/badger/v2/pb"
"github.com/dgraph-io/badger/v2/table"
"github.com/dgraph-io/badger/v2/y"
"github.com/stretchr/testify/require"
)
func TestTruncateVlogWithClose(t *testing.T) {
key := func(i int) []byte {
return []byte(fmt.Sprintf("%d%10d", i, i))
}
data := func(l int) []byte {
m := make([]byte, l)
_, err := rand.Read(m)
require.NoError(t, err)
return m
}
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
opt := getTestOptions(dir)
opt.SyncWrites = true
opt.Truncate = true
opt.ValueThreshold = 1 // Force all reads from value log.
db, err := Open(opt)
require.NoError(t, err)
err = db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry(key(0), data(4055)))
})
require.NoError(t, err)
// Close the DB.
require.NoError(t, db.Close())
require.NoError(t, os.Truncate(path.Join(dir, "000000.vlog"), 4090))
// Reopen and write some new data.
db, err = Open(opt)
require.NoError(t, err)
for i := 0; i < 32; i++ {
err := db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry(key(i), data(10)))
})
require.NoError(t, err)
}
// Read it back to ensure that we can read it now.
for i := 0; i < 32; i++ {
err := db.View(func(txn *Txn) error {
item, err := txn.Get(key(i))
require.NoError(t, err)
val := getItemValue(t, item)
require.Equal(t, 10, len(val))
return nil
})
require.NoError(t, err)
}
require.NoError(t, db.Close())
// Reopen and read the data again.
db, err = Open(opt)
require.NoError(t, err)
for i := 0; i < 32; i++ {
err := db.View(func(txn *Txn) error {
item, err := txn.Get(key(i))
require.NoError(t, err)
val := getItemValue(t, item)
require.Equal(t, 10, len(val))
return nil
})
require.NoError(t, err)
}
require.NoError(t, db.Close())
}
var manual = flag.Bool("manual", false, "Set when manually running some tests.")
// Badger dir to be used for performing db.Open benchmark.
var benchDir = flag.String("benchdir", "", "Set when running db.Open benchmark")
// The following 3 TruncateVlogNoClose tests should be run one after another.
// None of these close the DB, simulating a crash. They should be run with a
// script, which truncates the value log to 4090, lining up with the end of the
// first entry in the txn. At <4090, it would cause the entry to be truncated
// immediately, at >4090, same thing.
func TestTruncateVlogNoClose(t *testing.T) {
if !*manual {
t.Skip("Skipping test meant to be run manually.")
return
}
dir := "p"
opts := getTestOptions(dir)
opts.SyncWrites = true
opts.Truncate = true
kv, err := Open(opts)
require.NoError(t, err)
key := func(i int) string {
return fmt.Sprintf("%d%10d", i, i)
}
data := fmt.Sprintf("%4055d", 1)
err = kv.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte(key(0)), []byte(data)))
})
require.NoError(t, err)
}
func TestTruncateVlogNoClose2(t *testing.T) {
if !*manual {
t.Skip("Skipping test meant to be run manually.")
return
}
dir := "p"
opts := getTestOptions(dir)
opts.SyncWrites = true
opts.Truncate = true
kv, err := Open(opts)
require.NoError(t, err)
key := func(i int) string {
return fmt.Sprintf("%d%10d", i, i)
}
data := fmt.Sprintf("%10d", 1)
for i := 32; i < 64; i++ {
err := kv.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte(key(i)), []byte(data)))
})
require.NoError(t, err)
}
for i := 32; i < 64; i++ {
require.NoError(t, kv.View(func(txn *Txn) error {
item, err := txn.Get([]byte(key(i)))
require.NoError(t, err)
val := getItemValue(t, item)
require.NotNil(t, val)
require.True(t, len(val) > 0)
return nil
}))
}
}
func TestTruncateVlogNoClose3(t *testing.T) {
if !*manual {
t.Skip("Skipping test meant to be run manually.")
return
}
fmt.Print("Running")
dir := "p"
opts := getTestOptions(dir)
opts.SyncWrites = true
opts.Truncate = true
kv, err := Open(opts)
require.NoError(t, err)
key := func(i int) string {
return fmt.Sprintf("%d%10d", i, i)
}
for i := 32; i < 64; i++ {
require.NoError(t, kv.View(func(txn *Txn) error {
item, err := txn.Get([]byte(key(i)))
require.NoError(t, err)
val := getItemValue(t, item)
require.NotNil(t, val)
require.True(t, len(val) > 0)
return nil
}))
}
}
func TestBigKeyValuePairs(t *testing.T) {
// This test takes too much memory. So, run separately.
if !*manual {
t.Skip("Skipping test meant to be run manually.")
return
}
// Passing an empty directory since it will be filled by runBadgerTest.
opts := DefaultOptions("").
WithMaxTableSize(1 << 20).
WithValueLogMaxEntries(64)
runBadgerTest(t, &opts, func(t *testing.T, db *DB) {
bigK := make([]byte, 65001)
bigV := make([]byte, db.opt.ValueLogFileSize+1)
small := make([]byte, 65000)
txn := db.NewTransaction(true)
require.Regexp(t, regexp.MustCompile("Key.*exceeded"), txn.SetEntry(NewEntry(bigK, small)))
require.Regexp(t, regexp.MustCompile("Value.*exceeded"),
txn.SetEntry(NewEntry(small, bigV)))
require.NoError(t, txn.SetEntry(NewEntry(small, small)))
require.Regexp(t, regexp.MustCompile("Key.*exceeded"), txn.SetEntry(NewEntry(bigK, bigV)))
require.NoError(t, db.View(func(txn *Txn) error {
_, err := txn.Get(small)
require.Equal(t, ErrKeyNotFound, err)
return nil
}))
// Now run a longer test, which involves value log GC.
data := fmt.Sprintf("%100d", 1)
key := func(i int) string {
return fmt.Sprintf("%65000d", i)
}
saveByKey := func(key string, value []byte) error {
return db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte(key), value))
})
}
getByKey := func(key string) error {
return db.View(func(txn *Txn) error {
item, err := txn.Get([]byte(key))
if err != nil {
return err
}
return item.Value(func(val []byte) error {
if len(val) == 0 {
log.Fatalf("key not found %q", len(key))
}
return nil
})
})
}
for i := 0; i < 32; i++ {
if i < 30 {
require.NoError(t, saveByKey(key(i), []byte(data)))
} else {
require.NoError(t, saveByKey(key(i), []byte(fmt.Sprintf("%100d", i))))
}
}
for j := 0; j < 5; j++ {
for i := 0; i < 32; i++ {
if i < 30 {
require.NoError(t, saveByKey(key(i), []byte(data)))
} else {
require.NoError(t, saveByKey(key(i), []byte(fmt.Sprintf("%100d", i))))
}
}
}
for i := 0; i < 32; i++ {
require.NoError(t, getByKey(key(i)))
}
var loops int
var err error
for err == nil {
err = db.RunValueLogGC(0.5)
require.NotRegexp(t, regexp.MustCompile("truncate"), err)
loops++
}
t.Logf("Ran value log GC %d times. Last error: %v\n", loops, err)
})
}
// The following test checks for issue #585.
func TestPushValueLogLimit(t *testing.T) {
// This test takes too much memory. So, run separately.
if !*manual {
t.Skip("Skipping test meant to be run manually.")
return
}
// Passing an empty directory since it will be filled by runBadgerTest.
opt := DefaultOptions("").
WithValueLogMaxEntries(64).
WithValueLogFileSize(2 << 30)
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
data := []byte(fmt.Sprintf("%30d", 1))
key := func(i int) string {
return fmt.Sprintf("%100d", i)
}
for i := 0; i < 32; i++ {
if i == 4 {
v := make([]byte, math.MaxInt32)
err := db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte(key(i)), v))
})
require.NoError(t, err)
} else {
err := db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte(key(i)), data))
})
require.NoError(t, err)
}
}
for i := 0; i < 32; i++ {
err := db.View(func(txn *Txn) error {
item, err := txn.Get([]byte(key(i)))
require.NoError(t, err, "Getting key: %s", key(i))
err = item.Value(func(v []byte) error {
_ = v
return nil
})
require.NoError(t, err, "Getting value: %s", key(i))
return nil
})
require.NoError(t, err)
}
})
}
// The following benchmark test is supposed to be run against a badger directory with some data.
// Use badger fill to create data if it doesn't exist.
func BenchmarkDBOpen(b *testing.B) {
if *benchDir == "" {
b.Skip("Please set -benchdir to badger directory")
}
dir := *benchDir
// Passing an empty directory since it will be filled by runBadgerTest.
opt := DefaultOptions(dir).
WithReadOnly(true)
for i := 0; i < b.N; i++ {
db, err := Open(opt)
require.NoError(b, err)
require.NoError(b, db.Close())
}
}
// Regression test for https://github.com/dgraph-io/badger/issues/830
func TestDiscardMapTooBig(t *testing.T) {
createDiscardStats := func() map[uint32]int64 {
stat := map[uint32]int64{}
for i := uint32(0); i < 8000; i++ {
stat[i] = 0
}
return stat
}
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
db, err := Open(DefaultOptions(dir))
require.NoError(t, err, "error while opening db")
// Add some data so that memtable flush happens on close.
require.NoError(t, db.Update(func(txn *Txn) error {
return txn.Set([]byte("foo"), []byte("bar"))
}))
// overwrite discardstat with large value
db.vlog.lfDiscardStats.m = createDiscardStats()
require.NoError(t, db.Close())
// reopen the same DB
db, err = Open(DefaultOptions(dir))
require.NoError(t, err, "error while opening db")
require.NoError(t, db.Close())
}
// Test for values of size uint32.
func TestBigValues(t *testing.T) {
if !*manual {
t.Skip("Skipping test meant to be run manually.")
return
}
opts := DefaultOptions("").
WithValueThreshold(1 << 20).
WithValueLogMaxEntries(100)
test := func(t *testing.T, db *DB) {
keyCount := 1000
data := bytes.Repeat([]byte("a"), (1 << 20)) // Valuesize 1 MB.
key := func(i int) string {
return fmt.Sprintf("%65000d", i)
}
saveByKey := func(key string, value []byte) error {
return db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte(key), value))
})
}
getByKey := func(key string) error {
return db.View(func(txn *Txn) error {
item, err := txn.Get([]byte(key))
if err != nil {
return err
}
return item.Value(func(val []byte) error {
if len(val) == 0 || len(val) != len(data) || !bytes.Equal(val, []byte(data)) {
log.Fatalf("key not found %q", len(key))
}
return nil
})
})
}
for i := 0; i < keyCount; i++ {
require.NoError(t, saveByKey(key(i), []byte(data)))
}
for i := 0; i < keyCount; i++ {
require.NoError(t, getByKey(key(i)))
}
}
t.Run("disk mode", func(t *testing.T) {
runBadgerTest(t, &opts, func(t *testing.T, db *DB) {
test(t, db)
})
})
t.Run("InMemory mode", func(t *testing.T) {
opts.InMemory = true
opts.Dir = ""
opts.ValueDir = ""
db, err := Open(opts)
require.NoError(t, err)
test(t, db)
require.NoError(t, db.Close())
})
}
// This test is for compaction file picking testing. We are creating db with two levels. We have 10
// tables on level 3 and 3 tables on level 2. Tables on level 2 have overlap with 2, 4, 3 tables on
// level 3.
func TestCompactionFilePicking(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
db, err := Open(DefaultOptions(dir).WithTableLoadingMode(options.LoadToRAM))
require.NoError(t, err, "error while opening db")
defer func() {
require.NoError(t, db.Close())
}()
l3 := db.lc.levels[3]
for i := 1; i <= 10; i++ {
// Each table has difference of 1 between smallest and largest key.
tab := createTableWithRange(t, db, 2*i-1, 2*i)
addToManifest(t, db, tab, 3)
require.NoError(t, l3.replaceTables([]*table.Table{}, []*table.Table{tab}))
}
l2 := db.lc.levels[2]
// First table has keys 1 and 4.
tab := createTableWithRange(t, db, 1, 4)
addToManifest(t, db, tab, 2)
require.NoError(t, l2.replaceTables([]*table.Table{}, []*table.Table{tab}))
// Second table has keys 5 and 12.
tab = createTableWithRange(t, db, 5, 12)
addToManifest(t, db, tab, 2)
require.NoError(t, l2.replaceTables([]*table.Table{}, []*table.Table{tab}))
// Third table has keys 13 and 18.
tab = createTableWithRange(t, db, 13, 18)
addToManifest(t, db, tab, 2)
require.NoError(t, l2.replaceTables([]*table.Table{}, []*table.Table{tab}))
cdef := &compactDef{
thisLevel: db.lc.levels[2],
nextLevel: db.lc.levels[3],
}
tables := db.lc.levels[2].tables
db.lc.sortByOverlap(tables, cdef)
var expKey [8]byte
// First table should be with smallest and biggest keys as 1 and 4.
binary.BigEndian.PutUint64(expKey[:], uint64(1))
require.Equal(t, expKey[:], y.ParseKey(tables[0].Smallest()))
binary.BigEndian.PutUint64(expKey[:], uint64(4))
require.Equal(t, expKey[:], y.ParseKey(tables[0].Biggest()))
// Second table should be with smallest and biggest keys as 13 and 18.
binary.BigEndian.PutUint64(expKey[:], uint64(13))
require.Equal(t, expKey[:], y.ParseKey(tables[1].Smallest()))
binary.BigEndian.PutUint64(expKey[:], uint64(18))
require.Equal(t, expKey[:], y.ParseKey(tables[1].Biggest()))
// Third table should be with smallest and biggest keys as 5 and 12.
binary.BigEndian.PutUint64(expKey[:], uint64(5))
require.Equal(t, expKey[:], y.ParseKey(tables[2].Smallest()))
binary.BigEndian.PutUint64(expKey[:], uint64(12))
require.Equal(t, expKey[:], y.ParseKey(tables[2].Biggest()))
}
// addToManifest function is used in TestCompactionFilePicking. It adds table to db manifest.
func addToManifest(t *testing.T, db *DB, tab *table.Table, level uint32) {
change := &pb.ManifestChange{
Id: tab.ID(),
Op: pb.ManifestChange_CREATE,
Level: level,
Compression: uint32(tab.CompressionType()),
}
require.NoError(t, db.manifest.addChanges([]*pb.ManifestChange{change}),
"unable to add to manifest")
}
// createTableWithRange function is used in TestCompactionFilePicking. It creates
// a table with key starting from start and ending with end.
func createTableWithRange(t *testing.T, db *DB, start, end int) *table.Table {
bopts := buildTableOptions(db.opt)
b := table.NewTableBuilder(bopts)
nums := []int{start, end}
for _, i := range nums {
key := make([]byte, 8)
binary.BigEndian.PutUint64(key[:], uint64(i))
key = y.KeyWithTs(key, uint64(0))
val := y.ValueStruct{Value: []byte(fmt.Sprintf("%d", i))}
b.Add(key, val, 0)
}
fileID := db.lc.reserveFileID()
fd, err := y.CreateSyncedFile(table.NewFilename(fileID, db.opt.Dir), true)
require.NoError(t, err)
_, err = fd.Write(b.Finish())
require.NoError(t, err, "unable to write to file")
tab, err := table.OpenTable(fd, bopts)
require.NoError(t, err)
return tab
}
func TestReadSameVlog(t *testing.T) {
key := func(i int) []byte {
return []byte(fmt.Sprintf("%d%10d", i, i))
}
testReadingSameKey := func(t *testing.T, db *DB) {
// Forcing to read all values from vlog.
for i := 0; i < 50; i++ {
err := db.Update(func(txn *Txn) error {
return txn.Set(key(i), key(i))
})
require.NoError(t, err)
}
// reading it again several times
for i := 0; i < 50; i++ {
for j := 0; j < 10; j++ {
err := db.View(func(txn *Txn) error {
item, err := txn.Get(key(i))
require.NoError(t, err)
require.Equal(t, key(i), getItemValue(t, item))
return nil
})
require.NoError(t, err)
}
}
}
t.Run("Test Read Again Plain Text", func(t *testing.T) {
opt := getTestOptions("")
// Forcing to read from vlog
opt.ValueThreshold = 1
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
testReadingSameKey(t, db)
})
})
t.Run("Test Read Again Encryption", func(t *testing.T) {
opt := getTestOptions("")
opt.ValueThreshold = 1
// Generate encryption key.
eKey := make([]byte, 32)
_, err := rand.Read(eKey)
require.NoError(t, err)
opt.EncryptionKey = eKey
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
testReadingSameKey(t, db)
})
})
}
// The test ensures we don't lose data when badger is opened with KeepL0InMemory and GC is being
// done.
func TestL0GCBug(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
// Do not change any of the options below unless it's necessary.
opts := getTestOptions(dir)
opts.NumLevelZeroTables = 50
opts.NumLevelZeroTablesStall = 51
opts.ValueLogMaxEntries = 2
opts.ValueThreshold = 2
opts.KeepL0InMemory = true
// Setting LoadingMode to mmap seems to cause segmentation fault while closing DB.
opts.ValueLogLoadingMode = options.FileIO
opts.TableLoadingMode = options.FileIO
db1, err := Open(opts)
require.NoError(t, err)
key := func(i int) []byte {
return []byte(fmt.Sprintf("%10d", i))
}
val := []byte{1, 1, 1, 1, 1, 1, 1, 1}
// Insert 100 entries. This will create about 50*3 vlog files and 6 SST files.
for i := 0; i < 3; i++ {
for j := 0; j < 100; j++ {
err = db1.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry(key(j), val))
})
require.NoError(t, err)
}
}
// Run value log GC multiple times. This would ensure at least
// one value log file is garbage collected.
success := 0
for i := 0; i < 10; i++ {
err := db1.RunValueLogGC(0.01)
if err == nil {
success++
}
if err != nil && err != ErrNoRewrite {
t.Fatalf(err.Error())
}
}
// Ensure alteast one GC call was successful.
require.NotZero(t, success)
// CheckKeys reads all the keys previously stored.
checkKeys := func(db *DB) {
for i := 0; i < 100; i++ {
err := db.View(func(txn *Txn) error {
item, err := txn.Get(key(i))
require.NoError(t, err)
val1 := getItemValue(t, item)
require.Equal(t, val, val1)
return nil
})
require.NoError(t, err)
}
}
checkKeys(db1)
// Simulate a crash by not closing db1 but releasing the locks.
if db1.dirLockGuard != nil {
require.NoError(t, db1.dirLockGuard.release())
}
if db1.valueDirGuard != nil {
require.NoError(t, db1.valueDirGuard.release())
}
for _, f := range db1.vlog.filesMap {
require.NoError(t, f.fd.Close())
}
require.NoError(t, db1.registry.Close())
require.NoError(t, db1.lc.close())
require.NoError(t, db1.manifest.close())
db2, err := Open(opts)
require.NoError(t, err)
// Ensure we still have all the keys.
checkKeys(db2)
require.NoError(t, db2.Close())
}
// Regression test for https://github.com/dgraph-io/badger/issues/1126
//
// The test has 3 steps
// Step 1 - Create badger data. It is necessary that the value size is
// greater than valuethreshold. The value log file size after
// this step is around 170 bytes.
// Step 2 - Re-open the same badger and simulate a crash. The value log file
// size after this crash is around 2 GB (we increase the file size to mmap it).
// Step 3 - Re-open the same badger. We should be able to read all the data
// inserted in the first step.
func TestWindowsDataLoss(t *testing.T) {
if runtime.GOOS != "windows" {
t.Skip("The test is only for Windows.")
}
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
opt := DefaultOptions(dir).WithSyncWrites(true)
opt.ValueThreshold = 32
db, err := Open(opt)
require.NoError(t, err)
keyCount := 20
var keyList [][]byte // Stores all the keys generated.
for i := 0; i < keyCount; i++ {
// It is important that we create different transactions for each request.
err := db.Update(func(txn *Txn) error {
key := []byte(fmt.Sprintf("%d", i))
v := []byte("barValuebarValuebarValuebarValuebarValue")
require.Greater(t, len(v), opt.ValueThreshold)
//32 bytes length and now it's not working
err := txn.Set(key, v)
require.NoError(t, err)
keyList = append(keyList, key)
return nil
})
require.NoError(t, err)
}
require.NoError(t, db.Close())
opt.Truncate = true
db, err = Open(opt)
require.NoError(t, err)
// Return after reading one entry. We're simulating a crash.
// Simulate a crash by not closing db but releasing the locks.
if db.dirLockGuard != nil {
require.NoError(t, db.dirLockGuard.release())
}
if db.valueDirGuard != nil {
require.NoError(t, db.valueDirGuard.release())
}
// Don't use vlog.Close here. We don't want to fix the file size. Only un-mmap
// the data so that we can truncate the file durning the next vlog.Open.
require.NoError(t, y.Munmap(db.vlog.filesMap[db.vlog.maxFid].fmap))
for _, f := range db.vlog.filesMap {
require.NoError(t, f.fd.Close())
}
require.NoError(t, db.registry.Close())
require.NoError(t, db.manifest.close())
require.NoError(t, db.lc.close())
opt.Truncate = true
db, err = Open(opt)
require.NoError(t, err)
defer db.Close()
txn := db.NewTransaction(false)
defer txn.Discard()
it := txn.NewIterator(DefaultIteratorOptions)
defer it.Close()
var result [][]byte // stores all the keys read from the db.
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
k := item.Key()
err := item.Value(func(v []byte) error {
_ = v
return nil
})
require.NoError(t, err)
result = append(result, k)
}
require.ElementsMatch(t, keyList, result)
}
func TestDropAllDropPrefix(t *testing.T) {
key := func(i int) []byte {
return []byte(fmt.Sprintf("%10d", i))
}
val := func(i int) []byte {
return []byte(fmt.Sprintf("%128d", i))
}
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
wb := db.NewWriteBatch()
defer wb.Cancel()
N := 50000
for i := 0; i < N; i++ {
require.NoError(t, wb.Set(key(i), val(i)))
}
require.NoError(t, wb.Flush())
var wg sync.WaitGroup
wg.Add(3)
go func() {
defer wg.Done()
err := db.DropPrefix([]byte("000"))
for err == ErrBlockedWrites {
fmt.Printf("DropPrefix 000 err: %v", err)
err = db.DropPrefix([]byte("000"))
time.Sleep(time.Millisecond * 500)
}
require.NoError(t, err)
}()
go func() {
defer wg.Done()
err := db.DropPrefix([]byte("111"))
for err == ErrBlockedWrites {
fmt.Printf("DropPrefix 111 err: %v", err)
err = db.DropPrefix([]byte("111"))
time.Sleep(time.Millisecond * 500)
}
require.NoError(t, err)
}()
go func() {
time.Sleep(time.Millisecond) // Let drop prefix run first.
defer wg.Done()
err := db.DropAll()
for err == ErrBlockedWrites {
fmt.Printf("dropAll err: %v", err)
err = db.DropAll()
time.Sleep(time.Millisecond * 300)
}
require.NoError(t, err)
}()
wg.Wait()
})
}
func TestIsClosed(t *testing.T) {
test := func(inMemory bool) {
opt := DefaultOptions("")
if inMemory {
opt.InMemory = true
} else {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
opt.Dir = dir
opt.ValueDir = dir
}
db, err := Open(opt)
require.NoError(t, err)
require.False(t, db.IsClosed())
require.NoError(t, db.Close())
require.True(t, db.IsClosed())
}
t.Run("normal", func(t *testing.T) {
test(false)
})
t.Run("in-memory", func(t *testing.T) {
test(true)
})
}
badger-2.2007.2/db_test.go 0000664 0000000 0000000 00000151134 13721731165 0015156 0 ustar 00root root 0000000 0000000 /*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"bytes"
"context"
"encoding/binary"
"flag"
"fmt"
"io/ioutil"
"math"
"math/rand"
"os"
"path/filepath"
"runtime"
"sort"
"sync"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/dgraph-io/badger/v2/options"
"github.com/dgraph-io/badger/v2/pb"
"github.com/dgraph-io/badger/v2/skl"
"github.com/dgraph-io/badger/v2/y"
)
var mmap = flag.Bool("vlog_mmap", true, "Specify if value log must be memory-mapped")
// summary is produced when DB is closed. Currently it is used only for testing.
type summary struct {
fileIDs map[uint64]bool
}
func (s *levelsController) getSummary() *summary {
out := &summary{
fileIDs: make(map[uint64]bool),
}
for _, l := range s.levels {
l.getSummary(out)
}
return out
}
func (s *levelHandler) getSummary(sum *summary) {
s.RLock()
defer s.RUnlock()
for _, t := range s.tables {
sum.fileIDs[t.ID()] = true
}
}
func (s *DB) validate() error { return s.lc.validate() }
func getTestOptions(dir string) Options {
opt := DefaultOptions(dir).
WithMaxTableSize(1 << 15). // Force more compaction.
WithLevelOneSize(4 << 15). // Force more compaction.
WithSyncWrites(false).
WithBlockCacheSize(10 << 20)
if !*mmap {
return opt.WithValueLogLoadingMode(options.FileIO)
}
return opt
}
func getItemValue(t *testing.T, item *Item) (val []byte) {
t.Helper()
var v []byte
err := item.Value(func(val []byte) error {
v = append(v, val...)
return nil
})
if err != nil {
t.Error(err)
}
if v == nil {
return nil
}
another, err := item.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, v, another)
return v
}
func txnSet(t *testing.T, kv *DB, key []byte, val []byte, meta byte) {
txn := kv.NewTransaction(true)
require.NoError(t, txn.SetEntry(NewEntry(key, val).WithMeta(meta)))
require.NoError(t, txn.Commit())
}
func txnDelete(t *testing.T, kv *DB, key []byte) {
txn := kv.NewTransaction(true)
require.NoError(t, txn.Delete(key))
require.NoError(t, txn.Commit())
}
// Opens a badger db and runs a a test on it.
func runBadgerTest(t *testing.T, opts *Options, test func(t *testing.T, db *DB)) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
if opts == nil {
opts = new(Options)
*opts = getTestOptions(dir)
} else {
opts.Dir = dir
opts.ValueDir = dir
}
if opts.InMemory {
opts.Dir = ""
opts.ValueDir = ""
}
db, err := Open(*opts)
require.NoError(t, err)
defer func() {
require.NoError(t, db.Close())
}()
test(t, db)
}
func TestWrite(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
for i := 0; i < 100; i++ {
txnSet(t, db, []byte(fmt.Sprintf("key%d", i)), []byte(fmt.Sprintf("val%d", i)), 0x00)
}
})
}
func TestUpdateAndView(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
err := db.Update(func(txn *Txn) error {
for i := 0; i < 10; i++ {
entry := NewEntry([]byte(fmt.Sprintf("key%d", i)), []byte(fmt.Sprintf("val%d", i)))
if err := txn.SetEntry(entry); err != nil {
return err
}
}
return nil
})
require.NoError(t, err)
err = db.View(func(txn *Txn) error {
for i := 0; i < 10; i++ {
item, err := txn.Get([]byte(fmt.Sprintf("key%d", i)))
if err != nil {
return err
}
expected := []byte(fmt.Sprintf("val%d", i))
if err := item.Value(func(val []byte) error {
require.Equal(t, expected, val,
"Invalid value for key %q. expected: %q, actual: %q",
item.Key(), expected, val)
return nil
}); err != nil {
return err
}
}
return nil
})
require.NoError(t, err)
})
}
func TestConcurrentWrite(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
// Not a benchmark. Just a simple test for concurrent writes.
n := 20
m := 500
var wg sync.WaitGroup
for i := 0; i < n; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
for j := 0; j < m; j++ {
txnSet(t, db, []byte(fmt.Sprintf("k%05d_%08d", i, j)),
[]byte(fmt.Sprintf("v%05d_%08d", i, j)), byte(j%127))
}
}(i)
}
wg.Wait()
t.Log("Starting iteration")
opt := IteratorOptions{}
opt.Reverse = false
opt.PrefetchSize = 10
opt.PrefetchValues = true
txn := db.NewTransaction(true)
it := txn.NewIterator(opt)
defer it.Close()
var i, j int
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
k := item.Key()
if k == nil {
break // end of iteration.
}
require.EqualValues(t, fmt.Sprintf("k%05d_%08d", i, j), string(k))
v := getItemValue(t, item)
require.EqualValues(t, fmt.Sprintf("v%05d_%08d", i, j), string(v))
require.Equal(t, item.UserMeta(), byte(j%127))
j++
if j == m {
i++
j = 0
}
}
require.EqualValues(t, n, i)
require.EqualValues(t, 0, j)
})
}
func TestGet(t *testing.T) {
test := func(t *testing.T, db *DB) {
txnSet(t, db, []byte("key1"), []byte("val1"), 0x08)
txn := db.NewTransaction(false)
item, err := txn.Get([]byte("key1"))
require.NoError(t, err)
require.EqualValues(t, "val1", getItemValue(t, item))
require.Equal(t, byte(0x08), item.UserMeta())
txn.Discard()
txnSet(t, db, []byte("key1"), []byte("val2"), 0x09)
txn = db.NewTransaction(false)
item, err = txn.Get([]byte("key1"))
require.NoError(t, err)
require.EqualValues(t, "val2", getItemValue(t, item))
require.Equal(t, byte(0x09), item.UserMeta())
txn.Discard()
txnDelete(t, db, []byte("key1"))
txn = db.NewTransaction(false)
_, err = txn.Get([]byte("key1"))
require.Equal(t, ErrKeyNotFound, err)
txn.Discard()
txnSet(t, db, []byte("key1"), []byte("val3"), 0x01)
txn = db.NewTransaction(false)
item, err = txn.Get([]byte("key1"))
require.NoError(t, err)
require.EqualValues(t, "val3", getItemValue(t, item))
require.Equal(t, byte(0x01), item.UserMeta())
longVal := make([]byte, 1000)
txnSet(t, db, []byte("key1"), longVal, 0x00)
txn = db.NewTransaction(false)
item, err = txn.Get([]byte("key1"))
require.NoError(t, err)
require.EqualValues(t, longVal, getItemValue(t, item))
txn.Discard()
}
t.Run("disk mode", func(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
test(t, db)
})
})
t.Run("InMemory mode", func(t *testing.T) {
opts := DefaultOptions("").WithInMemory(true)
db, err := Open(opts)
require.NoError(t, err)
test(t, db)
require.NoError(t, db.Close())
})
t.Run("cache enabled", func(t *testing.T) {
opts := DefaultOptions("").WithBlockCacheSize(10 << 20)
runBadgerTest(t, &opts, func(t *testing.T, db *DB) {
test(t, db)
})
})
}
func TestGetAfterDelete(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
// populate with one entry
key := []byte("key")
txnSet(t, db, key, []byte("val1"), 0x00)
require.NoError(t, db.Update(func(txn *Txn) error {
err := txn.Delete(key)
require.NoError(t, err)
_, err = txn.Get(key)
require.Equal(t, ErrKeyNotFound, err)
return nil
}))
})
}
func TestTxnTooBig(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
data := func(i int) []byte {
return []byte(fmt.Sprintf("%b", i))
}
// n := 500000
n := 1000
txn := db.NewTransaction(true)
for i := 0; i < n; {
if err := txn.SetEntry(NewEntry(data(i), data(i))); err != nil {
require.NoError(t, txn.Commit())
txn = db.NewTransaction(true)
} else {
i++
}
}
require.NoError(t, txn.Commit())
txn = db.NewTransaction(true)
for i := 0; i < n; {
if err := txn.Delete(data(i)); err != nil {
require.NoError(t, txn.Commit())
txn = db.NewTransaction(true)
} else {
i++
}
}
require.NoError(t, txn.Commit())
})
}
func TestForceCompactL0(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
opts := getTestOptions(dir)
opts.ValueLogFileSize = 15 << 20
opts.managedTxns = true
db, err := Open(opts)
require.NoError(t, err)
data := func(i int) []byte {
return []byte(fmt.Sprintf("%b", i))
}
n := 80
m := 45 // Increasing would cause ErrTxnTooBig
sz := 32 << 10
v := make([]byte, sz)
for i := 0; i < n; i += 2 {
version := uint64(i)
txn := db.NewTransactionAt(version, true)
for j := 0; j < m; j++ {
require.NoError(t, txn.SetEntry(NewEntry(data(j), v)))
}
require.NoError(t, txn.CommitAt(version+1, nil))
}
db.Close()
opts.managedTxns = true
db, err = Open(opts)
require.NoError(t, err)
require.Equal(t, len(db.lc.levels[0].tables), 0)
require.NoError(t, db.Close())
}
func dirSize(path string) (int64, error) {
var size int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
if !info.IsDir() {
size += info.Size()
}
return err
})
return (size >> 20), err
}
// BenchmarkDbGrowth ensures DB does not grow with repeated adds and deletes.
//
// New keys are created with each for-loop iteration. During each
// iteration, the previous for-loop iteration's keys are deleted.
//
// To reproduce continous growth problem due to `badgerMove` keys,
// update `value.go` `discardEntry` line 1628 to return false
//
// Also with PR #1303, the delete keys are properly cleaned which
// further reduces disk size.
func BenchmarkDbGrowth(b *testing.B) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(b, err)
defer removeDir(dir)
start := 0
lastStart := 0
numKeys := 2000
valueSize := 1024
value := make([]byte, valueSize)
discardRatio := 0.001
maxWrites := 200
opts := getTestOptions(dir)
opts.ValueLogFileSize = 64 << 15
opts.MaxTableSize = 4 << 15
opts.LevelOneSize = 16 << 15
opts.NumVersionsToKeep = 1
opts.NumLevelZeroTables = 1
opts.NumLevelZeroTablesStall = 2
opts.KeepL0InMemory = false // enable L0 compaction
db, err := Open(opts)
require.NoError(b, err)
for numWrites := 0; numWrites < maxWrites; numWrites++ {
txn := db.NewTransaction(true)
if start > 0 {
for i := lastStart; i < start; i++ {
key := make([]byte, 8)
binary.BigEndian.PutUint64(key[:], uint64(i))
err := txn.Delete(key)
if err == ErrTxnTooBig {
require.NoError(b, txn.Commit())
txn = db.NewTransaction(true)
} else {
require.NoError(b, err)
}
}
}
for i := start; i < numKeys+start; i++ {
key := make([]byte, 8)
binary.BigEndian.PutUint64(key[:], uint64(i))
err := txn.SetEntry(NewEntry(key, value))
if err == ErrTxnTooBig {
require.NoError(b, txn.Commit())
txn = db.NewTransaction(true)
} else {
require.NoError(b, err)
}
}
require.NoError(b, txn.Commit())
require.NoError(b, db.Flatten(1))
for {
err = db.RunValueLogGC(discardRatio)
if err == ErrNoRewrite {
break
} else {
require.NoError(b, err)
}
}
size, err := dirSize(dir)
require.NoError(b, err)
fmt.Printf("Badger DB Size = %dMB\n", size)
lastStart = start
start += numKeys
}
db.Close()
size, err := dirSize(dir)
require.NoError(b, err)
require.LessOrEqual(b, size, int64(16))
fmt.Printf("Badger DB Size = %dMB\n", size)
}
// Put a lot of data to move some data to disk.
// WARNING: This test might take a while but it should pass!
func TestGetMore(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
data := func(i int) []byte {
return []byte(fmt.Sprintf("%b", i))
}
// n := 500000
n := 10000
m := 45 // Increasing would cause ErrTxnTooBig
for i := 0; i < n; i += m {
txn := db.NewTransaction(true)
for j := i; j < i+m && j < n; j++ {
require.NoError(t, txn.SetEntry(NewEntry(data(j), data(j))))
}
require.NoError(t, txn.Commit())
}
require.NoError(t, db.validate())
for i := 0; i < n; i++ {
txn := db.NewTransaction(false)
item, err := txn.Get(data(i))
if err != nil {
t.Error(err)
}
require.EqualValues(t, string(data(i)), string(getItemValue(t, item)))
txn.Discard()
}
// Overwrite
for i := 0; i < n; i += m {
txn := db.NewTransaction(true)
for j := i; j < i+m && j < n; j++ {
require.NoError(t, txn.SetEntry(NewEntry(data(j),
// Use a long value that will certainly exceed value threshold.
[]byte(fmt.Sprintf("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz%9d", j)))))
}
require.NoError(t, txn.Commit())
}
require.NoError(t, db.validate())
for i := 0; i < n; i++ {
expectedValue := fmt.Sprintf("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz%9d", i)
k := data(i)
txn := db.NewTransaction(false)
item, err := txn.Get(k)
if err != nil {
t.Error(err)
}
got := string(getItemValue(t, item))
if expectedValue != got {
vs, err := db.get(y.KeyWithTs(k, math.MaxUint64))
require.NoError(t, err)
fmt.Printf("wanted=%q Item: %s\n", k, item)
fmt.Printf("on re-run, got version: %+v\n", vs)
txn := db.NewTransaction(false)
itr := txn.NewIterator(DefaultIteratorOptions)
for itr.Seek(k); itr.Valid(); itr.Next() {
item := itr.Item()
fmt.Printf("item=%s\n", item)
if !bytes.Equal(item.Key(), k) {
break
}
}
itr.Close()
txn.Discard()
}
require.EqualValues(t, expectedValue, string(getItemValue(t, item)), "wanted=%q Item: %s\n", k, item)
txn.Discard()
}
// "Delete" key.
for i := 0; i < n; i += m {
if (i % 10000) == 0 {
fmt.Printf("Deleting i=%d\n", i)
}
txn := db.NewTransaction(true)
for j := i; j < i+m && j < n; j++ {
require.NoError(t, txn.Delete(data(j)))
}
require.NoError(t, txn.Commit())
}
db.validate()
for i := 0; i < n; i++ {
if (i % 10000) == 0 {
// Display some progress. Right now, it's not very fast with no caching.
fmt.Printf("Testing i=%d\n", i)
}
k := data(i)
txn := db.NewTransaction(false)
_, err := txn.Get([]byte(k))
require.Equal(t, ErrKeyNotFound, err, "should not have found k: %q", k)
txn.Discard()
}
})
}
// Put a lot of data to move some data to disk.
// WARNING: This test might take a while but it should pass!
func TestExistsMore(t *testing.T) {
test := func(t *testing.T, db *DB) {
// n := 500000
n := 10000
m := 45
for i := 0; i < n; i += m {
if (i % 1000) == 0 {
t.Logf("Putting i=%d\n", i)
}
txn := db.NewTransaction(true)
for j := i; j < i+m && j < n; j++ {
require.NoError(t, txn.SetEntry(NewEntry([]byte(fmt.Sprintf("%09d", j)),
[]byte(fmt.Sprintf("%09d", j)))))
}
require.NoError(t, txn.Commit())
}
db.validate()
for i := 0; i < n; i++ {
if (i % 1000) == 0 {
fmt.Printf("Testing i=%d\n", i)
}
k := fmt.Sprintf("%09d", i)
require.NoError(t, db.View(func(txn *Txn) error {
_, err := txn.Get([]byte(k))
require.NoError(t, err)
return nil
}))
}
require.NoError(t, db.View(func(txn *Txn) error {
_, err := txn.Get([]byte("non-exists"))
require.Error(t, err)
return nil
}))
// "Delete" key.
for i := 0; i < n; i += m {
if (i % 1000) == 0 {
fmt.Printf("Deleting i=%d\n", i)
}
txn := db.NewTransaction(true)
for j := i; j < i+m && j < n; j++ {
require.NoError(t, txn.Delete([]byte(fmt.Sprintf("%09d", j))))
}
require.NoError(t, txn.Commit())
}
db.validate()
for i := 0; i < n; i++ {
if (i % 10000) == 0 {
// Display some progress. Right now, it's not very fast with no caching.
fmt.Printf("Testing i=%d\n", i)
}
k := fmt.Sprintf("%09d", i)
require.NoError(t, db.View(func(txn *Txn) error {
_, err := txn.Get([]byte(k))
require.Error(t, err)
return nil
}))
}
fmt.Println("Done and closing")
}
t.Run("disk mode", func(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
test(t, db)
})
})
t.Run("InMemory mode", func(t *testing.T) {
opt := DefaultOptions("").WithInMemory(true)
db, err := Open(opt)
require.NoError(t, err)
test(t, db)
require.NoError(t, db.Close())
})
}
func TestIterate2Basic(t *testing.T) {
test := func(t *testing.T, db *DB) {
bkey := func(i int) []byte {
return []byte(fmt.Sprintf("%09d", i))
}
bval := func(i int) []byte {
return []byte(fmt.Sprintf("%025d", i))
}
// n := 500000
n := 10000
for i := 0; i < n; i++ {
if (i % 1000) == 0 {
t.Logf("Put i=%d\n", i)
}
txnSet(t, db, bkey(i), bval(i), byte(i%127))
}
opt := IteratorOptions{}
opt.PrefetchValues = true
opt.PrefetchSize = 10
txn := db.NewTransaction(false)
it := txn.NewIterator(opt)
{
var count int
rewind := true
t.Log("Starting first basic iteration")
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
key := item.Key()
if rewind && count == 5000 {
// Rewind would skip /head/ key, and it.Next() would skip 0.
count = 1
it.Rewind()
t.Log("Rewinding from 5000 to zero.")
rewind = false
continue
}
require.EqualValues(t, bkey(count), string(key))
val := getItemValue(t, item)
require.EqualValues(t, bval(count), string(val))
require.Equal(t, byte(count%127), item.UserMeta())
count++
}
require.EqualValues(t, n, count)
}
{
t.Log("Starting second basic iteration")
idx := 5030
for it.Seek(bkey(idx)); it.Valid(); it.Next() {
item := it.Item()
require.EqualValues(t, bkey(idx), string(item.Key()))
require.EqualValues(t, bval(idx), string(getItemValue(t, item)))
idx++
}
}
it.Close()
}
t.Run("disk mode", func(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
test(t, db)
})
})
t.Run("InMemory mode", func(t *testing.T) {
opt := DefaultOptions("").WithInMemory(true)
db, err := Open(opt)
require.NoError(t, err)
test(t, db)
require.NoError(t, db.Close())
})
}
func TestLoad(t *testing.T) {
testLoad := func(t *testing.T, opt Options) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
opt.Dir = dir
opt.ValueDir = dir
n := 10000
{
kv, err := Open(opt)
require.NoError(t, err)
for i := 0; i < n; i++ {
if (i % 10000) == 0 {
fmt.Printf("Putting i=%d\n", i)
}
k := []byte(fmt.Sprintf("%09d", i))
txnSet(t, kv, k, k, 0x00)
}
kv.Close()
}
kv, err := Open(opt)
require.NoError(t, err)
require.Equal(t, uint64(10001), kv.orc.readTs())
for i := 0; i < n; i++ {
if (i % 10000) == 0 {
fmt.Printf("Testing i=%d\n", i)
}
k := fmt.Sprintf("%09d", i)
require.NoError(t, kv.View(func(txn *Txn) error {
item, err := txn.Get([]byte(k))
require.NoError(t, err)
require.EqualValues(t, k, string(getItemValue(t, item)))
return nil
}))
}
kv.Close()
summary := kv.lc.getSummary()
// Check that files are garbage collected.
idMap := getIDMap(dir)
for fileID := range idMap {
// Check that name is in summary.filenames.
require.True(t, summary.fileIDs[fileID], "%d", fileID)
}
require.EqualValues(t, len(idMap), len(summary.fileIDs))
var fileIDs []uint64
for k := range summary.fileIDs { // Map to array.
fileIDs = append(fileIDs, k)
}
sort.Slice(fileIDs, func(i, j int) bool { return fileIDs[i] < fileIDs[j] })
fmt.Printf("FileIDs: %v\n", fileIDs)
}
t.Run("TestLoad Without Encryption/Compression", func(t *testing.T) {
opt := getTestOptions("")
opt.Compression = options.None
testLoad(t, opt)
})
t.Run("TestLoad With Encryption and no compression", func(t *testing.T) {
key := make([]byte, 32)
_, err := rand.Read(key)
require.NoError(t, err)
opt := getTestOptions("")
opt.EncryptionKey = key
opt.Compression = options.None
testLoad(t, opt)
})
t.Run("TestLoad With Encryption and compression", func(t *testing.T) {
key := make([]byte, 32)
_, err := rand.Read(key)
require.NoError(t, err)
opt := getTestOptions("")
opt.EncryptionKey = key
opt.Compression = options.ZSTD
testLoad(t, opt)
})
t.Run("TestLoad without Encryption and with compression", func(t *testing.T) {
opt := getTestOptions("")
opt.Compression = options.ZSTD
testLoad(t, opt)
})
}
func TestIterateDeleted(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
txnSet(t, db, []byte("Key1"), []byte("Value1"), 0x00)
txnSet(t, db, []byte("Key2"), []byte("Value2"), 0x00)
iterOpt := DefaultIteratorOptions
iterOpt.PrefetchValues = false
txn := db.NewTransaction(false)
idxIt := txn.NewIterator(iterOpt)
defer idxIt.Close()
count := 0
txn2 := db.NewTransaction(true)
prefix := []byte("Key")
for idxIt.Seek(prefix); idxIt.ValidForPrefix(prefix); idxIt.Next() {
key := idxIt.Item().Key()
count++
newKey := make([]byte, len(key))
copy(newKey, key)
require.NoError(t, txn2.Delete(newKey))
}
require.Equal(t, 2, count)
require.NoError(t, txn2.Commit())
for _, prefetch := range [...]bool{true, false} {
t.Run(fmt.Sprintf("Prefetch=%t", prefetch), func(t *testing.T) {
txn := db.NewTransaction(false)
iterOpt = DefaultIteratorOptions
iterOpt.PrefetchValues = prefetch
idxIt = txn.NewIterator(iterOpt)
var estSize int64
var idxKeys []string
for idxIt.Seek(prefix); idxIt.Valid(); idxIt.Next() {
item := idxIt.Item()
key := item.Key()
estSize += item.EstimatedSize()
if !bytes.HasPrefix(key, prefix) {
break
}
idxKeys = append(idxKeys, string(key))
t.Logf("%+v\n", idxIt.Item())
}
require.Equal(t, 0, len(idxKeys))
require.Equal(t, int64(0), estSize)
})
}
})
}
func TestIterateParallel(t *testing.T) {
key := func(account int) []byte {
var b [4]byte
binary.BigEndian.PutUint32(b[:], uint32(account))
return append([]byte("account-"), b[:]...)
}
N := 100000
iterate := func(txn *Txn, wg *sync.WaitGroup) {
defer wg.Done()
itr := txn.NewIterator(DefaultIteratorOptions)
defer itr.Close()
var count int
for itr.Rewind(); itr.Valid(); itr.Next() {
count++
item := itr.Item()
require.Equal(t, "account-", string(item.Key()[0:8]))
err := item.Value(func(val []byte) error {
require.Equal(t, "1000", string(val))
return nil
})
require.NoError(t, err)
}
require.Equal(t, N, count)
itr.Close() // Double close.
}
opt := DefaultOptions("")
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
var wg sync.WaitGroup
var txns []*Txn
for i := 0; i < N; i++ {
wg.Add(1)
txn := db.NewTransaction(true)
require.NoError(t, txn.SetEntry(NewEntry(key(i), []byte("1000"))))
txns = append(txns, txn)
}
for _, txn := range txns {
txn.CommitWith(func(err error) {
y.Check(err)
wg.Done()
})
}
wg.Wait()
// Check that a RW txn can run multiple iterators.
txn := db.NewTransaction(true)
itr := txn.NewIterator(DefaultIteratorOptions)
require.NotPanics(t, func() {
// Now that multiple iterators are supported in read-write
// transactions, make sure this does not panic anymore. Then just
// close the iterator.
txn.NewIterator(DefaultIteratorOptions).Close()
})
// The transaction should still panic since there is still one pending
// iterator that is open.
require.Panics(t, txn.Discard)
itr.Close()
txn.Discard()
// (Regression) Make sure that creating multiple concurrent iterators
// within a read only transaction continues to work.
t.Run("multiple read-only iterators", func(t *testing.T) {
// Run multiple iterators for a RO txn.
txn = db.NewTransaction(false)
defer txn.Discard()
wg.Add(3)
go iterate(txn, &wg)
go iterate(txn, &wg)
go iterate(txn, &wg)
wg.Wait()
})
// Make sure that when we create multiple concurrent iterators within a
// read-write transaction that it actually iterates successfully.
t.Run("multiple read-write iterators", func(t *testing.T) {
// Run multiple iterators for a RO txn.
txn = db.NewTransaction(true)
defer txn.Discard()
wg.Add(3)
go iterate(txn, &wg)
go iterate(txn, &wg)
go iterate(txn, &wg)
wg.Wait()
})
})
}
func TestDeleteWithoutSyncWrite(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
kv, err := Open(DefaultOptions(dir))
if err != nil {
t.Error(err)
t.Fail()
}
key := []byte("k1")
// Set a value with size > value threshold so that its written to value log.
txnSet(t, kv, key, []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789FOOBARZOGZOG"), 0x00)
txnDelete(t, kv, key)
kv.Close()
// Reopen KV
kv, err = Open(DefaultOptions(dir))
if err != nil {
t.Error(err)
t.Fail()
}
defer kv.Close()
require.NoError(t, kv.View(func(txn *Txn) error {
_, err := txn.Get(key)
require.Equal(t, ErrKeyNotFound, err)
return nil
}))
}
func TestPidFile(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
// Reopen database
_, err := Open(getTestOptions(db.opt.Dir))
require.Error(t, err)
require.Contains(t, err.Error(), "Another process is using this Badger database")
})
}
func TestInvalidKey(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
err := db.Update(func(txn *Txn) error {
err := txn.SetEntry(NewEntry([]byte("!badger!head"), nil))
require.Equal(t, ErrInvalidKey, err)
err = txn.SetEntry(NewEntry([]byte("!badger!"), nil))
require.Equal(t, ErrInvalidKey, err)
err = txn.SetEntry(NewEntry([]byte("!badger"), []byte("BadgerDB")))
require.NoError(t, err)
return err
})
require.NoError(t, err)
require.NoError(t, db.View(func(txn *Txn) error {
item, err := txn.Get([]byte("!badger"))
if err != nil {
return err
}
require.NoError(t, item.Value(func(val []byte) error {
require.Equal(t, []byte("BadgerDB"), val)
return nil
}))
return nil
}))
})
}
func TestIteratorPrefetchSize(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
bkey := func(i int) []byte {
return []byte(fmt.Sprintf("%09d", i))
}
bval := func(i int) []byte {
return []byte(fmt.Sprintf("%025d", i))
}
n := 100
for i := 0; i < n; i++ {
// if (i % 10) == 0 {
// t.Logf("Put i=%d\n", i)
// }
txnSet(t, db, bkey(i), bval(i), byte(i%127))
}
getIteratorCount := func(prefetchSize int) int {
opt := IteratorOptions{}
opt.PrefetchValues = true
opt.PrefetchSize = prefetchSize
var count int
txn := db.NewTransaction(false)
it := txn.NewIterator(opt)
{
t.Log("Starting first basic iteration")
for it.Rewind(); it.Valid(); it.Next() {
count++
}
require.EqualValues(t, n, count)
}
return count
}
var sizes = []int{-10, 0, 1, 10}
for _, size := range sizes {
c := getIteratorCount(size)
require.Equal(t, 100, c)
}
})
}
func TestSetIfAbsentAsync(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
kv, _ := Open(getTestOptions(dir))
bkey := func(i int) []byte {
return []byte(fmt.Sprintf("%09d", i))
}
f := func(err error) {}
n := 1000
for i := 0; i < n; i++ {
// if (i % 10) == 0 {
// t.Logf("Put i=%d\n", i)
// }
txn := kv.NewTransaction(true)
_, err = txn.Get(bkey(i))
require.Equal(t, ErrKeyNotFound, err)
require.NoError(t, txn.SetEntry(NewEntry(bkey(i), nil).WithMeta(byte(i%127))))
txn.CommitWith(f)
}
require.NoError(t, kv.Close())
kv, err = Open(getTestOptions(dir))
require.NoError(t, err)
opt := DefaultIteratorOptions
txn := kv.NewTransaction(false)
var count int
it := txn.NewIterator(opt)
{
t.Log("Starting first basic iteration")
for it.Rewind(); it.Valid(); it.Next() {
count++
}
require.EqualValues(t, n, count)
}
require.Equal(t, n, count)
require.NoError(t, kv.Close())
}
func TestGetSetRace(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
data := make([]byte, 4096)
_, err := rand.Read(data)
require.NoError(t, err)
var (
numOp = 100
wg sync.WaitGroup
keyCh = make(chan string)
)
// writer
wg.Add(1)
go func() {
defer func() {
wg.Done()
close(keyCh)
}()
for i := 0; i < numOp; i++ {
key := fmt.Sprintf("%d", i)
txnSet(t, db, []byte(key), data, 0x00)
keyCh <- key
}
}()
// reader
wg.Add(1)
go func() {
defer wg.Done()
for key := range keyCh {
require.NoError(t, db.View(func(txn *Txn) error {
item, err := txn.Get([]byte(key))
require.NoError(t, err)
err = item.Value(nil)
require.NoError(t, err)
return nil
}))
}
}()
wg.Wait()
})
}
func TestDiscardVersionsBelow(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
// Write 4 versions of the same key
for i := 0; i < 4; i++ {
err := db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte("answer"), []byte(fmt.Sprintf("%d", i))))
})
require.NoError(t, err)
}
opts := DefaultIteratorOptions
opts.AllVersions = true
opts.PrefetchValues = false
// Verify that there are 4 versions, and record 3rd version (2nd from top in iteration)
db.View(func(txn *Txn) error {
it := txn.NewIterator(opts)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
count++
item := it.Item()
require.Equal(t, []byte("answer"), item.Key())
if item.DiscardEarlierVersions() {
break
}
}
require.Equal(t, 4, count)
return nil
})
// Set new version and discard older ones.
err := db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte("answer"), []byte("5")).WithDiscard())
})
require.NoError(t, err)
// Verify that there are only 2 versions left, and versions
// below ts have been deleted.
db.View(func(txn *Txn) error {
it := txn.NewIterator(opts)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
count++
item := it.Item()
require.Equal(t, []byte("answer"), item.Key())
if item.DiscardEarlierVersions() {
break
}
}
require.Equal(t, 1, count)
return nil
})
})
}
func TestExpiry(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
// Write two keys, one with a TTL
err := db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte("answer1"), []byte("42")))
})
require.NoError(t, err)
err = db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte("answer2"), []byte("43")).WithTTL(1 * time.Second))
})
require.NoError(t, err)
time.Sleep(2 * time.Second)
// Verify that only unexpired key is found during iteration
err = db.View(func(txn *Txn) error {
_, err := txn.Get([]byte("answer1"))
require.NoError(t, err)
_, err = txn.Get([]byte("answer2"))
require.Equal(t, ErrKeyNotFound, err)
return nil
})
require.NoError(t, err)
// Verify that only one key is found during iteration
opts := DefaultIteratorOptions
opts.PrefetchValues = false
err = db.View(func(txn *Txn) error {
it := txn.NewIterator(opts)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
count++
item := it.Item()
require.Equal(t, []byte("answer1"), item.Key())
}
require.Equal(t, 1, count)
return nil
})
require.NoError(t, err)
})
}
func TestExpiryImproperDBClose(t *testing.T) {
testReplay := func(opt Options) {
// L0 compaction doesn't affect the test in any way. It is set to allow
// graceful shutdown of db0.
db0, err := Open(opt.WithCompactL0OnClose(false))
require.NoError(t, err)
dur := 1 * time.Hour
expiryTime := uint64(time.Now().Add(dur).Unix())
err = db0.Update(func(txn *Txn) error {
err = txn.SetEntry(NewEntry([]byte("test_key"), []byte("test_value")).WithTTL(dur))
require.NoError(t, err)
return nil
})
require.NoError(t, err)
// Simulate a crash by not closing db0, but releasing the locks.
if db0.dirLockGuard != nil {
require.NoError(t, db0.dirLockGuard.release())
db0.dirLockGuard = nil
}
if db0.valueDirGuard != nil {
require.NoError(t, db0.valueDirGuard.release())
db0.valueDirGuard = nil
}
require.NoError(t, db0.Close())
db1, err := Open(opt)
require.NoError(t, err)
err = db1.View(func(txn *Txn) error {
itm, err := txn.Get([]byte("test_key"))
require.NoError(t, err)
require.True(t, expiryTime <= itm.ExpiresAt() && itm.ExpiresAt() <= uint64(time.Now().Add(dur).Unix()),
"expiry time of entry is invalid")
return nil
})
require.NoError(t, err)
require.NoError(t, db1.Close())
}
t.Run("Test plain text", func(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
opt := getTestOptions(dir)
testReplay(opt)
})
t.Run("Test encryption", func(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
opt := getTestOptions(dir)
key := make([]byte, 32)
_, err = rand.Read(key)
require.NoError(t, err)
opt.EncryptionKey = key
testReplay(opt)
})
}
func randBytes(n int) []byte {
recv := make([]byte, n)
in, err := rand.Read(recv)
if err != nil {
panic(err)
}
return recv[:in]
}
var benchmarkData = []struct {
key, value []byte
success bool // represent if KV should be inserted successfully or not
}{
{randBytes(100), nil, true},
{randBytes(1000), []byte("foo"), true},
{[]byte("foo"), randBytes(1000), true},
{[]byte(""), randBytes(1000), false},
{nil, randBytes(1000000), false},
{randBytes(100000), nil, false},
{randBytes(1000000), nil, false},
}
func TestLargeKeys(t *testing.T) {
test := func(t *testing.T, opt Options) {
db, err := Open(opt)
require.NoError(t, err)
for i := 0; i < 1000; i++ {
tx := db.NewTransaction(true)
for _, kv := range benchmarkData {
k := make([]byte, len(kv.key))
copy(k, kv.key)
v := make([]byte, len(kv.value))
copy(v, kv.value)
if err := tx.SetEntry(NewEntry(k, v)); err != nil {
// check is success should be true
if kv.success {
t.Fatalf("failed with: %s", err)
}
} else if !kv.success {
t.Fatal("insertion should fail")
}
}
if err := tx.Commit(); err != nil {
t.Fatalf("#%d: batchSet err: %v", i, err)
}
}
require.NoError(t, db.Close())
}
t.Run("disk mode", func(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
opt := DefaultOptions(dir).WithValueLogFileSize(1024 * 1024 * 1024)
test(t, opt)
})
t.Run("InMemory mode", func(t *testing.T) {
opt := DefaultOptions("").WithValueLogFileSize(1024 * 1024 * 1024)
opt.InMemory = true
test(t, opt)
})
}
func TestCreateDirs(t *testing.T) {
dir, err := ioutil.TempDir("", "parent")
require.NoError(t, err)
defer removeDir(dir)
db, err := Open(DefaultOptions(filepath.Join(dir, "badger")))
require.NoError(t, err)
require.NoError(t, db.Close())
_, err = os.Stat(dir)
require.NoError(t, err)
}
func TestGetSetDeadlock(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
fmt.Println(dir)
require.NoError(t, err)
defer removeDir(dir)
db, err := Open(DefaultOptions(dir).WithValueLogFileSize(1 << 20))
require.NoError(t, err)
defer db.Close()
val := make([]byte, 1<<19)
key := []byte("key1")
require.NoError(t, db.Update(func(txn *Txn) error {
rand.Read(val)
require.NoError(t, txn.SetEntry(NewEntry(key, val)))
return nil
}))
timeout, done := time.After(10*time.Second), make(chan bool)
go func() {
db.Update(func(txn *Txn) error {
item, err := txn.Get(key)
require.NoError(t, err)
err = item.Value(nil) // This take a RLock on file
require.NoError(t, err)
rand.Read(val)
require.NoError(t, txn.SetEntry(NewEntry(key, val)))
require.NoError(t, txn.SetEntry(NewEntry([]byte("key2"), val)))
return nil
})
done <- true
}()
select {
case <-timeout:
t.Fatal("db.Update did not finish within 10s, assuming deadlock.")
case <-done:
t.Log("db.Update finished.")
}
}
func TestWriteDeadlock(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
db, err := Open(DefaultOptions(dir).WithValueLogFileSize(10 << 20))
require.NoError(t, err)
defer db.Close()
print := func(count *int) {
*count++
if *count%100 == 0 {
fmt.Printf("%05d\r", *count)
}
}
var count int
val := make([]byte, 10000)
require.NoError(t, db.Update(func(txn *Txn) error {
for i := 0; i < 1500; i++ {
key := fmt.Sprintf("%d", i)
rand.Read(val)
require.NoError(t, txn.SetEntry(NewEntry([]byte(key), val)))
print(&count)
}
return nil
}))
count = 0
fmt.Println("\nWrites done. Iteration and updates starting...")
err = db.Update(func(txn *Txn) error {
opt := DefaultIteratorOptions
opt.PrefetchValues = false
it := txn.NewIterator(opt)
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
// Using Value() would cause deadlock.
// item.Value()
out, err := item.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, len(val), len(out))
key := y.Copy(item.Key())
rand.Read(val)
require.NoError(t, txn.SetEntry(NewEntry(key, val)))
print(&count)
}
return nil
})
require.NoError(t, err)
}
func TestSequence(t *testing.T) {
key0 := []byte("seq0")
key1 := []byte("seq1")
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
seq0, err := db.GetSequence(key0, 10)
require.NoError(t, err)
seq1, err := db.GetSequence(key1, 100)
require.NoError(t, err)
for i := uint64(0); i < uint64(105); i++ {
num, err := seq0.Next()
require.NoError(t, err)
require.Equal(t, i, num)
num, err = seq1.Next()
require.NoError(t, err)
require.Equal(t, i, num)
}
err = db.View(func(txn *Txn) error {
item, err := txn.Get(key0)
if err != nil {
return err
}
var num0 uint64
if err := item.Value(func(val []byte) error {
num0 = binary.BigEndian.Uint64(val)
return nil
}); err != nil {
return err
}
require.Equal(t, uint64(110), num0)
item, err = txn.Get(key1)
if err != nil {
return err
}
var num1 uint64
if err := item.Value(func(val []byte) error {
num1 = binary.BigEndian.Uint64(val)
return nil
}); err != nil {
return err
}
require.Equal(t, uint64(200), num1)
return nil
})
require.NoError(t, err)
})
}
func TestSequence_Release(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
// get sequence, use once and release
key := []byte("key")
seq, err := db.GetSequence(key, 1000)
require.NoError(t, err)
num, err := seq.Next()
require.NoError(t, err)
require.Equal(t, uint64(0), num)
require.NoError(t, seq.Release())
// we used up 0 and 1 should be stored now
err = db.View(func(txn *Txn) error {
item, err := txn.Get(key)
if err != nil {
return err
}
val, err := item.ValueCopy(nil)
if err != nil {
return err
}
require.Equal(t, num+1, binary.BigEndian.Uint64(val))
return nil
})
require.NoError(t, err)
// using it again will lease 1+1000
num, err = seq.Next()
require.NoError(t, err)
require.Equal(t, uint64(1), num)
err = db.View(func(txn *Txn) error {
item, err := txn.Get(key)
if err != nil {
return err
}
val, err := item.ValueCopy(nil)
if err != nil {
return err
}
require.Equal(t, uint64(1001), binary.BigEndian.Uint64(val))
return nil
})
require.NoError(t, err)
})
}
func TestTestSequence2(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
key := []byte("key")
seq1, err := db.GetSequence(key, 2)
require.NoError(t, err)
seq2, err := db.GetSequence(key, 2)
require.NoError(t, err)
num, err := seq2.Next()
require.NoError(t, err)
require.Equal(t, uint64(2), num)
require.NoError(t, seq2.Release())
require.NoError(t, seq1.Release())
seq3, err := db.GetSequence(key, 2)
require.NoError(t, err)
for i := 0; i < 5; i++ {
num2, err := seq3.Next()
require.NoError(t, err)
require.Equal(t, uint64(i)+3, num2)
}
require.NoError(t, seq3.Release())
})
}
func TestReadOnly(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
opts := getTestOptions(dir)
// Create the DB
db, err := Open(opts)
require.NoError(t, err)
for i := 0; i < 10000; i++ {
txnSet(t, db, []byte(fmt.Sprintf("key%d", i)), []byte(fmt.Sprintf("value%d", i)), 0x00)
}
// Attempt a read-only open while it's open read-write.
opts.ReadOnly = true
_, err = Open(opts)
require.Error(t, err)
if err == ErrWindowsNotSupported {
require.NoError(t, db.Close())
return
}
require.Contains(t, err.Error(), "Another process is using this Badger database")
db.Close()
// Open one read-only
opts.ReadOnly = true
kv1, err := Open(opts)
require.NoError(t, err)
defer kv1.Close()
// Open another read-only
kv2, err := Open(opts)
require.NoError(t, err)
defer kv2.Close()
// Attempt a read-write open while it's open for read-only
opts.ReadOnly = false
_, err = Open(opts)
require.Error(t, err)
require.Contains(t, err.Error(), "Another process is using this Badger database")
// Get a thing from the DB
txn1 := kv1.NewTransaction(true)
v1, err := txn1.Get([]byte("key1"))
require.NoError(t, err)
b1, err := v1.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, b1, []byte("value1"))
err = txn1.Commit()
require.NoError(t, err)
// Get a thing from the DB via the other connection
txn2 := kv2.NewTransaction(true)
v2, err := txn2.Get([]byte("key2000"))
require.NoError(t, err)
b2, err := v2.ValueCopy(nil)
require.NoError(t, err)
require.Equal(t, b2, []byte("value2000"))
err = txn2.Commit()
require.NoError(t, err)
// Attempt to set a value on a read-only connection
txn := kv1.NewTransaction(true)
err = txn.SetEntry(NewEntry([]byte("key"), []byte("value")))
require.Error(t, err)
require.Contains(t, err.Error(), "No sets or deletes are allowed in a read-only transaction")
err = txn.Commit()
require.NoError(t, err)
}
func TestLSMOnly(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
opts := LSMOnlyOptions(dir)
dopts := DefaultOptions(dir)
require.NotEqual(t, dopts.ValueThreshold, opts.ValueThreshold)
dopts.ValueThreshold = 1 << 21
_, err = Open(dopts)
require.Contains(t, err.Error(), "Invalid ValueThreshold")
// Also test for error, when ValueThresholdSize is greater than maxBatchSize.
dopts.ValueThreshold = LSMOnlyOptions(dir).ValueThreshold
// maxBatchSize is calculated from MaxTableSize.
dopts.MaxTableSize = int64(LSMOnlyOptions(dir).ValueThreshold)
_, err = Open(dopts)
require.Error(t, err, "db creation should have been failed")
require.Contains(t, err.Error(), "Valuethreshold greater than max batch size")
opts.ValueLogMaxEntries = 100
db, err := Open(opts)
require.NoError(t, err)
value := make([]byte, 128)
_, err = rand.Read(value)
for i := 0; i < 500; i++ {
require.NoError(t, err)
txnSet(t, db, []byte(fmt.Sprintf("key%d", i)), value, 0x00)
}
require.NoError(t, db.Close()) // Close to force compactions, so Value log GC would run.
db, err = Open(opts)
require.NoError(t, err)
defer db.Close()
require.NoError(t, db.RunValueLogGC(0.2))
}
// This test function is doing some intricate sorcery.
func TestMinReadTs(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
for i := 0; i < 10; i++ {
require.NoError(t, db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte("x"), []byte("y")))
}))
}
time.Sleep(time.Millisecond)
readTxn0 := db.NewTransaction(false)
require.Equal(t, uint64(10), readTxn0.readTs)
min := db.orc.readMark.DoneUntil()
require.Equal(t, uint64(9), min)
readTxn := db.NewTransaction(false)
for i := 0; i < 10; i++ {
require.NoError(t, db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte("x"), []byte("y")))
}))
}
require.Equal(t, uint64(20), db.orc.readTs())
time.Sleep(time.Millisecond)
require.Equal(t, min, db.orc.readMark.DoneUntil())
readTxn0.Discard()
readTxn.Discard()
time.Sleep(time.Millisecond)
require.Equal(t, uint64(19), db.orc.readMark.DoneUntil())
db.orc.readMark.Done(uint64(20)) // Because we called readTs.
for i := 0; i < 10; i++ {
db.View(func(txn *Txn) error {
return nil
})
}
time.Sleep(time.Millisecond)
require.Equal(t, uint64(20), db.orc.readMark.DoneUntil())
})
}
func TestGoroutineLeak(t *testing.T) {
test := func(t *testing.T, opt *Options) {
time.Sleep(1 * time.Second)
before := runtime.NumGoroutine()
t.Logf("Num go: %d", before)
for i := 0; i < 12; i++ {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
updated := false
ctx, cancel := context.WithCancel(context.Background())
var wg sync.WaitGroup
wg.Add(1)
go func() {
err := db.Subscribe(ctx, func(kvs *pb.KVList) error {
require.Equal(t, []byte("value"), kvs.Kv[0].GetValue())
updated = true
wg.Done()
return nil
}, []byte("key"))
if err != nil {
require.Equal(t, err.Error(), context.Canceled.Error())
}
}()
// Wait for the go routine to be scheduled.
time.Sleep(time.Second)
err := db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte("key"), []byte("value")))
})
require.NoError(t, err)
wg.Wait()
cancel()
require.Equal(t, true, updated)
})
}
time.Sleep(2 * time.Second)
require.Equal(t, before, runtime.NumGoroutine())
}
t.Run("disk mode", func(t *testing.T) {
test(t, nil)
})
t.Run("InMemory mode", func(t *testing.T) {
opt := DefaultOptions("").WithInMemory(true)
test(t, &opt)
})
}
func ExampleOpen() {
dir, err := ioutil.TempDir("", "badger-test")
if err != nil {
panic(err)
}
defer removeDir(dir)
db, err := Open(DefaultOptions(dir))
if err != nil {
panic(err)
}
defer db.Close()
err = db.View(func(txn *Txn) error {
_, err := txn.Get([]byte("key"))
// We expect ErrKeyNotFound
fmt.Println(err)
return nil
})
if err != nil {
panic(err)
}
txn := db.NewTransaction(true) // Read-write txn
err = txn.SetEntry(NewEntry([]byte("key"), []byte("value")))
if err != nil {
panic(err)
}
err = txn.Commit()
if err != nil {
panic(err)
}
err = db.View(func(txn *Txn) error {
item, err := txn.Get([]byte("key"))
if err != nil {
return err
}
val, err := item.ValueCopy(nil)
if err != nil {
return err
}
fmt.Printf("%s\n", string(val))
return nil
})
if err != nil {
panic(err)
}
// Output:
// Key not found
// value
}
func ExampleTxn_NewIterator() {
dir, err := ioutil.TempDir("", "badger-test")
if err != nil {
panic(err)
}
defer removeDir(dir)
db, err := Open(DefaultOptions(dir))
if err != nil {
panic(err)
}
defer db.Close()
bkey := func(i int) []byte {
return []byte(fmt.Sprintf("%09d", i))
}
bval := func(i int) []byte {
return []byte(fmt.Sprintf("%025d", i))
}
txn := db.NewTransaction(true)
// Fill in 1000 items
n := 1000
for i := 0; i < n; i++ {
err := txn.SetEntry(NewEntry(bkey(i), bval(i)))
if err != nil {
panic(err)
}
}
err = txn.Commit()
if err != nil {
panic(err)
}
opt := DefaultIteratorOptions
opt.PrefetchSize = 10
// Iterate over 1000 items
var count int
err = db.View(func(txn *Txn) error {
it := txn.NewIterator(opt)
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
count++
}
return nil
})
if err != nil {
panic(err)
}
fmt.Printf("Counted %d elements", count)
// Output:
// Counted 1000 elements
}
func TestSyncForRace(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)
db, err := Open(DefaultOptions(dir).WithSyncWrites(false))
require.NoError(t, err)
defer db.Close()
closeChan := make(chan struct{})
doneChan := make(chan struct{})
go func() {
ticker := time.NewTicker(100 * time.Microsecond)
for {
select {
case <-ticker.C:
if err := db.Sync(); err != nil {
require.NoError(t, err)
}
db.opt.Debugf("Sync Iteration completed")
case <-closeChan:
close(doneChan)
return
}
}
}()
sz := 128 << 10 // 5 entries per value log file.
v := make([]byte, sz)
rand.Read(v[:rand.Intn(sz)])
txn := db.NewTransaction(true)
for i := 0; i < 10000; i++ {
require.NoError(t, txn.SetEntry(NewEntry([]byte(fmt.Sprintf("key%d", i)), v)))
if i%3 == 0 {
require.NoError(t, txn.Commit())
txn = db.NewTransaction(true)
}
if i%100 == 0 {
db.opt.Debugf("next 100 entries added to DB")
}
}
require.NoError(t, txn.Commit())
close(closeChan)
<-doneChan
}
// Earlier, if head is not pointing to latest Vlog file, then at replay badger used to crash with
// index out of range panic. After fix in this commit it should not.
func TestNoCrash(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err, "cannot create badger dir")
defer removeDir(dir)
ops := getTestOptions(dir)
ops.ValueLogMaxEntries = 1
ops.ValueThreshold = 32
db, err := Open(ops)
require.NoError(t, err, "unable to open db")
// entering 100 entries will generate 100 vlog files
for i := 0; i < 100; i++ {
err := db.Update(func(txn *Txn) error {
entry := NewEntry([]byte(fmt.Sprintf("key-%d", i)), []byte(fmt.Sprintf("val-%d", i)))
return txn.SetEntry(entry)
})
require.NoError(t, err, "update to db failed")
}
db.Lock()
// make head to point to second file. We cannot make it point to the first
// vlog file because we cannot push a zero head pointer.
db.vhead = valuePointer{1, 0, 0}
db.Unlock()
db.Close()
// reduce size of SSTable to flush early
ops.MaxTableSize = 1 << 10
db, err = Open(ops)
require.Nil(t, err, "error while opening db")
require.NoError(t, db.Close())
}
func TestForceFlushMemtable(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err, "temp dir for badger count not be created")
ops := getTestOptions(dir)
ops.ValueLogMaxEntries = 1
ops.LogRotatesToFlush = 1
db, err := Open(ops)
require.NoError(t, err, "error while openning db")
defer db.Close()
for i := 0; i < 3; i++ {
err = db.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry([]byte(fmt.Sprintf("key-%d", i)),
[]byte(fmt.Sprintf("value-%d", i))))
})
require.NoError(t, err, "unable to set key and value")
}
time.Sleep(1 * time.Second)
// We want to make sure that memtable is flushed on disk. While flushing memtable to disk,
// latest head is also stored in it. Hence we will try to read head from disk. To make sure
// this. we will truncate all memtables.
db.Lock()
db.mt.DecrRef()
for _, mt := range db.imm {
mt.DecrRef()
}
db.imm = db.imm[:0]
db.mt = skl.NewSkiplist(arenaSize(db.opt)) // Set it up for future writes.
db.Unlock()
// get latest value of value log head
headKey := y.KeyWithTs(head, math.MaxUint64)
vs, err := db.get(headKey)
require.NoError(t, err)
var vptr valuePointer
vptr.Decode(vs.Value)
// Since we are inserting 3 entries and ValueLogMaxEntries is 1, there will be 3 rotation. For
// 1st and 2nd time head flushed with memtable will have fid as 0 and last time it will be 1.
require.True(t, vptr.Fid == 1, fmt.Sprintf("expected fid: %d, actual fid: %d", 1, vptr.Fid))
}
func TestVerifyChecksum(t *testing.T) {
testVerfiyCheckSum := func(t *testing.T, opt Options) {
path, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer os.Remove(path)
opt.ValueDir = path
opt.Dir = path
// use stream write for writing.
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
value := make([]byte, 32)
y.Check2(rand.Read(value))
l := &pb.KVList{}
st := 0
for i := 0; i < 1000; i++ {
key := make([]byte, 8)
binary.BigEndian.PutUint64(key, uint64(i))
l.Kv = append(l.Kv, &pb.KV{
Key: key,
Value: value,
StreamId: uint32(st),
Version: 1,
})
if i%100 == 0 {
st++
}
}
sw := db.NewStreamWriter()
require.NoError(t, sw.Prepare(), "sw.Prepare() failed")
require.NoError(t, sw.Write(l), "sw.Write() failed")
require.NoError(t, sw.Flush(), "sw.Flush() failed")
require.NoError(t, db.VerifyChecksum(), "checksum verification failed for DB")
})
}
t.Run("Testing Verify Checksum without encryption", func(t *testing.T) {
testVerfiyCheckSum(t, getTestOptions(""))
})
t.Run("Testing Verify Checksum with Encryption", func(t *testing.T) {
key := make([]byte, 32)
_, err := rand.Read(key)
require.NoError(t, err)
opt := getTestOptions("")
opt.EncryptionKey = key
testVerfiyCheckSum(t, opt)
})
}
func TestMain(m *testing.M) {
flag.Parse()
os.Exit(m.Run())
}
func removeDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func TestWriteInemory(t *testing.T) {
opt := DefaultOptions("").WithInMemory(true)
db, err := Open(opt)
require.NoError(t, err)
defer func() {
require.NoError(t, db.Close())
}()
for i := 0; i < 100; i++ {
txnSet(t, db, []byte(fmt.Sprintf("key%d", i)), []byte(fmt.Sprintf("val%d", i)), 0x00)
}
err = db.View(func(txn *Txn) error {
for j := 0; j < 100; j++ {
item, err := txn.Get([]byte(fmt.Sprintf("key%d", j)))
require.NoError(t, err)
expected := []byte(fmt.Sprintf("val%d", j))
item.Value(func(val []byte) error {
require.Equal(t, expected, val,
"Invalid value for key %q. expected: %q, actual: %q",
item.Key(), expected, val)
return nil
})
}
return nil
})
require.NoError(t, err)
}
badger-2.2007.2/dir_unix.go 0000664 0000000 0000000 00000007016 13721731165 0015352 0 ustar 00root root 0000000 0000000 // +build !windows
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
// directoryLockGuard holds a lock on a directory and a pid file inside. The pid file isn't part
// of the locking mechanism, it's just advisory.
type directoryLockGuard struct {
// File handle on the directory, which we've flocked.
f *os.File
// The absolute path to our pid file.
path string
// Was this a shared lock for a read-only database?
readOnly bool
}
// acquireDirectoryLock gets a lock on the directory (using flock). If
// this is not read-only, it will also write our pid to
// dirPath/pidFileName for convenience.
func acquireDirectoryLock(dirPath string, pidFileName string, readOnly bool) (
*directoryLockGuard, error) {
// Convert to absolute path so that Release still works even if we do an unbalanced
// chdir in the meantime.
absPidFilePath, err := filepath.Abs(filepath.Join(dirPath, pidFileName))
if err != nil {
return nil, errors.Wrap(err, "cannot get absolute path for pid lock file")
}
f, err := os.Open(dirPath)
if err != nil {
return nil, errors.Wrapf(err, "cannot open directory %q", dirPath)
}
opts := unix.LOCK_EX | unix.LOCK_NB
if readOnly {
opts = unix.LOCK_SH | unix.LOCK_NB
}
err = unix.Flock(int(f.Fd()), opts)
if err != nil {
f.Close()
return nil, errors.Wrapf(err,
"Cannot acquire directory lock on %q. Another process is using this Badger database.",
dirPath)
}
if !readOnly {
// Yes, we happily overwrite a pre-existing pid file. We're the
// only read-write badger process using this directory.
err = ioutil.WriteFile(absPidFilePath, []byte(fmt.Sprintf("%d\n", os.Getpid())), 0666)
if err != nil {
f.Close()
return nil, errors.Wrapf(err,
"Cannot write pid file %q", absPidFilePath)
}
}
return &directoryLockGuard{f, absPidFilePath, readOnly}, nil
}
// Release deletes the pid file and releases our lock on the directory.
func (guard *directoryLockGuard) release() error {
var err error
if !guard.readOnly {
// It's important that we remove the pid file first.
err = os.Remove(guard.path)
}
if closeErr := guard.f.Close(); err == nil {
err = closeErr
}
guard.path = ""
guard.f = nil
return err
}
// openDir opens a directory for syncing.
func openDir(path string) (*os.File, error) { return os.Open(path) }
// When you create or delete a file, you have to ensure the directory entry for the file is synced
// in order to guarantee the file is visible (if the system crashes). (See the man page for fsync,
// or see https://github.com/coreos/etcd/issues/6368 for an example.)
func syncDir(dir string) error {
f, err := openDir(dir)
if err != nil {
return errors.Wrapf(err, "While opening directory: %s.", dir)
}
err = f.Sync()
closeErr := f.Close()
if err != nil {
return errors.Wrapf(err, "While syncing directory: %s.", dir)
}
return errors.Wrapf(closeErr, "While closing directory: %s.", dir)
}
badger-2.2007.2/dir_windows.go 0000664 0000000 0000000 00000007503 13721731165 0016062 0 ustar 00root root 0000000 0000000 // +build windows
/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
// OpenDir opens a directory in windows with write access for syncing.
import (
"os"
"path/filepath"
"syscall"
"github.com/pkg/errors"
)
// FILE_ATTRIBUTE_TEMPORARY - A file that is being used for temporary storage.
// FILE_FLAG_DELETE_ON_CLOSE - The file is to be deleted immediately after all of its handles are
// closed, which includes the specified handle and any other open or duplicated handles.
// See: https://docs.microsoft.com/en-us/windows/desktop/FileIO/file-attribute-constants
// NOTE: Added here to avoid importing golang.org/x/sys/windows
const (
FILE_ATTRIBUTE_TEMPORARY = 0x00000100
FILE_FLAG_DELETE_ON_CLOSE = 0x04000000
)
func openDir(path string) (*os.File, error) {
fd, err := openDirWin(path)
if err != nil {
return nil, err
}
return os.NewFile(uintptr(fd), path), nil
}
func openDirWin(path string) (fd syscall.Handle, err error) {
if len(path) == 0 {
return syscall.InvalidHandle, syscall.ERROR_FILE_NOT_FOUND
}
pathp, err := syscall.UTF16PtrFromString(path)
if err != nil {
return syscall.InvalidHandle, err
}
access := uint32(syscall.GENERIC_READ | syscall.GENERIC_WRITE)
sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE)
createmode := uint32(syscall.OPEN_EXISTING)
fl := uint32(syscall.FILE_FLAG_BACKUP_SEMANTICS)
return syscall.CreateFile(pathp, access, sharemode, nil, createmode, fl, 0)
}
// DirectoryLockGuard holds a lock on the directory.
type directoryLockGuard struct {
h syscall.Handle
path string
}
// AcquireDirectoryLock acquires exclusive access to a directory.
func acquireDirectoryLock(dirPath string, pidFileName string, readOnly bool) (*directoryLockGuard, error) {
if readOnly {
return nil, ErrWindowsNotSupported
}
// Convert to absolute path so that Release still works even if we do an unbalanced
// chdir in the meantime.
absLockFilePath, err := filepath.Abs(filepath.Join(dirPath, pidFileName))
if err != nil {
return nil, errors.Wrap(err, "Cannot get absolute path for pid lock file")
}
// This call creates a file handler in memory that only one process can use at a time. When
// that process ends, the file is deleted by the system.
// FILE_ATTRIBUTE_TEMPORARY is used to tell Windows to try to create the handle in memory.
// FILE_FLAG_DELETE_ON_CLOSE is not specified in syscall_windows.go but tells Windows to delete
// the file when all processes holding the handler are closed.
// XXX: this works but it's a bit klunky. i'd prefer to use LockFileEx but it needs unsafe pkg.
h, err := syscall.CreateFile(
syscall.StringToUTF16Ptr(absLockFilePath), 0, 0, nil,
syscall.OPEN_ALWAYS,
uint32(FILE_ATTRIBUTE_TEMPORARY|FILE_FLAG_DELETE_ON_CLOSE),
0)
if err != nil {
return nil, errors.Wrapf(err,
"Cannot create lock file %q. Another process is using this Badger database",
absLockFilePath)
}
return &directoryLockGuard{h: h, path: absLockFilePath}, nil
}
// Release removes the directory lock.
func (g *directoryLockGuard) release() error {
g.path = ""
return syscall.CloseHandle(g.h)
}
// Windows doesn't support syncing directories to the file system. See
// https://github.com/dgraph-io/badger/issues/699#issuecomment-504133587 for more details.
func syncDir(dir string) error { return nil }
badger-2.2007.2/doc.go 0000664 0000000 0000000 00000002302 13721731165 0014267 0 ustar 00root root 0000000 0000000 /*
Package badger implements an embeddable, simple and fast key-value database,
written in pure Go. It is designed to be highly performant for both reads and
writes simultaneously. Badger uses Multi-Version Concurrency Control (MVCC), and
supports transactions. It runs transactions concurrently, with serializable
snapshot isolation guarantees.
Badger uses an LSM tree along with a value log to separate keys from values,
hence reducing both write amplification and the size of the LSM tree. This
allows LSM tree to be served entirely from RAM, while the values are served
from SSD.
Usage
Badger has the following main types: DB, Txn, Item and Iterator. DB contains
keys that are associated with values. It must be opened with the appropriate
options before it can be accessed.
All operations happen inside a Txn. Txn represents a transaction, which can
be read-only or read-write. Read-only transactions can read values for a
given key (which are returned inside an Item), or iterate over a set of
key-value pairs using an Iterator (which are returned as Item type values as
well). Read-write transactions can also update and delete keys from the DB.
See the examples for more usage details.
*/
package badger
badger-2.2007.2/errors.go 0000664 0000000 0000000 00000013067 13721731165 0015050 0 ustar 00root root 0000000 0000000 /*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"math"
"github.com/pkg/errors"
)
const (
// ValueThresholdLimit is the maximum permissible value of opt.ValueThreshold.
ValueThresholdLimit = math.MaxUint16 - 16 + 1
)
var (
// ErrValueLogSize is returned when opt.ValueLogFileSize option is not within the valid
// range.
ErrValueLogSize = errors.New("Invalid ValueLogFileSize, must be between 1MB and 2GB")
// ErrKeyNotFound is returned when key isn't found on a txn.Get.
ErrKeyNotFound = errors.New("Key not found")
// ErrTxnTooBig is returned if too many writes are fit into a single transaction.
ErrTxnTooBig = errors.New("Txn is too big to fit into one request")
// ErrConflict is returned when a transaction conflicts with another transaction. This can
// happen if the read rows had been updated concurrently by another transaction.
ErrConflict = errors.New("Transaction Conflict. Please retry")
// ErrReadOnlyTxn is returned if an update function is called on a read-only transaction.
ErrReadOnlyTxn = errors.New("No sets or deletes are allowed in a read-only transaction")
// ErrDiscardedTxn is returned if a previously discarded transaction is re-used.
ErrDiscardedTxn = errors.New("This transaction has been discarded. Create a new one")
// ErrEmptyKey is returned if an empty key is passed on an update function.
ErrEmptyKey = errors.New("Key cannot be empty")
// ErrInvalidKey is returned if the key has a special !badger! prefix,
// reserved for internal usage.
ErrInvalidKey = errors.New("Key is using a reserved !badger! prefix")
// ErrRetry is returned when a log file containing the value is not found.
// This usually indicates that it may have been garbage collected, and the
// operation needs to be retried.
ErrRetry = errors.New("Unable to find log file. Please retry")
// ErrThresholdZero is returned if threshold is set to zero, and value log GC is called.
// In such a case, GC can't be run.
ErrThresholdZero = errors.New(
"Value log GC can't run because threshold is set to zero")
// ErrNoRewrite is returned if a call for value log GC doesn't result in a log file rewrite.
ErrNoRewrite = errors.New(
"Value log GC attempt didn't result in any cleanup")
// ErrRejected is returned if a value log GC is called either while another GC is running, or
// after DB::Close has been called.
ErrRejected = errors.New("Value log GC request rejected")
// ErrInvalidRequest is returned if the user request is invalid.
ErrInvalidRequest = errors.New("Invalid request")
// ErrManagedTxn is returned if the user tries to use an API which isn't
// allowed due to external management of transactions, when using ManagedDB.
ErrManagedTxn = errors.New(
"Invalid API request. Not allowed to perform this action using ManagedDB")
// ErrInvalidDump if a data dump made previously cannot be loaded into the database.
ErrInvalidDump = errors.New("Data dump cannot be read")
// ErrZeroBandwidth is returned if the user passes in zero bandwidth for sequence.
ErrZeroBandwidth = errors.New("Bandwidth must be greater than zero")
// ErrInvalidLoadingMode is returned when opt.ValueLogLoadingMode option is not
// within the valid range
ErrInvalidLoadingMode = errors.New("Invalid ValueLogLoadingMode, must be FileIO or MemoryMap")
// ErrReplayNeeded is returned when opt.ReadOnly is set but the
// database requires a value log replay.
ErrReplayNeeded = errors.New("Database was not properly closed, cannot open read-only")
// ErrWindowsNotSupported is returned when opt.ReadOnly is used on Windows
ErrWindowsNotSupported = errors.New("Read-only mode is not supported on Windows")
// ErrTruncateNeeded is returned when the value log gets corrupt, and requires truncation of
// corrupt data to allow Badger to run properly.
ErrTruncateNeeded = errors.New(
"Value log truncate required to run DB. This might result in data loss")
// ErrBlockedWrites is returned if the user called DropAll. During the process of dropping all
// data from Badger, we stop accepting new writes, by returning this error.
ErrBlockedWrites = errors.New("Writes are blocked, possibly due to DropAll or Close")
// ErrNilCallback is returned when subscriber's callback is nil.
ErrNilCallback = errors.New("Callback cannot be nil")
// ErrEncryptionKeyMismatch is returned when the storage key is not
// matched with the key previously given.
ErrEncryptionKeyMismatch = errors.New("Encryption key mismatch")
// ErrInvalidDataKeyID is returned if the datakey id is invalid.
ErrInvalidDataKeyID = errors.New("Invalid datakey id")
// ErrInvalidEncryptionKey is returned if length of encryption keys is invalid.
ErrInvalidEncryptionKey = errors.New("Encryption key's length should be" +
"either 16, 24, or 32 bytes")
// ErrGCInMemoryMode is returned when db.RunValueLogGC is called in in-memory mode.
ErrGCInMemoryMode = errors.New("Cannot run value log GC when DB is opened in InMemory mode")
// ErrDBClosed is returned when a get operation is performed after closing the DB.
ErrDBClosed = errors.New("DB Closed")
)
badger-2.2007.2/go.mod 0000664 0000000 0000000 00000001323 13721731165 0014303 0 ustar 00root root 0000000 0000000 module github.com/dgraph-io/badger/v2
go 1.12
require (
github.com/DataDog/zstd v1.4.1
github.com/cespare/xxhash v1.1.0
github.com/dgraph-io/ristretto v0.0.3-0.20200630154024-f66de99634de
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2
github.com/dustin/go-humanize v1.0.0
github.com/golang/protobuf v1.3.1
github.com/golang/snappy v0.0.1
github.com/kr/pretty v0.1.0 // indirect
github.com/pkg/errors v0.8.1
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/spf13/cobra v0.0.5
github.com/stretchr/testify v1.4.0
golang.org/x/net v0.0.0-20190620200207-3b0461eec859
golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
)
badger-2.2007.2/go.sum 0000664 0000000 0000000 00000015322 13721731165 0014334 0 ustar 00root root 0000000 0000000 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/DataDog/zstd v1.4.1 h1:3oxKN3wbHibqx897utPC2LTQU4J+IHWWJO+glkAkpFM=
github.com/DataDog/zstd v1.4.1/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo=
github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgraph-io/ristretto v0.0.3-0.20200630154024-f66de99634de h1:t0UHb5vdojIDUqktM6+xJAfScFBsVpXZmqC9dsgJmeA=
github.com/dgraph-io/ristretto v0.0.3-0.20200630154024-f66de99634de/go.mod h1:KPxhHT9ZxKefz+PCeOGsrHpl1qZ7i70dGTu2u+Ahh6E=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s=
github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb h1:fgwFCsaw9buMuxNd6+DQfAuSFqbNiQZpcgJQAgJsK6k=
golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
badger-2.2007.2/histogram.go 0000664 0000000 0000000 00000011631 13721731165 0015524 0 ustar 00root root 0000000 0000000 /*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"fmt"
"math"
)
// PrintHistogram builds and displays the key-value size histogram.
// When keyPrefix is set, only the keys that have prefix "keyPrefix" are
// considered for creating the histogram
func (db *DB) PrintHistogram(keyPrefix []byte) {
if db == nil {
fmt.Println("\nCannot build histogram: DB is nil.")
return
}
histogram := db.buildHistogram(keyPrefix)
fmt.Printf("Histogram of key sizes (in bytes)\n")
histogram.keySizeHistogram.printHistogram()
fmt.Printf("Histogram of value sizes (in bytes)\n")
histogram.valueSizeHistogram.printHistogram()
}
// histogramData stores information about a histogram
type histogramData struct {
bins []int64
countPerBin []int64
totalCount int64
min int64
max int64
sum int64
}
// sizeHistogram contains keySize histogram and valueSize histogram
type sizeHistogram struct {
keySizeHistogram, valueSizeHistogram histogramData
}
// newSizeHistogram returns a new instance of keyValueSizeHistogram with
// properly initialized fields.
func newSizeHistogram() *sizeHistogram {
// TODO(ibrahim): find appropriate bin size.
keyBins := createHistogramBins(1, 16)
valueBins := createHistogramBins(1, 30)
return &sizeHistogram{
keySizeHistogram: histogramData{
bins: keyBins,
countPerBin: make([]int64, len(keyBins)+1),
max: math.MinInt64,
min: math.MaxInt64,
sum: 0,
},
valueSizeHistogram: histogramData{
bins: valueBins,
countPerBin: make([]int64, len(valueBins)+1),
max: math.MinInt64,
min: math.MaxInt64,
sum: 0,
},
}
}
// createHistogramBins creates bins for an histogram. The bin sizes are powers
// of two of the form [2^min_exponent, ..., 2^max_exponent].
func createHistogramBins(minExponent, maxExponent uint32) []int64 {
var bins []int64
for i := minExponent; i <= maxExponent; i++ {
bins = append(bins, int64(1)< histogram.max {
histogram.max = value
}
if value < histogram.min {
histogram.min = value
}
histogram.sum += value
histogram.totalCount++
for index := 0; index <= len(histogram.bins); index++ {
// Allocate value in the last buckets if we reached the end of the Bounds array.
if index == len(histogram.bins) {
histogram.countPerBin[index]++
break
}
// Check if the value should be added to the "index" bin
if value < int64(histogram.bins[index]) {
histogram.countPerBin[index]++
break
}
}
}
// buildHistogram builds the key-value size histogram.
// When keyPrefix is set, only the keys that have prefix "keyPrefix" are
// considered for creating the histogram
func (db *DB) buildHistogram(keyPrefix []byte) *sizeHistogram {
txn := db.NewTransaction(false)
defer txn.Discard()
itr := txn.NewIterator(DefaultIteratorOptions)
defer itr.Close()
badgerHistogram := newSizeHistogram()
// Collect key and value sizes.
for itr.Seek(keyPrefix); itr.ValidForPrefix(keyPrefix); itr.Next() {
item := itr.Item()
badgerHistogram.keySizeHistogram.Update(item.KeySize())
badgerHistogram.valueSizeHistogram.Update(item.ValueSize())
}
return badgerHistogram
}
// printHistogram prints the histogram data in a human-readable format.
func (histogram histogramData) printHistogram() {
fmt.Printf("Total count: %d\n", histogram.totalCount)
fmt.Printf("Min value: %d\n", histogram.min)
fmt.Printf("Max value: %d\n", histogram.max)
fmt.Printf("Mean: %.2f\n", float64(histogram.sum)/float64(histogram.totalCount))
fmt.Printf("%24s %9s\n", "Range", "Count")
numBins := len(histogram.bins)
for index, count := range histogram.countPerBin {
if count == 0 {
continue
}
// The last bin represents the bin that contains the range from
// the last bin up to infinity so it's processed differently than the
// other bins.
if index == len(histogram.countPerBin)-1 {
lowerBound := int(histogram.bins[numBins-1])
fmt.Printf("[%10d, %10s) %9d\n", lowerBound, "infinity", count)
continue
}
upperBound := int(histogram.bins[index])
lowerBound := 0
if index > 0 {
lowerBound = int(histogram.bins[index-1])
}
fmt.Printf("[%10d, %10d) %9d\n", lowerBound, upperBound, count)
}
fmt.Println()
}
badger-2.2007.2/histogram_test.go 0000664 0000000 0000000 00000006554 13721731165 0016573 0 ustar 00root root 0000000 0000000 /*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package badger
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestBuildKeyValueSizeHistogram(t *testing.T) {
t.Run("All same size key-values", func(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
entries := int64(40)
err := db.Update(func(txn *Txn) error {
for i := rune(0); i < rune(entries); i++ {
err := txn.SetEntry(NewEntry([]byte(string(i)), []byte("B")))
if err != nil {
return err
}
}
return nil
})
require.NoError(t, err)
histogram := db.buildHistogram(nil)
keyHistogram := histogram.keySizeHistogram
valueHistogram := histogram.valueSizeHistogram
require.Equal(t, entries, keyHistogram.totalCount)
require.Equal(t, entries, valueHistogram.totalCount)
// Each entry is of size one. So the sum of sizes should be the same
// as number of entries
require.Equal(t, entries, valueHistogram.sum)
require.Equal(t, entries, keyHistogram.sum)
// All value sizes are same. The first bin should have all the values.
require.Equal(t, entries, valueHistogram.countPerBin[0])
require.Equal(t, entries, keyHistogram.countPerBin[0])
require.Equal(t, int64(1), keyHistogram.max)
require.Equal(t, int64(1), keyHistogram.min)
require.Equal(t, int64(1), valueHistogram.max)
require.Equal(t, int64(1), valueHistogram.min)
})
})
t.Run("different size key-values", func(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
entries := int64(3)
err := db.Update(func(txn *Txn) error {
if err := txn.SetEntry(NewEntry([]byte("A"), []byte("B"))); err != nil {
return err
}
if err := txn.SetEntry(NewEntry([]byte("AA"), []byte("BB"))); err != nil {
return err
}
return txn.SetEntry(NewEntry([]byte("AAA"), []byte("BBB")))
})
require.NoError(t, err)
histogram := db.buildHistogram(nil)
keyHistogram := histogram.keySizeHistogram
valueHistogram := histogram.valueSizeHistogram
require.Equal(t, entries, keyHistogram.totalCount)
require.Equal(t, entries, valueHistogram.totalCount)
// Each entry is of size one. So the sum of sizes should be the same
// as number of entries
require.Equal(t, int64(6), valueHistogram.sum)
require.Equal(t, int64(6), keyHistogram.sum)
// Length 1 key is in first bucket, length 2 and 3 are in the second
// bucket
require.Equal(t, int64(1), valueHistogram.countPerBin[0])
require.Equal(t, int64(2), valueHistogram.countPerBin[1])
require.Equal(t, int64(1), keyHistogram.countPerBin[0])
require.Equal(t, int64(2), keyHistogram.countPerBin[1])
require.Equal(t, int64(3), keyHistogram.max)
require.Equal(t, int64(1), keyHistogram.min)
require.Equal(t, int64(3), valueHistogram.max)
require.Equal(t, int64(1), valueHistogram.min)
})
})
}
badger-2.2007.2/images/ 0000775 0000000 0000000 00000000000 13721731165 0014443 5 ustar 00root root 0000000 0000000 badger-2.2007.2/images/benchmarks-rocksdb.png 0000664 0000000 0000000 00000202572 13721731165 0020723 0 ustar 00root root 0000000 0000000 PNG
IHDR "8 gAMA a cHRM z&