pax_global_header00006660000000000000000000000064141432204500014505gustar00rootroot0000000000000052 comment=63e1bceb0af075df7e4e5f5dafa5491c22ba029b golang-github-valyala-gozstd-1.14.2+ds/000077500000000000000000000000001414322045000176625ustar00rootroot00000000000000golang-github-valyala-gozstd-1.14.2+ds/.gitignore000066400000000000000000000000051414322045000216450ustar00rootroot00000000000000tags golang-github-valyala-gozstd-1.14.2+ds/.travis.yml000066400000000000000000000005471414322045000220010ustar00rootroot00000000000000language: go os: - linux - osx - freebsd go: - 1.14 script: # build test for supported platforms - GOARCH=amd64 go build # run tests on a standard platform - go test -v -coverprofile=coverage.txt -covermode=atomic - go test -v -race after_success: # Upload coverage results to codecov.io - bash <(curl -s https://codecov.io/bash) golang-github-valyala-gozstd-1.14.2+ds/LICENSE000066400000000000000000000021001414322045000206600ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2018 Aliaksandr Valialkin Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. golang-github-valyala-gozstd-1.14.2+ds/Makefile000066400000000000000000000040021414322045000213160ustar00rootroot00000000000000GOOS ?= $(shell go env GOOS) GOARCH ?= $(shell go env GOARCH) GOOS_GOARCH := $(GOOS)_$(GOARCH) GOOS_GOARCH_NATIVE := $(shell go env GOHOSTOS)_$(shell go env GOHOSTARCH) LIBZSTD_NAME := libzstd_$(GOOS_GOARCH).a ZSTD_VERSION ?= v1.5.0 MUSL_BUILDER_IMAGE=golang:1.17.1-alpine BUILDER_IMAGE := local/builder_musl:2.0.0-$(shell echo $(MUSL_BUILDER_IMAGE) | tr : _) .PHONY: libzstd.a libzstd.a: $(LIBZSTD_NAME) $(LIBZSTD_NAME): ifeq ($(GOOS_GOARCH),$(GOOS_GOARCH_NATIVE)) cd zstd/lib && ZSTD_LEGACY_SUPPORT=0 MOREFLAGS=$(MOREFLAGS) $(MAKE) clean libzstd.a mv zstd/lib/libzstd.a $(LIBZSTD_NAME) else ifeq ($(GOOS_GOARCH),linux_arm) cd zstd/lib && CC=arm-linux-gnueabi-gcc ZSTD_LEGACY_SUPPORT=0 MOREFLAGS=$(MOREFLAGS) $(MAKE) clean libzstd.a mv zstd/lib/libzstd.a libzstd_linux_arm.a endif ifeq ($(GOOS_GOARCH),linux_arm64) cd zstd/lib && CC=aarch64-linux-gnu-gcc ZSTD_LEGACY_SUPPORT=0 MOREFLAGS=$(MOREFLAGS) $(MAKE) clean libzstd.a mv zstd/lib/libzstd.a libzstd_linux_arm64.a endif ifeq ($(GOOS_GOARCH),linux_musl_amd64) cd zstd/lib && ZSTD_LEGACY_SUPPORT=0 MOREFLAGS=$(MOREFLAGS) $(MAKE) clean libzstd.a mv zstd/lib/libzstd.a libzstd_linux_musl_amd64.a endif endif package-builder: (docker image ls --format '{{.Repository}}:{{.Tag}}' | grep -q '$(BUILDER_IMAGE)$$') \ || docker build \ --build-arg builder_image=$(MUSL_BUILDER_IMAGE) \ --tag $(BUILDER_IMAGE) \ builder package-musl: package-builder docker run --rm \ --user $(shell id -u):$(shell id -g) \ --mount type=bind,src="$(shell pwd)",dst=/zstd \ -w /zstd \ $(DOCKER_OPTS) \ $(BUILDER_IMAGE) \ sh -c "GOOS=linux_musl make clean libzstd.a" clean: rm -f $(LIBZSTD_NAME) cd zstd && $(MAKE) clean update-zstd: rm -rf zstd-tmp git clone --branch $(ZSTD_VERSION) --depth 1 https://github.com/Facebook/zstd zstd-tmp rm -rf zstd-tmp/.git rm -rf zstd mv zstd-tmp zstd $(MAKE) clean libzstd.a cp zstd/lib/zstd.h . cp zstd/lib/zdict.h . cp zstd/lib/zstd_errors.h . test: CGO_ENABLED=1 GODEBUG=cgocheck=2 go test -v bench: CGO_ENABLED=1 go test -bench=. golang-github-valyala-gozstd-1.14.2+ds/README.md000066400000000000000000000100211414322045000211330ustar00rootroot00000000000000[![Build Status](https://travis-ci.org/valyala/gozstd.svg)](https://travis-ci.org/valyala/gozstd) [![GoDoc](https://godoc.org/github.com/valyala/gozstd?status.svg)](http://godoc.org/github.com/valyala/gozstd) [![Go Report](https://goreportcard.com/badge/github.com/valyala/gozstd)](https://goreportcard.com/report/github.com/valyala/gozstd) [![codecov](https://codecov.io/gh/valyala/gozstd/branch/master/graph/badge.svg)](https://codecov.io/gh/valyala/gozstd) # gozstd - go wrapper for [zstd](http://facebook.github.io/zstd/) ## Features * Vendors upstream [zstd](https://github.com/facebook/zstd) without any modifications. * [Simple API](https://godoc.org/github.com/valyala/gozstd). * Optimized for speed. The API may be easily used in zero allocations mode. * `Compress*` and `Decompress*` functions are optimized for high concurrency. * Proper [Writer.Flush](https://godoc.org/github.com/valyala/gozstd#Writer.Flush) for network apps. * Supports the following features from upstream [zstd](https://facebook.github.io/zstd/): * Block / stream compression / decompression with all the supported compression levels and with dictionary support. * [Dictionary](https://github.com/facebook/zstd#the-case-for-small-data-compression) building from a sample set. The created dictionary may be saved to persistent storage / transfered over the network. * Dictionary loading for compression / decompression. Pull requests for missing upstream `zstd` features are welcome. ## Quick start ### How to install `gozstd`? ``` go get -u github.com/valyala/gozstd ``` ### How to compress data? The easiest way is just to use [Compress](https://godoc.org/github.com/valyala/gozstd#Compress): ```go compressedData := Compress(nil, data) ``` There is also [StreamCompress](https://godoc.org/github.com/valyala/gozstd#StreamCompress) and [Writer](https://godoc.org/github.com/valyala/gozstd#Writer) for stream compression. ### How to decompress data? The easiest way is just to use [Decompress](https://godoc.org/github.com/valyala/gozstd#Decompress): ```go data, err := Decompress(nil, compressedData) ``` There is also [StreamDecompress](https://godoc.org/github.com/valyala/gozstd#StreamDecompress) and [Reader](https://godoc.org/github.com/valyala/gozstd#Reader) for stream decompression. ### How to cross-compile gozstd? If you're cross-compiling some code that uses gozstd and you stumble upon the following error: ``` # github.com/valyala/gozstd /go/pkg/mod/github.com/valyala/gozstd@v1.6.2/stream.go:31:59: undefined: CDict /go/pkg/mod/github.com/valyala/gozstd@v1.6.2/stream.go:35:64: undefined: CDict /go/pkg/mod/github.com/valyala/gozstd@v1.6.2/stream.go:47:20: undefined: Writer ``` You can easily fix it by enabling [CGO](https://golang.org/cmd/cgo/) and using a cross-compiler (e.g. `arm-linux-gnueabi-gcc`): ```bash env CC=arm-linux-gnueabi-gcc GOOS=linux GOARCH=arm CGO_ENABLED=1 go build ./main.go ``` **NOTE**: Check [#21](https://github.com/valyala/gozstd/issues/21) for more info. ### Who uses gozstd? * [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) ## FAQ * Q: _Which go version is supported?_ A: `go1.10` and newer. Pull requests for older go versions are accepted. * Q: _Which platforms/architectures are supported?_ A: `linux/amd64`, `linux/arm`, `linux/arm64`, `freebsd/amd64`, `darwin/amd64`, `darwin/arm64`, `windows/amd64`. Pull requests for other platforms/architectures are accepted. * Q: _I don't trust `libzstd*.a` binary files from the repo or these files dont't work on my OS/ARCH. How to rebuild them?_ A: Just run `make clean libzstd.a` if your OS/ARCH is supported. * Q: _How do I specify custom build flags when recompiling `libzstd*.a`?_ A: You can specify MOREFLAGS=... variable when running `make` like this: `MOREFLAGS=-fPIC make clean libzstd.a`. * Q: _Why the repo contains `libzstd*.a` binary files?_ A: This simplifies package installation to usual `go get` without additional steps for building the `libzstd*.a` golang-github-valyala-gozstd-1.14.2+ds/builder/000077500000000000000000000000001414322045000213105ustar00rootroot00000000000000golang-github-valyala-gozstd-1.14.2+ds/builder/Dockerfile000066400000000000000000000001221414322045000232750ustar00rootroot00000000000000ARG builder_image FROM $builder_image RUN apk add gcc musl-dev make git --no-cachegolang-github-valyala-gozstd-1.14.2+ds/dict.go000066400000000000000000000115301414322045000211340ustar00rootroot00000000000000package gozstd /* #cgo CFLAGS: -O3 #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #define ZDICT_STATIC_LINKING_ONLY #include "zdict.h" #include // for uintptr_t // The following *_wrapper functions allow avoiding memory allocations // durting calls from Go. // See https://github.com/golang/go/issues/24450 . static ZSTD_CDict* ZSTD_createCDict_wrapper(uintptr_t dictBuffer, size_t dictSize, int compressionLevel) { return ZSTD_createCDict((const void *)dictBuffer, dictSize, compressionLevel); } static ZSTD_DDict* ZSTD_createDDict_wrapper(uintptr_t dictBuffer, size_t dictSize) { return ZSTD_createDDict((const void *)dictBuffer, dictSize); } */ import "C" import ( "fmt" "runtime" "sync" "unsafe" ) const minDictLen = C.ZDICT_DICTSIZE_MIN // BuildDict returns dictionary built from the given samples. // // The resulting dictionary size will be close to desiredDictLen. // // The returned dictionary may be passed to NewCDict* and NewDDict. func BuildDict(samples [][]byte, desiredDictLen int) []byte { if desiredDictLen < minDictLen { desiredDictLen = minDictLen } dict := make([]byte, desiredDictLen) // Calculate the total samples size. samplesBufLen := 0 for _, sample := range samples { if len(sample) == 0 { // Skip empty samples. continue } samplesBufLen += len(sample) } // Construct flat samplesBuf and samplesSizes. samplesBuf := make([]byte, 0, samplesBufLen) samplesSizes := make([]C.size_t, 0, len(samples)) for _, sample := range samples { samplesBuf = append(samplesBuf, sample...) samplesSizes = append(samplesSizes, C.size_t(len(sample))) } // Add fake samples if the original samples are too small. minSamplesBufLen := int(C.ZDICT_CONTENTSIZE_MIN) if minSamplesBufLen < minDictLen { minSamplesBufLen = minDictLen } for samplesBufLen < minSamplesBufLen { fakeSample := []byte(fmt.Sprintf("this is a fake sample %d", samplesBufLen)) samplesBuf = append(samplesBuf, fakeSample...) samplesSizes = append(samplesSizes, C.size_t(len(fakeSample))) samplesBufLen += len(fakeSample) } // Run ZDICT_trainFromBuffer under lock, since it looks like it // is unsafe for concurrent usage (it just randomly crashes). // TODO: remove this restriction. buildDictLock.Lock() result := C.ZDICT_trainFromBuffer( unsafe.Pointer(&dict[0]), C.size_t(len(dict)), unsafe.Pointer(&samplesBuf[0]), &samplesSizes[0], C.unsigned(len(samplesSizes))) buildDictLock.Unlock() if C.ZDICT_isError(result) != 0 { // Return empty dictionary, since the original samples are too small. return nil } dictLen := int(result) return dict[:dictLen] } var buildDictLock sync.Mutex // CDict is a dictionary used for compression. // // A single CDict may be re-used in concurrently running goroutines. type CDict struct { p *C.ZSTD_CDict compressionLevel int } // NewCDict creates new CDict from the given dict. // // Call Release when the returned dict is no longer used. func NewCDict(dict []byte) (*CDict, error) { return NewCDictLevel(dict, DefaultCompressionLevel) } // NewCDictLevel creates new CDict from the given dict // using the given compressionLevel. // // Call Release when the returned dict is no longer used. func NewCDictLevel(dict []byte, compressionLevel int) (*CDict, error) { if len(dict) == 0 { return nil, fmt.Errorf("dict cannot be empty") } cd := &CDict{ p: C.ZSTD_createCDict_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(&dict[0]))), C.size_t(len(dict)), C.int(compressionLevel)), compressionLevel: compressionLevel, } // Prevent from GC'ing of dict during CGO call above. runtime.KeepAlive(dict) runtime.SetFinalizer(cd, freeCDict) return cd, nil } // Release releases resources occupied by cd. // // cd cannot be used after the release. func (cd *CDict) Release() { if cd.p == nil { return } result := C.ZSTD_freeCDict(cd.p) ensureNoError("ZSTD_freeCDict", result) cd.p = nil } func freeCDict(v interface{}) { v.(*CDict).Release() } // DDict is a dictionary used for decompression. // // A single DDict may be re-used in concurrently running goroutines. type DDict struct { p *C.ZSTD_DDict } // NewDDict creates new DDict from the given dict. // // Call Release when the returned dict is no longer needed. func NewDDict(dict []byte) (*DDict, error) { if len(dict) == 0 { return nil, fmt.Errorf("dict cannot be empty") } dd := &DDict{ p: C.ZSTD_createDDict_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(&dict[0]))), C.size_t(len(dict))), } // Prevent from GC'ing of dict during CGO call above. runtime.KeepAlive(dict) runtime.SetFinalizer(dd, freeDDict) return dd, nil } // Release releases resources occupied by dd. // // dd cannot be used after the release. func (dd *DDict) Release() { if dd.p == nil { return } result := C.ZSTD_freeDDict(dd.p) ensureNoError("ZSTD_freeDDict", result) dd.p = nil } func freeDDict(v interface{}) { v.(*DDict).Release() } golang-github-valyala-gozstd-1.14.2+ds/dict_example_test.go000066400000000000000000000027311414322045000237110ustar00rootroot00000000000000package gozstd import ( "fmt" "log" ) func ExampleBuildDict() { // Collect samples for the dictionary. var samples [][]byte for i := 0; i < 1000; i++ { sample := fmt.Sprintf("this is a dict sample number %d", i) samples = append(samples, []byte(sample)) } // Build a dictionary with the desired size of 8Kb. dict := BuildDict(samples, 8*1024) // Now the dict may be used for compression/decompression. // Create CDict from the dict. cd, err := NewCDict(dict) if err != nil { log.Fatalf("cannot create CDict: %s", err) } defer cd.Release() // Compress multiple blocks with the same CDict. var compressedBlocks [][]byte for i := 0; i < 3; i++ { plainData := fmt.Sprintf("this is line %d for dict compression", i) compressedData := CompressDict(nil, []byte(plainData), cd) compressedBlocks = append(compressedBlocks, compressedData) } // The compressedData must be decompressed with the same dict. // Create DDict from the dict. dd, err := NewDDict(dict) if err != nil { log.Fatalf("cannot create DDict: %s", err) } defer dd.Release() // Decompress multiple blocks with the same DDict. for _, compressedData := range compressedBlocks { decompressedData, err := DecompressDict(nil, compressedData, dd) if err != nil { log.Fatalf("cannot decompress data: %s", err) } fmt.Printf("%s\n", decompressedData) } // Output: // this is line 0 for dict compression // this is line 1 for dict compression // this is line 2 for dict compression } golang-github-valyala-gozstd-1.14.2+ds/dict_test.go000066400000000000000000000045461414322045000222040ustar00rootroot00000000000000package gozstd import ( "fmt" "math/rand" "testing" "time" ) func TestCDictEmpty(t *testing.T) { cd, err := NewCDict(nil) if err == nil { t.Fatalf("expecting non-nil error") } if cd != nil { t.Fatalf("expecting nil cd") } } func TestDDictEmpty(t *testing.T) { dd, err := NewDDict(nil) if err == nil { t.Fatalf("expecting non-nil error") } if dd != nil { t.Fatalf("expecting nil dd") } } func TestCDictCreateRelease(t *testing.T) { var samples [][]byte for i := 0; i < 1000; i++ { samples = append(samples, []byte(fmt.Sprintf("sample %d", i))) } dict := BuildDict(samples, 64*1024) for i := 0; i < 10; i++ { cd, err := NewCDict(dict) if err != nil { t.Fatalf("cannot create dict: %s", err) } cd.Release() } } func TestDDictCreateRelease(t *testing.T) { var samples [][]byte for i := 0; i < 1000; i++ { samples = append(samples, []byte(fmt.Sprintf("sample %d", i))) } dict := BuildDict(samples, 64*1024) for i := 0; i < 10; i++ { dd, err := NewDDict(dict) if err != nil { t.Fatalf("cannot create dict: %s", err) } dd.Release() } } func TestBuildDict(t *testing.T) { for _, samplesCount := range []int{0, 1, 10, 100, 1000} { t.Run(fmt.Sprintf("samples_%d", samplesCount), func(t *testing.T) { var samples [][]byte for i := 0; i < samplesCount; i++ { sample := []byte(fmt.Sprintf("sample %d, rand num %d, other num %X", i, rand.Intn(100), rand.Intn(100000))) samples = append(samples, sample) samples = append(samples, nil) // add empty sample } for _, desiredDictLen := range []int{20, 256, 1000, 10000} { t.Run(fmt.Sprintf("desiredDictLen_%d", desiredDictLen), func(t *testing.T) { testBuildDict(t, samples, desiredDictLen) }) } }) } } func testBuildDict(t *testing.T, samples [][]byte, desiredDictLen int) { t.Helper() // Serial test. dictOrig := BuildDict(samples, desiredDictLen) // Concurrent test. ch := make(chan error, 3) for i := 0; i < cap(ch); i++ { go func() { dict := BuildDict(samples, desiredDictLen) if string(dict) != string(dictOrig) { ch <- fmt.Errorf("unexpected dict; got\n%X; want\n%X", dict, dictOrig) } ch <- nil }() } for i := 0; i < cap(ch); i++ { select { case err := <-ch: if err != nil { t.Fatalf("error in concurrent test: %s", err) } case <-time.After(time.Second): t.Fatalf("timeout in concurrent test") } } } golang-github-valyala-gozstd-1.14.2+ds/doc.go000066400000000000000000000002061414322045000207540ustar00rootroot00000000000000// Package gozstd is Go wrapper for zstd. // // Gozstd is used in https://github.com/VictoriaMetrics/VictoriaMetrics . package gozstd golang-github-valyala-gozstd-1.14.2+ds/go.mod000066400000000000000000000000521414322045000207650ustar00rootroot00000000000000module github.com/valyala/gozstd go 1.12 golang-github-valyala-gozstd-1.14.2+ds/gozstd.go000066400000000000000000000250301414322045000215230ustar00rootroot00000000000000package gozstd /* #cgo CFLAGS: -O3 #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "zstd_errors.h" #include // for uintptr_t // The following *_wrapper functions allow avoiding memory allocations // durting calls from Go. // See https://github.com/golang/go/issues/24450 . static size_t ZSTD_compressCCtx_wrapper(uintptr_t ctx, uintptr_t dst, size_t dstCapacity, uintptr_t src, size_t srcSize, int compressionLevel) { return ZSTD_compressCCtx((ZSTD_CCtx*)ctx, (void*)dst, dstCapacity, (const void*)src, srcSize, compressionLevel); } static size_t ZSTD_compress_usingCDict_wrapper(uintptr_t ctx, uintptr_t dst, size_t dstCapacity, uintptr_t src, size_t srcSize, uintptr_t cdict) { return ZSTD_compress_usingCDict((ZSTD_CCtx*)ctx, (void*)dst, dstCapacity, (const void*)src, srcSize, (const ZSTD_CDict*)cdict); } static size_t ZSTD_decompressDCtx_wrapper(uintptr_t ctx, uintptr_t dst, size_t dstCapacity, uintptr_t src, size_t srcSize) { return ZSTD_decompressDCtx((ZSTD_DCtx*)ctx, (void*)dst, dstCapacity, (const void*)src, srcSize); } static size_t ZSTD_decompress_usingDDict_wrapper(uintptr_t ctx, uintptr_t dst, size_t dstCapacity, uintptr_t src, size_t srcSize, uintptr_t ddict) { return ZSTD_decompress_usingDDict((ZSTD_DCtx*)ctx, (void*)dst, dstCapacity, (const void*)src, srcSize, (const ZSTD_DDict*)ddict); } static unsigned long long ZSTD_getFrameContentSize_wrapper(uintptr_t src, size_t srcSize) { return ZSTD_getFrameContentSize((const void*)src, srcSize); } */ import "C" import ( "fmt" "io" "runtime" "sync" "unsafe" ) // DefaultCompressionLevel is the default compression level. const DefaultCompressionLevel = 3 // Obtained from ZSTD_CLEVEL_DEFAULT. // Compress appends compressed src to dst and returns the result. func Compress(dst, src []byte) []byte { return compressDictLevel(dst, src, nil, DefaultCompressionLevel) } // CompressLevel appends compressed src to dst and returns the result. // // The given compressionLevel is used for the compression. func CompressLevel(dst, src []byte, compressionLevel int) []byte { return compressDictLevel(dst, src, nil, compressionLevel) } // CompressDict appends compressed src to dst and returns the result. // // The given dictionary is used for the compression. func CompressDict(dst, src []byte, cd *CDict) []byte { return compressDictLevel(dst, src, cd, 0) } func compressDictLevel(dst, src []byte, cd *CDict, compressionLevel int) []byte { var cctx, cctxDict *cctxWrapper if cd == nil { cctx = cctxPool.Get().(*cctxWrapper) } else { cctxDict = cctxDictPool.Get().(*cctxWrapper) } dst = compress(cctx, cctxDict, dst, src, cd, compressionLevel) if cd == nil { cctxPool.Put(cctx) } else { cctxDictPool.Put(cctxDict) } return dst } var cctxPool = &sync.Pool{ New: newCCtx, } var cctxDictPool = &sync.Pool{ New: newCCtx, } func newCCtx() interface{} { cctx := C.ZSTD_createCCtx() cw := &cctxWrapper{ cctx: cctx, } runtime.SetFinalizer(cw, freeCCtx) return cw } func freeCCtx(cw *cctxWrapper) { C.ZSTD_freeCCtx(cw.cctx) cw.cctx = nil } type cctxWrapper struct { cctx *C.ZSTD_CCtx } func compress(cctx, cctxDict *cctxWrapper, dst, src []byte, cd *CDict, compressionLevel int) []byte { if len(src) == 0 { return dst } dstLen := len(dst) if cap(dst) > dstLen { // Fast path - try compressing without dst resize. result := compressInternal(cctx, cctxDict, dst[dstLen:cap(dst)], src, cd, compressionLevel, false) compressedSize := int(result) if compressedSize >= 0 { // All OK. return dst[:dstLen+compressedSize] } if C.ZSTD_getErrorCode(result) != C.ZSTD_error_dstSize_tooSmall { // Unexpected error. panic(fmt.Errorf("BUG: unexpected error during compression with cd=%p: %s", cd, errStr(result))) } } // Slow path - resize dst to fit compressed data. compressBound := int(C.ZSTD_compressBound(C.size_t(len(src)))) + 1 if n := dstLen + compressBound - cap(dst) + dstLen; n > 0 { // This should be optimized since go 1.11 - see https://golang.org/doc/go1.11#performance-compiler. dst = append(dst[:cap(dst)], make([]byte, n)...) } result := compressInternal(cctx, cctxDict, dst[dstLen:dstLen+compressBound], src, cd, compressionLevel, true) compressedSize := int(result) dst = dst[:dstLen+compressedSize] if cap(dst)-len(dst) > 4096 { // Re-allocate dst in order to remove superflouos capacity and reduce memory usage. dst = append([]byte{}, dst...) } return dst } func compressInternal(cctx, cctxDict *cctxWrapper, dst, src []byte, cd *CDict, compressionLevel int, mustSucceed bool) C.size_t { if cd != nil { result := C.ZSTD_compress_usingCDict_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(cctxDict.cctx))), C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))), C.size_t(cap(dst)), C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))), C.size_t(len(src)), C.uintptr_t(uintptr(unsafe.Pointer(cd.p)))) // Prevent from GC'ing of dst and src during CGO call above. runtime.KeepAlive(dst) runtime.KeepAlive(src) if mustSucceed { ensureNoError("ZSTD_compress_usingCDict_wrapper", result) } return result } result := C.ZSTD_compressCCtx_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(cctx.cctx))), C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))), C.size_t(cap(dst)), C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))), C.size_t(len(src)), C.int(compressionLevel)) // Prevent from GC'ing of dst and src during CGO call above. runtime.KeepAlive(dst) runtime.KeepAlive(src) if mustSucceed { ensureNoError("ZSTD_compressCCtx_wrapper", result) } return result } // Decompress appends decompressed src to dst and returns the result. func Decompress(dst, src []byte) ([]byte, error) { return DecompressDict(dst, src, nil) } // DecompressDict appends decompressed src to dst and returns the result. // // The given dictionary dd is used for the decompression. func DecompressDict(dst, src []byte, dd *DDict) ([]byte, error) { var dctx, dctxDict *dctxWrapper if dd == nil { dctx = dctxPool.Get().(*dctxWrapper) } else { dctxDict = dctxDictPool.Get().(*dctxWrapper) } var err error dst, err = decompress(dctx, dctxDict, dst, src, dd) if dd == nil { dctxPool.Put(dctx) } else { dctxDictPool.Put(dctxDict) } return dst, err } var dctxPool = &sync.Pool{ New: newDCtx, } var dctxDictPool = &sync.Pool{ New: newDCtx, } func newDCtx() interface{} { dctx := C.ZSTD_createDCtx() dw := &dctxWrapper{ dctx: dctx, } runtime.SetFinalizer(dw, freeDCtx) return dw } func freeDCtx(dw *dctxWrapper) { C.ZSTD_freeDCtx(dw.dctx) dw.dctx = nil } type dctxWrapper struct { dctx *C.ZSTD_DCtx } func decompress(dctx, dctxDict *dctxWrapper, dst, src []byte, dd *DDict) ([]byte, error) { if len(src) == 0 { return dst, nil } dstLen := len(dst) if cap(dst) > dstLen { // Fast path - try decompressing without dst resize. result := decompressInternal(dctx, dctxDict, dst[dstLen:cap(dst)], src, dd) decompressedSize := int(result) if decompressedSize >= 0 { // All OK. return dst[:dstLen+decompressedSize], nil } if C.ZSTD_getErrorCode(result) != C.ZSTD_error_dstSize_tooSmall { // Error during decompression. return dst[:dstLen], fmt.Errorf("decompression error: %s", errStr(result)) } } // Slow path - resize dst to fit decompressed data. decompressBound := int(C.ZSTD_getFrameContentSize_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))), C.size_t(len(src)))) // Prevent from GC'ing of src during CGO call above. runtime.KeepAlive(src) switch uint64(decompressBound) { case uint64(C.ZSTD_CONTENTSIZE_UNKNOWN): return streamDecompress(dst, src, dd) case uint64(C.ZSTD_CONTENTSIZE_ERROR): return dst, fmt.Errorf("cannot decompress invalid src") } decompressBound++ if n := dstLen + decompressBound - cap(dst); n > 0 { // This should be optimized since go 1.11 - see https://golang.org/doc/go1.11#performance-compiler. dst = append(dst[:cap(dst)], make([]byte, n)...) } result := decompressInternal(dctx, dctxDict, dst[dstLen:dstLen+decompressBound], src, dd) decompressedSize := int(result) if decompressedSize >= 0 { dst = dst[:dstLen+decompressedSize] if cap(dst)-len(dst) > 4096 { // Re-allocate dst in order to remove superflouos capacity and reduce memory usage. dst = append([]byte{}, dst...) } return dst, nil } // Error during decompression. return dst[:dstLen], fmt.Errorf("decompression error: %s", errStr(result)) } func decompressInternal(dctx, dctxDict *dctxWrapper, dst, src []byte, dd *DDict) C.size_t { var n C.size_t if dd != nil { n = C.ZSTD_decompress_usingDDict_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(dctxDict.dctx))), C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))), C.size_t(cap(dst)), C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))), C.size_t(len(src)), C.uintptr_t(uintptr(unsafe.Pointer(dd.p)))) } else { n = C.ZSTD_decompressDCtx_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(dctx.dctx))), C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))), C.size_t(cap(dst)), C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))), C.size_t(len(src))) } // Prevent from GC'ing of dst and src during CGO calls above. runtime.KeepAlive(dst) runtime.KeepAlive(src) return n } func errStr(result C.size_t) string { errCode := C.ZSTD_getErrorCode(result) errCStr := C.ZSTD_getErrorString(errCode) return C.GoString(errCStr) } func ensureNoError(funcName string, result C.size_t) { if int(result) >= 0 { // Fast path - avoid calling C function. return } if C.ZSTD_getErrorCode(result) != 0 { panic(fmt.Errorf("BUG: unexpected error in %s: %s", funcName, errStr(result))) } } func streamDecompress(dst, src []byte, dd *DDict) ([]byte, error) { sd := getStreamDecompressor(dd) sd.dst = dst sd.src = src _, err := sd.zr.WriteTo(sd) dst = sd.dst putStreamDecompressor(sd) return dst, err } type streamDecompressor struct { dst []byte src []byte srcOffset int zr *Reader } type srcReader streamDecompressor func (sr *srcReader) Read(p []byte) (int, error) { sd := (*streamDecompressor)(sr) n := copy(p, sd.src[sd.srcOffset:]) sd.srcOffset += n if n < len(p) { return n, io.EOF } return n, nil } func (sd *streamDecompressor) Write(p []byte) (int, error) { sd.dst = append(sd.dst, p...) return len(p), nil } func getStreamDecompressor(dd *DDict) *streamDecompressor { v := streamDecompressorPool.Get() if v == nil { sd := &streamDecompressor{ zr: NewReader(nil), } v = sd } sd := v.(*streamDecompressor) sd.zr.Reset((*srcReader)(sd), dd) return sd } func putStreamDecompressor(sd *streamDecompressor) { sd.dst = nil sd.src = nil sd.srcOffset = 0 sd.zr.Reset(nil, nil) streamDecompressorPool.Put(sd) } var streamDecompressorPool sync.Pool golang-github-valyala-gozstd-1.14.2+ds/gozstd_example_test.go000066400000000000000000000034101414322045000242730ustar00rootroot00000000000000package gozstd import ( "fmt" "log" ) func ExampleCompress_simple() { data := []byte("foo bar baz") // Compress and decompress data into new buffers. compressedData := Compress(nil, data) decompressedData, err := Decompress(nil, compressedData) if err != nil { log.Fatalf("cannot decompress data: %s", err) } fmt.Printf("%s", decompressedData) // Output: // foo bar baz } func ExampleDecompress_simple() { data := []byte("foo bar baz") // Compress and decompress data into new buffers. compressedData := Compress(nil, data) decompressedData, err := Decompress(nil, compressedData) if err != nil { log.Fatalf("cannot decompress data: %s", err) } fmt.Printf("%s", decompressedData) // Output: // foo bar baz } func ExampleCompress_noAllocs() { data := []byte("foo bar baz") // Compressed data will be put into cbuf. var cbuf []byte for i := 0; i < 5; i++ { // Compress re-uses cbuf for the compressed data. cbuf = Compress(cbuf[:0], data) decompressedData, err := Decompress(nil, cbuf) if err != nil { log.Fatalf("cannot decompress data: %s", err) } fmt.Printf("%d. %s\n", i, decompressedData) } // Output: // 0. foo bar baz // 1. foo bar baz // 2. foo bar baz // 3. foo bar baz // 4. foo bar baz } func ExampleDecompress_noAllocs() { data := []byte("foo bar baz") compressedData := Compress(nil, data) // Decompressed data will be put into dbuf. var dbuf []byte for i := 0; i < 5; i++ { // Decompress re-uses dbuf for the decompressed data. var err error dbuf, err = Decompress(dbuf[:0], compressedData) if err != nil { log.Fatalf("cannot decompress data: %s", err) } fmt.Printf("%d. %s\n", i, dbuf) } // Output: // 0. foo bar baz // 1. foo bar baz // 2. foo bar baz // 3. foo bar baz // 4. foo bar baz } golang-github-valyala-gozstd-1.14.2+ds/gozstd_test.go000066400000000000000000000237271414322045000225750ustar00rootroot00000000000000package gozstd import ( "bytes" "encoding/hex" "fmt" "math/rand" "runtime" "strings" "testing" "time" ) func TestDecompressSmallBlockWithoutSingleSegmentFlag(t *testing.T) { // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/281 for details. cblockHex := "28B52FFD00007D000038C0A907DFD40300015407022B0E02" dblockHexExpected := "C0A907DFD4030000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000" cblock := mustUnhex(cblockHex) dblockExpected := mustUnhex(dblockHexExpected) t.Run("empty-dst-buf", func(t *testing.T) { dblock, err := Decompress(nil, cblock) if err != nil { t.Fatalf("unexpected error when decrompressing with empty initial buffer: %s", err) } if string(dblock) != string(dblockExpected) { t.Fatalf("unexpected decompressed block;\ngot\n%X\nwant\n%X", dblock, dblockExpected) } }) t.Run("small-dst-buf", func(t *testing.T) { buf := make([]byte, len(dblockExpected)/2) dblock, err := Decompress(buf[:0], cblock) if err != nil { t.Fatalf("unexpected error when decrompressing with empty initial buffer: %s", err) } if string(dblock) != string(dblockExpected) { t.Fatalf("unexpected decompressed block;\ngot\n%X\nwant\n%X", dblock, dblockExpected) } }) t.Run("enough-dst-buf", func(t *testing.T) { buf := make([]byte, len(dblockExpected)) dblock, err := Decompress(buf[:0], cblock) if err != nil { t.Fatalf("unexpected error when decrompressing with empty initial buffer: %s", err) } if string(dblock) != string(dblockExpected) { t.Fatalf("unexpected decompressed block;\ngot\n%X\nwant\n%X", dblock, dblockExpected) } }) } func mustUnhex(dataHex string) []byte { data, err := hex.DecodeString(dataHex) if err != nil { panic(fmt.Errorf("BUG: cannot unhex %q: %s", dataHex, err)) } return data } func TestCompressDecompressDistinctConcurrentDicts(t *testing.T) { // Build multiple distinct dicts. var cdicts []*CDict var ddicts []*DDict defer func() { for _, cd := range cdicts { cd.Release() } for _, dd := range ddicts { dd.Release() } }() for i := 0; i < 4; i++ { var samples [][]byte for j := 0; j < 1000; j++ { sample := fmt.Sprintf("this is %d,%d sample", j, i) samples = append(samples, []byte(sample)) } dict := BuildDict(samples, 4*1024) cd, err := NewCDict(dict) if err != nil { t.Fatalf("cannot create CDict: %s", err) } cdicts = append(cdicts, cd) dd, err := NewDDict(dict) if err != nil { t.Fatalf("cannot create DDict: %s", err) } ddicts = append(ddicts, dd) } // Build data for the compression. var bb bytes.Buffer i := 0 for bb.Len() < 1e4 { fmt.Fprintf(&bb, "%d sample line this is %d", bb.Len(), i) i++ } data := bb.Bytes() // Run concurrent goroutines compressing/decompressing with distinct dicts. ch := make(chan error, len(cdicts)) for i := 0; i < cap(ch); i++ { go func(cd *CDict, dd *DDict) { ch <- testCompressDecompressDistinctConcurrentDicts(cd, dd, data) }(cdicts[i], ddicts[i]) } // Wait for goroutines to finish. for i := 0; i < cap(ch); i++ { select { case err := <-ch: if err != nil { t.Fatalf("unexpected error: %s", err) } case <-time.After(time.Second): t.Fatalf("timeout") } } } func testCompressDecompressDistinctConcurrentDicts(cd *CDict, dd *DDict, data []byte) error { var compressedData, decompressedData []byte for j := 0; j < 10; j++ { compressedData = CompressDict(compressedData[:0], data, cd) var err error decompressedData, err = DecompressDict(decompressedData[:0], compressedData, dd) if err != nil { return fmt.Errorf("cannot decompress data: %s", err) } if !bytes.Equal(decompressedData, data) { return fmt.Errorf("unexpected decompressed data; got\n%q; want\n%q", decompressedData, data) } } return nil } func TestCompressDecompressDict(t *testing.T) { var samples [][]byte for i := 0; i < 1000; i++ { sample := fmt.Sprintf("%d this is line %d", i, i) samples = append(samples, []byte(sample)) } dict := BuildDict(samples, 16*1024) cd, err := NewCDict(dict) if err != nil { t.Fatalf("cannot create CDict: %s", err) } defer cd.Release() dd, err := NewDDict(dict) if err != nil { t.Fatalf("cannot create DDict: %s", err) } defer dd.Release() // Run serial test. if err := testCompressDecompressDictSerial(cd, dd); err != nil { t.Fatalf("error in serial test: %s", err) } // Run concurrent test. ch := make(chan error, 5) for i := 0; i < cap(ch); i++ { go func() { ch <- testCompressDecompressDictSerial(cd, dd) }() } for i := 0; i < cap(ch); i++ { select { case err := <-ch: if err != nil { t.Fatalf("error in concurrent test: %s", err) } case <-time.After(time.Second): t.Fatalf("timeout in concurrent test") } } } func testCompressDecompressDictSerial(cd *CDict, dd *DDict) error { for i := 0; i < 30; i++ { var src []byte for j := 0; j < 100; j++ { src = append(src, []byte(fmt.Sprintf("line %d is this %d\n", j, i+j))...) } compressedData := CompressDict(nil, src, cd) plainData, err := DecompressDict(nil, compressedData, dd) if err != nil { return fmt.Errorf("unexpected error when decompressing %d bytes: %s", len(src), err) } if string(plainData) != string(src) { return fmt.Errorf("unexpected data after decompressing %d bytes; got\n%X; want\n%X", len(src), plainData, src) } // Try decompressing without dict. _, err = Decompress(nil, compressedData) if err == nil { return fmt.Errorf("expecting non-nil error when decompressing without dict") } if !strings.Contains(err.Error(), "Dictionary mismatch") { return fmt.Errorf("unexpected error when decompressing without dict: %q; must contain %q", err, "Dictionary mismatch") } } return nil } func TestDecompressInvalidData(t *testing.T) { // Try decompressing invalid data. src := []byte("invalid compressed data") buf := make([]byte, len(src)) if _, err := Decompress(nil, src); err == nil { t.Fatalf("expecting error when decompressing invalid data") } if _, err := Decompress(buf[:0], src); err == nil { t.Fatalf("expecting error when decompressing invalid data into existing buffer") } // Try decompressing corrupted data. s := newTestString(64*1024, 15) cd := Compress(nil, []byte(s)) cd[len(cd)-1]++ if _, err := Decompress(nil, cd); err == nil { t.Fatalf("expecting error when decompressing corrupted data") } if _, err := Decompress(buf[:0], cd); err == nil { t.Fatalf("expecting error when decompressing corrupdate data into existing buffer") } } func TestCompressLevel(t *testing.T) { src := []byte("foobar baz") for compressLevel := 1; compressLevel < 22; compressLevel++ { testCompressLevel(t, src, compressLevel) } // Test invalid compression levels - they should clamp // to the closest valid levels. testCompressLevel(t, src, -123) testCompressLevel(t, src, 234324) } func testCompressLevel(t *testing.T, src []byte, compressionLevel int) { t.Helper() cd := CompressLevel(nil, src, compressionLevel) dd, err := Decompress(nil, cd) if err != nil { t.Fatalf("unexpected error during decompression: %s", err) } if string(dd) != string(src) { t.Fatalf("unexpected dd\n%X; want\n%X", dd, src) } } func TestCompressDecompress(t *testing.T) { testCompressDecompress(t, "") testCompressDecompress(t, "a") testCompressDecompress(t, "foo bar") for size := 1; size <= 1e6; size *= 10 { s := newTestString(size, 20) testCompressDecompress(t, s) } } func testCompressDecompress(t *testing.T, s string) { t.Helper() if err := testCompressDecompressSerial(s); err != nil { t.Fatalf("error in serial test: %s", err) } ch := make(chan error, runtime.GOMAXPROCS(-1)+2) for i := 0; i < cap(ch); i++ { go func() { ch <- testCompressDecompressSerial(s) }() } for i := 0; i < cap(ch); i++ { select { case err := <-ch: if err != nil { t.Fatalf("unexpected error in parallel test: %s", err) } case <-time.After(time.Second): t.Fatalf("timeout in parallel test") } } } func testCompressDecompressSerial(s string) error { cs := Compress(nil, []byte(s)) ds, err := Decompress(nil, cs) if err != nil { return fmt.Errorf("cannot decompress: %s\ns=%X\ncs=%X", err, s, cs) } if string(ds) != s { return fmt.Errorf("unexpected ds (len=%d, sLen=%d, cslen=%d)\n%X; want\n%X", len(ds), len(s), len(cs), ds, s) } // Verify prefixed decompression. prefix := []byte("foobaraaa") ds, err = Decompress(prefix, cs) if err != nil { return fmt.Errorf("cannot decompress prefixed cs: %s\ns=%X\ncs=%X", err, s, cs) } if string(ds[:len(prefix)]) != string(prefix) { return fmt.Errorf("unexpected prefix in the decompressed result: %X; want %X", ds[:len(prefix)], prefix) } ds = ds[len(prefix):] if string(ds) != s { return fmt.Errorf("unexpected prefixed ds\n%X; want\n%X", ds, s) } // Verify prefixed compression. csp := Compress(prefix, []byte(s)) if string(csp[:len(prefix)]) != string(prefix) { return fmt.Errorf("unexpected prefix in the compressed result: %X; want %X", csp[:len(prefix)], prefix) } csp = csp[len(prefix):] if string(csp) != string(cs) { return fmt.Errorf("unexpected prefixed cs\n%X; want\n%X", csp, cs) } return nil } func newTestString(size, randomness int) string { s := make([]byte, size) for i := 0; i < size; i++ { s[i] = byte(rand.Intn(randomness)) } return string(s) } func TestCompressDecompressMultiFrames(t *testing.T) { var bb bytes.Buffer for bb.Len() < 3*128*1024 { fmt.Fprintf(&bb, "compress/decompress big data %d, ", bb.Len()) } origData := append([]byte{}, bb.Bytes()...) cd := Compress(nil, bb.Bytes()) plainData, err := Decompress(nil, cd) if err != nil { t.Fatalf("cannot decompress big data: %s", err) } if !bytes.Equal(plainData, origData) { t.Fatalf("unexpected data decompressed: got\n%q; want\n%q\nlen(data)=%d, len(orig)=%d", plainData, origData, len(plainData), len(origData)) } } golang-github-valyala-gozstd-1.14.2+ds/gozstd_timing_test.go000066400000000000000000000106521414322045000241350ustar00rootroot00000000000000package gozstd import ( "bytes" "fmt" "math/rand" "sync" "sync/atomic" "testing" ) var Sink uint64 var benchBlockSizes = []int{1, 1e1, 1e2, 1e3, 1e4, 1e5, 3e5} var benchCompressionLevels = []int{3, 5, 10} func BenchmarkDecompressDict(b *testing.B) { for _, blockSize := range benchBlockSizes { b.Run(fmt.Sprintf("blockSize_%d", blockSize), func(b *testing.B) { for _, level := range benchCompressionLevels { b.Run(fmt.Sprintf("level_%d", level), func(b *testing.B) { benchmarkDecompressDict(b, blockSize, level) }) } }) } } func benchmarkDecompressDict(b *testing.B, blockSize, level int) { block := newBenchString(blockSize) bd := getBenchDicts(level) src := CompressDict(nil, block, bd.cd) b.Logf("compressionRatio: %f", float64(len(block))/float64(len(src))) b.ReportAllocs() b.SetBytes(int64(blockSize)) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { n := 0 var dst []byte var err error for pb.Next() { dst, err = DecompressDict(dst[:0], src, bd.dd) if err != nil { panic(fmt.Errorf("BUG: cannot decompress with dict: %s", err)) } n += len(dst) } atomic.AddUint64(&Sink, uint64(n)) }) } func BenchmarkCompressDict(b *testing.B) { for _, blockSize := range benchBlockSizes { b.Run(fmt.Sprintf("blockSize_%d", blockSize), func(b *testing.B) { for _, level := range benchCompressionLevels { b.Run(fmt.Sprintf("level_%d", level), func(b *testing.B) { benchmarkCompressDict(b, blockSize, level) }) } }) } } func benchmarkCompressDict(b *testing.B, blockSize, level int) { src := newBenchString(blockSize) bd := getBenchDicts(level) b.ReportAllocs() b.SetBytes(int64(len(src))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { n := 0 var dst []byte for pb.Next() { dst = CompressDict(dst[:0], src, bd.cd) n += len(dst) } atomic.AddUint64(&Sink, uint64(n)) }) } func getBenchDicts(level int) *benchDicts { benchDictsLock.Lock() tmp := benchDictsMap[level] if tmp == nil { tmp = newBenchDicts(level) benchDictsMap[level] = tmp } benchDictsLock.Unlock() return tmp } type benchDicts struct { cd *CDict dd *DDict } var benchDictsMap = make(map[int]*benchDicts) var benchDictsLock sync.Mutex func newBenchDicts(level int) *benchDicts { var samples [][]byte for i := 0; i < 300; i++ { sampleLen := rand.Intn(300) sample := newBenchString(sampleLen) samples = append(samples, sample) } dict := BuildDict(samples, 32*1024) cd, err := NewCDictLevel(dict, level) if err != nil { panic(fmt.Errorf("cannot create CDict: %s", err)) } dd, err := NewDDict(dict) if err != nil { panic(fmt.Errorf("cannot create DDict: %s", err)) } return &benchDicts{ cd: cd, dd: dd, } } func newBenchString(blockSize int) []byte { var bb bytes.Buffer line := 0 for bb.Len() < blockSize { fmt.Fprintf(&bb, "line %d, size %d, hex %08X\n", line, bb.Len(), line) line++ } return bb.Bytes()[:blockSize] } func BenchmarkCompress(b *testing.B) { for _, blockSize := range benchBlockSizes { b.Run(fmt.Sprintf("blockSize_%d", blockSize), func(b *testing.B) { for _, level := range benchCompressionLevels { b.Run(fmt.Sprintf("level_%d", level), func(b *testing.B) { benchmarkCompress(b, blockSize, level) }) } }) } } func benchmarkCompress(b *testing.B, blockSize, level int) { src := newBenchString(blockSize) b.ReportAllocs() b.SetBytes(int64(len(src))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { n := 0 var dst []byte for pb.Next() { dst = CompressLevel(dst[:0], src, level) n += len(dst) } atomic.AddUint64(&Sink, uint64(n)) }) } func BenchmarkDecompress(b *testing.B) { for _, blockSize := range benchBlockSizes { b.Run(fmt.Sprintf("blockSize_%d", blockSize), func(b *testing.B) { for _, level := range benchCompressionLevels { b.Run(fmt.Sprintf("level_%d", level), func(b *testing.B) { benchmarkDecompress(b, blockSize, level) }) } }) } } func benchmarkDecompress(b *testing.B, blockSize, level int) { block := newBenchString(blockSize) src := CompressLevel(nil, block, level) b.Logf("compressionRatio: %f", float64(len(block))/float64(len(src))) b.ReportAllocs() b.SetBytes(int64(len(block))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { n := 0 var dst []byte var err error for pb.Next() { dst, err = Decompress(dst[:0], src) if err != nil { panic(fmt.Errorf("unexpected error: %s", err)) } n += len(dst) } atomic.AddUint64(&Sink, uint64(n)) }) } golang-github-valyala-gozstd-1.14.2+ds/reader.go000066400000000000000000000143531414322045000214610ustar00rootroot00000000000000package gozstd /* #cgo CFLAGS: -O3 #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "zstd_errors.h" #include // for malloc/free #include // for uintptr_t // The following *_wrapper functions allow avoiding memory allocations // durting calls from Go. // See https://github.com/golang/go/issues/24450 . static size_t ZSTD_initDStream_usingDDict_wrapper(uintptr_t ds, uintptr_t dict) { return ZSTD_initDStream_usingDDict((ZSTD_DStream*)ds, (ZSTD_DDict*)dict); } static size_t ZSTD_freeDStream_wrapper(uintptr_t ds) { return ZSTD_freeDStream((ZSTD_DStream*)ds); } static size_t ZSTD_decompressStream_wrapper(uintptr_t ds, uintptr_t output, uintptr_t input) { return ZSTD_decompressStream((ZSTD_DStream*)ds, (ZSTD_outBuffer*)output, (ZSTD_inBuffer*)input); } */ import "C" import ( "fmt" "io" "runtime" "unsafe" ) var ( dstreamInBufSize = C.ZSTD_DStreamInSize() dstreamOutBufSize = C.ZSTD_DStreamOutSize() ) // Reader implements zstd reader. type Reader struct { r io.Reader ds *C.ZSTD_DStream dd *DDict inBuf *C.ZSTD_inBuffer outBuf *C.ZSTD_outBuffer inBufGo cMemPtr outBufGo cMemPtr } // NewReader returns new zstd reader reading compressed data from r. // // Call Release when the Reader is no longer needed. func NewReader(r io.Reader) *Reader { return NewReaderDict(r, nil) } // NewReaderDict returns new zstd reader reading compressed data from r // using the given DDict. // // Call Release when the Reader is no longer needed. func NewReaderDict(r io.Reader, dd *DDict) *Reader { ds := C.ZSTD_createDStream() initDStream(ds, dd) inBuf := (*C.ZSTD_inBuffer)(C.malloc(C.sizeof_ZSTD_inBuffer)) inBuf.src = C.malloc(dstreamInBufSize) inBuf.size = 0 inBuf.pos = 0 outBuf := (*C.ZSTD_outBuffer)(C.malloc(C.sizeof_ZSTD_outBuffer)) outBuf.dst = C.malloc(dstreamOutBufSize) outBuf.size = 0 outBuf.pos = 0 zr := &Reader{ r: r, ds: ds, dd: dd, inBuf: inBuf, outBuf: outBuf, } zr.inBufGo = cMemPtr(zr.inBuf.src) zr.outBufGo = cMemPtr(zr.outBuf.dst) runtime.SetFinalizer(zr, freeDStream) return zr } // Reset resets zr to read from r using the given dictionary dd. func (zr *Reader) Reset(r io.Reader, dd *DDict) { zr.inBuf.size = 0 zr.inBuf.pos = 0 zr.outBuf.size = 0 zr.outBuf.pos = 0 zr.dd = dd initDStream(zr.ds, zr.dd) zr.r = r } func initDStream(ds *C.ZSTD_DStream, dd *DDict) { var ddict *C.ZSTD_DDict if dd != nil { ddict = dd.p } result := C.ZSTD_initDStream_usingDDict_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(ds))), C.uintptr_t(uintptr(unsafe.Pointer(ddict)))) ensureNoError("ZSTD_initDStream_usingDDict", result) } func freeDStream(v interface{}) { v.(*Reader).Release() } // Release releases all the resources occupied by zr. // // zr cannot be used after the release. func (zr *Reader) Release() { if zr.ds == nil { return } result := C.ZSTD_freeDStream_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(zr.ds)))) ensureNoError("ZSTD_freeDStream", result) zr.ds = nil C.free(zr.inBuf.src) C.free(unsafe.Pointer(zr.inBuf)) zr.inBuf = nil C.free(zr.outBuf.dst) C.free(unsafe.Pointer(zr.outBuf)) zr.outBuf = nil zr.r = nil zr.dd = nil } // WriteTo writes all the data from zr to w. // // It returns the number of bytes written to w. func (zr *Reader) WriteTo(w io.Writer) (int64, error) { nn := int64(0) for { if zr.outBuf.pos == zr.outBuf.size { if err := zr.fillOutBuf(); err != nil { if err == io.EOF { return nn, nil } return nn, err } } n, err := w.Write(zr.outBufGo[zr.outBuf.pos:zr.outBuf.size]) zr.outBuf.pos += C.size_t(n) nn += int64(n) if err != nil { return nn, err } } } // Read reads up to len(p) bytes from zr to p. func (zr *Reader) Read(p []byte) (int, error) { if len(p) == 0 { return 0, nil } if zr.outBuf.pos == zr.outBuf.size { if err := zr.fillOutBuf(); err != nil { return 0, err } } n := copy(p, zr.outBufGo[zr.outBuf.pos:zr.outBuf.size]) zr.outBuf.pos += C.size_t(n) return n, nil } func (zr *Reader) fillOutBuf() error { if zr.inBuf.pos == zr.inBuf.size && zr.outBuf.size < dstreamOutBufSize { // inBuf is empty and the previously decompressed data size // is smaller than the maximum possible zr.outBuf.size. // This means that the internal buffer in zr.ds doesn't contain // more data to decompress, so read new data into inBuf. if err := zr.fillInBuf(); err != nil { return err } } tryDecompressAgain: // Try decompressing inBuf into outBuf. zr.outBuf.size = dstreamOutBufSize zr.outBuf.pos = 0 prevInBufPos := zr.inBuf.pos result := C.ZSTD_decompressStream_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(zr.ds))), C.uintptr_t(uintptr(unsafe.Pointer(zr.outBuf))), C.uintptr_t(uintptr(unsafe.Pointer(zr.inBuf)))) zr.outBuf.size = zr.outBuf.pos zr.outBuf.pos = 0 if C.ZSTD_getErrorCode(result) != 0 { return fmt.Errorf("cannot decompress data: %s", errStr(result)) } if zr.outBuf.size > 0 { // Something has been decompressed to outBuf. Return it. return nil } // Nothing has been decompressed from inBuf. if zr.inBuf.pos != prevInBufPos && zr.inBuf.pos < zr.inBuf.size { // Data has been consumed from inBuf, but decompressed // into nothing. There is more data in inBuf, so try // decompressing it again. goto tryDecompressAgain } // Either nothing has been consumed from inBuf or it has been // decompressed into nothing and inBuf became empty. // Read more data into inBuf and try decompressing again. if err := zr.fillInBuf(); err != nil { return err } goto tryDecompressAgain } func (zr *Reader) fillInBuf() error { // Copy the remaining data to the start of inBuf. copy(zr.inBufGo[:dstreamInBufSize], zr.inBufGo[zr.inBuf.pos:zr.inBuf.size]) zr.inBuf.size -= zr.inBuf.pos zr.inBuf.pos = 0 readAgain: // Read more data into inBuf. n, err := zr.r.Read(zr.inBufGo[zr.inBuf.size:dstreamInBufSize]) zr.inBuf.size += C.size_t(n) if err == nil { if n == 0 { // Nothing has been read. Try reading data again. goto readAgain } return nil } if n > 0 { // Do not return error if at least a single byte read, i.e. forward progress is made. return nil } if err == io.EOF { // Do not wrap io.EOF, so the caller may notify the end of stream. return err } return fmt.Errorf("cannot read data from the underlying reader: %s", err) } golang-github-valyala-gozstd-1.14.2+ds/reader_example_test.go000066400000000000000000000022331414322045000242250ustar00rootroot00000000000000package gozstd import ( "bytes" "fmt" "io" "io/ioutil" "log" ) func ExampleReader() { // Compress the data. compressedData := Compress(nil, []byte("line 0\nline 1\nline 2")) // Read it via Reader. r := bytes.NewReader(compressedData) zr := NewReader(r) defer zr.Release() var a []int for i := 0; i < 3; i++ { var n int if _, err := fmt.Fscanf(zr, "line %d\n", &n); err != nil { log.Fatalf("cannot read line: %s", err) } a = append(a, n) } // Make sure there are no data left in zr. buf := make([]byte, 1) if _, err := zr.Read(buf); err != io.EOF { log.Fatalf("unexpected error; got %v; want %v", err, io.EOF) } fmt.Println(a) // Output: // [0 1 2] } func ExampleReader_Reset() { zr := NewReader(nil) defer zr.Release() // Read from different sources using the same Reader. for i := 0; i < 3; i++ { compressedData := Compress(nil, []byte(fmt.Sprintf("line %d", i))) r := bytes.NewReader(compressedData) zr.Reset(r, nil) data, err := ioutil.ReadAll(zr) if err != nil { log.Fatalf("unexpected error when reading compressed data: %s", err) } fmt.Printf("%s\n", data) } // Output: // line 0 // line 1 // line 2 } golang-github-valyala-gozstd-1.14.2+ds/reader_test.go000066400000000000000000000165751414322045000225300ustar00rootroot00000000000000package gozstd import ( "bytes" "fmt" "io" "io/ioutil" "math/rand" "strings" "testing" "time" ) func TestReaderReadCompressBomb(t *testing.T) { // Compress easily compressible string with size greater // than the dstreamOutBufSize. // This string shuld be compressed into a short byte slice, // which should be then decompressed into a big buffer // with size greater than dstreamOutBufSize. // This means the Reader.outBuf capacity isn't enough to hold // all the decompressed data. var bb bytes.Buffer zw := NewWriter(&bb) s := newTestString(int(2*dstreamOutBufSize), 1) n, err := zw.Write([]byte(s)) if err != nil { t.Fatalf("unexpected error in Writer.Write: %s", err) } if n != len(s) { t.Fatalf("unexpected number of bytes written; got %d; want %d", n, len(s)) } if err := zw.Flush(); err != nil { t.Fatalf("cannot flush data: %s", err) } zr := NewReader(&bb) buf := make([]byte, len(s)) n, err = io.ReadFull(zr, buf) if err != nil { t.Fatalf("unexpected error in io.ReadFull: %s", err) } if n != len(s) { t.Fatalf("unexpected number of bytes read; got %d; want %d", n, len(s)) } if string(buf) != s { t.Fatalf("unexpected data read;\ngot\n%X\nwant\n%X", buf, s) } // Free resources. zw.Close() zw.Release() zr.Release() } func TestReaderWriteTo(t *testing.T) { data := newTestString(130*1024, 3) compressedData := Compress(nil, []byte(data)) zr := NewReader(bytes.NewReader(compressedData)) defer zr.Release() var bb bytes.Buffer n, err := zr.WriteTo(&bb) if err != nil { t.Fatalf("cannot write data from zr to bb: %s", err) } if n != int64(bb.Len()) { t.Fatalf("unexpected number of bytes written; got %d; want %d", n, bb.Len()) } plainData := bb.Bytes() if string(plainData) != data { t.Fatalf("unexpected data decompressed; got\n%X; want\n%X", plainData, data) } } func TestReaderDict(t *testing.T) { var samples [][]byte for i := 0; i < 1e4; i++ { sample := []byte(fmt.Sprintf("this is a sample number %d", i)) samples = append(samples, sample) } dict := BuildDict(samples, 8*1024) cd, err := NewCDict(dict) if err != nil { t.Fatalf("cannot create CDict: %s", err) } defer cd.Release() dd, err := NewDDict(dict) if err != nil { t.Fatalf("cannot create DDict: %s", err) } defer dd.Release() // Run serial test. if err := testReaderDictSerial(cd, dd); err != nil { t.Fatalf("error in serial test: %s", err) } // Run concurrent test. ch := make(chan error, 3) for i := 0; i < cap(ch); i++ { go func() { ch <- testReaderDictSerial(cd, dd) }() } for i := 0; i < cap(ch); i++ { select { case err := <-ch: if err != nil { t.Fatalf("error in concurrent test: %s", err) } case <-time.After(time.Second): t.Fatalf("timeout in concurrent test") } } } func testReaderDictSerial(cd *CDict, dd *DDict) error { var bb bytes.Buffer for i := 0; i < 8000; i++ { fmt.Fprintf(&bb, "This is number %d ", i) } origData := bb.Bytes() compressedData := CompressDict(nil, origData, cd) // Decompress via Reader. zr := NewReaderDict(bytes.NewReader(compressedData), dd) defer zr.Release() plainData, err := ioutil.ReadAll(zr) if err != nil { return fmt.Errorf("cannot stream decompress data with dict: %s", err) } if !bytes.Equal(plainData, origData) { return fmt.Errorf("unexpected stream uncompressed data; got\n%q; want\n%q\nlen(plainData)=%d, len(origData)=%d", plainData, origData, len(plainData), len(origData)) } // Try decompressing without dict. zrNoDict := NewReader(bytes.NewReader(compressedData)) defer zrNoDict.Release() _, err = ioutil.ReadAll(zrNoDict) if err == nil { return fmt.Errorf("expecting non-nil error when stream decompressing without dict") } if !strings.Contains(err.Error(), "Dictionary mismatch") { return fmt.Errorf("unexpected error when stream decompressing without dict; got %q; want %q", err, "Dictionary mismatch") } return nil } func TestReaderMultiFrames(t *testing.T) { var bb bytes.Buffer for bb.Len() < 3*128*1024 { fmt.Fprintf(&bb, "reader big data %d, ", bb.Len()) } origData := append([]byte{}, bb.Bytes()...) cd := Compress(nil, bb.Bytes()) r := bytes.NewReader(cd) zr := NewReader(r) defer zr.Release() plainData, err := ioutil.ReadAll(zr) if err != nil { t.Fatalf("cannot read big data: %s", err) } if !bytes.Equal(plainData, origData) { t.Fatalf("unexpected data read: got\n%q; want\n%q\nlen(data)=%d, len(orig)=%d", plainData, origData, len(plainData), len(origData)) } } func TestReaderBadUnderlyingReader(t *testing.T) { r := &badReader{ b: Compress(nil, []byte(newTestString(64*1024, 30))), } zr := NewReader(r) defer zr.Release() buf := make([]byte, 123) for { if _, err := zr.Read(buf); err != nil { if !strings.Contains(err.Error(), "badReader failed") { t.Fatalf("unexpected error: %s", err) } break } } } type badReader struct { b []byte } func (br *badReader) Read(p []byte) (int, error) { if len(p) == 0 { return 0, nil } if rand.Intn(5) == 0 || len(br.b) < 2 { return 0, fmt.Errorf("badReader failed") } n := copy(p[:1], br.b) br.b = br.b[n:] return n, nil } func TestReaderInvalidData(t *testing.T) { // Try decompressing invalid data. src := []byte("invalid compressed data") r := bytes.NewReader(src) zr := NewReader(r) defer zr.Release() if _, err := ioutil.ReadAll(zr); err == nil { t.Fatalf("expecting error when decompressing invalid data") } // Try decompressing corrupted data. s := newTestString(64*1024, 15) cd := Compress(nil, []byte(s)) cd[len(cd)-1]++ r = bytes.NewReader(cd) zr.Reset(r, nil) if _, err := ioutil.ReadAll(zr); err == nil { t.Fatalf("expecting error when decompressing corrupted data") } } func TestReader(t *testing.T) { testReader(t, "") testReader(t, "a") testReader(t, "foo bar") testReader(t, "aasdf sdfa dsa fdsaf dsa") for size := 1; size <= 4e5; size *= 2 { s := newTestString(size, 20) testReader(t, s) } } func testReader(t *testing.T, s string) { t.Helper() cd := Compress(nil, []byte(s)) // Serial test if err := testReaderSerial(s, cd); err != nil { t.Fatalf("error in serial reader test: %s", err) } // Concurrent test ch := make(chan error, 10) for i := 0; i < cap(ch); i++ { go func() { ch <- testReaderSerial(s, cd) }() } for i := 0; i < cap(ch); i++ { select { case err := <-ch: if err != nil { t.Fatalf("unexpected error: %s", err) } case <-time.After(time.Second): t.Fatalf("timeout") } } } func testReaderSerial(s string, cd []byte) error { zr := NewReader(nil) defer zr.Release() for i := 0; i < 2; i++ { r := bytes.NewReader(cd) zr.Reset(r, nil) if err := testReaderExt(zr, s); err != nil { return err } } return nil } func testReaderExt(zr *Reader, s string) error { buf := make([]byte, len(s)) // Verify reading zero bytes n, err := zr.Read(buf[:0]) if err != nil { return fmt.Errorf("cannot read zero bytes: %s", err) } if n != 0 { return fmt.Errorf("unexpected number of bytes read; got %d; want %d", n, 0) } // Verify reading random number of bytes. for len(s) > 0 { nWant := rand.Intn(len(s))/7 + 1 n, err := io.ReadFull(zr, buf[:nWant]) if err != nil { return fmt.Errorf("unexpected error when reading data: %s", err) } if n != nWant { return fmt.Errorf("unexpected number of bytes read; got %d; want %d", n, nWant) } if string(buf[:n]) != s[:n] { return fmt.Errorf("unexpected data read: got\n%X; want\n%X", buf[:n], s[:n]) } s = s[n:] } return nil } golang-github-valyala-gozstd-1.14.2+ds/reader_timing_test.go000066400000000000000000000040701414322045000240620ustar00rootroot00000000000000package gozstd import ( "bytes" "fmt" "io" "testing" ) func BenchmarkReaderDict(b *testing.B) { for _, blockSize := range benchBlockSizes { b.Run(fmt.Sprintf("blockSize_%d", blockSize), func(b *testing.B) { for _, level := range benchCompressionLevels { b.Run(fmt.Sprintf("level_%d", level), func(b *testing.B) { benchmarkReaderDict(b, blockSize, level) }) } }) } } func benchmarkReaderDict(b *testing.B, blockSize, level int) { bd := getBenchDicts(level) block := newBenchString(blockSize * benchBlocksPerStream) cd := CompressDict(nil, block, bd.cd) b.Logf("compressionRatio: %f", float64(len(block))/float64(len(cd))) b.ReportAllocs() b.SetBytes(int64(len(block))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { r := bytes.NewReader(cd) zr := NewReaderDict(r, bd.dd) defer zr.Release() buf := make([]byte, blockSize) for pb.Next() { for { _, err := io.ReadFull(zr, buf) if err != nil { if err == io.EOF { break } panic(fmt.Errorf("unexpected error: %s", err)) } } r.Reset(cd) zr.Reset(r, bd.dd) } }) } func BenchmarkReader(b *testing.B) { for _, blockSize := range benchBlockSizes { b.Run(fmt.Sprintf("blockSize_%d", blockSize), func(b *testing.B) { for _, level := range benchCompressionLevels { b.Run(fmt.Sprintf("level_%d", level), func(b *testing.B) { benchmarkReader(b, blockSize, level) }) } }) } } func benchmarkReader(b *testing.B, blockSize, level int) { block := newBenchString(blockSize * benchBlocksPerStream) cd := CompressLevel(nil, block, level) b.Logf("compressionRatio: %f", float64(len(block))/float64(len(cd))) b.ReportAllocs() b.SetBytes(int64(len(block))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { r := bytes.NewReader(cd) zr := NewReader(r) defer zr.Release() buf := make([]byte, blockSize) for pb.Next() { for { _, err := io.ReadFull(zr, buf) if err != nil { if err == io.EOF { break } panic(fmt.Errorf("unexpected error: %s", err)) } } r.Reset(cd) zr.Reset(r, nil) } }) } golang-github-valyala-gozstd-1.14.2+ds/stream.go000066400000000000000000000071611414322045000215110ustar00rootroot00000000000000package gozstd import ( "io" "sync" ) // StreamCompress compresses src into dst. // // This function doesn't work with interactive network streams, since data read // from src may be buffered before passing to dst for performance reasons. // Use Writer.Flush for interactive network streams. func StreamCompress(dst io.Writer, src io.Reader) error { return streamCompressDictLevel(dst, src, nil, DefaultCompressionLevel) } // StreamCompressLevel compresses src into dst using the given compressionLevel. // // This function doesn't work with interactive network streams, since data read // from src may be buffered before passing to dst for performance reasons. // Use Writer.Flush for interactive network streams. func StreamCompressLevel(dst io.Writer, src io.Reader, compressionLevel int) error { return streamCompressDictLevel(dst, src, nil, compressionLevel) } // StreamCompressDict compresses src into dst using the given dict cd. // // This function doesn't work with interactive network streams, since data read // from src may be buffered before passing to dst for performance reasons. // Use Writer.Flush for interactive network streams. func StreamCompressDict(dst io.Writer, src io.Reader, cd *CDict) error { return streamCompressDictLevel(dst, src, cd, 0) } func streamCompressDictLevel(dst io.Writer, src io.Reader, cd *CDict, compressionLevel int) error { sc := getSCompressor(compressionLevel) sc.zw.Reset(dst, cd, compressionLevel) _, err := sc.zw.ReadFrom(src) if err == nil { err = sc.zw.Close() } putSCompressor(sc) return err } type sCompressor struct { zw *Writer compressionLevel int } func getSCompressor(compressionLevel int) *sCompressor { p := getSCompressorPool(compressionLevel) v := p.Get() if v == nil { return &sCompressor{ zw: NewWriterLevel(nil, compressionLevel), compressionLevel: compressionLevel, } } return v.(*sCompressor) } func putSCompressor(sc *sCompressor) { sc.zw.Reset(nil, nil, sc.compressionLevel) p := getSCompressorPool(sc.compressionLevel) p.Put(sc) } func getSCompressorPool(compressionLevel int) *sync.Pool { // Use per-level compressor pools, since Writer.Reset is expensive // between distinct compression levels. sCompressorPoolLock.Lock() p := sCompressorPool[compressionLevel] if p == nil { p = &sync.Pool{} sCompressorPool[compressionLevel] = p } sCompressorPoolLock.Unlock() return p } var ( sCompressorPoolLock sync.Mutex sCompressorPool = make(map[int]*sync.Pool) ) // StreamDecompress decompresses src into dst. // // This function doesn't work with interactive network streams, since data read // from src may be buffered before passing to dst for performance reasons. // Use Reader for interactive network streams. func StreamDecompress(dst io.Writer, src io.Reader) error { return StreamDecompressDict(dst, src, nil) } // StreamDecompressDict decompresses src into dst using the given dictionary dd. // // This function doesn't work with interactive network streams, since data read // from src may be buffered before passing to dst for performance reasons. // Use Reader for interactive network streams. func StreamDecompressDict(dst io.Writer, src io.Reader, dd *DDict) error { sd := getSDecompressor() sd.zr.Reset(src, dd) _, err := sd.zr.WriteTo(dst) putSDecompressor(sd) return err } type sDecompressor struct { zr *Reader } func getSDecompressor() *sDecompressor { v := sDecompressorPool.Get() if v == nil { return &sDecompressor{ zr: NewReader(nil), } } return v.(*sDecompressor) } func putSDecompressor(sd *sDecompressor) { sd.zr.Reset(nil, nil) sDecompressorPool.Put(sd) } var sDecompressorPool sync.Pool golang-github-valyala-gozstd-1.14.2+ds/stream_test.go000066400000000000000000000120411414322045000225410ustar00rootroot00000000000000package gozstd import ( "bytes" "fmt" "testing" "time" ) func TestStreamCompressDecompress(t *testing.T) { testStreamCompressDecompress(t, "") testStreamCompressDecompress(t, "a") testStreamCompressDecompress(t, "foo bar") for blockSize := range []int{11, 111, 1111, 11111, 111111, 211111} { data := newTestString(blockSize, 3) testStreamCompressDecompress(t, data) } } func testStreamCompressDecompress(t *testing.T, data string) { t.Helper() // Serial test. if err := testStreamCompressDecompressSerial(data); err != nil { t.Fatalf("error in serial test: %s", err) } // Concurrent test. ch := make(chan error, 3) for i := 0; i < cap(ch); i++ { go func() { ch <- testStreamCompressDecompressSerial(data) }() } for i := 0; i < cap(ch); i++ { select { case err := <-ch: if err != nil { t.Fatalf("error in concurrent test: %s", err) } case <-time.After(time.Second): t.Fatalf("timeout in concurrent test") } } } func testStreamCompressDecompressSerial(data string) error { var bbCompress bytes.Buffer if err := StreamCompress(&bbCompress, bytes.NewBufferString(data)); err != nil { return fmt.Errorf("cannot compress stream of size %d: %s", len(data), err) } var bbDecompress bytes.Buffer if err := StreamDecompress(&bbDecompress, &bbCompress); err != nil { return fmt.Errorf("cannot decompress stream of size %d: %s", len(data), err) } plainData := bbDecompress.Bytes() if string(plainData) != data { return fmt.Errorf("unexpected decompressed data; got\n%q; want\n%q", plainData, data) } return nil } func TestStreamCompressDecompressLevel(t *testing.T) { for level := 0; level < 20; level++ { t.Run(fmt.Sprintf("level_%d", level), func(t *testing.T) { testStreamCompressDecompressLevel(t, "", level) testStreamCompressDecompressLevel(t, "a", level) testStreamCompressDecompressLevel(t, "foo bar", level) for blockSize := range []int{11, 111, 1111, 11111, 143333} { data := newTestString(blockSize, 3) testStreamCompressDecompressLevel(t, data, level) } }) } } func testStreamCompressDecompressLevel(t *testing.T, data string, level int) { t.Helper() // Serial test. if err := testStreamCompressDecompressLevelSerial(data, level); err != nil { t.Fatalf("error in serial test: %s", err) } // Concurrent test. ch := make(chan error, 3) for i := 0; i < cap(ch); i++ { go func() { ch <- testStreamCompressDecompressLevelSerial(data, level) }() } for i := 0; i < cap(ch); i++ { select { case err := <-ch: if err != nil { t.Fatalf("error in concurrent test: %s", err) } case <-time.After(5 * time.Second): t.Fatalf("timeout in concurrent test") } } } func testStreamCompressDecompressLevelSerial(data string, level int) error { var bbCompress bytes.Buffer if err := StreamCompressLevel(&bbCompress, bytes.NewBufferString(data), level); err != nil { return fmt.Errorf("cannot compress stream of size %d: %s", len(data), err) } var bbDecompress bytes.Buffer if err := StreamDecompress(&bbDecompress, &bbCompress); err != nil { return fmt.Errorf("cannot decompress stream of size %d: %s", len(data), err) } plainData := bbDecompress.Bytes() if string(plainData) != data { return fmt.Errorf("unexpected decompressed data; got\n%q; want\n%q", plainData, data) } return nil } func TestStreamCompressDecompressDict(t *testing.T) { var samples [][]byte for i := 0; i < 1000; i++ { sample := fmt.Sprintf("this is a dict sample line %d", i) samples = append(samples, []byte(sample)) } dict := BuildDict(samples, 8*1024) cd, err := NewCDict(dict) if err != nil { t.Fatalf("cannot create CDict: %s", err) } defer cd.Release() dd, err := NewDDict(dict) if err != nil { t.Fatalf("cannot create DDict: %s", err) } defer dd.Release() // Create data for the compression. var bb bytes.Buffer for bb.Len() < 256*1024 { fmt.Fprintf(&bb, "dict sample line %d this is", bb.Len()) } data := bb.Bytes() // Serial test. if err := testStreamCompressDecompressDictSerial(cd, dd, data); err != nil { t.Fatalf("error in serial test: %s", err) } // Concurrent test. ch := make(chan error, 3) for i := 0; i < cap(ch); i++ { go func() { ch <- testStreamCompressDecompressDictSerial(cd, dd, data) }() } for i := 0; i < cap(ch); i++ { select { case err := <-ch: if err != nil { t.Fatalf("error in concurrent test: %s", err) } case <-time.After(time.Second): t.Fatalf("timeout in concurrent test") } } } func testStreamCompressDecompressDictSerial(cd *CDict, dd *DDict, data []byte) error { var bbCompress bytes.Buffer if err := StreamCompressDict(&bbCompress, bytes.NewReader(data), cd); err != nil { return fmt.Errorf("cannot compress stream of size %d: %s", len(data), err) } var bbDecompress bytes.Buffer if err := StreamDecompressDict(&bbDecompress, &bbCompress, dd); err != nil { return fmt.Errorf("cannot decompress stream of size %d: %s", len(data), err) } plainData := bbDecompress.Bytes() if !bytes.Equal(plainData, data) { return fmt.Errorf("unexpected decompressed data; got\n%q; want\n%q", plainData, data) } return nil } golang-github-valyala-gozstd-1.14.2+ds/stream_timing_test.go000066400000000000000000000032571414322045000241210ustar00rootroot00000000000000package gozstd import ( "bytes" "fmt" "io/ioutil" "testing" ) func BenchmarkStreamCompress(b *testing.B) { for _, blockSize := range benchBlockSizes { b.Run(fmt.Sprintf("blockSize_%d", blockSize), func(b *testing.B) { for _, level := range benchCompressionLevels { b.Run(fmt.Sprintf("level_%d", level), func(b *testing.B) { benchmarkStreamCompress(b, blockSize, level) }) } }) } } func benchmarkStreamCompress(b *testing.B, blockSize, level int) { block := newBenchString(blockSize * benchBlocksPerStream) b.ReportAllocs() b.SetBytes(int64(len(block))) b.RunParallel(func(pb *testing.PB) { r := bytes.NewReader(block) for pb.Next() { if err := StreamCompressLevel(ioutil.Discard, r, level); err != nil { panic(fmt.Errorf("unexpected error: %s", err)) } r.Reset(block) } }) } func BenchmarkStreamDecompress(b *testing.B) { for _, blockSize := range benchBlockSizes { b.Run(fmt.Sprintf("blockSize_%d", blockSize), func(b *testing.B) { for _, level := range benchCompressionLevels { b.Run(fmt.Sprintf("level_%d", level), func(b *testing.B) { benchmarkStreamDecompress(b, blockSize, level) }) } }) } } func benchmarkStreamDecompress(b *testing.B, blockSize, level int) { block := newBenchString(blockSize * benchBlocksPerStream) cd := CompressLevel(nil, block, level) b.Logf("compressionRatio: %f", float64(len(block))/float64(len(cd))) b.ReportAllocs() b.SetBytes(int64(len(block))) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { r := bytes.NewReader(cd) for pb.Next() { if err := StreamDecompress(ioutil.Discard, r); err != nil { panic(fmt.Errorf("unexpected error: %s", err)) } r.Reset(cd) } }) } golang-github-valyala-gozstd-1.14.2+ds/writer.go000066400000000000000000000256461414322045000215420ustar00rootroot00000000000000package gozstd /* #cgo CFLAGS: -O3 #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "zstd_errors.h" #include // for malloc/free #include // for uintptr_t // The following *_wrapper functions allow avoiding memory allocations // durting calls from Go. // See https://github.com/golang/go/issues/24450 . static size_t ZSTD_CCtx_setParameter_wrapper(uintptr_t cs, ZSTD_cParameter param, int value) { return ZSTD_CCtx_setParameter((ZSTD_CStream*)cs, param, value); } static size_t ZSTD_initCStream_wrapper(uintptr_t cs, int compressionLevel) { return ZSTD_initCStream((ZSTD_CStream*)cs, compressionLevel); } static size_t ZSTD_CCtx_refCDict_wrapper(uintptr_t cc, uintptr_t dict) { return ZSTD_CCtx_refCDict((ZSTD_CCtx*)cc, (ZSTD_CDict*)dict); } static size_t ZSTD_freeCStream_wrapper(uintptr_t cs) { return ZSTD_freeCStream((ZSTD_CStream*)cs); } static size_t ZSTD_compressStream_wrapper(uintptr_t cs, uintptr_t output, uintptr_t input) { return ZSTD_compressStream((ZSTD_CStream*)cs, (ZSTD_outBuffer*)output, (ZSTD_inBuffer*)input); } static size_t ZSTD_flushStream_wrapper(uintptr_t cs, uintptr_t output) { return ZSTD_flushStream((ZSTD_CStream*)cs, (ZSTD_outBuffer*)output); } static size_t ZSTD_endStream_wrapper(uintptr_t cs, uintptr_t output) { return ZSTD_endStream((ZSTD_CStream*)cs, (ZSTD_outBuffer*)output); } */ import "C" import ( "fmt" "io" "runtime" "unsafe" ) var ( cstreamInBufSize = C.ZSTD_CStreamInSize() cstreamOutBufSize = C.ZSTD_CStreamOutSize() ) type cMemPtr *[1 << 30]byte // Writer implements zstd writer. type Writer struct { w io.Writer compressionLevel int wlog int cs *C.ZSTD_CStream cd *CDict inBuf *C.ZSTD_inBuffer outBuf *C.ZSTD_outBuffer inBufGo cMemPtr outBufGo cMemPtr } // NewWriter returns new zstd writer writing compressed data to w. // // The returned writer must be closed with Close call in order // to finalize the compressed stream. // // Call Release when the Writer is no longer needed. func NewWriter(w io.Writer) *Writer { return NewWriterParams(w, nil) } // NewWriterLevel returns new zstd writer writing compressed data to w // at the given compression level. // // The returned writer must be closed with Close call in order // to finalize the compressed stream. // // Call Release when the Writer is no longer needed. func NewWriterLevel(w io.Writer, compressionLevel int) *Writer { params := &WriterParams{ CompressionLevel: compressionLevel, } return NewWriterParams(w, params) } // NewWriterDict returns new zstd writer writing compressed data to w // using the given cd. // // The returned writer must be closed with Close call in order // to finalize the compressed stream. // // Call Release when the Writer is no longer needed. func NewWriterDict(w io.Writer, cd *CDict) *Writer { params := &WriterParams{ Dict: cd, } return NewWriterParams(w, params) } const ( // WindowLogMin is the minimum value of the windowLog parameter. WindowLogMin = 10 // from zstd.h // WindowLogMax32 is the maximum value of the windowLog parameter on 32-bit architectures. WindowLogMax32 = 30 // from zstd.h // WindowLogMax64 is the maximum value of the windowLog parameter on 64-bit architectures. WindowLogMax64 = 31 // from zstd.h // DefaultWindowLog is the default value of the windowLog parameter. DefaultWindowLog = 0 ) // A WriterParams allows users to specify compression parameters by calling // NewWriterParams. // // Calling NewWriterParams with a nil WriterParams is equivalent to calling // NewWriter. type WriterParams struct { // Compression level. Special value 0 means 'default compression level'. CompressionLevel int // WindowLog. Must be clamped between WindowLogMin and WindowLogMin32/64. // Special value 0 means 'use default windowLog'. // // Note: enabling log distance matching increases memory usage for both // compressor and decompressor. When set to a value greater than 27, the // decompressor requires special treatment. WindowLog int // Dict is optional dictionary used for compression. Dict *CDict } // NewWriterParams returns new zstd writer writing compressed data to w // using the given set of parameters. // // The returned writer must be closed with Close call in order // to finalize the compressed stream. // // Call Release when the Writer is no longer needed. func NewWriterParams(w io.Writer, params *WriterParams) *Writer { if params == nil { params = &WriterParams{} } cs := C.ZSTD_createCStream() initCStream(cs, *params) inBuf := (*C.ZSTD_inBuffer)(C.malloc(C.sizeof_ZSTD_inBuffer)) inBuf.src = C.malloc(cstreamInBufSize) inBuf.size = 0 inBuf.pos = 0 outBuf := (*C.ZSTD_outBuffer)(C.malloc(C.sizeof_ZSTD_outBuffer)) outBuf.dst = C.malloc(cstreamOutBufSize) outBuf.size = cstreamOutBufSize outBuf.pos = 0 zw := &Writer{ w: w, compressionLevel: params.CompressionLevel, wlog: params.WindowLog, cs: cs, cd: params.Dict, inBuf: inBuf, outBuf: outBuf, } zw.inBufGo = cMemPtr(zw.inBuf.src) zw.outBufGo = cMemPtr(zw.outBuf.dst) runtime.SetFinalizer(zw, freeCStream) return zw } // Reset resets zw to write to w using the given dictionary cd and the given // compressionLevel. Use ResetWriterParams if you wish to change other // parameters that were set via WriterParams. func (zw *Writer) Reset(w io.Writer, cd *CDict, compressionLevel int) { params := WriterParams{ CompressionLevel: compressionLevel, WindowLog: zw.wlog, Dict: cd, } zw.ResetWriterParams(w, ¶ms) } // ResetWriterParams resets zw to write to w using the given set of parameters. func (zw *Writer) ResetWriterParams(w io.Writer, params *WriterParams) { zw.inBuf.size = 0 zw.inBuf.pos = 0 zw.outBuf.size = cstreamOutBufSize zw.outBuf.pos = 0 zw.cd = params.Dict initCStream(zw.cs, *params) zw.w = w } func initCStream(cs *C.ZSTD_CStream, params WriterParams) { if params.Dict != nil { result := C.ZSTD_CCtx_refCDict_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(cs))), C.uintptr_t(uintptr(unsafe.Pointer(params.Dict.p)))) ensureNoError("ZSTD_CCtx_refCDict", result) } else { result := C.ZSTD_initCStream_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(cs))), C.int(params.CompressionLevel)) ensureNoError("ZSTD_initCStream", result) } result := C.ZSTD_CCtx_setParameter_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(cs))), C.ZSTD_cParameter(C.ZSTD_c_windowLog), C.int(params.WindowLog)) ensureNoError("ZSTD_CCtx_setParameter", result) } func freeCStream(v interface{}) { v.(*Writer).Release() } // Release releases all the resources occupied by zw. // // zw cannot be used after the release. func (zw *Writer) Release() { if zw.cs == nil { return } result := C.ZSTD_freeCStream_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(zw.cs)))) ensureNoError("ZSTD_freeCStream", result) zw.cs = nil C.free(unsafe.Pointer(zw.inBuf.src)) C.free(unsafe.Pointer(zw.inBuf)) zw.inBuf = nil C.free(unsafe.Pointer(zw.outBuf.dst)) C.free(unsafe.Pointer(zw.outBuf)) zw.outBuf = nil zw.w = nil zw.cd = nil } // ReadFrom reads all the data from r and writes it to zw. // // Returns the number of bytes read from r. // // ReadFrom may not flush the compressed data to the underlying writer // due to performance reasons. // Call Flush or Close when the compressed data must propagate // to the underlying writer. func (zw *Writer) ReadFrom(r io.Reader) (int64, error) { nn := int64(0) for { // Fill the inBuf. for zw.inBuf.size < cstreamInBufSize { n, err := r.Read(zw.inBufGo[zw.inBuf.size:cstreamInBufSize]) // Sometimes n > 0 even when Read() returns an error. // This is true especially if the error is io.EOF. zw.inBuf.size += C.size_t(n) nn += int64(n) if err != nil { if err == io.EOF { return nn, nil } return nn, err } } // Flush the inBuf. if err := zw.flushInBuf(); err != nil { return nn, err } } } // Write writes p to zw. // // Write doesn't flush the compressed data to the underlying writer // due to performance reasons. // Call Flush or Close when the compressed data must propagate // to the underlying writer. func (zw *Writer) Write(p []byte) (int, error) { pLen := len(p) if pLen == 0 { return 0, nil } for { n := copy(zw.inBufGo[zw.inBuf.size:cstreamInBufSize], p) zw.inBuf.size += C.size_t(n) p = p[n:] if len(p) == 0 { // Fast path - just copy the data to input buffer. return pLen, nil } if err := zw.flushInBuf(); err != nil { return 0, err } } } func (zw *Writer) flushInBuf() error { prevInBufPos := zw.inBuf.pos result := C.ZSTD_compressStream_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(zw.cs))), C.uintptr_t(uintptr(unsafe.Pointer(zw.outBuf))), C.uintptr_t(uintptr(unsafe.Pointer(zw.inBuf)))) ensureNoError("ZSTD_compressStream", result) // Move the remaining data to the start of inBuf. copy(zw.inBufGo[:cstreamInBufSize], zw.inBufGo[zw.inBuf.pos:zw.inBuf.size]) zw.inBuf.size -= zw.inBuf.pos zw.inBuf.pos = 0 if zw.outBuf.size-zw.outBuf.pos > zw.outBuf.pos && prevInBufPos != zw.inBuf.pos { // There is enough space in outBuf and the last compression // succeeded, so don't flush outBuf yet. return nil } // Flush outBuf, since there is low space in it or the last compression // attempt was unsuccessful. return zw.flushOutBuf() } func (zw *Writer) flushOutBuf() error { if zw.outBuf.pos == 0 { // Nothing to flush. return nil } outBuf := zw.outBufGo[:zw.outBuf.pos] n, err := zw.w.Write(outBuf) zw.outBuf.pos = 0 if err != nil { return fmt.Errorf("cannot flush internal buffer to the underlying writer: %s", err) } if n != len(outBuf) { panic(fmt.Errorf("BUG: the underlying writer violated io.Writer contract and didn't return error after writing incomplete data; written %d bytes; want %d bytes", n, len(outBuf))) } return nil } // Flush flushes the remaining data from zw to the underlying writer. func (zw *Writer) Flush() error { // Flush inBuf. for zw.inBuf.size > 0 { if err := zw.flushInBuf(); err != nil { return err } } // Flush the internal buffer to outBuf. for { result := C.ZSTD_flushStream_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(zw.cs))), C.uintptr_t(uintptr(unsafe.Pointer(zw.outBuf)))) ensureNoError("ZSTD_flushStream", result) if err := zw.flushOutBuf(); err != nil { return err } if result == 0 { // No more data left in the internal buffer. return nil } } } // Close finalizes the compressed stream and flushes all the compressed data // to the underlying writer. // // It doesn't close the underlying writer passed to New* functions. func (zw *Writer) Close() error { if err := zw.Flush(); err != nil { return err } for { result := C.ZSTD_endStream_wrapper( C.uintptr_t(uintptr(unsafe.Pointer(zw.cs))), C.uintptr_t(uintptr(unsafe.Pointer(zw.outBuf)))) ensureNoError("ZSTD_endStream", result) if err := zw.flushOutBuf(); err != nil { return err } if result == 0 { return nil } } } golang-github-valyala-gozstd-1.14.2+ds/writer_example_test.go000066400000000000000000000053111414322045000242770ustar00rootroot00000000000000package gozstd import ( "bytes" "fmt" "io" "log" ) func ExampleWriter() { // Compress data to bb. var bb bytes.Buffer zw := NewWriter(&bb) defer zw.Release() for i := 0; i < 3; i++ { fmt.Fprintf(zw, "line %d\n", i) } if err := zw.Close(); err != nil { log.Fatalf("cannot close writer: %s", err) } // Decompress the data and verify it is valid. plainData, err := Decompress(nil, bb.Bytes()) fmt.Printf("err: %v\n%s", err, plainData) // Output: // err: // line 0 // line 1 // line 2 } func ExampleWriter_Flush() { var bb bytes.Buffer zw := NewWriter(&bb) defer zw.Release() // Write some data to zw. data := []byte("some data\nto compress") if _, err := zw.Write(data); err != nil { log.Fatalf("cannot write data to zw: %s", err) } // Verify the data is cached in zw and isn't propagated to bb. if bb.Len() > 0 { log.Fatalf("%d bytes unexpectedly propagated to bb", bb.Len()) } // Flush the compressed data to bb. if err := zw.Flush(); err != nil { log.Fatalf("cannot flush compressed data: %s", err) } // Verify the compressed data is propagated to bb. if bb.Len() == 0 { log.Fatalf("the compressed data isn't propagated to bb") } // Try reading the compressed data with reader. zr := NewReader(&bb) defer zr.Release() buf := make([]byte, len(data)) if _, err := io.ReadFull(zr, buf); err != nil { log.Fatalf("cannot read the compressed data: %s", err) } fmt.Printf("%s", buf) // Output: // some data // to compress } func ExampleWriter_Reset() { zw := NewWriter(nil) defer zw.Release() // Write to different destinations using the same Writer. for i := 0; i < 3; i++ { var bb bytes.Buffer zw.Reset(&bb, nil, DefaultCompressionLevel) if _, err := zw.Write([]byte(fmt.Sprintf("line %d", i))); err != nil { log.Fatalf("unexpected error when writing data: %s", err) } if err := zw.Close(); err != nil { log.Fatalf("unexpected error when closing zw: %s", err) } // Decompress the compressed data. plainData, err := Decompress(nil, bb.Bytes()) if err != nil { log.Fatalf("unexpected error when decompressing data: %s", err) } fmt.Printf("%s\n", plainData) } // Output: // line 0 // line 1 // line 2 } func ExampleWriterParams() { // Compress data to bb. var bb bytes.Buffer zw := NewWriterParams(&bb, &WriterParams{ CompressionLevel: 10, WindowLog: 14, }) defer zw.Release() for i := 0; i < 3; i++ { fmt.Fprintf(zw, "line %d\n", i) } if err := zw.Close(); err != nil { log.Fatalf("cannot close writer: %s", err) } // Decompress the data and verify it is valid. plainData, err := Decompress(nil, bb.Bytes()) fmt.Printf("err: %v\n%s", err, plainData) // Output: // err: // line 0 // line 1 // line 2 } golang-github-valyala-gozstd-1.14.2+ds/writer_test.go000066400000000000000000000312241414322045000225660ustar00rootroot00000000000000package gozstd import ( "bytes" "encoding/binary" "fmt" "io" "io/ioutil" "math/rand" "strings" "testing" "time" ) type EOFReader struct { b []byte } func (er EOFReader) Read(p []byte) (int, error) { if len(p) < len(er.b) { er.b = er.b[:len(p)] } return copy(p, er.b), io.EOF } func TestWriterReadFromWithEOF(t *testing.T) { var bb bytes.Buffer zw := NewWriter(&bb) defer zw.Release() data := []byte(newTestString(42, 3)) n, err := zw.ReadFrom(EOFReader{data}) if err != nil { t.Fatalf("cannot read data to zw: %s", err) } if n != int64(len(data)) { t.Fatalf("unexpected number of bytes read; got %d; want %d", n, len(data)) } } func TestWriterReadFrom(t *testing.T) { var bb bytes.Buffer zw := NewWriter(&bb) defer zw.Release() data := newTestString(132*1024, 3) n, err := zw.ReadFrom(bytes.NewBufferString(data)) if err != nil { t.Fatalf("cannot read data to zw: %s", err) } if n != int64(len(data)) { t.Fatalf("unexpected number of bytes read; got %d; want %d", n, len(data)) } if err := zw.Close(); err != nil { t.Fatalf("cannot close zw: %s", err) } plainData, err := Decompress(nil, bb.Bytes()) if err != nil { t.Fatalf("cannot decompress data: %s", err) } if string(plainData) != data { t.Fatalf("unexpected data decompressed; got\n%X; want\n%X", plainData, data) } } func TestNewWriterLevel(t *testing.T) { src := []byte(newTestString(512, 3)) for level := 0; level < 23; level++ { var bb bytes.Buffer zw := NewWriterLevel(&bb, level) _, err := io.Copy(zw, bytes.NewReader(src)) if err != nil { t.Fatalf("error when compressing on level %d: %s", level, err) } if err := zw.Close(); err != nil { t.Fatalf("error when closing zw on level %d: %s", level, err) } zw.Release() plainData, err := Decompress(nil, bb.Bytes()) if err != nil { t.Fatalf("cannot decompress data on level %d: %s", level, err) } if !bytes.Equal(plainData, src) { t.Fatalf("unexpected data obtained after decompression on level %d; got\n%X; want\n%X", level, plainData, src) } } } func TestWriterDict(t *testing.T) { var samples [][]byte for i := 0; i < 1e4; i++ { sample := []byte(fmt.Sprintf("this is a sample number %d", i)) samples = append(samples, sample) } dict := BuildDict(samples, 8*1024) cd, err := NewCDict(dict) if err != nil { t.Fatalf("cannot create CDict: %s", err) } defer cd.Release() dd, err := NewDDict(dict) if err != nil { t.Fatalf("cannot create DDict: %s", err) } defer dd.Release() // Run serial test. if err := testWriterDictSerial(cd, dd); err != nil { t.Fatalf("error in serial test: %s", err) } // Run concurrent test. ch := make(chan error, 3) for i := 0; i < cap(ch); i++ { go func() { ch <- testWriterDictSerial(cd, dd) }() } for i := 0; i < cap(ch); i++ { select { case err := <-ch: if err != nil { t.Fatalf("error in concurrent test: %s", err) } case <-time.After(time.Second): t.Fatalf("timeout in concurrent test") } } } func testWriterDictSerial(cd *CDict, dd *DDict) error { var bb bytes.Buffer var bbOrig bytes.Buffer zw := NewWriterDict(&bb, cd) defer zw.Release() w := io.MultiWriter(zw, &bbOrig) for i := 0; i < 8000; i++ { if _, err := fmt.Fprintf(w, "This is number %d ", i); err != nil { return fmt.Errorf("error when writing data to zw: %s", err) } } if err := zw.Close(); err != nil { return fmt.Errorf("cannot close zw: %s", err) } // Decompress via Decompress. compressedData := bb.Bytes() plainData, err := DecompressDict(nil, compressedData, dd) if err != nil { return fmt.Errorf("cannot decompress data with dict: %s", err) } if !bytes.Equal(plainData, bbOrig.Bytes()) { return fmt.Errorf("unexpected uncompressed data; got\n%q; want\n%q\nlen(plainData)=%d, len(origData)=%d", plainData, bbOrig.Bytes(), len(plainData), bbOrig.Len()) } // Decompress via Reader. zr := NewReaderDict(&bb, dd) defer zr.Release() plainData, err = ioutil.ReadAll(zr) if err != nil { return fmt.Errorf("cannot stream decompress data with dict: %s", err) } if !bytes.Equal(plainData, bbOrig.Bytes()) { return fmt.Errorf("unexpected stream uncompressed data; got\n%q; want\n%q\nlen(plainData)=%d, len(origData)=%d", plainData, bbOrig.Bytes(), len(plainData), bbOrig.Len()) } // Try decompressing without dict. _, err = Decompress(nil, compressedData) if err == nil { return fmt.Errorf("expecting non-nil error when decompressing without dict") } if !strings.Contains(err.Error(), "Dictionary mismatch") { return fmt.Errorf("unexpected error when decompressing without dict; got %q; want %q", err, "Dictionary mismatch") } zrNoDict := NewReader(bytes.NewReader(compressedData)) defer zrNoDict.Release() _, err = ioutil.ReadAll(zrNoDict) if err == nil { return fmt.Errorf("expecting non-nil error when stream decompressing without dict") } if !strings.Contains(err.Error(), "Dictionary mismatch") { return fmt.Errorf("unexpected error when stream decompressing without dict; got %q; want %q", err, "Dictionary mismatch") } return nil } func TestWriterWindowLog(t *testing.T) { // Do not exceed 27 as decompressing data would require special treatment // outof the scope of this library. For instance, using the command-line // `zstd` would require passing the -long=28 option. const wlogMax = 27 src := []byte(newTestString(512, 3)) for level := 0; level < 23; level++ { for wlog := WindowLogMin; wlog <= wlogMax; wlog++ { params := &WriterParams{ CompressionLevel: level, WindowLog: wlog, } var bb bytes.Buffer zw := NewWriterParams(&bb, params) _, err := io.Copy(zw, bytes.NewReader(src)) if err != nil { t.Fatalf("error when compressing on level %d wlog %d: %s", level, wlog, err) } if err := zw.Close(); err != nil { t.Fatalf("error when closing zw on level %d wlog %d: %s", level, wlog, err) } zw.Release() zr := NewReader(bytes.NewReader(bb.Bytes())) plainData, err := ioutil.ReadAll(zr) if err != nil { t.Fatalf("cannot decompress data on level %d wlog %d: %s", level, wlog, err) } if !bytes.Equal(plainData, src) { t.Fatalf("unexpected data obtained after decompression on level %d wlog %d; got\n%X; want\n%X", level, wlog, plainData, src) } zr.Release() } } } func TestWriterResetWriterParams(t *testing.T) { var bbOrig bytes.Buffer zw := NewWriter(ioutil.Discard) defer zw.Release() for j := 0; j < 1e4; j++ { if _, err := fmt.Fprintf(&bbOrig, "This is number %d ", j); err != nil { t.Fatalf("error when writing data to bbOrig: %s", err) } } const wlogMax = 27 for i := 0; i < 100; i++ { var bb bytes.Buffer params := WriterParams{ // loop WindowLog from WindowLogMin to 27 WindowLog: WindowLogMin + i%(wlogMax-WindowLogMin), CompressionLevel: i % 10, } zw.ResetWriterParams(&bb, ¶ms) io.Copy(zw, bytes.NewReader(bbOrig.Bytes())) if err := zw.Close(); err != nil { t.Fatalf("error when closing zw: %s", err) } plainData, err := Decompress(nil, bb.Bytes()) if err != nil { t.Fatalf("cannot decompress data written with %+v: %s", params, err) } origData := bbOrig.Bytes() if !bytes.Equal(plainData, origData) { t.Fatalf("unexpected data decompressed: got\n%q; want\n%q\nlen(data)=%d, len(orig)=%d", plainData, origData, len(plainData), len(origData)) } } } func TestWriterMultiFrames(t *testing.T) { var bb bytes.Buffer var bbOrig bytes.Buffer zw := NewWriter(&bb) defer zw.Release() w := io.MultiWriter(zw, &bbOrig) for bbOrig.Len() < 3*128*1024 { if _, err := fmt.Fprintf(w, "writer big data %d, ", bbOrig.Len()); err != nil { t.Fatalf("unexpected error when writing to zw: %s", err) } } if err := zw.Close(); err != nil { t.Fatalf("unexpected error when closing zw: %s", err) } plainData, err := Decompress(nil, bb.Bytes()) if err != nil { t.Fatalf("cannot decompress big data: %s", err) } origData := bbOrig.Bytes() if !bytes.Equal(plainData, origData) { t.Fatalf("unexpected data decompressed: got\n%q; want\n%q\nlen(data)=%d, len(orig)=%d", plainData, origData, len(plainData), len(origData)) } } func TestWriterBadUnderlyingWriter(t *testing.T) { zw := NewWriter(&badWriter{}) defer zw.Release() data := []byte(newTestString(123, 20)) for { if _, err := zw.Write(data); err != nil { if !strings.Contains(err.Error(), "badWriter failed") { t.Fatalf("unexpected error: %s", err) } break } } } type badWriter struct{} func (*badWriter) Write(p []byte) (int, error) { if len(p) == 0 { return 0, nil } if rand.Intn(10) == 0 { return 0, fmt.Errorf("badWriter failed") } return len(p), nil } func TestWriter(t *testing.T) { testWriter(t, "") testWriter(t, "a") testWriter(t, "foo bar") testWriter(t, "aasdf sdfa dsa fdsaf dsa") for size := 1; size <= 4e5; size *= 2 { s := newTestString(size, 20) testWriter(t, s) } } func testWriter(t *testing.T, s string) { t.Helper() // Serial test if err := testWriterSerial(s); err != nil { t.Fatalf("error in serial writer test: %s", err) } // Concurrent test ch := make(chan error, 10) for i := 0; i < cap(ch); i++ { go func() { ch <- testWriterSerial(s) }() } for i := 0; i < cap(ch); i++ { select { case err := <-ch: if err != nil { t.Fatalf("unexpected error: %s", err) } case <-time.After(time.Second): t.Fatalf("timeout") } } } func testWriterSerial(s string) error { zw := NewWriter(nil) defer zw.Release() for i := 0; i < 2; i++ { var bb bytes.Buffer zw.Reset(&bb, nil, DefaultCompressionLevel) if err := testWriterExt(zw, s); err != nil { return err } cd := bb.Bytes() // Use Decompress. dd, err := Decompress(nil, cd) if err != nil { return fmt.Errorf("unexpected error when decompressing data: %s", err) } if string(dd) != s { return fmt.Errorf("unexpected data after the decompression; got\n%X; want\n%X", dd, s) } // Use Reader zr := NewReader(&bb) dd, err = ioutil.ReadAll(zr) if err != nil { return fmt.Errorf("unexpected error when reading compressed data: %s", err) } if string(dd) != s { return fmt.Errorf("unexpected data after reading compressed data; got\n%X; want\n%X", dd, s) } } return nil } func testWriterExt(zw *Writer, s string) error { bs := []byte(s) // Verify writing zero bytes. n, err := zw.Write(bs[:0]) if err != nil { return fmt.Errorf("cannot write zero-byte value: %s", err) } if n != 0 { return fmt.Errorf("unexpected number of bytes written; got %d; want %d", n, 0) } // Verify writing random number of bytes. i := 0 for i < len(bs) { nWant := rand.Intn(len(bs)-i)/7 + 1 n, err := zw.Write(bs[i : i+nWant]) if err != nil { return fmt.Errorf("unexpected error when writing data: %s", err) } if n != nWant { return fmt.Errorf("unexpected number of bytes written; got %d; want %d", n, nWant) } i += nWant } if err := zw.Flush(); err != nil { return fmt.Errorf("unexpected error when flushing data: %s", err) } if err := zw.Close(); err != nil { return fmt.Errorf("unexpected error when closing zw: %s", err) } return nil } func TestWriterBig(t *testing.T) { pr, pw := io.Pipe() zw := NewWriter(pw) defer zw.Release() zr := NewReader(pr) defer zr.Release() doneCh := make(chan error) var writtenBB bytes.Buffer go func() { sizeBuf := make([]byte, 8) for writtenBB.Len() < 3e6 { packetSize := rand.Intn(1000) + 1 binary.BigEndian.PutUint64(sizeBuf, uint64(packetSize)) if _, err := zw.Write(sizeBuf); err != nil { panic(fmt.Errorf("cannot write sizeBuf: %s", err)) } s := newTestString(packetSize, 10) if _, err := zw.Write([]byte(s)); err != nil { panic(fmt.Errorf("cannot write packet with size %d: %s", packetSize, err)) } writtenBB.WriteString(s) } binary.BigEndian.PutUint64(sizeBuf, 0) if _, err := zw.Write(sizeBuf); err != nil { panic(fmt.Errorf("cannot write `end of stream` packet: %s", err)) } if err := zw.Flush(); err != nil { panic(fmt.Errorf("cannot flush data: %s", err)) } doneCh <- nil }() var readBB bytes.Buffer sizeBuf := make([]byte, 8) for { if _, err := io.ReadFull(zr, sizeBuf); err != nil { t.Fatalf("cannot read sizeBuf: %s", err) } packetSize := binary.BigEndian.Uint64(sizeBuf) if packetSize == 0 { // end of stream. break } packetBuf := make([]byte, packetSize) if _, err := io.ReadFull(zr, packetBuf); err != nil { t.Fatalf("cannot read packetBuf: %s", err) } readBB.Write(packetBuf) } select { case <-doneCh: case <-time.After(5 * time.Second): t.Fatalf("timeout") } if writtenBB.Len() != readBB.Len() { t.Fatalf("non-equal lens for writtenBB and readBB: %d vs %d", writtenBB.Len(), readBB.Len()) } if !bytes.Equal(writtenBB.Bytes(), readBB.Bytes()) { t.Fatalf("unequal writtenBB and readBB\nwrittenBB=\n%X\nreadBB=\n%X", writtenBB.Bytes(), readBB.Bytes()) } } golang-github-valyala-gozstd-1.14.2+ds/writer_timing_test.go000066400000000000000000000044021414322045000241330ustar00rootroot00000000000000package gozstd import ( "fmt" "io/ioutil" "testing" ) const benchBlocksPerStream = 10 func BenchmarkWriterDict(b *testing.B) { for _, blockSize := range benchBlockSizes { b.Run(fmt.Sprintf("blockSize_%d", blockSize), func(b *testing.B) { for _, level := range benchCompressionLevels { b.Run(fmt.Sprintf("level_%d", level), func(b *testing.B) { benchmarkWriterDict(b, blockSize, level) }) } }) } } func benchmarkWriterDict(b *testing.B, blockSize, level int) { bd := getBenchDicts(level) block := newBenchString(blockSize * benchBlocksPerStream) b.ReportAllocs() b.SetBytes(int64(len(block))) b.RunParallel(func(pb *testing.PB) { zw := NewWriterDict(ioutil.Discard, bd.cd) defer zw.Release() for pb.Next() { for i := 0; i < benchBlocksPerStream; i++ { _, err := zw.Write(block[i*blockSize : (i+1)*blockSize]) if err != nil { panic(fmt.Errorf("unexpected error: %s", err)) } } if err := zw.Close(); err != nil { panic(fmt.Errorf("unexpected error: %s", err)) } zw.Reset(ioutil.Discard, bd.cd, level) } }) } func BenchmarkWriter(b *testing.B) { for _, blockSize := range benchBlockSizes { b.Run(fmt.Sprintf("blockSize_%d", blockSize), func(b *testing.B) { for _, level := range benchCompressionLevels { b.Run(fmt.Sprintf("level_%d", level), func(b *testing.B) { benchmarkWriter(b, blockSize, level) }) } }) } } func benchmarkWriter(b *testing.B, blockSize, level int) { block := newBenchString(blockSize * benchBlocksPerStream) b.ReportAllocs() b.SetBytes(int64(len(block))) b.RunParallel(func(pb *testing.PB) { zw := NewWriterLevel(ioutil.Discard, level) defer zw.Release() for pb.Next() { for i := 0; i < benchBlocksPerStream; i++ { _, err := zw.Write(block[i*blockSize : (i+1)*blockSize]) if err != nil { panic(fmt.Errorf("unexpected error: %s", err)) } } if err := zw.Close(); err != nil { panic(fmt.Errorf("unexpected error: %s", err)) } zw.Reset(ioutil.Discard, nil, level) } }) } func BenchmarkWriterResetAlloc(b *testing.B) { b.ReportAllocs() params := &WriterParams{} zw := NewWriter(ioutil.Discard) defer zw.Release() for n := 0; n < b.N; n++ { zw.Reset(ioutil.Discard, nil, 0) zw.ResetWriterParams(ioutil.Discard, params) } }