pax_global_header00006660000000000000000000000064140736414270014522gustar00rootroot0000000000000052 comment=28dc167a7824cd876a4cf3ce6f1bdaa1dda7c9c2 ksuid-1.0.4/000077500000000000000000000000001407364142700126435ustar00rootroot00000000000000ksuid-1.0.4/.circleci/000077500000000000000000000000001407364142700144765ustar00rootroot00000000000000ksuid-1.0.4/.circleci/config.yml000066400000000000000000000005051407364142700164660ustar00rootroot00000000000000version: 2 jobs: build: working_directory: /go/src/github.com/segmentio/ksuid docker: - image: circleci/golang steps: - checkout - setup_remote_docker: { reusable: true, docker_layer_caching: true } - run: go get -v -t ./... - run: go vet ./... - run: go test -v -race ./... ksuid-1.0.4/.gitignore000066400000000000000000000004641407364142700146370ustar00rootroot00000000000000# Compiled Object files, Static and Dynamic libs (Shared Objects) *.o *.a *.so # Folders _obj _test # Architecture specific extensions/prefixes *.[568vq] [568vq].out *.cgo1.go *.cgo2.c _cgo_defun.c _cgo_gotypes.go _cgo_export.* _testmain.go *.exe *.test *.prof /ksuid # Emacs *~ # govendor /vendor/*/ ksuid-1.0.4/LICENSE.md000066400000000000000000000020531407364142700142470ustar00rootroot00000000000000MIT License Copyright (c) 2017 Segment.io Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ksuid-1.0.4/README.md000066400000000000000000000214251407364142700141260ustar00rootroot00000000000000# ksuid [![Go Report Card](https://goreportcard.com/badge/github.com/segmentio/ksuid)](https://goreportcard.com/report/github.com/segmentio/ksuid) [![GoDoc](https://godoc.org/github.com/segmentio/ksuid?status.svg)](https://godoc.org/github.com/segmentio/ksuid) [![Circle CI](https://circleci.com/gh/segmentio/ksuid.svg?style=shield)](https://circleci.com/gh/segmentio/ksuid.svg?style=shield) ksuid is an efficient, comprehensive, battle-tested Go library for generating and parsing a specific kind of globally unique identifier called a *KSUID*. This library serves as its reference implementation. ## Install ```sh go get -u github.com/segmentio/ksuid ``` ## What is a KSUID? KSUID is for K-Sortable Unique IDentifier. It is a kind of globally unique identifier similar to a [RFC 4122 UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier), built from the ground-up to be "naturally" sorted by generation timestamp without any special type-aware logic. In short, running a set of KSUIDs through the UNIX `sort` command will result in a list ordered by generation time. ## Why use KSUIDs? There are numerous methods for generating unique identifiers, so why KSUID? 1. Naturally ordered by generation time 2. Collision-free, coordination-free, dependency-free 3. Highly portable representations Even if only one of these properties are important to you, KSUID is a great choice! :) Many projects chose to use KSUIDs *just* because the text representation is copy-and-paste friendly. ### 1. Naturally Ordered By Generation Time Unlike the more ubiquitous UUIDv4, a KSUID contains a timestamp component that allows them to be loosely sorted by generation time. This is not a strong guarantee (an invariant) as it depends on wall clocks, but is still incredibly useful in practice. Both the binary and text representations will sort by creation time without any special sorting logic. ### 2. Collision-free, Coordination-free, Dependency-free While RFC 4122 UUIDv1s *do* include a time component, there aren't enough bytes of randomness to provide strong protection against collisions (duplicates). With such a low amount of entropy, it is feasible for a malicious party to guess generated IDs, creating a problem for systems whose security is, implicitly or explicitly, sensitive to an adversary guessing identifiers. To fit into a 64-bit number space, [Snowflake IDs](https://blog.twitter.com/2010/announcing-snowflake) and its derivatives require coordination to avoid collisions, which significantly increases the deployment complexity and operational burden. A KSUID includes 128 bits of pseudorandom data ("entropy"). This number space is 64 times larger than the 122 bits used by the well-accepted RFC 4122 UUIDv4 standard. The additional timestamp component can be considered "bonus entropy" which further decreases the probability of collisions, to the point of physical infeasibility in any practical implementation. ### Highly Portable Representations The text *and* binary representations are lexicographically sortable, which allows them to be dropped into systems which do not natively support KSUIDs and retain their time-ordered property. The text representation is an alphanumeric base62 encoding, so it "fits" anywhere alphanumeric strings are accepted. No delimiters are used, so stringified KSUIDs won't be inadvertently truncated or tokenized when interpreted by software that is designed for human-readable text, a common problem for the text representation of RFC 4122 UUIDs. ## How do KSUIDs work? Binary KSUIDs are 20-bytes: a 32-bit unsigned integer UTC timestamp and a 128-bit randomly generated payload. The timestamp uses big-endian encoding, to support lexicographic sorting. The timestamp epoch is adjusted to March 5th, 2014, providing over 100 years of life. The payload is generated by a cryptographically-strong pseudorandom number generator. The text representation is always 27 characters, encoded in alphanumeric base62 that will lexicographically sort by timestamp. ## High Performance This library is designed to be used in code paths that are performance critical. Its code has been tuned to eliminate all non-essential overhead. The `KSUID` type is derived from a fixed-size array, which eliminates the additional reference chasing and allocation involved in a variable-width type. The API provides an interface for use in code paths which are sensitive to allocation. For example, the `Append` method can be used to parse the text representation and replace the contents of a `KSUID` value without additional heap allocation. All public package level "pure" functions are concurrency-safe, protected by a global mutex. For hot loops that generate a large amount of KSUIDs from a single Goroutine, the `Sequence` type is provided to elide the potential contention. By default, out of an abundance of caution, the cryptographically-secure PRNG is used to generate the random bits of a KSUID. This can be relaxed in extremely performance-critical code using the included `FastRander` type. `FastRander` uses the standard PRNG with a seed generated by the cryptographically-secure PRNG. *_NOTE:_ While there is no evidence that `FastRander` will increase the probability of a collision, it shouldn't be used in scenarios where uniqueness is important to security, as there is an increased chance the generated IDs can be predicted by an adversary.* ## Battle Tested This code has been used in production at Segment for several years, across a diverse array of projects. Trillions upon trillions of KSUIDs have been generated in some of Segment's most performance-critical, large-scale distributed systems. ## Plays Well With Others Designed to be integrated with other libraries, the `KSUID` type implements many standard library interfaces, including: * `Stringer` * `database/sql.Scanner` and `database/sql/driver.Valuer` * `encoding.BinaryMarshal` and `encoding.BinaryUnmarshal` * `encoding.TextMarshal` and `encoding.TextUnmarshal` (`encoding/json` friendly!) ## Command Line Tool This package comes with a command-line tool `ksuid`, useful for generating KSUIDs as well as inspecting the internal components of existing KSUIDs. Machine-friendly output is provided for scripting use cases. Given a Go build environment, it can be installed with the command: ```sh $ go install github.com/segmentio/ksuid/cmd/ksuid ``` ## CLI Usage Examples ### Generate a KSUID ```sh $ ksuid 0ujsswThIGTUYm2K8FjOOfXtY1K ``` ### Generate 4 KSUIDs ```sh $ ksuid -n 4 0ujsszwN8NRY24YaXiTIE2VWDTS 0ujsswThIGTUYm2K8FjOOfXtY1K 0ujssxh0cECutqzMgbtXSGnjorm 0ujsszgFvbiEr7CDgE3z8MAUPFt ``` ### Inspect the components of a KSUID ```sh $ ksuid -f inspect 0ujtsYcgvSTl8PAuAdqWYSMnLOv REPRESENTATION: String: 0ujtsYcgvSTl8PAuAdqWYSMnLOv Raw: 0669F7EFB5A1CD34B5F99D1154FB6853345C9735 COMPONENTS: Time: 2017-10-09 21:00:47 -0700 PDT Timestamp: 107608047 Payload: B5A1CD34B5F99D1154FB6853345C9735 ``` ### Generate a KSUID and inspect its components ```sh $ ksuid -f inspect REPRESENTATION: String: 0ujzPyRiIAffKhBux4PvQdDqMHY Raw: 066A029C73FC1AA3B2446246D6E89FCD909E8FE8 COMPONENTS: Time: 2017-10-09 21:46:20 -0700 PDT Timestamp: 107610780 Payload: 73FC1AA3B2446246D6E89FCD909E8FE8 ``` ### Inspect a KSUID with template formatted inspection output ```sh $ ksuid -f template -t '{{ .Time }}: {{ .Payload }}' 0ujtsYcgvSTl8PAuAdqWYSMnLOv 2017-10-09 21:00:47 -0700 PDT: B5A1CD34B5F99D1154FB6853345C9735 ``` ### Inspect multiple KSUIDs with template formatted output ```sh $ ksuid -f template -t '{{ .Time }}: {{ .Payload }}' $(ksuid -n 4) 2017-10-09 21:05:37 -0700 PDT: 304102BC687E087CC3A811F21D113CCF 2017-10-09 21:05:37 -0700 PDT: EAF0B240A9BFA55E079D887120D962F0 2017-10-09 21:05:37 -0700 PDT: DF0761769909ABB0C7BB9D66F79FC041 2017-10-09 21:05:37 -0700 PDT: 1A8F0E3D0BDEB84A5FAD702876F46543 ``` ### Generate KSUIDs and output JSON using template formatting ```sh $ ksuid -f template -t '{ "timestamp": "{{ .Timestamp }}", "payload": "{{ .Payload }}", "ksuid": "{{.String}}"}' -n 4 { "timestamp": "107611700", "payload": "9850EEEC191BF4FF26F99315CE43B0C8", "ksuid": "0uk1Hbc9dQ9pxyTqJ93IUrfhdGq"} { "timestamp": "107611700", "payload": "CC55072555316F45B8CA2D2979D3ED0A", "ksuid": "0uk1HdCJ6hUZKDgcxhpJwUl5ZEI"} { "timestamp": "107611700", "payload": "BA1C205D6177F0992D15EE606AE32238", "ksuid": "0uk1HcdvF0p8C20KtTfdRSB9XIm"} { "timestamp": "107611700", "payload": "67517BA309EA62AE7991B27BB6F2FCAC", "ksuid": "0uk1Ha7hGJ1Q9Xbnkt0yZgNwg3g"} ``` ## Implementations for other languages - Python: [svix-ksuid](https://github.com/svixhq/python-ksuid/) - Ruby: [ksuid-ruby](https://github.com/michaelherold/ksuid-ruby) - Java: [ksuid](https://github.com/ksuid/ksuid) - Rust: [rksuid](https://github.com/nharring/rksuid) - dotNet: [Ksuid.Net](https://github.com/JoyMoe/Ksuid.Net) ## License ksuid source code is available under an MIT [License](/LICENSE.md). ksuid-1.0.4/base62.go000066400000000000000000000115651407364142700142640ustar00rootroot00000000000000package ksuid import ( "encoding/binary" "errors" ) const ( // lexographic ordering (based on Unicode table) is 0-9A-Za-z base62Characters = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" zeroString = "000000000000000000000000000" offsetUppercase = 10 offsetLowercase = 36 ) var ( errShortBuffer = errors.New("the output buffer is too small to hold to decoded value") ) // Converts a base 62 byte into the number value that it represents. func base62Value(digit byte) byte { switch { case digit >= '0' && digit <= '9': return digit - '0' case digit >= 'A' && digit <= 'Z': return offsetUppercase + (digit - 'A') default: return offsetLowercase + (digit - 'a') } } // This function encodes the base 62 representation of the src KSUID in binary // form into dst. // // In order to support a couple of optimizations the function assumes that src // is 20 bytes long and dst is 27 bytes long. // // Any unused bytes in dst will be set to the padding '0' byte. func fastEncodeBase62(dst []byte, src []byte) { const srcBase = 4294967296 const dstBase = 62 // Split src into 5 4-byte words, this is where most of the efficiency comes // from because this is a O(N^2) algorithm, and we make N = N / 4 by working // on 32 bits at a time. parts := [5]uint32{ binary.BigEndian.Uint32(src[0:4]), binary.BigEndian.Uint32(src[4:8]), binary.BigEndian.Uint32(src[8:12]), binary.BigEndian.Uint32(src[12:16]), binary.BigEndian.Uint32(src[16:20]), } n := len(dst) bp := parts[:] bq := [5]uint32{} for len(bp) != 0 { quotient := bq[:0] remainder := uint64(0) for _, c := range bp { value := uint64(c) + uint64(remainder)*srcBase digit := value / dstBase remainder = value % dstBase if len(quotient) != 0 || digit != 0 { quotient = append(quotient, uint32(digit)) } } // Writes at the end of the destination buffer because we computed the // lowest bits first. n-- dst[n] = base62Characters[remainder] bp = quotient } // Add padding at the head of the destination buffer for all bytes that were // not set. copy(dst[:n], zeroString) } // This function appends the base 62 representation of the KSUID in src to dst, // and returns the extended byte slice. // The result is left-padded with '0' bytes to always append 27 bytes to the // destination buffer. func fastAppendEncodeBase62(dst []byte, src []byte) []byte { dst = reserve(dst, stringEncodedLength) n := len(dst) fastEncodeBase62(dst[n:n+stringEncodedLength], src) return dst[:n+stringEncodedLength] } // This function decodes the base 62 representation of the src KSUID to the // binary form into dst. // // In order to support a couple of optimizations the function assumes that src // is 27 bytes long and dst is 20 bytes long. // // Any unused bytes in dst will be set to zero. func fastDecodeBase62(dst []byte, src []byte) error { const srcBase = 62 const dstBase = 4294967296 // This line helps BCE (Bounds Check Elimination). // It may be safely removed. _ = src[26] parts := [27]byte{ base62Value(src[0]), base62Value(src[1]), base62Value(src[2]), base62Value(src[3]), base62Value(src[4]), base62Value(src[5]), base62Value(src[6]), base62Value(src[7]), base62Value(src[8]), base62Value(src[9]), base62Value(src[10]), base62Value(src[11]), base62Value(src[12]), base62Value(src[13]), base62Value(src[14]), base62Value(src[15]), base62Value(src[16]), base62Value(src[17]), base62Value(src[18]), base62Value(src[19]), base62Value(src[20]), base62Value(src[21]), base62Value(src[22]), base62Value(src[23]), base62Value(src[24]), base62Value(src[25]), base62Value(src[26]), } n := len(dst) bp := parts[:] bq := [stringEncodedLength]byte{} for len(bp) > 0 { quotient := bq[:0] remainder := uint64(0) for _, c := range bp { value := uint64(c) + uint64(remainder)*srcBase digit := value / dstBase remainder = value % dstBase if len(quotient) != 0 || digit != 0 { quotient = append(quotient, byte(digit)) } } if n < 4 { return errShortBuffer } dst[n-4] = byte(remainder >> 24) dst[n-3] = byte(remainder >> 16) dst[n-2] = byte(remainder >> 8) dst[n-1] = byte(remainder) n -= 4 bp = quotient } var zero [20]byte copy(dst[:n], zero[:]) return nil } // This function appends the base 62 decoded version of src into dst. func fastAppendDecodeBase62(dst []byte, src []byte) []byte { dst = reserve(dst, byteLength) n := len(dst) fastDecodeBase62(dst[n:n+byteLength], src) return dst[:n+byteLength] } // Ensures that at least nbytes are available in the remaining capacity of the // destination slice, if not, a new copy is made and returned by the function. func reserve(dst []byte, nbytes int) []byte { c := cap(dst) n := len(dst) if avail := c - n; avail < nbytes { c *= 2 if (c - n) < nbytes { c = n + nbytes } b := make([]byte, n, c) copy(b, dst) dst = b } return dst } ksuid-1.0.4/base62_test.go000066400000000000000000000124201407364142700153120ustar00rootroot00000000000000package ksuid import ( "bytes" "sort" "strings" "testing" ) func TestBase10ToBase62AndBack(t *testing.T) { number := []byte{1, 2, 3, 4} encoded := base2base(number, 10, 62) decoded := base2base(encoded, 62, 10) if bytes.Compare(number, decoded) != 0 { t.Fatal(number, " != ", decoded) } } func TestBase256ToBase62AndBack(t *testing.T) { number := []byte{255, 254, 253, 251} encoded := base2base(number, 256, 62) decoded := base2base(encoded, 62, 256) if bytes.Compare(number, decoded) != 0 { t.Fatal(number, " != ", decoded) } } func TestEncodeAndDecodeBase62(t *testing.T) { helloWorld := []byte("hello world") encoded := encodeBase62(helloWorld) decoded := decodeBase62(encoded) if len(encoded) < len(helloWorld) { t.Fatal("length of encoded base62 string", encoded, "should be >= than raw bytes!") } if bytes.Compare(helloWorld, decoded) != 0 { t.Fatal(decoded, " != ", helloWorld) } } func TestLexographicOrdering(t *testing.T) { unsortedStrings := make([]string, 256) for i := 0; i < 256; i++ { s := string(encodeBase62([]byte{0, byte(i)})) unsortedStrings[i] = strings.Repeat("0", 2-len(s)) + s } if !sort.StringsAreSorted(unsortedStrings) { sortedStrings := make([]string, len(unsortedStrings)) for i, s := range unsortedStrings { sortedStrings[i] = s } sort.Strings(sortedStrings) t.Fatal("base62 encoder does not produce lexographically sorted output.", "expected:", sortedStrings, "actual:", unsortedStrings) } } func TestBase62Value(t *testing.T) { s := base62Characters for i := range s { v := int(base62Value(s[i])) if v != i { t.Error("bad value:") t.Log("<<<", i) t.Log(">>>", v) } } } func TestFastAppendEncodeBase62(t *testing.T) { for i := 0; i != 1000; i++ { id := New() b0 := id[:] b1 := appendEncodeBase62(nil, b0) b2 := fastAppendEncodeBase62(nil, b0) s1 := string(leftpad(b1, '0', stringEncodedLength)) s2 := string(b2) if s1 != s2 { t.Error("bad base62 representation of", id) t.Log("<<<", s1, len(s1)) t.Log(">>>", s2, len(s2)) } } } func TestFastAppendDecodeBase62(t *testing.T) { for i := 0; i != 1000; i++ { id := New() b0 := leftpad(encodeBase62(id[:]), '0', stringEncodedLength) b1 := appendDecodeBase62(nil, []byte(string(b0))) // because it modifies the input buffer b2 := fastAppendDecodeBase62(nil, b0) if !bytes.Equal(leftpad(b1, 0, byteLength), b2) { t.Error("bad binary representation of", string(b0)) t.Log("<<<", b1) t.Log(">>>", b2) } } } func BenchmarkAppendEncodeBase62(b *testing.B) { a := [stringEncodedLength]byte{} id := New() for i := 0; i != b.N; i++ { appendEncodeBase62(a[:0], id[:]) } } func BenchmarkAppendFastEncodeBase62(b *testing.B) { a := [stringEncodedLength]byte{} id := New() for i := 0; i != b.N; i++ { fastAppendEncodeBase62(a[:0], id[:]) } } func BenchmarkAppendDecodeBase62(b *testing.B) { a := [byteLength]byte{} id := []byte(New().String()) for i := 0; i != b.N; i++ { b := [stringEncodedLength]byte{} copy(b[:], id) appendDecodeBase62(a[:0], b[:]) } } func BenchmarkAppendFastDecodeBase62(b *testing.B) { a := [byteLength]byte{} id := []byte(New().String()) for i := 0; i != b.N; i++ { fastAppendDecodeBase62(a[:0], id) } } // The functions bellow were the initial implementation of the base conversion // algorithms, they were replaced by optimized versions later on. We keep them // in the test files as a reference to ensure compatibility between the generic // and optimized implementations. func appendBase2Base(dst []byte, src []byte, inBase int, outBase int) []byte { off := len(dst) bs := src[:] bq := [stringEncodedLength]byte{} for len(bs) > 0 { length := len(bs) quotient := bq[:0] remainder := 0 for i := 0; i != length; i++ { acc := int(bs[i]) + remainder*inBase d := acc/outBase | 0 remainder = acc % outBase if len(quotient) > 0 || d > 0 { quotient = append(quotient, byte(d)) } } // Appends in reverse order, the byte slice gets reversed before it's // returned by the function. dst = append(dst, byte(remainder)) bs = quotient } reverse(dst[off:]) return dst } func base2base(src []byte, inBase int, outBase int) []byte { return appendBase2Base(nil, src, inBase, outBase) } func appendEncodeBase62(dst []byte, src []byte) []byte { off := len(dst) dst = appendBase2Base(dst, src, 256, 62) for i, c := range dst[off:] { dst[off+i] = base62Characters[c] } return dst } func encodeBase62(in []byte) []byte { return appendEncodeBase62(nil, in) } func appendDecodeBase62(dst []byte, src []byte) []byte { // Kind of intrusive, we modify the input buffer... it's OK here, it saves // a memory allocation in Parse. for i, b := range src { // O(1)... technically. Has better real-world perf than a map src[i] = byte(strings.IndexByte(base62Characters, b)) } return appendBase2Base(dst, src, 62, 256) } func decodeBase62(src []byte) []byte { return appendDecodeBase62( make([]byte, 0, len(src)*2), append(make([]byte, 0, len(src)), src...), ) } func reverse(b []byte) { i := 0 j := len(b) - 1 for i < j { b[i], b[j] = b[j], b[i] i++ j-- } } func leftpad(b []byte, c byte, n int) []byte { if n -= len(b); n > 0 { for i := 0; i != n; i++ { b = append(b, c) } copy(b[n:], b) for i := 0; i != n; i++ { b[i] = c } } return b } ksuid-1.0.4/cmd/000077500000000000000000000000001407364142700134065ustar00rootroot00000000000000ksuid-1.0.4/cmd/ksuid/000077500000000000000000000000001407364142700145255ustar00rootroot00000000000000ksuid-1.0.4/cmd/ksuid/main.go000066400000000000000000000050641407364142700160050ustar00rootroot00000000000000package main import ( "bytes" "encoding/hex" "flag" "fmt" "io" "os" "strings" "text/template" "time" "github.com/segmentio/ksuid" ) var ( count int format string tpltxt string verbose bool ) func init() { flag.IntVar(&count, "n", 1, "Number of KSUIDs to generate when called with no other arguments.") flag.StringVar(&format, "f", "string", "One of string, inspect, time, timestamp, payload, raw, or template.") flag.StringVar(&tpltxt, "t", "", "The Go template used to format the output.") flag.BoolVar(&verbose, "v", false, "Turn on verbose mode.") } func main() { flag.Parse() args := flag.Args() var print func(ksuid.KSUID) switch format { case "string": print = printString case "inspect": print = printInspect case "time": print = printTime case "timestamp": print = printTimestamp case "payload": print = printPayload case "raw": print = printRaw case "template": print = printTemplate default: fmt.Println("Bad formatting function:", format) os.Exit(1) } if len(args) == 0 { for i := 0; i < count; i++ { args = append(args, ksuid.New().String()) } } var ids []ksuid.KSUID for _, arg := range args { id, err := ksuid.Parse(arg) if err != nil { fmt.Printf("Error when parsing %q: %s\n\n", arg, err) flag.PrintDefaults() os.Exit(1) } ids = append(ids, id) } for _, id := range ids { if verbose { fmt.Printf("%s: ", id) } print(id) } } func printString(id ksuid.KSUID) { fmt.Println(id.String()) } func printInspect(id ksuid.KSUID) { const inspectFormat = ` REPRESENTATION: String: %v Raw: %v COMPONENTS: Time: %v Timestamp: %v Payload: %v ` fmt.Printf(inspectFormat, id.String(), strings.ToUpper(hex.EncodeToString(id.Bytes())), id.Time(), id.Timestamp(), strings.ToUpper(hex.EncodeToString(id.Payload())), ) } func printTime(id ksuid.KSUID) { fmt.Println(id.Time()) } func printTimestamp(id ksuid.KSUID) { fmt.Println(id.Timestamp()) } func printPayload(id ksuid.KSUID) { os.Stdout.Write(id.Payload()) } func printRaw(id ksuid.KSUID) { os.Stdout.Write(id.Bytes()) } func printTemplate(id ksuid.KSUID) { b := &bytes.Buffer{} t := template.Must(template.New("").Parse(tpltxt)) t.Execute(b, struct { String string Raw string Time time.Time Timestamp uint32 Payload string }{ String: id.String(), Raw: strings.ToUpper(hex.EncodeToString(id.Bytes())), Time: id.Time(), Timestamp: id.Timestamp(), Payload: strings.ToUpper(hex.EncodeToString(id.Payload())), }) b.WriteByte('\n') io.Copy(os.Stdout, b) } ksuid-1.0.4/go.mod000066400000000000000000000000531407364142700137470ustar00rootroot00000000000000module github.com/segmentio/ksuid go 1.12 ksuid-1.0.4/ksuid.go000066400000000000000000000200621407364142700143110ustar00rootroot00000000000000package ksuid import ( "bytes" "crypto/rand" "database/sql/driver" "encoding/binary" "fmt" "io" "math" "sync" "time" ) const ( // KSUID's epoch starts more recently so that the 32-bit number space gives a // significantly higher useful lifetime of around 136 years from March 2017. // This number (14e8) was picked to be easy to remember. epochStamp int64 = 1400000000 // Timestamp is a uint32 timestampLengthInBytes = 4 // Payload is 16-bytes payloadLengthInBytes = 16 // KSUIDs are 20 bytes when binary encoded byteLength = timestampLengthInBytes + payloadLengthInBytes // The length of a KSUID when string (base62) encoded stringEncodedLength = 27 // A string-encoded minimum value for a KSUID minStringEncoded = "000000000000000000000000000" // A string-encoded maximum value for a KSUID maxStringEncoded = "aWgEPTl1tmebfsQzFP4bxwgy80V" ) // KSUIDs are 20 bytes: // 00-03 byte: uint32 BE UTC timestamp with custom epoch // 04-19 byte: random "payload" type KSUID [byteLength]byte var ( rander = rand.Reader randMutex = sync.Mutex{} randBuffer = [payloadLengthInBytes]byte{} errSize = fmt.Errorf("Valid KSUIDs are %v bytes", byteLength) errStrSize = fmt.Errorf("Valid encoded KSUIDs are %v characters", stringEncodedLength) errStrValue = fmt.Errorf("Valid encoded KSUIDs are bounded by %s and %s", minStringEncoded, maxStringEncoded) errPayloadSize = fmt.Errorf("Valid KSUID payloads are %v bytes", payloadLengthInBytes) // Represents a completely empty (invalid) KSUID Nil KSUID // Represents the highest value a KSUID can have Max = KSUID{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255} ) // Append appends the string representation of i to b, returning a slice to a // potentially larger memory area. func (i KSUID) Append(b []byte) []byte { return fastAppendEncodeBase62(b, i[:]) } // The timestamp portion of the ID as a Time object func (i KSUID) Time() time.Time { return correctedUTCTimestampToTime(i.Timestamp()) } // The timestamp portion of the ID as a bare integer which is uncorrected // for KSUID's special epoch. func (i KSUID) Timestamp() uint32 { return binary.BigEndian.Uint32(i[:timestampLengthInBytes]) } // The 16-byte random payload without the timestamp func (i KSUID) Payload() []byte { return i[timestampLengthInBytes:] } // String-encoded representation that can be passed through Parse() func (i KSUID) String() string { return string(i.Append(make([]byte, 0, stringEncodedLength))) } // Raw byte representation of KSUID func (i KSUID) Bytes() []byte { // Safe because this is by-value return i[:] } // IsNil returns true if this is a "nil" KSUID func (i KSUID) IsNil() bool { return i == Nil } // Get satisfies the flag.Getter interface, making it possible to use KSUIDs as // part of of the command line options of a program. func (i KSUID) Get() interface{} { return i } // Set satisfies the flag.Value interface, making it possible to use KSUIDs as // part of of the command line options of a program. func (i *KSUID) Set(s string) error { return i.UnmarshalText([]byte(s)) } func (i KSUID) MarshalText() ([]byte, error) { return []byte(i.String()), nil } func (i KSUID) MarshalBinary() ([]byte, error) { return i.Bytes(), nil } func (i *KSUID) UnmarshalText(b []byte) error { id, err := Parse(string(b)) if err != nil { return err } *i = id return nil } func (i *KSUID) UnmarshalBinary(b []byte) error { id, err := FromBytes(b) if err != nil { return err } *i = id return nil } // Value converts the KSUID into a SQL driver value which can be used to // directly use the KSUID as parameter to a SQL query. func (i KSUID) Value() (driver.Value, error) { if i.IsNil() { return nil, nil } return i.String(), nil } // Scan implements the sql.Scanner interface. It supports converting from // string, []byte, or nil into a KSUID value. Attempting to convert from // another type will return an error. func (i *KSUID) Scan(src interface{}) error { switch v := src.(type) { case nil: return i.scan(nil) case []byte: return i.scan(v) case string: return i.scan([]byte(v)) default: return fmt.Errorf("Scan: unable to scan type %T into KSUID", v) } } func (i *KSUID) scan(b []byte) error { switch len(b) { case 0: *i = Nil return nil case byteLength: return i.UnmarshalBinary(b) case stringEncodedLength: return i.UnmarshalText(b) default: return errSize } } // Parse decodes a string-encoded representation of a KSUID object func Parse(s string) (KSUID, error) { if len(s) != stringEncodedLength { return Nil, errStrSize } src := [stringEncodedLength]byte{} dst := [byteLength]byte{} copy(src[:], s[:]) if err := fastDecodeBase62(dst[:], src[:]); err != nil { return Nil, errStrValue } return FromBytes(dst[:]) } func timeToCorrectedUTCTimestamp(t time.Time) uint32 { return uint32(t.Unix() - epochStamp) } func correctedUTCTimestampToTime(ts uint32) time.Time { return time.Unix(int64(ts)+epochStamp, 0) } // Generates a new KSUID. In the strange case that random bytes // can't be read, it will panic. func New() KSUID { ksuid, err := NewRandom() if err != nil { panic(fmt.Sprintf("Couldn't generate KSUID, inconceivable! error: %v", err)) } return ksuid } // Generates a new KSUID func NewRandom() (ksuid KSUID, err error) { return NewRandomWithTime(time.Now()) } func NewRandomWithTime(t time.Time) (ksuid KSUID, err error) { // Go's default random number generators are not safe for concurrent use by // multiple goroutines, the use of the rander and randBuffer are explicitly // synchronized here. randMutex.Lock() _, err = io.ReadAtLeast(rander, randBuffer[:], len(randBuffer)) copy(ksuid[timestampLengthInBytes:], randBuffer[:]) randMutex.Unlock() if err != nil { ksuid = Nil // don't leak random bytes on error return } ts := timeToCorrectedUTCTimestamp(t) binary.BigEndian.PutUint32(ksuid[:timestampLengthInBytes], ts) return } // Constructs a KSUID from constituent parts func FromParts(t time.Time, payload []byte) (KSUID, error) { if len(payload) != payloadLengthInBytes { return Nil, errPayloadSize } var ksuid KSUID ts := timeToCorrectedUTCTimestamp(t) binary.BigEndian.PutUint32(ksuid[:timestampLengthInBytes], ts) copy(ksuid[timestampLengthInBytes:], payload) return ksuid, nil } // Constructs a KSUID from a 20-byte binary representation func FromBytes(b []byte) (KSUID, error) { var ksuid KSUID if len(b) != byteLength { return Nil, errSize } copy(ksuid[:], b) return ksuid, nil } // Sets the global source of random bytes for KSUID generation. This // should probably only be set once globally. While this is technically // thread-safe as in it won't cause corruption, there's no guarantee // on ordering. func SetRand(r io.Reader) { if r == nil { rander = rand.Reader return } rander = r } // Implements comparison for KSUID type func Compare(a, b KSUID) int { return bytes.Compare(a[:], b[:]) } // Sorts the given slice of KSUIDs func Sort(ids []KSUID) { quickSort(ids, 0, len(ids)-1) } // IsSorted checks whether a slice of KSUIDs is sorted func IsSorted(ids []KSUID) bool { if len(ids) != 0 { min := ids[0] for _, id := range ids[1:] { if bytes.Compare(min[:], id[:]) > 0 { return false } min = id } } return true } func quickSort(a []KSUID, lo int, hi int) { if lo < hi { pivot := a[hi] i := lo - 1 for j, n := lo, hi; j != n; j++ { if bytes.Compare(a[j][:], pivot[:]) < 0 { i++ a[i], a[j] = a[j], a[i] } } i++ if bytes.Compare(a[hi][:], a[i][:]) < 0 { a[i], a[hi] = a[hi], a[i] } quickSort(a, lo, i-1) quickSort(a, i+1, hi) } } // Next returns the next KSUID after id. func (id KSUID) Next() KSUID { zero := makeUint128(0, 0) t := id.Timestamp() u := uint128Payload(id) v := add128(u, makeUint128(0, 1)) if v == zero { // overflow t++ } return v.ksuid(t) } // Prev returns the previoud KSUID before id. func (id KSUID) Prev() KSUID { max := makeUint128(math.MaxUint64, math.MaxUint64) t := id.Timestamp() u := uint128Payload(id) v := sub128(u, makeUint128(0, 1)) if v == max { // overflow t-- } return v.ksuid(t) } ksuid-1.0.4/ksuid_test.go000066400000000000000000000161431407364142700153550ustar00rootroot00000000000000package ksuid import ( "bytes" "encoding/json" "flag" "fmt" "sort" "strings" "testing" "time" ) func TestConstructionTimestamp(t *testing.T) { x := New() nowTime := time.Now().Round(1 * time.Minute) xTime := x.Time().Round(1 * time.Minute) if xTime != nowTime { t.Fatal(xTime, "!=", nowTime) } } func TestNil(t *testing.T) { if !Nil.IsNil() { t.Fatal("Nil should be Nil!") } x, _ := FromBytes(make([]byte, byteLength)) if !x.IsNil() { t.Fatal("Zero-byte array should be Nil!") } } func TestEncoding(t *testing.T) { x, _ := FromBytes(make([]byte, byteLength)) if !x.IsNil() { t.Fatal("Zero-byte array should be Nil!") } encoded := x.String() expected := strings.Repeat("0", stringEncodedLength) if encoded != expected { t.Fatal("expected", expected, "encoded", encoded) } } func TestPadding(t *testing.T) { b := make([]byte, byteLength) for i := 0; i < byteLength; i++ { b[i] = 255 } x, _ := FromBytes(b) xEncoded := x.String() nilEncoded := Nil.String() if len(xEncoded) != len(nilEncoded) { t.Fatal("Encoding should produce equal-length strings for zero and max case") } } func TestParse(t *testing.T) { _, err := Parse("123") if err != errStrSize { t.Fatal("Expected Parsing a 3-char string to return an error") } parsed, err := Parse(strings.Repeat("0", stringEncodedLength)) if err != nil { t.Fatal("Unexpected error", err) } if Compare(parsed, Nil) != 0 { t.Fatal("Parsing all-zeroes string should equal Nil value", "expected:", Nil, "actual:", parsed) } maxBytes := make([]byte, byteLength) for i := 0; i < byteLength; i++ { maxBytes[i] = 255 } maxBytesKSUID, err := FromBytes(maxBytes) if err != nil { t.Fatal("Unexpected error", err) } maxParseKSUID, err := Parse(maxStringEncoded) if err != nil { t.Fatal("Unexpected error", err) } if Compare(maxBytesKSUID, maxParseKSUID) != 0 { t.Fatal("String decoder broke for max string") } } func TestIssue25(t *testing.T) { // https://github.com/segmentio/ksuid/issues/25 for _, s := range []string{ "aaaaaaaaaaaaaaaaaaaaaaaaaaa", "aWgEPTl1tmebfsQzFP4bxwgy80!", } { _, err := Parse(s) if err != errStrValue { t.Error("invalid KSUID representations cannot be successfully parsed, got err =", err) } } } func TestEncodeAndDecode(t *testing.T) { x := New() builtFromEncodedString, err := Parse(x.String()) if err != nil { t.Fatal("Unexpected error", err) } if Compare(x, builtFromEncodedString) != 0 { t.Fatal("Parse(X).String() != X") } } func TestMarshalText(t *testing.T) { var id1 = New() var id2 KSUID if err := id2.UnmarshalText([]byte(id1.String())); err != nil { t.Fatal(err) } if id1 != id2 { t.Fatal(id1, "!=", id2) } if b, err := id2.MarshalText(); err != nil { t.Fatal(err) } else if s := string(b); s != id1.String() { t.Fatal(s) } } func TestMarshalBinary(t *testing.T) { var id1 = New() var id2 KSUID if err := id2.UnmarshalBinary(id1.Bytes()); err != nil { t.Fatal(err) } if id1 != id2 { t.Fatal(id1, "!=", id2) } if b, err := id2.MarshalBinary(); err != nil { t.Fatal(err) } else if bytes.Compare(b, id1.Bytes()) != 0 { t.Fatal("bad binary form:", id2) } } func TestMashalJSON(t *testing.T) { var id1 = New() var id2 KSUID if b, err := json.Marshal(id1); err != nil { t.Fatal(err) } else if err := json.Unmarshal(b, &id2); err != nil { t.Fatal(err) } else if id1 != id2 { t.Error(id1, "!=", id2) } } func TestFlag(t *testing.T) { var id1 = New() var id2 KSUID fset := flag.NewFlagSet("test", flag.ContinueOnError) fset.Var(&id2, "id", "the KSUID") if err := fset.Parse([]string{"-id", id1.String()}); err != nil { t.Fatal(err) } if id1 != id2 { t.Error(id1, "!=", id2) } } func TestSqlValuer(t *testing.T) { id, _ := Parse(maxStringEncoded) if v, err := id.Value(); err != nil { t.Error(err) } else if s, ok := v.(string); !ok { t.Error("not a string value") } else if s != maxStringEncoded { t.Error("bad string value::", s) } } func TestSqlValuerNilValue(t *testing.T) { if v, err := Nil.Value(); err != nil { t.Error(err) } else if v != nil { t.Errorf("bad nil value: %v", v) } } func TestSqlScanner(t *testing.T) { id1 := New() id2 := New() tests := []struct { ksuid KSUID value interface{} }{ {Nil, nil}, {id1, id1.String()}, {id2, id2.Bytes()}, } for _, test := range tests { t.Run(fmt.Sprintf("%T", test.value), func(t *testing.T) { var id KSUID if err := id.Scan(test.value); err != nil { t.Error(err) } if id != test.ksuid { t.Error("bad KSUID:") t.Logf("expected %v", test.ksuid) t.Logf("found %v", id) } }) } } func TestAppend(t *testing.T) { for _, repr := range []string{"0pN1Own7255s7jwpwy495bAZeEa", "aWgEPTl1tmebfsQzFP4bxwgy80V"} { k, _ := Parse(repr) a := make([]byte, 0, stringEncodedLength) a = append(a, "?: "...) a = k.Append(a) if s := string(a); s != "?: "+repr { t.Error(s) } } } func TestSort(t *testing.T) { ids1 := [11]KSUID{} ids2 := [11]KSUID{} for i := range ids1 { ids1[i] = New() } ids2 = ids1 sort.Slice(ids2[:], func(i, j int) bool { return Compare(ids2[i], ids2[j]) < 0 }) Sort(ids1[:]) if !IsSorted(ids1[:]) { t.Error("not sorted") } if ids1 != ids2 { t.Error("bad order:") t.Log(ids1) t.Log(ids2) } } func TestPrevNext(t *testing.T) { tests := []struct { id KSUID prev KSUID next KSUID }{ { id: Nil, prev: Max, next: KSUID{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, }, { id: Max, prev: KSUID{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, next: Nil, }, } for _, test := range tests { t.Run(test.id.String(), func(t *testing.T) { testPrevNext(t, test.id, test.prev, test.next) }) } } func TestGetTimestamp(t *testing.T) { nowTime := time.Now() x, _ := NewRandomWithTime(nowTime) xTime := int64(x.Timestamp()) unix := nowTime.Unix() if xTime != unix - epochStamp { t.Fatal(xTime, "!=", unix) } } func testPrevNext(t *testing.T, id, prev, next KSUID) { id1 := id.Prev() id2 := id.Next() if id1 != prev { t.Error("previous id of the nil KSUID is wrong:", id1, "!=", prev) } if id2 != next { t.Error("next id of the nil KSUID is wrong:", id2, "!=", next) } } func BenchmarkAppend(b *testing.B) { a := make([]byte, 0, stringEncodedLength) k := New() for i := 0; i != b.N; i++ { k.Append(a) } } func BenchmarkString(b *testing.B) { k := New() for i := 0; i != b.N; i++ { _ = k.String() } } func BenchmarkParse(b *testing.B) { for i := 0; i != b.N; i++ { Parse(maxStringEncoded) } } func BenchmarkCompare(b *testing.B) { k1 := New() k2 := New() for i := 0; i != b.N; i++ { Compare(k1, k2) } } func BenchmarkSort(b *testing.B) { ids1 := [101]KSUID{} ids2 := [101]KSUID{} for i := range ids1 { ids1[i] = New() } for i := 0; i != b.N; i++ { ids2 = ids1 Sort(ids2[:]) } } func BenchmarkNew(b *testing.B) { b.Run("with crypto rand", func(b *testing.B) { SetRand(nil) for i := 0; i != b.N; i++ { New() } }) b.Run("with math rand", func(b *testing.B) { SetRand(FastRander) for i := 0; i != b.N; i++ { New() } }) } ksuid-1.0.4/rand.go000066400000000000000000000023011407364142700141120ustar00rootroot00000000000000package ksuid import ( cryptoRand "crypto/rand" "encoding/binary" "io" "math/rand" ) // FastRander is an io.Reader that uses math/rand and is optimized for // generating 16 bytes KSUID payloads. It is intended to be used as a // performance improvements for programs that have no need for // cryptographically secure KSUIDs and are generating a lot of them. var FastRander = newRBG() func newRBG() io.Reader { r, err := newRandomBitsGenerator() if err != nil { panic(err) } return r } func newRandomBitsGenerator() (r io.Reader, err error) { var seed int64 if seed, err = readCryptoRandomSeed(); err != nil { return } r = &randSourceReader{source: rand.NewSource(seed).(rand.Source64)} return } func readCryptoRandomSeed() (seed int64, err error) { var b [8]byte if _, err = io.ReadFull(cryptoRand.Reader, b[:]); err != nil { return } seed = int64(binary.LittleEndian.Uint64(b[:])) return } type randSourceReader struct { source rand.Source64 } func (r *randSourceReader) Read(b []byte) (int, error) { // optimized for generating 16 bytes payloads binary.LittleEndian.PutUint64(b[:8], r.source.Uint64()) binary.LittleEndian.PutUint64(b[8:], r.source.Uint64()) return 16, nil } ksuid-1.0.4/sequence.go000066400000000000000000000030231407364142700150000ustar00rootroot00000000000000package ksuid import ( "encoding/binary" "errors" "math" ) // Sequence is a KSUID generator which produces a sequence of ordered KSUIDs // from a seed. // // Up to 65536 KSUIDs can be generated by for a single seed. // // A typical usage of a Sequence looks like this: // // seq := ksuid.Sequence{ // Seed: ksuid.New(), // } // id, err := seq.Next() // // Sequence values are not safe to use concurrently from multiple goroutines. type Sequence struct { // The seed is used as base for the KSUID generator, all generated KSUIDs // share the same leading 18 bytes of the seed. Seed KSUID count uint32 // uint32 for overflow, only 2 bytes are used } // Next produces the next KSUID in the sequence, or returns an error if the // sequence has been exhausted. func (seq *Sequence) Next() (KSUID, error) { id := seq.Seed // copy count := seq.count if count > math.MaxUint16 { return Nil, errors.New("too many IDs were generated") } seq.count++ return withSequenceNumber(id, uint16(count)), nil } // Bounds returns the inclusive min and max bounds of the KSUIDs that may be // generated by the sequence. If all ids have been generated already then the // returned min value is equal to the max. func (seq *Sequence) Bounds() (min KSUID, max KSUID) { count := seq.count if count > math.MaxUint16 { count = math.MaxUint16 } return withSequenceNumber(seq.Seed, uint16(count)), withSequenceNumber(seq.Seed, math.MaxUint16) } func withSequenceNumber(id KSUID, n uint16) KSUID { binary.BigEndian.PutUint16(id[len(id)-2:], n) return id } ksuid-1.0.4/sequence_test.go000066400000000000000000000013211407364142700160360ustar00rootroot00000000000000package ksuid import ( "encoding/binary" "math" "testing" ) func TestSequence(t *testing.T) { seq := Sequence{Seed: New()} if min, max := seq.Bounds(); min == max { t.Error("min and max of KSUID range must differ when no ids have been generated") } for i := 0; i <= math.MaxUint16; i++ { id, err := seq.Next() if err != nil { t.Fatal(err) } if j := int(binary.BigEndian.Uint16(id[len(id)-2:])); j != i { t.Fatalf("expected %d but got %d in %s", i, j, id) } } if _, err := seq.Next(); err == nil { t.Fatal("no error returned after exhausting the id generator") } if min, max := seq.Bounds(); min != max { t.Error("after all KSUIDs were generated the min and max must be equal") } } ksuid-1.0.4/set.go000066400000000000000000000161401407364142700137670ustar00rootroot00000000000000package ksuid import ( "bytes" "encoding/binary" ) // CompressedSet is an immutable data type which stores a set of KSUIDs. type CompressedSet []byte // Iter returns an iterator that produces all KSUIDs in the set. func (set CompressedSet) Iter() CompressedSetIter { return CompressedSetIter{ content: []byte(set), } } // String satisfies the fmt.Stringer interface, returns a human-readable string // representation of the set. func (set CompressedSet) String() string { b := bytes.Buffer{} b.WriteByte('[') set.writeTo(&b) b.WriteByte(']') return b.String() } // String satisfies the fmt.GoStringer interface, returns a Go representation of // the set. func (set CompressedSet) GoString() string { b := bytes.Buffer{} b.WriteString("ksuid.CompressedSet{") set.writeTo(&b) b.WriteByte('}') return b.String() } func (set CompressedSet) writeTo(b *bytes.Buffer) { a := [27]byte{} for i, it := 0, set.Iter(); it.Next(); i++ { if i != 0 { b.WriteString(", ") } b.WriteByte('"') it.KSUID.Append(a[:0]) b.Write(a[:]) b.WriteByte('"') } } // Compress creates and returns a compressed set of KSUIDs from the list given // as arguments. func Compress(ids ...KSUID) CompressedSet { c := 1 + byteLength + (len(ids) / 5) b := make([]byte, 0, c) return AppendCompressed(b, ids...) } // AppendCompressed uses the given byte slice as pre-allocated storage space to // build a KSUID set. // // Note that the set uses a compression technique to store the KSUIDs, so the // resuling length is not 20 x len(ids). The rule of thumb here is for the given // byte slice to reserve the amount of memory that the application would be OK // to waste. func AppendCompressed(set []byte, ids ...KSUID) CompressedSet { if len(ids) != 0 { if !IsSorted(ids) { Sort(ids) } one := makeUint128(0, 1) // The first KSUID is always written to the set, this is the starting // point for all deltas. set = append(set, byte(rawKSUID)) set = append(set, ids[0][:]...) timestamp := ids[0].Timestamp() lastKSUID := ids[0] lastValue := uint128Payload(ids[0]) for i := 1; i != len(ids); i++ { id := ids[i] if id == lastKSUID { continue } t := id.Timestamp() v := uint128Payload(id) if t != timestamp { d := t - timestamp n := varintLength32(d) set = append(set, timeDelta|byte(n)) set = appendVarint32(set, d, n) set = append(set, id[timestampLengthInBytes:]...) timestamp = t } else { d := sub128(v, lastValue) if d != one { n := varintLength128(d) set = append(set, payloadDelta|byte(n)) set = appendVarint128(set, d, n) } else { l, c := rangeLength(ids[i+1:], t, id, v) m := uint64(l + 1) n := varintLength64(m) set = append(set, payloadRange|byte(n)) set = appendVarint64(set, m, n) i += c id = ids[i] v = uint128Payload(id) } } lastKSUID = id lastValue = v } } return CompressedSet(set) } func rangeLength(ids []KSUID, timestamp uint32, lastKSUID KSUID, lastValue uint128) (length int, count int) { one := makeUint128(0, 1) for i := range ids { id := ids[i] if id == lastKSUID { continue } if id.Timestamp() != timestamp { count = i return } v := uint128Payload(id) if sub128(v, lastValue) != one { count = i return } lastKSUID = id lastValue = v length++ } count = len(ids) return } func appendVarint128(b []byte, v uint128, n int) []byte { c := v.bytes() return append(b, c[len(c)-n:]...) } func appendVarint64(b []byte, v uint64, n int) []byte { c := [8]byte{} binary.BigEndian.PutUint64(c[:], v) return append(b, c[len(c)-n:]...) } func appendVarint32(b []byte, v uint32, n int) []byte { c := [4]byte{} binary.BigEndian.PutUint32(c[:], v) return append(b, c[len(c)-n:]...) } func varint128(b []byte) uint128 { a := [16]byte{} copy(a[16-len(b):], b) return makeUint128FromPayload(a[:]) } func varint64(b []byte) uint64 { a := [8]byte{} copy(a[8-len(b):], b) return binary.BigEndian.Uint64(a[:]) } func varint32(b []byte) uint32 { a := [4]byte{} copy(a[4-len(b):], b) return binary.BigEndian.Uint32(a[:]) } func varintLength128(v uint128) int { if v[1] != 0 { return 8 + varintLength64(v[1]) } return varintLength64(v[0]) } func varintLength64(v uint64) int { switch { case (v & 0xFFFFFFFFFFFFFF00) == 0: return 1 case (v & 0xFFFFFFFFFFFF0000) == 0: return 2 case (v & 0xFFFFFFFFFF000000) == 0: return 3 case (v & 0xFFFFFFFF00000000) == 0: return 4 case (v & 0xFFFFFF0000000000) == 0: return 5 case (v & 0xFFFF000000000000) == 0: return 6 case (v & 0xFF00000000000000) == 0: return 7 default: return 8 } } func varintLength32(v uint32) int { switch { case (v & 0xFFFFFF00) == 0: return 1 case (v & 0xFFFF0000) == 0: return 2 case (v & 0xFF000000) == 0: return 3 default: return 4 } } const ( rawKSUID = 0 timeDelta = (1 << 6) payloadDelta = (1 << 7) payloadRange = (1 << 6) | (1 << 7) ) // CompressedSetIter is an iterator type returned by Set.Iter to produce the // list of KSUIDs stored in a set. // // Here's is how the iterator type is commonly used: // // for it := set.Iter(); it.Next(); { // id := it.KSUID // // ... // } // // CompressedSetIter values are not safe to use concurrently from multiple // goroutines. type CompressedSetIter struct { // KSUID is modified by calls to the Next method to hold the KSUID loaded // by the iterator. KSUID KSUID content []byte offset int seqlength uint64 timestamp uint32 lastValue uint128 } // Next moves the iterator forward, returning true if there a KSUID was found, // or false if the iterator as reached the end of the set it was created from. func (it *CompressedSetIter) Next() bool { if it.seqlength != 0 { value := incr128(it.lastValue) it.KSUID = value.ksuid(it.timestamp) it.seqlength-- it.lastValue = value return true } if it.offset == len(it.content) { return false } b := it.content[it.offset] it.offset++ const mask = rawKSUID | timeDelta | payloadDelta | payloadRange tag := int(b) & mask cnt := int(b) & ^mask switch tag { case rawKSUID: off0 := it.offset off1 := off0 + byteLength copy(it.KSUID[:], it.content[off0:off1]) it.offset = off1 it.timestamp = it.KSUID.Timestamp() it.lastValue = uint128Payload(it.KSUID) case timeDelta: off0 := it.offset off1 := off0 + cnt off2 := off1 + payloadLengthInBytes it.timestamp += varint32(it.content[off0:off1]) binary.BigEndian.PutUint32(it.KSUID[:timestampLengthInBytes], it.timestamp) copy(it.KSUID[timestampLengthInBytes:], it.content[off1:off2]) it.offset = off2 it.lastValue = uint128Payload(it.KSUID) case payloadDelta: off0 := it.offset off1 := off0 + cnt delta := varint128(it.content[off0:off1]) value := add128(it.lastValue, delta) it.KSUID = value.ksuid(it.timestamp) it.offset = off1 it.lastValue = value case payloadRange: off0 := it.offset off1 := off0 + cnt value := incr128(it.lastValue) it.KSUID = value.ksuid(it.timestamp) it.seqlength = varint64(it.content[off0:off1]) it.offset = off1 it.seqlength-- it.lastValue = value default: panic("KSUID set iterator is reading malformed data") } return true } ksuid-1.0.4/set_test.go000066400000000000000000000163561407364142700150370ustar00rootroot00000000000000package ksuid import ( "testing" "time" ) func TestCompressedSet(t *testing.T) { tests := []struct { scenario string function func(*testing.T) }{ { scenario: "String", function: testCompressedSetString, }, { scenario: "GoString", function: testCompressedSetGoString, }, { scenario: "sparse", function: testCompressedSetSparse, }, { scenario: "packed", function: testCompressedSetPacked, }, { scenario: "mixed", function: testCompressedSetMixed, }, { scenario: "iterating over a nil compressed set returns no ids", function: testCompressedSetNil, }, { scenario: "concatenating multiple compressed sets is supported", function: testCompressedSetConcat, }, { scenario: "duplicate ids are appear only once in the compressed set", function: testCompressedSetDuplicates, }, { scenario: "building a compressed set with a single id repeated multiple times produces the id only once", function: testCompressedSetSingle, }, { scenario: "iterating over a compressed sequence returns the full sequence", function: testCompressedSetSequence, }, } for _, test := range tests { t.Run(test.scenario, test.function) } } func testCompressedSetString(t *testing.T) { id1, _ := Parse("0uHjRkQoL2JKAQIULPdqqb5fOkk") id2, _ := Parse("0uHjRvkOG5CbtoXW5oCEp3L2xBu") id3, _ := Parse("0uHjSJ4Pe5606kT2XWixK6dirlo") set := Compress(id1, id2, id3) if s := set.String(); s != `["0uHjRkQoL2JKAQIULPdqqb5fOkk", "0uHjRvkOG5CbtoXW5oCEp3L2xBu", "0uHjSJ4Pe5606kT2XWixK6dirlo"]` { t.Error(s) } } func testCompressedSetGoString(t *testing.T) { id1, _ := Parse("0uHjRkQoL2JKAQIULPdqqb5fOkk") id2, _ := Parse("0uHjRvkOG5CbtoXW5oCEp3L2xBu") id3, _ := Parse("0uHjSJ4Pe5606kT2XWixK6dirlo") set := Compress(id1, id2, id3) if s := set.GoString(); s != `ksuid.CompressedSet{"0uHjRkQoL2JKAQIULPdqqb5fOkk", "0uHjRvkOG5CbtoXW5oCEp3L2xBu", "0uHjSJ4Pe5606kT2XWixK6dirlo"}` { t.Error(s) } } func testCompressedSetSparse(t *testing.T) { now := time.Now() times := [100]time.Time{} for i := range times { times[i] = now.Add(time.Duration(i) * 2 * time.Second) } ksuids := [1000]KSUID{} for i := range ksuids { ksuids[i], _ = NewRandomWithTime(times[i%len(times)]) } set := Compress(ksuids[:]...) for i, it := 0, set.Iter(); it.Next(); { if i >= len(ksuids) { t.Error("too many KSUIDs were produced by the set iterator") break } if ksuids[i] != it.KSUID { t.Errorf("bad KSUID at index %d: expected %s but found %s", i, ksuids[i], it.KSUID) } i++ } reportCompressionRatio(t, ksuids[:], set) } func testCompressedSetPacked(t *testing.T) { sequences := [10]Sequence{} for i := range sequences { sequences[i] = Sequence{Seed: New()} } ksuids := [1000]KSUID{} for i := range ksuids { ksuids[i], _ = sequences[i%len(sequences)].Next() } set := Compress(ksuids[:]...) for i, it := 0, set.Iter(); it.Next(); { if i >= len(ksuids) { t.Error("too many KSUIDs were produced by the set iterator") break } if ksuids[i] != it.KSUID { t.Errorf("bad KSUID at index %d: expected %s but found %s", i, ksuids[i], it.KSUID) } i++ } reportCompressionRatio(t, ksuids[:], set) } func testCompressedSetMixed(t *testing.T) { now := time.Now() times := [20]time.Time{} for i := range times { times[i] = now.Add(time.Duration(i) * 2 * time.Second) } sequences := [200]Sequence{} for i := range sequences { seed, _ := NewRandomWithTime(times[i%len(times)]) sequences[i] = Sequence{Seed: seed} } ksuids := [1000]KSUID{} for i := range ksuids { ksuids[i], _ = sequences[i%len(sequences)].Next() } set := Compress(ksuids[:]...) for i, it := 0, set.Iter(); it.Next(); { if i >= len(ksuids) { t.Error("too many KSUIDs were produced by the set iterator") break } if ksuids[i] != it.KSUID { t.Errorf("bad KSUID at index %d: expected %s but found %s", i, ksuids[i], it.KSUID) } i++ } reportCompressionRatio(t, ksuids[:], set) } func testCompressedSetDuplicates(t *testing.T) { sequence := Sequence{Seed: New()} ksuids := [1000]KSUID{} for i := range ksuids[:10] { ksuids[i], _ = sequence.Next() // exercise dedupe on the id range code path } for i := range ksuids[10:] { ksuids[i+10] = New() } for i := 1; i < len(ksuids); i += 4 { ksuids[i] = ksuids[i-1] // generate many dupes } miss := make(map[KSUID]struct{}) uniq := make(map[KSUID]struct{}) for _, id := range ksuids { miss[id] = struct{}{} } set := Compress(ksuids[:]...) for it := set.Iter(); it.Next(); { if _, dupe := uniq[it.KSUID]; dupe { t.Errorf("duplicate id found in compressed set: %s", it.KSUID) } uniq[it.KSUID] = struct{}{} delete(miss, it.KSUID) } if len(miss) != 0 { t.Error("some ids were not found in the compressed set:") for id := range miss { t.Log(id) } } } func testCompressedSetSingle(t *testing.T) { id := New() set := Compress( id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, id, ) n := 0 for it := set.Iter(); it.Next(); { if n != 0 { t.Errorf("too many ids found in the compressed set: %s", it.KSUID) } else if id != it.KSUID { t.Errorf("invalid id found in the compressed set: %s != %s", it.KSUID, id) } n++ } if n == 0 { t.Error("no ids were produced by the compressed set") } } func testCompressedSetSequence(t *testing.T) { seq := Sequence{Seed: New()} ids := make([]KSUID, 5) for i := 0; i < 5; i++ { ids[i], _ = seq.Next() } iter := Compress(ids...).Iter() index := 0 for iter.Next() { if iter.KSUID != ids[index] { t.Errorf("mismatched id at index %d: %s != %s", index, iter.KSUID, ids[index]) } index++ } if index != 5 { t.Errorf("Expected 5 ids, got %d", index) } } func testCompressedSetNil(t *testing.T) { set := CompressedSet(nil) for it := set.Iter(); it.Next(); { t.Errorf("too many ids returned by the iterator of a nil compressed set: %s", it.KSUID) } } func testCompressedSetConcat(t *testing.T) { ksuids := [100]KSUID{} for i := range ksuids { ksuids[i] = New() } set := CompressedSet(nil) set = AppendCompressed(set, ksuids[:42]...) set = AppendCompressed(set, ksuids[42:64]...) set = AppendCompressed(set, ksuids[64:]...) for i, it := 0, set.Iter(); it.Next(); i++ { if ksuids[i] != it.KSUID { t.Errorf("invalid ID at index %d: %s != %s", i, ksuids[i], it.KSUID) } } } func reportCompressionRatio(t *testing.T, ksuids []KSUID, set CompressedSet) { len1 := byteLength * len(ksuids) len2 := len(set) t.Logf("original %d B, compressed %d B (%.4g%%)", len1, len2, 100*(1-(float64(len2)/float64(len1)))) } func BenchmarkCompressedSet(b *testing.B) { ksuids1 := [1000]KSUID{} ksuids2 := [1000]KSUID{} for i := range ksuids1 { ksuids1[i] = New() } ksuids2 = ksuids1 buf := make([]byte, 0, 1024) set := Compress(ksuids2[:]...) b.Run("write", func(b *testing.B) { n := 0 for i := 0; i != b.N; i++ { ksuids2 = ksuids1 buf = AppendCompressed(buf[:0], ksuids2[:]...) n = len(buf) } b.SetBytes(int64(n + len(ksuids2))) }) b.Run("read", func(b *testing.B) { n := 0 for i := 0; i != b.N; i++ { n = 0 for it := set.Iter(); true; { if !it.Next() { n++ break } } } b.SetBytes(int64((n * byteLength) + len(set))) }) } ksuid-1.0.4/uint128.go000066400000000000000000000046071407364142700144130ustar00rootroot00000000000000package ksuid import "fmt" // uint128 represents an unsigned 128 bits little endian integer. type uint128 [2]uint64 func uint128Payload(ksuid KSUID) uint128 { return makeUint128FromPayload(ksuid[timestampLengthInBytes:]) } func makeUint128(high uint64, low uint64) uint128 { return uint128{low, high} } func makeUint128FromPayload(payload []byte) uint128 { return uint128{ // low uint64(payload[8])<<56 | uint64(payload[9])<<48 | uint64(payload[10])<<40 | uint64(payload[11])<<32 | uint64(payload[12])<<24 | uint64(payload[13])<<16 | uint64(payload[14])<<8 | uint64(payload[15]), // high uint64(payload[0])<<56 | uint64(payload[1])<<48 | uint64(payload[2])<<40 | uint64(payload[3])<<32 | uint64(payload[4])<<24 | uint64(payload[5])<<16 | uint64(payload[6])<<8 | uint64(payload[7]), } } func (v uint128) ksuid(timestamp uint32) KSUID { return KSUID{ // time byte(timestamp >> 24), byte(timestamp >> 16), byte(timestamp >> 8), byte(timestamp), // high byte(v[1] >> 56), byte(v[1] >> 48), byte(v[1] >> 40), byte(v[1] >> 32), byte(v[1] >> 24), byte(v[1] >> 16), byte(v[1] >> 8), byte(v[1]), // low byte(v[0] >> 56), byte(v[0] >> 48), byte(v[0] >> 40), byte(v[0] >> 32), byte(v[0] >> 24), byte(v[0] >> 16), byte(v[0] >> 8), byte(v[0]), } } func (v uint128) bytes() [16]byte { return [16]byte{ // high byte(v[1] >> 56), byte(v[1] >> 48), byte(v[1] >> 40), byte(v[1] >> 32), byte(v[1] >> 24), byte(v[1] >> 16), byte(v[1] >> 8), byte(v[1]), // low byte(v[0] >> 56), byte(v[0] >> 48), byte(v[0] >> 40), byte(v[0] >> 32), byte(v[0] >> 24), byte(v[0] >> 16), byte(v[0] >> 8), byte(v[0]), } } func (v uint128) String() string { return fmt.Sprintf("0x%016X%016X", v[0], v[1]) } const wordBitSize = 64 func cmp128(x, y uint128) int { if x[1] < y[1] { return -1 } if x[1] > y[1] { return 1 } if x[0] < y[0] { return -1 } if x[0] > y[0] { return 1 } return 0 } func add128(x, y uint128) (z uint128) { x0 := x[0] y0 := y[0] z0 := x0 + y0 z[0] = z0 c := (x0&y0 | (x0|y0)&^z0) >> (wordBitSize - 1) z[1] = x[1] + y[1] + c return } func sub128(x, y uint128) (z uint128) { x0 := x[0] y0 := y[0] z0 := x0 - y0 z[0] = z0 c := (y0&^x0 | (y0|^x0)&z0) >> (wordBitSize - 1) z[1] = x[1] - y[1] - c return } func incr128(x uint128) uint128 { return add128(x, uint128{1, 0}) } ksuid-1.0.4/uint128_test.go000066400000000000000000000053671407364142700154560ustar00rootroot00000000000000package ksuid import ( "fmt" "testing" ) func TestCmp128(t *testing.T) { tests := []struct { x uint128 y uint128 k int }{ { x: makeUint128(0, 0), y: makeUint128(0, 0), k: 0, }, { x: makeUint128(0, 1), y: makeUint128(0, 0), k: +1, }, { x: makeUint128(0, 0), y: makeUint128(0, 1), k: -1, }, { x: makeUint128(1, 0), y: makeUint128(0, 1), k: +1, }, { x: makeUint128(0, 1), y: makeUint128(1, 0), k: -1, }, } for _, test := range tests { t.Run(fmt.Sprintf("cmp128(%s,%s)", test.x, test.y), func(t *testing.T) { if k := cmp128(test.x, test.y); k != test.k { t.Error(k, "!=", test.k) } }) } } func TestAdd128(t *testing.T) { tests := []struct { x uint128 y uint128 z uint128 }{ { x: makeUint128(0, 0), y: makeUint128(0, 0), z: makeUint128(0, 0), }, { x: makeUint128(0, 1), y: makeUint128(0, 0), z: makeUint128(0, 1), }, { x: makeUint128(0, 0), y: makeUint128(0, 1), z: makeUint128(0, 1), }, { x: makeUint128(1, 0), y: makeUint128(0, 1), z: makeUint128(1, 1), }, { x: makeUint128(0, 1), y: makeUint128(1, 0), z: makeUint128(1, 1), }, { x: makeUint128(0, 0xFFFFFFFFFFFFFFFF), y: makeUint128(0, 1), z: makeUint128(1, 0), }, } for _, test := range tests { t.Run(fmt.Sprintf("add128(%s,%s)", test.x, test.y), func(t *testing.T) { if z := add128(test.x, test.y); z != test.z { t.Error(z, "!=", test.z) } }) } } func TestSub128(t *testing.T) { tests := []struct { x uint128 y uint128 z uint128 }{ { x: makeUint128(0, 0), y: makeUint128(0, 0), z: makeUint128(0, 0), }, { x: makeUint128(0, 1), y: makeUint128(0, 0), z: makeUint128(0, 1), }, { x: makeUint128(0, 0), y: makeUint128(0, 1), z: makeUint128(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), }, { x: makeUint128(1, 0), y: makeUint128(0, 1), z: makeUint128(0, 0xFFFFFFFFFFFFFFFF), }, { x: makeUint128(0, 1), y: makeUint128(1, 0), z: makeUint128(0xFFFFFFFFFFFFFFFF, 1), }, { x: makeUint128(0, 0xFFFFFFFFFFFFFFFF), y: makeUint128(0, 1), z: makeUint128(0, 0xFFFFFFFFFFFFFFFE), }, } for _, test := range tests { t.Run(fmt.Sprintf("sub128(%s,%s)", test.x, test.y), func(t *testing.T) { if z := sub128(test.x, test.y); z != test.z { t.Error(z, "!=", test.z) } }) } } func BenchmarkCmp128(b *testing.B) { x := makeUint128(0, 0) y := makeUint128(0, 0) for i := 0; i != b.N; i++ { cmp128(x, y) } } func BenchmarkAdd128(b *testing.B) { x := makeUint128(0, 0) y := makeUint128(0, 0) for i := 0; i != b.N; i++ { add128(x, y) } } func BenchmarkSub128(b *testing.B) { x := makeUint128(0, 0) y := makeUint128(0, 0) for i := 0; i != b.N; i++ { sub128(x, y) } }