pax_global_header00006660000000000000000000000064130004764370014516gustar00rootroot0000000000000052 comment=9fd33a8bde5de043fecdc66c13801b52a966e5f4 jetset-1.0.0/000077500000000000000000000000001300047643700130125ustar00rootroot00000000000000jetset-1.0.0/LICENSE000066400000000000000000000027201300047643700140200ustar00rootroot00000000000000Copyright (c) 2015, jetset Authors All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the Go Authors nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. jetset-1.0.0/README.md000066400000000000000000000003131300047643700142660ustar00rootroot00000000000000# jetset compressed set JetSet is a compressed set of numbers. Numbers added to the set will be compressed with delta coding followed by Burrows Wheeler compression. Common set functions are supported. jetset-1.0.0/jetset.go000066400000000000000000000153571300047643700146520ustar00rootroot00000000000000// Copyright 2015 The jetset Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package jetset import ( "bytes" "encoding/binary" "fmt" "github.com/pointlander/compress" ) type Set struct { data []byte length int } func (set Set) String() string { buffer, in, output := bytes.NewBuffer(set.data), make(chan []byte, 1), make([]byte, set.length) in <- output close(in) compress.BijectiveBurrowsWheelerDecoder(in).MoveToFrontRunLengthDecoder().AdaptiveDecoder().Decode(buffer) codes, code, reader, space := "[", uint64(0), bytes.NewReader(output), "" next := func() error { return binary.Read(reader, binary.BigEndian, &code) } for err := next(); err == nil; err = next() { codes += space + fmt.Sprintf("%v", code) space = " " } return codes + "]" } type decompressor struct { buffer *bytes.Reader adding bool current, count, offset uint64 } func newDecompressor(set Set) *decompressor { if len(set.data) == 0 { return &decompressor{buffer: bytes.NewReader(nil)} } buffer, in, output := bytes.NewBuffer(set.data), make(chan []byte, 1), make([]byte, set.length) in <- output close(in) compress.BijectiveBurrowsWheelerDecoder(in).MoveToFrontRunLengthDecoder().AdaptiveDecoder().Decode(buffer) return &decompressor{buffer: bytes.NewReader(output)} } func (d *decompressor) decompress() (uint64, bool) { if d.adding { if d.count > 0 { d.count-- d.current += d.offset return d.current, true } else { var code uint64 err := binary.Read(d.buffer, binary.BigEndian, &code) if err == nil { if code == 0 { err := binary.Read(d.buffer, binary.BigEndian, &d.count) if err != nil { panic(err) } err = binary.Read(d.buffer, binary.BigEndian, &d.offset) if err != nil { panic(err) } d.count-- d.current += d.offset return d.current, true } else { d.current += code return d.current, true } } else { return 0, false } } } else { err := binary.Read(d.buffer, binary.BigEndian, &d.current) if err == nil { d.adding = true return d.current, true } else { return 0, false } } } type compressor struct { buffer *bytes.Buffer writing bool current, count, offset uint64 } func newCompressor() *compressor { return &compressor{buffer: &bytes.Buffer{}} } func (c *compressor) compress(e uint64) { if c.writing { offset := e - c.current if c.offset == offset { c.count++ } else if c.count > 0 { binary.Write(c.buffer, binary.BigEndian, uint64(0)) binary.Write(c.buffer, binary.BigEndian, c.count + 1) binary.Write(c.buffer, binary.BigEndian, c.offset) c.count, c.offset = 0, offset } else { binary.Write(c.buffer, binary.BigEndian, c.offset) c.offset = offset } } else { c.writing, c.offset = true, e } c.current = e } func (c *compressor) close() Set { if c.writing { if c.count > 0 { binary.Write(c.buffer, binary.BigEndian, uint64(0)) binary.Write(c.buffer, binary.BigEndian, c.count + 1) binary.Write(c.buffer, binary.BigEndian, c.offset) } else { binary.Write(c.buffer, binary.BigEndian, c.offset) } buffer, in := &bytes.Buffer{}, make(chan []byte, 1) in <- c.buffer.Bytes() close(in) compress.BijectiveBurrowsWheelerCoder(in).MoveToFrontRunLengthCoder().AdaptiveCoder().Code(buffer) return Set{data: buffer.Bytes(), length: c.buffer.Len()} } return Set{} } func (s Set) Copy() Set { cp := make([]byte, len(s.data)) copy(cp, s.data) return Set{data: cp, length: s.length} } func (s Set) Add(e uint64) Set { comp, decomp, found := newCompressor(), newDecompressor(s), false d, status := decomp.decompress() if !status { comp.compress(e) return comp.close() } for status { if !found && e == d { found = true } else if !found && e < d { comp.compress(e) found = true } comp.compress(d) d, status = decomp.decompress() } if !found { comp.compress(e) } return comp.close() } func (a Set) AddRange(begin, end uint64) Set { comp, decomp_a, d_b := newCompressor(), newDecompressor(a), begin d_a, status_a := decomp_a.decompress() for status_a && d_b <= end { if d_a == d_b { comp.compress(d_a) d_a, status_a = decomp_a.decompress() d_b++ } else if d_a < d_b { comp.compress(d_a) d_a, status_a = decomp_a.decompress() } else { comp.compress(d_b) d_b++ } } for status_a { comp.compress(d_a) d_a, status_a = decomp_a.decompress() } for d_b <= end { comp.compress(d_b) d_b++ } return comp.close() } func (s Set) Has(e uint64) bool { decomp := newDecompressor(s) for item, status := decomp.decompress(); status; item, status = decomp.decompress() { if item == e { return true } } return false } func (s Set) Complement(max uint64) Set { comp, decomp, i := newCompressor(), newDecompressor(s), uint64(0) d, status := decomp.decompress() for i <= max && status { if i < d { comp.compress(i) } else if i == d { d, status = decomp.decompress() } i++ } for i <= max { comp.compress(i) i++ } return comp.close() } func (a Set) Union(b Set) Set { comp, decomp_a, decomp_b := newCompressor(), newDecompressor(a), newDecompressor(b) d_a, status_a := decomp_a.decompress() d_b, status_b := decomp_b.decompress() for status_a && status_b { if d_a == d_b { comp.compress(d_a) d_a, status_a = decomp_a.decompress() d_b, status_b = decomp_b.decompress() } else if d_a < d_b { comp.compress(d_a) d_a, status_a = decomp_a.decompress() } else { comp.compress(d_b) d_b, status_b = decomp_b.decompress() } } for status_a { comp.compress(d_a) d_a, status_a = decomp_a.decompress() } for status_b { comp.compress(d_b) d_b, status_b = decomp_b.decompress() } return comp.close() } func (a Set) Intersection(b Set) Set { comp, decomp_a, decomp_b := newCompressor(), newDecompressor(a), newDecompressor(b) d_a, status_a := decomp_a.decompress() d_b, status_b := decomp_b.decompress() for status_a && status_b { if d_a == d_b { comp.compress(d_a) d_a, status_a = decomp_a.decompress() d_b, status_b = decomp_b.decompress() } else if d_a < d_b { d_a, status_a = decomp_a.decompress() } else { d_b, status_b = decomp_b.decompress() } } return comp.close() } func (a Set) Intersects(b Set) bool { decomp_a, decomp_b := newDecompressor(a), newDecompressor(b) d_a, status_a := decomp_a.decompress() d_b, status_b := decomp_b.decompress() for status_a && status_b { if d_a == d_b { return true } else if d_a < d_b { d_a, status_a = decomp_a.decompress() } else { d_b, status_b = decomp_b.decompress() } } return false } func (s Set) Len() int { length, decomp := 0, newDecompressor(s) for _, status := decomp.decompress(); status; _, status = decomp.decompress() { length++ } return length } jetset-1.0.0/jetset_test.go000066400000000000000000000064531300047643700157060ustar00rootroot00000000000000// Copyright 2015 The jetset Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package jetset import ( "testing" ) func TestCompress(t *testing.T) { items, comp := []uint64{1, 3, 4, 5, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 20, 22, 24, 26, 28}, newCompressor() for _, item := range items { comp.compress(item) } set := comp.close() decomp := newDecompressor(set) for _, item := range items { d, status := decomp.decompress() if !status || d != item { t.Errorf("compression/decompression error") break } } } func makeTestSetA() Set { set := Set{} set = set.Add(10) set = set.Add(11) set = set.Add(11) set = set.Add(12) set = set.Add(14) set = set.Add(15) set = set.Add(16) set = set.Add(17) set = set.Add(20) set = set.Add(1) set = set.Add(1) set = set.Add(3) set = set.Add(4) set = set.Add(5) set = set.Add(7) set = set.Add(8) set = set.Add(9) set = set.Add(22) set = set.Add(21) return set } func makeTestSetB() Set { set := Set{} set = set.Add(10) set = set.Add(12) set = set.Add(15) set = set.Add(17) set = set.Add(1) set = set.Add(1) set = set.Add(4) set = set.Add(7) set = set.Add(9) set = set.Add(21) set = set.Add(23) set = set.Add(24) set = set.Add(28) return set } func TestAdd(t *testing.T) { items, set := []uint64{1, 3, 4, 5, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22}, makeTestSetA() decomp := newDecompressor(set) for _, item := range items { d, status := decomp.decompress() if !status || d != item { t.Errorf("add error") break } } } func TestAddRange(t *testing.T) { items, set := []uint64{1, 3, 4, 5, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 26}, makeTestSetA().AddRange(21, 26) decomp := newDecompressor(set) for _, item := range items { d, status := decomp.decompress() if !status || d != item { t.Errorf("add range error") break } } } func TestHas(t *testing.T) { set := makeTestSetA() if !set.Has(11) { t.Errorf("set should include 11") } if set.Has(13) { t.Errorf("set shouldn't have 13") } if !set.Has(16) { t.Errorf("set should have 16") } } func TestComplement(t *testing.T) { set := makeTestSetA() set = set.Complement(0x110000) if set.Has(11) { t.Errorf("set shouldn't have 11") } if !set.Has(13) { t.Errorf("set should have 13") } if set.Has(16) { t.Errorf("set shouldn't have 16") } } func TestUnion(t *testing.T) { a, b := makeTestSetA(), makeTestSetB() set := a.Union(b) if !set.Has(10) { t.Errorf("set should have 10") } if !set.Has(28) { t.Errorf("set should have 28") } if !set.Has(9) { t.Errorf("set should have 9") } } func TestIntersection(t *testing.T) { a, b := makeTestSetA(), makeTestSetB() set := a.Intersection(b) if !set.Has(10) { t.Errorf("set should have 10") } if set.Has(28) { t.Errorf("set shouldn't have 28") } if !set.Has(9) { t.Errorf("set should have 9") } if set.Has(11) { t.Errorf("set should have 11") } } func TestIntersects(t *testing.T) { a, b := makeTestSetA(), makeTestSetB() if !a.Intersects(b) { t.Errorf("a should intersect b") } } func TestLen(t *testing.T) { a, b := makeTestSetA(), makeTestSetB() if a.Len() != 17 { t.Errorf("length of set should be 17") } if b.Len() != 12 { t.Errorf("length of set should be 12") } }