pax_global_header00006660000000000000000000000064134726077040014524gustar00rootroot0000000000000052 comment=a090aee7935ebb9a67c9a8233b04eea0e9f7e1b2 fuzzy-patricia-3.0.0/000077500000000000000000000000001347260770400145055ustar00rootroot00000000000000fuzzy-patricia-3.0.0/.gitignore000066400000000000000000000004211347260770400164720ustar00rootroot00000000000000# Swap files. *.swp # Compiled Object files, Static and Dynamic libs (Shared Objects) *.o *.a *.so # Folders _obj _test # Architecture specific extensions/prefixes *.[568vq] [568vq].out *.cgo1.go *.cgo2.c _cgo_defun.c _cgo_gotypes.go _cgo_export.* _testmain.go *.exe fuzzy-patricia-3.0.0/AUTHORS000066400000000000000000000001411347260770400155510ustar00rootroot00000000000000This is the complete list of go-patricia copyright holders: Ondřej Kupka fuzzy-patricia-3.0.0/LICENSE000066400000000000000000000020661347260770400155160ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2014 The AUTHORS Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. fuzzy-patricia-3.0.0/README.md000066400000000000000000000057421347260770400157740ustar00rootroot00000000000000# go-patricia # **Documentation**: [GoDoc](http://godoc.org/github.com/tchap/go-patricia/patricia)
**Test Coverage**: [![Coverage Status](https://coveralls.io/repos/tchap/go-patricia/badge.png)](https://coveralls.io/r/tchap/go-patricia) ## About ## A generic patricia trie (also called radix tree) implemented in Go (Golang). The patricia trie as implemented in this library enables fast visiting of items in some particular ways: 1. visit all items saved in the tree, 2. visit all items matching particular prefix (visit subtree), or 3. given a string, visit all items matching some prefix of that string. `[]byte` type is used for keys, `interface{}` for values. `Trie` is not thread safe. Synchronize the access yourself. ### State of the Project ### Apparently some people are using this, so the API should not change often. Any ideas on how to make the library better are still welcome. More (unit) testing would be cool as well... ## Usage ## Import the package from GitHub first. ```go import "github.com/tchap/go-patricia/patricia" ``` You can as well use gopkg.in thingie: ```go import "gopkg.in/tchap/go-patricia.v2/patricia" ``` Then you can start having fun. ```go printItem := func(prefix patricia.Prefix, item patricia.Item) error { fmt.Printf("%q: %v\n", prefix, item) return nil } // Create a new default trie (using the default parameter values). trie := NewTrie() // Create a new custom trie. trie := NewTrie(MaxPrefixPerNode(16), MaxChildrenPerSparseNode(10)) // Insert some items. trie.Insert(Prefix("Pepa Novak"), 1) trie.Insert(Prefix("Pepa Sindelar"), 2) trie.Insert(Prefix("Karel Macha"), 3) trie.Insert(Prefix("Karel Hynek Macha"), 4) // Just check if some things are present in the tree. key := Prefix("Pepa Novak") fmt.Printf("%q present? %v\n", key, trie.Match(key)) // "Pepa Novak" present? true key = Prefix("Karel") fmt.Printf("Anybody called %q here? %v\n", key, trie.MatchSubtree(key)) // Anybody called "Karel" here? true // Walk the tree in alphabetical order. trie.Visit(printItem) // "Karel Hynek Macha": 4 // "Karel Macha": 3 // "Pepa Novak": 1 // "Pepa Sindelar": 2 // Walk a subtree. trie.VisitSubtree(Prefix("Pepa"), printItem) // "Pepa Novak": 1 // "Pepa Sindelar": 2 // Modify an item, then fetch it from the tree. trie.Set(Prefix("Karel Hynek Macha"), 10) key = Prefix("Karel Hynek Macha") fmt.Printf("%q: %v\n", key, trie.Get(key)) // "Karel Hynek Macha": 10 // Walk prefixes. prefix := Prefix("Karel Hynek Macha je kouzelnik") trie.VisitPrefixes(prefix, printItem) // "Karel Hynek Macha": 10 // Delete some items. trie.Delete(Prefix("Pepa Novak")) trie.Delete(Prefix("Karel Macha")) // Walk again. trie.Visit(printItem) // "Karel Hynek Macha": 10 // "Pepa Sindelar": 2 // Delete a subtree. trie.DeleteSubtree(Prefix("Pepa")) // Print what is left. trie.Visit(printItem) // "Karel Hynek Macha": 10 ``` ## License ## MIT, check the `LICENSE` file. [![Gittip Badge](http://img.shields.io/gittip/alanhamlett.png)](https://www.gittip.com/tchap/ "Gittip Badge") fuzzy-patricia-3.0.0/patricia/000077500000000000000000000000001347260770400163015ustar00rootroot00000000000000fuzzy-patricia-3.0.0/patricia/children.go000066400000000000000000000075661347260770400204360ustar00rootroot00000000000000// Copyright (c) 2014 The go-patricia AUTHORS // // Use of this source code is governed by The MIT License // that can be found in the LICENSE file. package patricia import ( "io" "sort" ) type childList interface { length() int head() *Trie add(child *Trie) childList remove(b byte) replace(b byte, child *Trie) next(b byte) *Trie combinedMask() uint64 getChildren() []*Trie walk(prefix *Prefix, visitor VisitorFunc) error print(w io.Writer, indent int) clone() childList total() int } type tries []*Trie func (t tries) Len() int { return len(t) } func (t tries) Less(i, j int) bool { strings := sort.StringSlice{string(t[i].prefix), string(t[j].prefix)} return strings.Less(0, 1) } func (t tries) Swap(i, j int) { t[i], t[j] = t[j], t[i] } type childContainer struct { char byte node *Trie } type superDenseChildList struct { children []childContainer } func newSuperDenseChildList() childList { return &superDenseChildList{ make([]childContainer, 0), } } func (list *superDenseChildList) length() int { return len(list.children) } func (list *superDenseChildList) head() *Trie { if len(list.children) > 0 { return list.children[0].node } return nil } func (list *superDenseChildList) add(child *Trie) childList { char := child.prefix[0] list.children = append(list.children, childContainer{ char, child, }) return list } func (list *superDenseChildList) remove(b byte) { children := list.children for i := 0; i < len(children); i++ { if children[i].char == b { // children[i] = children[len(children)-1] // children = children[:len(children)-1] // children = append(children[:i], children[i+1:]...) newChildren := make([]childContainer, len(children)-1) // copy the elements over to avoid "memory leaks" copy(newChildren, children[:i]) copy(newChildren[i:], children[i+1:]) list.children = newChildren // list.children = make([]childContainer, len(children)) // copy(list.children, children) // children = nil return } } } func (list *superDenseChildList) replace(b byte, child *Trie) { children := list.children for i := 0; i < len(list.children); i++ { if children[i].char == b { children[i].node = child return } } } func (list *superDenseChildList) next(b byte) *Trie { children := list.children for i := 0; i < len(list.children); i++ { if children[i].char == b { return children[i].node } } return nil } func (list superDenseChildList) combinedMask() uint64 { var mask uint64 for _, child := range list.children { // fmt.Printf("child = %+v\n", child) mask |= child.node.mask } return mask } func (list *superDenseChildList) getChildren() []*Trie { children := make([]*Trie, 0, len(list.children)) for _, child := range list.children { children = append(children, child.node) } return children } func (list *superDenseChildList) walk(prefix *Prefix, visitor VisitorFunc) error { for _, child := range list.children { node := child.node *prefix = append(*prefix, node.prefix...) if node.item != nil { if err := visitor(*prefix, node.item); err != nil { if err == SkipSubtree { *prefix = (*prefix)[:len(*prefix)-len(node.prefix)] continue } *prefix = (*prefix)[:len(*prefix)-len(node.prefix)] return err } } err := node.children.walk(prefix, visitor) *prefix = (*prefix)[:len(*prefix)-len(node.prefix)] if err != nil { return err } } return nil } func (list *superDenseChildList) print(w io.Writer, indent int) { for _, child := range list.children { child.node.print(w, indent) } } func (list *superDenseChildList) clone() childList { clones := make([]childContainer, len(list.children)) for i := 0; i < len(list.children); i++ { child := list.children[i] clones[i] = childContainer{child.char, child.node.Clone()} } return &superDenseChildList{ clones, } } func (list *superDenseChildList) total() int { return len(list.children) } fuzzy-patricia-3.0.0/patricia/patricia.go000066400000000000000000000514051347260770400204310ustar00rootroot00000000000000// Copyright (c) 2014 The go-patricia AUTHORS // // Use of this source code is governed by The MIT License // that can be found in the LICENSE file. package patricia import ( "bytes" "errors" "fmt" "io" "strings" ) //------------------------------------------------------------------------------ // Trie //------------------------------------------------------------------------------ const ( defaultMaxPrefixPerNode = 10 ) var ( maxPrefixPerNode = defaultMaxPrefixPerNode ) type ( // Prefix is the type of node prefixes Prefix []byte // Item is just interface{} Item interface{} // VisitorFunc is the type of functions passed to visit function VisitorFunc func(prefix Prefix, item Item) error // FuzzyVisitorFunc additionaly returns how many characters were skipped which can be sorted on FuzzyVisitorFunc func(prefix Prefix, item Item, skipped int) error ) // Trie is a generic patricia trie that allows fast retrieval of items by prefix. // and other funky stuff. // // Trie is not thread-safe. type Trie struct { prefix Prefix item Item mask uint64 children childList } // Public API ------------------------------------------------------------------ // NewTrie constructs a new trie. func NewTrie() *Trie { trie := &Trie{} trie.children = newSuperDenseChildList() trie.mask = 0 return trie } // SetMaxPrefixPerNode sets the maximum length of a prefix before it is split into two nodes func SetMaxPrefixPerNode(value int) { maxPrefixPerNode = value } // Clone makes a copy of an existing trie. // Items stored in both tries become shared, obviously. func (trie *Trie) Clone() *Trie { return &Trie{ prefix: append(Prefix(nil), trie.prefix...), item: trie.item, children: trie.children.clone(), } } // Item returns the item stored in the root of this trie. func (trie *Trie) Item() Item { return trie.item } // Insert inserts a new item into the trie using the given prefix. Insert does // not replace existing items. It returns false if an item was already in place. func (trie *Trie) Insert(key Prefix, item Item) (inserted bool) { return trie.put(key, item, false) } // Set works much like Insert, but it always sets the item, possibly replacing // the item previously inserted. func (trie *Trie) Set(key Prefix, item Item) { trie.put(key, item, true) } // Get returns the item located at key. // // This method is a bit dangerous, because Get can as well end up in an internal // node that is not really representing any user-defined value. So when nil is // a valid value being used, it is not possible to tell if the value was inserted // into the tree by the user or not. A possible workaround for this is not to use // nil interface as a valid value, even using zero value of any type is enough // to prevent this bad behaviour. func (trie *Trie) Get(key Prefix) (item Item) { _, node, found, leftover := trie.findSubtree(key) if !found || len(leftover) != 0 { return nil } return node.item } // Match returns what Get(prefix) != nil would return. The same warning as for // Get applies here as well. func (trie *Trie) Match(prefix Prefix) (matchedExactly bool) { return trie.Get(prefix) != nil } // MatchSubtree returns true when there is a subtree representing extensions // to key, that is if there are any keys in the tree which have key as prefix. func (trie *Trie) MatchSubtree(key Prefix) (matched bool) { _, _, matched, _ = trie.findSubtree(key) return } // Visit calls visitor on every node containing a non-nil item // in alphabetical order. // // If an error is returned from visitor, the function stops visiting the tree // and returns that error, unless it is a special error - SkipSubtree. In that // case Visit skips the subtree represented by the current node and continues // elsewhere. func (trie *Trie) Visit(visitor VisitorFunc) error { return trie.walk(nil, visitor) } func (trie *Trie) size() int { n := 0 err := trie.walk(nil, func(prefix Prefix, item Item) error { n++ return nil }) if err != nil { panic(err) } return n } func (trie *Trie) total() int { return 1 + trie.children.total() } // VisitSubtree works much like Visit, but it only visits nodes matching prefix. func (trie *Trie) VisitSubtree(prefix Prefix, visitor VisitorFunc) error { // Nil prefix not allowed. if prefix == nil { panic(ErrNilPrefix) } // Empty trie must be handled explicitly. if trie.prefix == nil { return nil } // Locate the relevant subtree. _, root, found, leftover := trie.findSubtree(prefix) if !found { return nil } prefix = append(prefix, leftover...) // Visit it. return root.walk(prefix, visitor) } type potentialSubtree struct { idx int skipped int prefix Prefix node *Trie } // VisitFuzzy visits every node that is succesfully matched via fuzzy matching func (trie *Trie) VisitFuzzy(partial Prefix, caseInsensitive bool, visitor FuzzyVisitorFunc) error { if len(partial) == 0 { return trie.VisitPrefixes(partial, caseInsensitive, func(prefix Prefix, item Item) error { return visitor(prefix, item, 0) }) } var ( m uint64 cmp uint64 i int p potentialSubtree ) potential := []potentialSubtree{potentialSubtree{node: trie, prefix: Prefix(""), idx: 0}} for l := len(potential); l > 0; l = len(potential) { i = l - 1 p = potential[i] potential = potential[:i] m = makePrefixMask(partial[p.idx:]) if caseInsensitive { cmp = caseInsensitiveMask(p.node.mask) } else { cmp = p.node.mask } if (cmp & m) != m { continue } matchCount, skipped := fuzzyMatchCount(p.node.prefix, partial[p.idx:], p.idx, caseInsensitive) p.idx += matchCount if p.idx != 0 { p.skipped += skipped } if p.idx == len(partial) { fullPrefix := append(p.prefix, p.node.prefix...) err := p.node.walk(Prefix(""), func(prefix Prefix, item Item) error { key := make([]byte, len(fullPrefix), len(fullPrefix)+len(prefix)) copy(key, fullPrefix) key = append(key, prefix...) err := visitor(key, item, p.skipped) if err != nil { return err } return nil }) if err != nil { return err } continue } for _, c := range p.node.children.getChildren() { if c != nil { newPrefix := make(Prefix, len(p.prefix), len(p.prefix)+len(p.node.prefix)) copy(newPrefix, p.prefix) newPrefix = append(newPrefix, p.node.prefix...) potential = append(potential, potentialSubtree{ node: c, prefix: newPrefix, idx: p.idx, skipped: p.skipped, }) } else { fmt.Println("warning, child isn il") } } } return nil } func fuzzyMatchCount(prefix, query Prefix, idx int, caseInsensitive bool) (count, skipped int) { for i := 0; i < len(prefix); i++ { var match bool if caseInsensitive { match = matchCaseInsensitive(prefix[i], query[count]) } else { match = prefix[i] == query[count] } if !match { if count+idx > 0 { skipped++ } continue } count++ if count >= len(query) { return } } return } // VisitSubstring takes a substring and visits all the nodes that whos prefix contains this substring func (trie *Trie) VisitSubstring(substring Prefix, caseInsensitive bool, visitor VisitorFunc) error { if len(substring) == 0 { return trie.VisitSubtree(substring, visitor) } var ( m uint64 cmp uint64 i int p potentialSubtree suffixLen int maxSuffixLen = len(substring) - 1 ) potential := []potentialSubtree{potentialSubtree{node: trie, prefix: nil}} for l := len(potential); l > 0; l = len(potential) { i = l - 1 p = potential[i] potential = potential[:i] if len(p.prefix) < maxSuffixLen { suffixLen = len(p.prefix) } else { suffixLen = maxSuffixLen } searchBytes := append(p.prefix[len(p.prefix)-suffixLen:], p.node.prefix...) contains := false if caseInsensitive { contains = bytes.Contains(bytes.ToUpper(searchBytes), bytes.ToUpper(substring)) } else { contains = bytes.Contains(searchBytes, substring) } if contains { fullPrefix := append(p.prefix, p.node.prefix...) err := p.node.walk(Prefix(""), func(prefix Prefix, item Item) error { key := make([]byte, len(fullPrefix), len(fullPrefix)+len(prefix)) copy(key, fullPrefix) key = append(key, prefix...) copy(key, append(fullPrefix, prefix...)) err := visitor(key, item) if err != nil { return err } return nil }) if err != nil { return err } } newPrefix := make(Prefix, len(p.prefix), len(p.prefix)+len(p.node.prefix)) copy(newPrefix, p.prefix) newPrefix = append(newPrefix, p.node.prefix...) overLap := overlapLength(newPrefix, substring, caseInsensitive) m = makePrefixMask(substring[overLap:]) for _, c := range p.node.children.getChildren() { if caseInsensitive { cmp = caseInsensitiveMask(c.mask) } else { cmp = c.mask } if c != nil && (cmp&m == m) { potential = append(potential, potentialSubtree{ node: c, prefix: newPrefix, }) } } } return nil } func overlapLength(prefix, query Prefix, caseInsensitive bool) int { startLength := len(query) - 1 if len(prefix) < startLength { startLength = len(prefix) } for i := startLength; i > 0; i-- { suffix := prefix[len(prefix)-i:] queryPrefix := query[:i] if caseInsensitive { if bytes.EqualFold(suffix, queryPrefix) { return i } } else if bytes.Equal(suffix, queryPrefix) { return i } } return 0 } // VisitPrefixes visits only nodes that represent prefixes of key. // To say the obvious, returning SkipSubtree from visitor makes no sense here. func (trie *Trie) VisitPrefixes(key Prefix, caseInsensitive bool, visitor VisitorFunc) error { // Nil key not allowed. if key == nil { panic(ErrNilPrefix) } // Empty trie must be handled explicitly. if trie.prefix == nil { return nil } // Walk the path matching key prefixes. node := trie prefix := key offset := 0 for { // Compute what part of prefix matches. common := node.longestCommonPrefixLength(key, caseInsensitive) key = key[common:] offset += common // Partial match means that there is no subtree matching prefix. if common < len(node.prefix) { return nil } // Call the visitor. if item := node.item; item != nil { if err := visitor(prefix[:offset], item); err != nil { return err } } if len(key) == 0 { // This node represents key, we are finished. return nil } // There is some key suffix left, move to the children. child := node.children.next(key[0]) if child == nil { // There is nowhere to continue, return. return nil } node = child } } // Delete deletes the item represented by the given prefix. // // True is returned if the matching node was found and deleted. func (trie *Trie) Delete(key Prefix) (deleted bool) { // Nil prefix not allowed. if key == nil { panic(ErrNilPrefix) } // Empty trie must be handled explicitly. if trie.prefix == nil { return false } // Find the relevant node. path, found, _ := trie.findSubtreePath(key) if !found { return false } node := path[len(path)-1] var parent *Trie if len(path) != 1 { parent = path[len(path)-2] } // If the item is already set to nil, there is nothing to do. if node.item == nil { return false } // Delete the item. node.item = nil // Initialise i before goto. // Will be used later in a loop. i := len(path) - 1 // In case there are some child nodes, we cannot drop the whole subtree. // We can try to compact nodes, though. if node.children.length() != 0 { goto Compact } // In case we are at the root, just reset it and we are done. if parent == nil { node.reset() return true } // We can drop a subtree. // Find the first ancestor that has its value set or it has 2 or more child nodes. // That will be the node where to drop the subtree at. for ; i >= 0; i-- { if current := path[i]; current.item != nil || current.children.length() >= 2 { break } } // Handle the case when there is no such node. // In other words, we can reset the whole tree. if i == -1 { path[0].reset() return true } // We can just remove the subtree here. node = path[i] if i == 0 { parent = nil } else { parent = path[i-1] } // i+1 is always a valid index since i is never pointing to the last node. // The loop above skips at least the last node since we are sure that the item // is set to nil and it has no children, othewise we would be compacting instead. node.children.remove(path[i+1].prefix[0]) // lastly, the bitmasks of all of the parent nodes have to be updated again, since // a child node of all of them has bin removed for ; i >= 0; i-- { n := path[i] n.mask = n.children.combinedMask() } Compact: // The node is set to the first non-empty ancestor, // so try to compact since that might be possible now. if compacted := node.compact(); compacted != node { if parent == nil { *node = *compacted } else { parent.children.replace(node.prefix[0], compacted) *parent = *parent.compact() } } return true } // DeleteSubtree finds the subtree exactly matching prefix and deletes it. // // True is returned if the subtree was found and deleted. func (trie *Trie) DeleteSubtree(prefix Prefix) (deleted bool) { // Nil prefix not allowed. if prefix == nil { panic(ErrNilPrefix) } // Empty trie must be handled explicitly. if trie.prefix == nil { return false } // Locate the relevant subtree. parent, root, found, _ := trie.findSubtree(prefix) path, _, _ := trie.findSubtreePath(prefix) if !found { return false } // If we are in the root of the trie, reset the trie. if parent == nil { root.reset() return true } // Otherwise remove the root node from its parent. parent.children.remove(root.prefix[0]) // update masks parent.mask = parent.children.combinedMask() for i := len(path) - 1; i >= 0; i-- { n := path[i] n.mask = n.children.combinedMask() } return true } // Internal helper methods ----------------------------------------------------- func (trie *Trie) empty() bool { return trie.item == nil && trie.children.length() == 0 } func (trie *Trie) reset() { trie.prefix = nil trie.children = newSuperDenseChildList() } func makePrefixMask(key Prefix) uint64 { var mask uint64 for _, b := range key { if b >= '0' && b <= '9' { // 0-9 bits: 0-9 b -= 48 } else if b >= 'A' && b <= 'Z' { // A-Z bits: 10-35 b -= 55 } else if b >= 'a' && b <= 'z' { // a-z bits: 36-61 b -= 61 } else if b == '.' { b = 62 } else if b == '-' { b = 63 } else { continue } mask |= uint64(1) << uint64(b) } return mask } const upperBits = 0xFFFFFFC00 const lowerBits = 0x3FFFFFF000000000 func caseInsensitiveMask(mask uint64) uint64 { mask |= (mask & upperBits) << uint64(26) mask |= (mask & lowerBits) >> uint64(26) return mask } var charmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.-" func (trie *Trie) put(key Prefix, item Item, replace bool) (inserted bool) { // Nil prefix not allowed. if key == nil { panic(ErrNilPrefix) } var ( common int node = trie child *Trie mask uint64 ) mask = makePrefixMask(key) if node.prefix == nil { node.mask |= mask if len(key) <= maxPrefixPerNode { node.prefix = key goto InsertItem } node.prefix = key[:maxPrefixPerNode] key = key[maxPrefixPerNode:] mask = makePrefixMask(key) goto AppendChild } for { // Compute the longest common prefix length. common = node.longestCommonPrefixLength(key, false) key = key[common:] // Only a part matches, split. if common < len(node.prefix) { goto SplitPrefix } // common == len(node.prefix) since never (common > len(node.prefix)) // common == len(former key) <-> 0 == len(key) // -> former key == node.prefix if len(key) == 0 { goto InsertItem } node.mask |= mask // Check children for matching prefix. child = node.children.next(key[0]) if child == nil { goto AppendChild } node = child } SplitPrefix: // Split the prefix if necessary. child = new(Trie) *child = *node *node = *NewTrie() node.prefix = child.prefix[:common] child.prefix = child.prefix[common:] child = child.compact() node.children = node.children.add(child) node.mask = child.mask node.mask |= mask mask = makePrefixMask(key) AppendChild: // Keep appending children until whole prefix is inserted. // This loop starts with empty node.prefix that needs to be filled. for len(key) != 0 { child := NewTrie() child.mask = mask if len(key) <= maxPrefixPerNode { child.prefix = key node.children = node.children.add(child) node = child goto InsertItem } else { child.prefix = key[:maxPrefixPerNode] key = key[maxPrefixPerNode:] mask = makePrefixMask(key) node.children = node.children.add(child) node = child } } InsertItem: // Try to insert the item if possible. if replace || node.item == nil { node.item = item return true } return false } func (trie *Trie) compact() *Trie { // Only a node with a single child can be compacted. if trie.children.length() != 1 { return trie } child := trie.children.head() // If any item is set, we cannot compact since we want to retain // the ability to do searching by key. This makes compaction less usable, // but that simply cannot be avoided. if trie.item != nil || child.item != nil { return trie } // Make sure the combined prefixes fit into a single node. if len(trie.prefix)+len(child.prefix) > maxPrefixPerNode { return trie } // Concatenate the prefixes, move the items. child.prefix = append(trie.prefix, child.prefix...) child.mask = trie.mask if trie.item != nil { child.item = trie.item } return child } func (trie *Trie) findSubtree(prefix Prefix) (parent *Trie, root *Trie, found bool, leftover Prefix) { // Find the subtree matching prefix. root = trie for { // Compute what part of prefix matches. common := root.longestCommonPrefixLength(prefix, false) prefix = prefix[common:] // We used up the whole prefix, subtree found. if len(prefix) == 0 { found = true leftover = root.prefix[common:] return } // Partial match means that there is no subtree matching prefix. if common < len(root.prefix) { leftover = root.prefix[common:] return } // There is some prefix left, move to the children. child := root.children.next(prefix[0]) if child == nil { // There is nowhere to continue, there is no subtree matching prefix. return } parent = root root = child } } func (trie *Trie) findSubtreePath(prefix Prefix) (path []*Trie, found bool, leftover Prefix) { // Find the subtree matching prefix. root := trie var subtreePath []*Trie for { // Append the current root to the path. subtreePath = append(subtreePath, root) // Compute what part of prefix matches. common := root.longestCommonPrefixLength(prefix, false) prefix = prefix[common:] // We used up the whole prefix, subtree found. if len(prefix) == 0 { path = subtreePath found = true leftover = root.prefix[common:] return } // Partial match means that there is no subtree matching prefix. if common < len(root.prefix) { leftover = root.prefix[common:] return } // There is some prefix left, move to the children. child := root.children.next(prefix[0]) if child == nil { // There is nowhere to continue, there is no subtree matching prefix. return } root = child } } func (trie *Trie) walk(actualRootPrefix Prefix, visitor VisitorFunc) error { var prefix Prefix // Allocate a bit more space for prefix at the beginning. if actualRootPrefix == nil { prefix = make(Prefix, 32+len(trie.prefix)) copy(prefix, trie.prefix) prefix = prefix[:len(trie.prefix)] } else { prefix = make(Prefix, 32+len(actualRootPrefix)) copy(prefix, actualRootPrefix) prefix = prefix[:len(actualRootPrefix)] } // Visit the root first. Not that this works for empty trie as well since // in that case item == nil && len(children) == 0. if trie.item != nil { if err := visitor(prefix, trie.item); err != nil { if err == SkipSubtree { return nil } return err } } // Then continue to the children. return trie.children.walk(&prefix, visitor) } func (trie *Trie) longestCommonPrefixLength(prefix Prefix, caseInsensitive bool) (i int) { for ; i < len(prefix) && i < len(trie.prefix); i++ { p := prefix[i] t := trie.prefix[i] if caseInsensitive { if !(matchCaseInsensitive(t, p)) { break } } else { if p != t { break } } } return } func matchCaseInsensitive(a byte, b byte) bool { return a == b+32 || b == a+32 || a == b } func (trie *Trie) dump() string { writer := &bytes.Buffer{} trie.print(writer, 0) return writer.String() } func (trie *Trie) print(writer io.Writer, indent int) { fmt.Fprintf(writer, "%s%s %v\n", strings.Repeat(" ", indent), string(trie.prefix), trie.item) trie.children.print(writer, indent+2) } // Errors ---------------------------------------------------------------------- var ( SkipSubtree = errors.New("Skip this subtree") ErrNilPrefix = errors.New("Nil prefix passed into a method call") ) fuzzy-patricia-3.0.0/patricia/patricia_dense_test.go000066400000000000000000000146651347260770400226550ustar00rootroot00000000000000// Copyright (c) 2014 The go-patricia AUTHORS // // Use of this source code is governed by The MIT License // that can be found in the LICENSE file. package patricia import ( "math/rand" "runtime" "strconv" "testing" ) // Tests ----------------------------------------------------------------------- // overhead is allowed tolerance for Go's runtime/GC to increase the allocated memory // (to avoid failing tests on insignificant growth amounts) const overhead = 4000 func TestTrie_InsertDense(t *testing.T) { trie := NewTrie() data := []testData{ {"aba", 0, success}, {"abb", 1, success}, {"abc", 2, success}, {"abd", 3, success}, {"abe", 4, success}, {"abf", 5, success}, {"abg", 6, success}, {"abh", 7, success}, {"abi", 8, success}, {"abj", 9, success}, {"abk", 0, success}, {"abl", 1, success}, {"abm", 2, success}, {"abn", 3, success}, {"abo", 4, success}, {"abp", 5, success}, {"abq", 6, success}, {"abr", 7, success}, {"abs", 8, success}, {"abt", 9, success}, {"abu", 0, success}, {"abv", 1, success}, {"abw", 2, success}, {"abx", 3, success}, {"aby", 4, success}, {"abz", 5, success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } } func TestTrie_InsertDenseDuplicatePrefixes(t *testing.T) { trie := NewTrie() data := []testData{ {"aba", 0, success}, {"abb", 1, success}, {"abc", 2, success}, {"abd", 3, success}, {"abe", 4, success}, {"abf", 5, success}, {"abg", 6, success}, {"abh", 7, success}, {"abi", 8, success}, {"abj", 9, success}, {"abk", 0, success}, {"abl", 1, success}, {"abm", 2, success}, {"abn", 3, success}, {"abo", 4, success}, {"abp", 5, success}, {"abq", 6, success}, {"abr", 7, success}, {"abs", 8, success}, {"abt", 9, success}, {"abu", 0, success}, {"abv", 1, success}, {"abw", 2, success}, {"abx", 3, success}, {"aby", 4, success}, {"abz", 5, success}, {"aba", 0, failure}, {"abb", 1, failure}, {"abc", 2, failure}, {"abd", 3, failure}, {"abe", 4, failure}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } } func TestTrie_CloneDense(t *testing.T) { trie := NewTrie() data := []testData{ {"aba", 0, success}, {"abb", 1, success}, {"abc", 2, success}, {"abd", 3, success}, {"abe", 4, success}, {"abf", 5, success}, {"abg", 6, success}, {"abh", 7, success}, {"abi", 8, success}, {"abj", 9, success}, {"abk", 0, success}, {"abl", 1, success}, {"abm", 2, success}, {"abn", 3, success}, {"abo", 4, success}, {"abp", 5, success}, {"abq", 6, success}, {"abr", 7, success}, {"abs", 8, success}, {"abt", 9, success}, {"abu", 0, success}, {"abv", 1, success}, {"abw", 2, success}, {"abx", 3, success}, {"aby", 4, success}, {"abz", 5, success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } t.Log("CLONE") clone := trie.Clone() for _, v := range data { t.Logf("GET prefix=%v, item=%v", v.key, v.value) if item := clone.Get(Prefix(v.key)); item != v.value { t.Errorf("Unexpected return value, expected=%v, got=%v", v.value, item) } } prefix := "xxx" item := 666 t.Logf("INSERT prefix=%v, item=%v", prefix, item) if ok := trie.Insert(Prefix(prefix), item); !ok { t.Errorf("Unexpected return value, expected=true, got=%v", ok) } t.Logf("GET cloned prefix=%v", prefix) if item := clone.Get(Prefix(prefix)); item != nil { t.Errorf("Unexpected return value, expected=nil, got=%v", item) } } func TestTrie_DeleteDense(t *testing.T) { trie := NewTrie() data := []testData{ {"aba", 0, success}, {"abb", 1, success}, {"abc", 2, success}, {"abd", 3, success}, {"abe", 4, success}, {"abf", 5, success}, {"abg", 6, success}, {"abh", 7, success}, {"abi", 8, success}, {"abj", 9, success}, {"abk", 0, success}, {"abl", 1, success}, {"abm", 2, success}, {"abn", 3, success}, {"abo", 4, success}, {"abp", 5, success}, {"abq", 6, success}, {"abr", 7, success}, {"abs", 8, success}, {"abt", 9, success}, {"abu", 0, success}, {"abv", 1, success}, {"abw", 2, success}, {"abx", 3, success}, {"aby", 4, success}, {"abz", 5, success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } for _, v := range data { t.Logf("DELETE word=%v, success=%v", v.key, v.retVal) if ok := trie.Delete([]byte(v.key)); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } } func TestTrie_DeleteLeakageDense(t *testing.T) { trie := NewTrie() genTestData := func() *testData { // Generate a random integer as a key. key := strconv.FormatUint(rand.Uint64(), 10) return &testData{key: key, value: "v", retVal: success} } testSize := 100 data := make([]*testData, 0, testSize) for i := 0; i < testSize; i++ { data = append(data, genTestData()) } oldBytes := heapAllocatedBytes() // repeat insertion/deletion for 10K times to catch possible memory issues for i := 0; i < 10000; i++ { for _, v := range data { if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } for _, v := range data { if ok := trie.Delete([]byte(v.key)); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } } if newBytes := heapAllocatedBytes(); newBytes > oldBytes+overhead { t.Logf("Size=%d, Total=%d, Trie state:\n%s\n", trie.size(), trie.total(), trie.dump()) t.Errorf("Heap space leak, grew %d bytes (%d to %d)\n", newBytes-oldBytes, oldBytes, newBytes) } if numChildren := trie.children.length(); numChildren != 0 { t.Errorf("Trie is not empty: %v children found", numChildren) } } func heapAllocatedBytes() uint64 { runtime.GC() ms := runtime.MemStats{} runtime.ReadMemStats(&ms) return ms.Alloc } fuzzy-patricia-3.0.0/patricia/patricia_sparse_test.go000066400000000000000000000461221347260770400230450ustar00rootroot00000000000000// Copyright (c) 2014 The go-patricia AUTHORS // // Use of this source code is governed by The MIT License // that can be found in the LICENSE file. package patricia import ( "bytes" "errors" "fmt" "strings" "testing" ) const ( success = true failure = false ) type testData struct { key string value interface{} retVal bool } // Tests ----------------------------------------------------------------------- func TestTrie_InsertDifferentPrefixes(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepaneeeeeeeeeeeeee", "Pepan Zdepan", success}, {"Honzooooooooooooooo", "Honza Novak", success}, {"Jenikuuuuuuuuuuuuuu", "Jenik Poustevnicek", success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } } func TestTrie_InsertDuplicatePrefixes(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepan", "Pepan Zdepan", success}, {"Pepan", "Pepan Zdepan", failure}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } } func TestTrie_InsertVariousPrefixes(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepan", "Pepan Zdepan", success}, {"Pepin", "Pepin Omacka", success}, {"Honza", "Honza Novak", success}, {"Jenik", "Jenik Poustevnicek", success}, {"Pepan", "Pepan Dupan", failure}, {"Karel", "Karel Pekar", success}, {"Jenik", "Jenik Poustevnicek", failure}, {"Pepanek", "Pepanek Zemlicka", success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } } func TestTrie_InsertAndMatchPrefix(t *testing.T) { trie := NewTrie() t.Log("INSERT prefix=by week") trie.Insert(Prefix("by week"), 2) t.Log("INSERT prefix=by") trie.Insert(Prefix("by"), 1) if !trie.Match(Prefix("by")) { t.Error("MATCH prefix=by, expected=true, got=false") } } func TestTrie_SetGet(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepan", "Pepan Zdepan", success}, {"Pepin", "Pepin Omacka", success}, {"Honza", "Honza Novak", success}, {"Jenik", "Jenik Poustevnicek", success}, {"Pepan", "Pepan Dupan", failure}, {"Karel", "Karel Pekar", success}, {"Jenik", "Jenik Poustevnicek", failure}, {"Pepanek", "Pepanek Zemlicka", success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } for _, v := range data { t.Logf("SET %q to 10", v.key) trie.Set(Prefix(v.key), 10) } for _, v := range data { value := trie.Get(Prefix(v.key)) t.Logf("GET %q => %v", v.key, value) if value.(int) != 10 { t.Errorf("Unexpected return value, %v != 10", value) } } if value := trie.Get(Prefix("random crap")); value != nil { t.Errorf("Unexpected return value, %v != ", value) } } func TestTrie_Match(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepan", "Pepan Zdepan", success}, {"Pepin", "Pepin Omacka", success}, {"Honza", "Honza Novak", success}, {"Jenik", "Jenik Poustevnicek", success}, {"Pepan", "Pepan Dupan", failure}, {"Karel", "Karel Pekar", success}, {"Jenik", "Jenik Poustevnicek", failure}, {"Pepanek", "Pepanek Zemlicka", success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } for _, v := range data { matched := trie.Match(Prefix(v.key)) t.Logf("MATCH %q => %v", v.key, matched) if !matched { t.Errorf("Inserted key %q was not matched", v.key) } } if trie.Match(Prefix("random crap")) { t.Errorf("Key that was not inserted matched: %q", "random crap") } } func TestTrie_MatchFalsePositive(t *testing.T) { trie := NewTrie() if ok := trie.Insert(Prefix("A"), 1); !ok { t.Fatal("INSERT prefix=A, item=1 not ok") } resultMatchSubtree := trie.MatchSubtree(Prefix("A extra")) resultMatch := trie.Match(Prefix("A extra")) if resultMatchSubtree != false { t.Error("MatchSubtree returned false positive") } if resultMatch != false { t.Error("Match returned false positive") } } func TestTrie_MatchSubtree(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepan", "Pepan Zdepan", success}, {"Pepin", "Pepin Omacka", success}, {"Honza", "Honza Novak", success}, {"Jenik", "Jenik Poustevnicek", success}, {"Pepan", "Pepan Dupan", failure}, {"Karel", "Karel Pekar", success}, {"Jenik", "Jenik Poustevnicek", failure}, {"Pepanek", "Pepanek Zemlicka", success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } for _, v := range data { key := Prefix(v.key[:3]) matched := trie.MatchSubtree(key) t.Logf("MATCH_SUBTREE %q => %v", key, matched) if !matched { t.Errorf("Subtree %q was not matched", v.key) } } } func TestTrie_Visit(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepa", 0, success}, {"Pepa Zdepa", 1, success}, {"Pepa Kuchar", 2, success}, {"Honza", 3, success}, {"Jenik", 4, success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert([]byte(v.key), v.value); ok != v.retVal { t.Fatalf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } if err := trie.Visit(func(prefix Prefix, item Item) error { name := data[item.(int)].key t.Logf("VISITING prefix=%q, item=%v", prefix, item) if !strings.HasPrefix(string(prefix), name) { t.Errorf("Unexpected prefix encountered, %q not a prefix of %q", prefix, name) } return nil }); err != nil { t.Fatal(err) } } func TestTrie_VisitSkipSubtree(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepa", 0, success}, {"Pepa Zdepa", 1, success}, {"Pepa Kuchar", 2, success}, {"Honza", 3, success}, {"Jenik", 4, success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert([]byte(v.key), v.value); ok != v.retVal { t.Fatalf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } if err := trie.Visit(func(prefix Prefix, item Item) error { t.Logf("VISITING prefix=%q, item=%v", prefix, item) if item.(int) == 0 { t.Logf("SKIP %q", prefix) return SkipSubtree } if strings.HasPrefix(string(prefix), "Pepa") { t.Errorf("Unexpected prefix encountered, %q", prefix) } return nil }); err != nil { t.Fatal(err) } } func TestTrie_VisitReturnError(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepa", 0, success}, {"Pepa Zdepa", 1, success}, {"Pepa Kuchar", 2, success}, {"Honza", 3, success}, {"Jenik", 4, success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert([]byte(v.key), v.value); ok != v.retVal { t.Fatalf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } someErr := errors.New("Something exploded") if err := trie.Visit(func(prefix Prefix, item Item) error { t.Logf("VISITING prefix=%q, item=%v", prefix, item) if item.(int) == 3 { return someErr } if item.(int) != 3 { t.Logf("Unexpected prefix encountered, %q", prefix) } return nil }); err != nil && err != someErr { t.Fatal(err) } } func TestTrie_VisitSubtree(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepa", 0, success}, {"Pepa Zdepa", 1, success}, {"Pepa Kuchar", 2, success}, {"Honza", 3, success}, {"Jenik", 4, success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert([]byte(v.key), v.value); ok != v.retVal { t.Fatalf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } var counter int subtreePrefix := []byte("Pep") t.Log("VISIT Pep") if err := trie.VisitSubtree(subtreePrefix, func(prefix Prefix, item Item) error { t.Logf("VISITING prefix=%q, item=%v", prefix, item) if !bytes.HasPrefix(prefix, subtreePrefix) { t.Errorf("Unexpected prefix encountered, %q does not extend %q", prefix, subtreePrefix) } if len(prefix) > len(data[item.(int)].key) { t.Fatalf("Something is rather fishy here, prefix=%q", prefix) } counter++ return nil }); err != nil { t.Fatal(err) } if counter != 3 { t.Error("Unexpected number of nodes visited") } } func TestTrie_VisitPrefixes(t *testing.T) { trie := NewTrie() data := []testData{ {"P", 0, success}, {"Pe", 1, success}, {"Pep", 2, success}, {"Pepa", 3, success}, {"Pepa Zdepa", 4, success}, {"Pepa Kuchar", 5, success}, {"Honza", 6, success}, {"Jenik", 7, success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert([]byte(v.key), v.value); ok != v.retVal { t.Fatalf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } var counter int word := []byte("Pepa") if err := trie.VisitPrefixes(word, false, func(prefix Prefix, item Item) error { t.Logf("VISITING prefix=%q, item=%v", prefix, item) if !bytes.HasPrefix(word, prefix) { t.Errorf("Unexpected prefix encountered, %q is not a prefix of %q", prefix, word) } counter++ return nil }); err != nil { t.Fatal(err) } if counter != 4 { t.Error("Unexpected number of nodes visited") } } func TestPatriciaTrie_CloneSparse(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepaneeeeeeeeeeeeee", "Pepan Zdepan", success}, {"Honzooooooooooooooo", "Honza Novak", success}, {"Jenikuuuuuuuuuuuuuu", "Jenik Poustevnicek", success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } t.Log("CLONE") clone := trie.Clone() for _, v := range data { t.Logf("GET prefix=%v, item=%v", v.key, v.value) if item := clone.Get(Prefix(v.key)); item != v.value { t.Errorf("Unexpected return value, expected=%v, got=%v", v.value, item) } } prefix := "xxx" item := 666 t.Logf("INSERT prefix=%v, item=%v", prefix, item) if ok := trie.Insert(Prefix(prefix), item); !ok { t.Errorf("Unexpected return value, expected=true, got=%v", ok) } t.Logf("GET cloned prefix=%v", prefix) if item := clone.Get(Prefix(prefix)); item != nil { t.Errorf("Unexpected return value, expected=nil, got=%v", item) } } func TestParticiaTrie_Delete(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepan", "Pepan Zdepan", success}, {"Honza", "Honza Novak", success}, {"Jenik", "Jenik Poustevnicek", success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert([]byte(v.key), v.value); ok != v.retVal { t.Fatalf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } for _, v := range data { t.Logf("DELETE word=%v, success=%v", v.key, v.retVal) if ok := trie.Delete([]byte(v.key)); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } } func TestParticiaTrie_DeleteLeakageSparse(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepan", "Pepan Zdepan", success}, {"Honza", "Honza Novak", success}, {"Jenik", "Jenik Poustevnicek", success}, } oldBytes := heapAllocatedBytes() for i := 0; i < 10000; i++ { for _, v := range data { if ok := trie.Insert([]byte(v.key), v.value); ok != v.retVal { t.Fatalf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } for _, v := range data { if ok := trie.Delete([]byte(v.key)); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } } if newBytes := heapAllocatedBytes(); newBytes > oldBytes+overhead { t.Logf("Size=%d, Total=%d, Trie state:\n%s\n", trie.size(), trie.total(), trie.dump()) t.Errorf("Heap space leak, grew %d bytes (from %d to %d)\n", newBytes-oldBytes, oldBytes, newBytes) } } func TestParticiaTrie_DeleteNonExistent(t *testing.T) { trie := NewTrie() insertData := []testData{ {"Pepan", "Pepan Zdepan", success}, {"Honza", "Honza Novak", success}, {"Jenik", "Jenik Poustevnicek", success}, } deleteData := []testData{ {"Pepan", "Pepan Zdepan", success}, {"Honza", "Honza Novak", success}, {"Pepan", "Pepan Zdepan", failure}, {"Jenik", "Jenik Poustevnicek", success}, {"Honza", "Honza Novak", failure}, } for _, v := range insertData { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert([]byte(v.key), v.value); ok != v.retVal { t.Fatalf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } for _, v := range deleteData { t.Logf("DELETE word=%v, success=%v", v.key, v.retVal) if ok := trie.Delete([]byte(v.key)); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } } func TestParticiaTrie_DeleteSubtree(t *testing.T) { trie := NewTrie() insertData := []testData{ {"P", 0, success}, {"Pe", 1, success}, {"Pep", 2, success}, {"Pepa", 3, success}, {"Pepa Zdepa", 4, success}, {"Pepa Kuchar", 5, success}, {"Honza", 6, success}, {"Jenik", 7, success}, } deleteData := []testData{ {"Pe", -1, success}, {"Pe", -1, failure}, {"Honzik", -1, failure}, {"Honza", -1, success}, {"Honza", -1, failure}, {"Pep", -1, failure}, {"P", -1, success}, {"Nobody", -1, failure}, {"", -1, success}, } for _, v := range insertData { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert([]byte(v.key), v.value); ok != v.retVal { t.Fatalf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } for _, v := range deleteData { t.Logf("DELETE_SUBTREE prefix=%v, success=%v", v.key, v.retVal) if ok := trie.DeleteSubtree([]byte(v.key)); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } } /* func TestTrie_Dump(t *testing.T) { trie := NewTrie() data := []testData{ {"Honda", nil, success}, {"Honza", nil, success}, {"Jenik", nil, success}, {"Pepan", nil, success}, {"Pepin", nil, success}, } for i, v := range data { if _, ok := trie.Insert([]byte(v.key), v.value); ok != v.retVal { t.Logf("INSERT %v %v", v.key, v.value) t.Fatalf("Unexpected return value, expected=%v, got=%v", i, ok) } } dump := ` +--+--+ Hon +--+--+ da | | | +--+ za | +--+ Jenik | +--+ Pep +--+--+ an | +--+ in ` var buf bytes.Buffer trie.Dump(buf) if !bytes.Equal(buf.Bytes(), dump) { t.Logf("DUMP") t.Fatalf("Unexpected dump generated, expected\n\n%v\ngot\n\n%v", dump, buf.String()) } } */ func TestTrie_compact(t *testing.T) { trie := NewTrie() trie.Insert(Prefix("a"), 0) trie.Insert(Prefix("ab"), 0) trie.Insert(Prefix("abc"), 0) trie.Insert(Prefix("abcd"), 0) trie.Insert(Prefix("abcde"), 0) trie.Insert(Prefix("abcdef"), 0) trie.Insert(Prefix("abcdefg"), 0) trie.Insert(Prefix("abcdefgi"), 0) trie.Insert(Prefix("abcdefgij"), 0) trie.Insert(Prefix("abcdefgijk"), 0) trie.Delete(Prefix("abcdef")) trie.Delete(Prefix("abcde")) trie.Delete(Prefix("abcdefg")) trie.Delete(Prefix("a")) trie.Delete(Prefix("abc")) trie.Delete(Prefix("ab")) trie.Visit(func(prefix Prefix, item Item) error { // 97 ~~ 'a', for ch := byte(97); ch <= 107; ch++ { if c := bytes.Count(prefix, []byte{ch}); c > 1 { t.Errorf("%q appeared in %q %v times", ch, prefix, c) } } return nil }) } func TestTrie_longestCommonPrefixLength(t *testing.T) { type args struct { prefix Prefix caseInsensitive bool } tests := []struct { prefix Prefix args args want int }{ { Prefix("1234567890"), args{ Prefix(""), false, }, 0, }, { Prefix("1234567890"), args{ Prefix("12345"), false, }, 5, }, { Prefix("1234567890"), args{ Prefix("123789"), false, }, 3, }, { Prefix("1234567890"), args{ Prefix("12345678901"), false, }, 10, }, { Prefix("aBcDeFg"), args{ Prefix("abcd"), true, }, 4, }, { Prefix("aBcDeFg"), args{ Prefix("ABCDEF"), true, }, 6, }, { Prefix("eeffgghh"), args{ Prefix("eEfFGgH"), true, }, 7, }, } for _, test := range tests { trie := NewTrie() trie.prefix = test.prefix ret := trie.longestCommonPrefixLength(test.args.prefix, test.args.caseInsensitive) if ret != test.want { t.Errorf("unexpected return value, expected %d, got %d", test.want, ret) } } } // Examples -------------------------------------------------------------------- func ExampleTrie() { // Create a new tree. trie := NewTrie() // Insert some items. trie.Insert(Prefix("Pepa Novak"), 1) trie.Insert(Prefix("Pepa Sindelar"), 2) trie.Insert(Prefix("Karel Macha"), 3) trie.Insert(Prefix("Karel Hynek Macha"), 4) // Just check if some things are present in the tree. key := Prefix("Pepa Novak") fmt.Printf("%q present? %v\n", key, trie.Match(key)) key = Prefix("Karel") fmt.Printf("Anybody called %q here? %v\n", key, trie.MatchSubtree(key)) // Walk the tree. trie.Visit(printItem) // "Karel Hynek Macha": 4 // "Karel Macha": 3 // "Pepa Novak": 1 // "Pepa Sindelar": 2 // Walk a subtree. trie.VisitSubtree(Prefix("Pepa"), printItem) // "Pepa Novak": 1 // "Pepa Sindelar": 2 // Modify an item, then fetch it from the tree. trie.Set(Prefix("Karel Hynek Macha"), 10) key = Prefix("Karel Hynek Macha") fmt.Printf("%q: %v\n", key, trie.Get(key)) // "Karel Hynek Macha": 10 // Walk prefixes. prefix := Prefix("Karel Hynek Macha je kouzelnik") trie.VisitPrefixes(prefix, false, printItem) // "Karel Hynek Macha": 10 // Delete some items. trie.Delete(Prefix("Pepa Novak")) trie.Delete(Prefix("Karel Macha")) // Walk again. trie.Visit(printItem) // "Karel Hynek Macha": 10 // "Pepa Sindelar": 2 // Delete a subtree. trie.DeleteSubtree(Prefix("Pepa")) // Print what is left. trie.Visit(printItem) // "Karel Hynek Macha": 10 // Output: // "Pepa Novak" present? true // Anybody called "Karel" here? true // "Pepa Novak": 1 // "Pepa Sindelar": 2 // "Karel Macha": 3 // "Karel Hynek Macha": 4 // "Pepa Novak": 1 // "Pepa Sindelar": 2 // "Karel Hynek Macha": 10 // "Karel Hynek Macha": 10 // "Pepa Sindelar": 2 // "Karel Hynek Macha": 10 // "Karel Hynek Macha": 10 } // Helpers --------------------------------------------------------------------- func printItem(prefix Prefix, item Item) error { fmt.Printf("%q: %v\n", prefix, item) return nil } fuzzy-patricia-3.0.0/patricia/patricia_test.go000066400000000000000000000276331347260770400214760ustar00rootroot00000000000000// Copyright (c) 2014 The go-patricia AUTHORS // // Use of this source code is governed by The MIT License // that can be found in the LICENSE file. package patricia import ( "crypto/rand" mrand "math/rand" "reflect" "testing" ) // Tests ----------------------------------------------------------------------- func TestTrie_GetNonexistentPrefix(t *testing.T) { trie := NewTrie() data := []testData{ {"aba", 0, success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } t.Logf("GET prefix=baa, expect item=nil") if item := trie.Get(Prefix("baa")); item != nil { t.Errorf("Unexpected return value, expected=, got=%v", item) } } func TestTrie_RandomKitchenSink(t *testing.T) { if testing.Short() { t.Skip() } const count, size = 750000, 16 b := make([]byte, count+size+1) if _, err := rand.Read(b); err != nil { t.Fatal("error generating random bytes", err) } m := make(map[string]string) for i := 0; i < count; i++ { m[string(b[i:i+size])] = string(b[i+1 : i+size+1]) } trie := NewTrie() getAndDelete := func(k, v string) { i := trie.Get(Prefix(k)) if i == nil { t.Fatalf("item not found, prefix=%v", []byte(k)) } else if s, ok := i.(string); !ok { t.Fatalf("unexpected item type, expecting=%v, got=%v", reflect.TypeOf(k), reflect.TypeOf(i)) } else if s != v { t.Fatalf("unexpected item, expecting=%v, got=%v", []byte(k), []byte(s)) } else if !trie.Delete(Prefix(k)) { t.Fatalf("delete failed, prefix=%v", []byte(k)) } else if i = trie.Get(Prefix(k)); i != nil { t.Fatalf("unexpected item, expecting=, got=%v", i) } else if trie.Delete(Prefix(k)) { t.Fatalf("extra delete succeeded, prefix=%v", []byte(k)) } } for k, v := range m { if !trie.Insert(Prefix(k), v) { t.Fatalf("insert failed, prefix=%v", []byte(k)) } if byte(k[size/2]) < 128 { getAndDelete(k, v) delete(m, k) } } for k, v := range m { getAndDelete(k, v) } } // Make sure Delete that affects the root node works. // This was panicking when Delete was broken. func TestTrie_DeleteRoot(t *testing.T) { trie := NewTrie() v := testData{"aba", 0, success} t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } t.Logf("DELETE prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Delete(Prefix(v.key)); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } func TestTrie_DeleteAbsentPrefix(t *testing.T) { trie := NewTrie() v := testData{"a", 0, success} t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } d := "ab" t.Logf("DELETE prefix=%v, success=%v", d, failure) if ok := trie.Delete(Prefix(d)); ok != failure { t.Errorf("Unexpected return value, expected=%v, got=%v", failure, ok) } t.Logf("GET prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if i := trie.Get(Prefix(v.key)); i != v.value { t.Errorf("Unexpected item, expected=%v, got=%v", v.value, i) } } func reverse(s string) string { runes := []rune(s) for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { runes[i], runes[j] = runes[j], runes[i] } return string(runes) } func checkMasksRecursive(t *testing.T, root *Trie) { for _, child := range root.children.getChildren() { if child.mask & ^root.mask != 0 { t.Errorf("\ninvalid mask at prefix %s\nchild prefix: %s\ncharmap: \t%s\nmask: \t%064b\n"+ "child mask: \t%064b\ndiff:\t%064b\n", root.prefix, child.prefix, reverse(charmap), root.mask, child.mask, child.mask & ^root.mask, ) } checkMasksRecursive(t, child) } } func TestTrie_AddCorrectMasks(t *testing.T) { trie := NewTrie() data := []testData{ {"Pepan", "Pepan Zdepan", success}, {"Pepin", "Pepin Omacka", success}, {"Honza", "Honza Novak", success}, {"Jenik", "Jenik Poustevnicek", success}, {"Pepan", "Pepan Dupan", failure}, {"Karel", "Karel Pekar", success}, {"Jenak", "Jenak Poustevnicek", success}, {"Pepanek", "Pepanek Zemlicka", success}, } for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } checkMasksRecursive(t, trie) } } func TestTrie_DeleteCorrectMasks(t *testing.T) { data := []testData{ {"Pepan", "Pepan Zdepan", success}, {"Pepin", "Pepin Omacka", success}, {"Honza", "Honza Novak", success}, {"Jenik", "Jenik Poustevnicek", success}, {"Karel", "Karel Pekar", success}, {"Jenak", "Jenak Poustevnicek", success}, {"Pepanek", "Pepanek Zemlicka", success}, } deleteData := [][]testData{ { {"Honza", "Honza Novak", success}, {"Jenik", "Jenik Poustevnicek", success}, {"Pepan", "Pepan Dupan", success}, }, { {"Pepan", "Pepan Dupan", success}, }, { {"Jenak", "Jenak Poustevnicek", success}, {"Pepanek", "Pepanek Zemlicka", success}, {"Pepin", "Pepin Omacka", success}, {"Honza", "Honza Novak", success}, {"Jenik", "Jenik Poustevnicek", success}, }, } for _, d := range deleteData { trie := NewTrie() for _, v := range data { t.Logf("INSERT prefix=%v, item=%v, success=%v", v.key, v.value, v.retVal) if ok := trie.Insert(Prefix(v.key), v.value); ok != v.retVal { t.Errorf("Unexpected return value, expected=%v, got=%v", v.retVal, ok) } } for _, record := range d { trie.Delete(Prefix(record.key)) } checkMasksRecursive(t, trie) } } func populateTrie(t *testing.T) *Trie { data := []string{ "Pepan", "Pepin", "Honza", "Jenik", "Karel", "Jenak", "Pepanek", } trie := NewTrie() for _, v := range data { if ok := trie.Insert(Prefix(v), struct{}{}); !ok { t.Errorf("Couldn't insert item %s", v) } } return trie } func TestTrie_FuzzyCollect(t *testing.T) { trie := populateTrie(t) type testResult struct { wantKey string wantSkipped int } type testData struct { query string caseInsensitive bool wantResults []testResult } testQueries := []testData{ { "Ppn", false, []testResult{ {"Pepan", 2}, {"Pepin", 2}, {"Pepanek", 2}, }, }, { "Ha", false, []testResult{ {"Honza", 3}, }, }, { "nza", false, []testResult{ {"Honza", 0}, }, }, { "eni", false, []testResult{ {"Jenik", 0}, }, }, { "jk", true, []testResult{ {"Jenik", 3}, {"Jenak", 3}, }, }, { "ppn", true, []testResult{ {"Pepan", 2}, {"Pepin", 2}, {"Pepanek", 2}, }, }, } for _, data := range testQueries { resultMap := make(map[string]int) t.Logf("QUERY %s", data.query) trie.VisitFuzzy(Prefix(data.query), data.caseInsensitive, func(prefix Prefix, item Item, skipped int) error { // result := testResult{string(prefix), skipped} resultMap[string(prefix)] = skipped return nil }) t.Logf("got result set %v\n", resultMap) for _, want := range data.wantResults { got, ok := resultMap[want.wantKey] if !ok { t.Errorf("item %s not found in result set\n", want.wantKey) continue } if got != want.wantSkipped { t.Errorf("got wrong skipped value, wanted %d, got %d\n", want.wantSkipped, got) } } } } func TestTrie_SubstringCollect(t *testing.T) { trie := populateTrie(t) type testData struct { query string caseInsensitive bool wantResults []string } testQueries := []testData{ { "epa", false, []string{ "Pepan", "Pepanek", }, }, { "onza", false, []string{ "Honza", }, }, { "nza", false, []string{ "Honza", }, }, { "l", false, []string{ "Karel", }, }, { "a", false, []string{ "Pepan", "Honza", "Pepan", "Karel", "Jenak", "Pepanek", }, }, { "pep", true, []string{ "Pepin", "Pepan", }, }, { "kar", true, []string{ "Karel", }, }, { "", false, []string{ "Pepan", "Pepin", "Honza", "Jenik", "Karel", "Jenak", "Pepanek", }, }, } for _, data := range testQueries { resultMap := make(map[string]bool) t.Logf("QUERY %s", data.query) trie.VisitSubstring(Prefix(data.query), true, func(prefix Prefix, item Item) error { // result := testResult{string(prefix), skipped} resultMap[string(prefix)] = true return nil }) t.Logf("got result set %v\n", resultMap) for _, want := range data.wantResults { if _, ok := resultMap[want]; !ok { t.Errorf("item %s not found in result set\n", want) continue } } } } func Test_makePrefixMask(t *testing.T) { type testData struct { key Prefix wanted uint64 } data := []testData{ { Prefix("0123456789"), 0x3FF, }, { Prefix("AAAA"), 0x400, }, { Prefix(""), 0, }, { Prefix("abc"), 0x7000000000, }, { Prefix(".-"), 0xc000000000000000, }, } for _, d := range data { got := makePrefixMask(d.key) if got != d.wanted { t.Errorf("Unexpected bitmask, wanted: %b, got %b\n", d.wanted, got) } } } const ( amountWords = 100000 wordLength = 10 queryLength = 10 ) var benchmarkTrie *Trie func populateBenchmarkTrie(superDenseChildList bool) { benchmarkTrie = NewTrie() for i := 0; i < amountWords; i++ { benchmarkTrie.Insert(Prefix(mrandBytes(wordLength)), struct{}{}) } } type visitFunc func(prefix Prefix, caseInsensitive bool, visitor VisitorFunc) error func benchmarkVisit(caseInsensitive bool, visitor visitFunc, b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { visitor(Prefix(mrandBytes(queryLength)), caseInsensitive, func(prefix Prefix, item Item) error { return nil }) } } func benchmarkVisitFuzzy(caseInsensitive bool, b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { benchmarkTrie.VisitFuzzy(Prefix(mrandBytes(queryLength)), caseInsensitive, func(prefix Prefix, item Item, skipped int) error { return nil }) } } func BenchmarkPrefix(b *testing.B) { populateBenchmarkTrie(false) benchmarkVisit(false, benchmarkTrie.VisitPrefixes, b) } func BenchmarkPrefixCaseInsensitive(b *testing.B) { populateBenchmarkTrie(false) benchmarkVisit(true, benchmarkTrie.VisitPrefixes, b) } func BenchmarkPrefixSuperDense(b *testing.B) { populateBenchmarkTrie(true) benchmarkVisit(false, benchmarkTrie.VisitPrefixes, b) } func BenchmarkPrefixCaseInsensitiveSuperDense(b *testing.B) { populateBenchmarkTrie(true) benchmarkVisit(true, benchmarkTrie.VisitPrefixes, b) } func BenchmarkSubstring(b *testing.B) { populateBenchmarkTrie(false) benchmarkVisit(false, benchmarkTrie.VisitSubstring, b) } func BenchmarkSubstringCaseInsensitive(b *testing.B) { populateBenchmarkTrie(false) benchmarkVisit(true, benchmarkTrie.VisitSubstring, b) } func BenchmarkSubstringSuperDense(b *testing.B) { populateBenchmarkTrie(true) benchmarkVisit(false, benchmarkTrie.VisitSubstring, b) } func BenchmarkSubstringCaseInsensitiveSuperDense(b *testing.B) { populateBenchmarkTrie(true) benchmarkVisit(true, benchmarkTrie.VisitSubstring, b) } func BenchmarkFuzzy(b *testing.B) { populateBenchmarkTrie(false) benchmarkVisitFuzzy(false, b) } func BenchmarkFuzzyCaseInsensitive(b *testing.B) { populateBenchmarkTrie(false) benchmarkVisitFuzzy(true, b) } func BenchmarkFuzzySuperDense(b *testing.B) { populateBenchmarkTrie(true) benchmarkVisitFuzzy(false, b) } func BenchmarkFuzzyCaseInsensitiveSuperDense(b *testing.B) { populateBenchmarkTrie(true) benchmarkVisitFuzzy(true, b) } func mrandBytes(length int) []byte { bytes := make([]byte, length) for i := 0; i < length; i++ { bytes = append(bytes, byte(mrand.Intn(75)+'0')) } return bytes }