pax_global_header00006660000000000000000000000064150740141670014517gustar00rootroot0000000000000052 comment=bf82928180827570bb22fa82bcde0d08ddac12c6 derekparker-trie-bf82928/000077500000000000000000000000001507401416700153255ustar00rootroot00000000000000derekparker-trie-bf82928/.gitignore000066400000000000000000000000071507401416700173120ustar00rootroot00000000000000*.test derekparker-trie-bf82928/LICENSE000066400000000000000000000020671507401416700163370ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2014 Derek Parker Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. derekparker-trie-bf82928/README.md000066400000000000000000000017101507401416700166030ustar00rootroot00000000000000[![GoDoc](https://godoc.org/github.com/derekparker/trie?status.svg)](https://godoc.org/github.com/derekparker/trie) # Trie Data structure and relevant algorithms for extremely fast prefix/fuzzy string searching. ## Usage Create a Trie with: ```Go t := trie.New() ``` Add Keys with: ```Go // Add can take in meta information which can be stored with the key. // i.e. you could store any information you would like to associate with // this particular key. t.Add("foobar", 1) ``` Find a key with: ```Go node, ok := t.Find("foobar") meta := node.Meta() // use meta with meta.(type) ``` Remove Keys with: ```Go t.Remove("foobar") ``` Prefix search with: ```Go t.PrefixSearch("foo") ``` Fast test for valid prefix: ```Go t.HasKeysWithPrefix("foo") ``` Fuzzy search with: ```Go t.FuzzySearch("fb") ``` ## Contributing Fork this repo and run tests with: go test Create a feature branch, write your tests and code and submit a pull request. ## License MIT derekparker-trie-bf82928/fixtures/000077500000000000000000000000001507401416700171765ustar00rootroot00000000000000derekparker-trie-bf82928/fixtures/test.txt000066400000000000000000000000141507401416700207110ustar00rootroot00000000000000foo bar baz derekparker-trie-bf82928/go.mod000066400000000000000000000000611507401416700164300ustar00rootroot00000000000000module github.com/derekparker/trie/v3 go 1.23.1 derekparker-trie-bf82928/trie.go000066400000000000000000000311751507401416700166260ustar00rootroot00000000000000// Implementation of an R-Way Trie data structure. // // A Trie has a root Node which is the base of the tree. // Each subsequent Node has a letter and children, which are // nodes that have letter values associated with them. package trie import ( "iter" "maps" "sort" "sync" ) type node[T any] struct { mask uint64 parent *node[T] children map[rune]*node[T] // keyed by first rune of child's segment meta T path *string // pointer to full key for terminal nodes segment string // the string segment stored in this node depth int32 termCount int32 } // Trie is a data structure that stores a set of strings. type Trie[T any] struct { mu sync.RWMutex root *node[T] size int } type ByKeys []string func (a ByKeys) Len() int { return len(a) } func (a ByKeys) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a ByKeys) Less(i, j int) bool { return len(a[i]) < len(a[j]) } // New creates a new Trie with an initialized root Node. func New[T any]() *Trie[T] { return &Trie[T]{ root: &node[T]{depth: 0}, // Lazy init children map size: 0, } } // AllKeyValuesIter returns a sequence of all key-value pairs in the trie. func (t *Trie[T]) AllKeyValuesIter() iter.Seq2[string, T] { return collectIter(t.root) } // AllKeyValues returns a map of all key-value pairs in the trie. func (t *Trie[T]) AllKeyValues() map[string]T { t.mu.RLock() defer t.mu.RUnlock() return maps.Collect(collectIter(t.root)) } // Add adds the key to the Trie, including meta data. func (t *Trie[T]) Add(key string, meta T) *node[T] { t.mu.Lock() defer t.mu.Unlock() if key == "" { return nil } t.size++ keyRunes := []rune(key) bitmask := maskruneslice(keyRunes) nd := t.root nd.mask |= bitmask nd.termCount++ remainingRunes := keyRunes for len(remainingRunes) > 0 { firstRune := remainingRunes[0] // Check if there's a child starting with this rune if len(nd.children) == 0 { // No children, create new child with full remaining string return nd.newChild(string(remainingRunes), meta, key) } child, exists := nd.children[firstRune] if !exists { // No child with this first rune, create new one return nd.newChild(string(remainingRunes), meta, key) } // Find common prefix between remaining and child's segment segmentRunes := []rune(child.segment) commonLen := commonPrefixLenRunes(remainingRunes, segmentRunes) if commonLen == len(segmentRunes) { // Full match with child's segment, continue down remainingRunes = remainingRunes[commonLen:] nd = child if len(remainingRunes) > 0 { bitmask := maskruneslice(remainingRunes) nd.mask |= bitmask } nd.termCount++ if len(remainingRunes) == 0 { // Key ends exactly at this node nd.meta = meta if nd.path == nil { nd.path = &key } return nd } continue } // Partial match - need to split the child node // Create intermediate node with common prefix intermediate := &node[T]{ segment: string(segmentRunes[:commonLen]), parent: nd, depth: nd.depth + 1, children: make(map[rune]*node[T]), termCount: child.termCount, } // Update child's segment to be the non-common part childNewSegmentRunes := segmentRunes[commonLen:] child.segment = string(childNewSegmentRunes) child.parent = intermediate intermediate.children[childNewSegmentRunes[0]] = child // Update parent's children map nd.children[firstRune] = intermediate // Update masks if child.children != nil { for _, c := range child.children { intermediate.mask |= c.mask } } if len(childNewSegmentRunes) > 0 { intermediate.mask |= maskruneslice(childNewSegmentRunes) } remainingRunes = remainingRunes[commonLen:] nd = intermediate if len(remainingRunes) > 0 { bitmask := maskruneslice(remainingRunes) nd.mask |= bitmask } nd.termCount++ if len(remainingRunes) == 0 { // Key ends at the split point nd.meta = meta nd.path = &key return nd } // Create new child for remaining part newChild := nd.newChild(string(remainingRunes), meta, key) return newChild } // Should not reach here return nd } // commonPrefixLenRunes returns the length of the common prefix between two rune slices func commonPrefixLenRunes(r1, r2 []rune) int { minLen := min(len(r1), len(r2)) for i := range minLen { if r1[i] != r2[i] { return i } } return minLen } // Find finds and returns meta data associated // with `key`. func (t *Trie[T]) Find(key string) (*node[T], bool) { t.mu.RLock() defer t.mu.RUnlock() nd := findNode(t.root, key) if nd == nil || nd.path == nil { return nil, false } return nd, true } func (t *Trie[T]) HasKeysWithPrefix(key string) bool { t.mu.RLock() defer t.mu.RUnlock() nd := findNode(t.root, key) return nd != nil } // Remove removes a key from the trie, ensuring that // all bitmasks up to root are appropriately recalculated. func (t *Trie[T]) Remove(key string) { t.mu.Lock() defer t.mu.Unlock() nd := findNode(t.root, key) if nd == nil || nd.path == nil { return } t.size-- // Mark node as non-terminal nd.path = nil // If node has children, we can't remove it, just mark as non-terminal if len(nd.children) > 0 { return } // Node has no children, we can remove it // Walk up and remove nodes that are no longer needed for nd != nil && nd.path == nil && len(nd.children) == 0 { parent := nd.parent if parent == nil { break } // Remove this node from parent's children if len(nd.segment) > 0 { firstRune := []rune(nd.segment)[0] delete(parent.children, firstRune) } // If parent now has only one child and is not terminal, we could merge // but we'll keep it simple for now nd = parent } // Recalculate bitmasks from this point up for n := nd; n != nil; n = n.parent { n.mask = 0 if n.children != nil { for _, c := range n.children { n.mask |= c.mask } } if len(n.segment) > 0 { n.mask |= maskruneslice([]rune(n.segment)) } } } // Keys returns all the keys currently stored in the trie. func (t *Trie[T]) Keys() []string { t.mu.RLock() defer t.mu.RUnlock() if t.size == 0 { return []string{} } return t.PrefixSearch("") } // FuzzySearch performs a fuzzy search against the keys in the trie. // FuzzySearch performs a fuzzy search against the keys in the trie, returning all keys // with the given prefix. Results are returned sorted. func (t *Trie[T]) FuzzySearch(pre string) []string { t.mu.RLock() defer t.mu.RUnlock() keys := make([]string, 0, t.size) for key := range fuzzycollectIter(t.root, []rune(pre)) { keys = append(keys, key) } sort.Sort(ByKeys(keys)) return keys } // FuzzySearchIter performs a fuzzy search and returns an iterator over matching keys. // Unlike FuzzySearch, the keys are not sorted - they are yielded as they are found. // This provides lazy evaluation and is more memory efficient for large result sets. func (t *Trie[T]) FuzzySearchIter(pre string) iter.Seq[string] { t.mu.RLock() defer t.mu.RUnlock() return fuzzycollectIter(t.root, []rune(pre)) } // PrefixSearch performs a prefix search against the keys in the trie. func (t *Trie[T]) PrefixSearch(pre string) []string { // Use PrefixSearchIter internally to avoid code duplication var keys []string for key := range t.PrefixSearchIter(pre) { keys = append(keys, key) } return keys } // PrefixSearchIter performs a prefix search and returns an iterator over matching key-value pairs. // Unlike PrefixSearch, this returns an iterator that yields both keys and their associated values. // This provides lazy evaluation and is more memory efficient for large result sets. func (t *Trie[T]) PrefixSearchIter(pre string) iter.Seq2[string, T] { t.mu.RLock() defer t.mu.RUnlock() nd := findNode(t.root, pre) if nd == nil { // Return an empty iterator if no node is found return func(yield func(string, T) bool) {} } return collectIter(nd) } // newChild creates and returns a pointer to a new child for the node. func (n *node[T]) newChild(segment string, meta T, fullKey string) *node[T] { runes := []rune(segment) if len(runes) == 0 { return nil } bitmask := maskruneslice(runes) child := &node[T]{ segment: segment, mask: bitmask, meta: meta, parent: n, depth: n.depth + 1, path: &fullKey, } n.ensureChildren() firstRune := runes[0] n.children[firstRune] = child n.mask |= bitmask return child } // Val returns the value of the node. func (n *node[T]) Val() T { return n.meta } // ensureChildren lazily initializes the children map if needed func (n *node[T]) ensureChildren() { if n.children == nil { n.children = make(map[rune]*node[T]) } } func findNode[T any](nd *node[T], key string) *node[T] { if nd == nil { return nil } remaining := key for len(remaining) > 0 { if len(nd.children) == 0 { return nil } runes := []rune(remaining) firstRune := runes[0] child, exists := nd.children[firstRune] if !exists { return nil } // Check if remaining matches child's segment segmentRunes := []rune(child.segment) remainingRunes := []rune(remaining) // For prefix search: allow partial match if remaining is shorter matchLen := min(len(segmentRunes), len(remainingRunes)) // Compare segment with beginning of remaining for i := range matchLen { if remainingRunes[i] != segmentRunes[i] { return nil } } // If we've consumed all of remaining, this is the node we want if len(remainingRunes) <= len(segmentRunes) { return child } // Segment matches, continue remaining = string(remainingRunes[len(segmentRunes):]) nd = child } return nd } // maskruneslice creates a bitmask for the given runes. // Optimized to eliminate bounds checking and enable vectorization. // //go:inline func maskruneslice(rs []rune) uint64 { // Use 4 accumulators for better instruction-level parallelism var m0, m1, m2, m3 uint64 // Process 4 elements at a time using slice patterns for BCE for len(rs) >= 4 { // Compiler knows rs[:4] is safe when len(rs) >= 4 // This pattern eliminates all bounds checks r := rs[:4:4] // Full slice expression prevents capacity growth // No bounds checks on these accesses m0 |= uint64(1) << uint64(r[0]-'a') m1 |= uint64(1) << uint64(r[1]-'a') m2 |= uint64(1) << uint64(r[2]-'a') m3 |= uint64(1) << uint64(r[3]-'a') rs = rs[4:] } // Handle remaining elements (0-3) // Process remaining with explicit length checks for BCE switch len(rs) { case 3: m0 |= uint64(1) << uint64(rs[0]-'a') m1 |= uint64(1) << uint64(rs[1]-'a') m2 |= uint64(1) << uint64(rs[2]-'a') case 2: m0 |= uint64(1) << uint64(rs[0]-'a') m1 |= uint64(1) << uint64(rs[1]-'a') case 1: m0 |= uint64(1) << uint64(rs[0]-'a') } // Combine all accumulators return m0 | m1 | m2 | m3 } // collectIter returns an iterator over all key-value pairs starting from the given node func collectIter[T any](nd *node[T]) iter.Seq2[string, T] { return func(yield func(string, T) bool) { childrenCount := 0 if nd.children != nil { childrenCount = len(nd.children) } nodes := make([]*node[T], 1, childrenCount+1) nodes[0] = nd for len(nodes) > 0 { i := len(nodes) - 1 n := nodes[i] nodes = nodes[:i] if n.children != nil { for _, c := range n.children { nodes = append(nodes, c) } } if n.path != nil { if !yield(*n.path, n.meta) { return } } } } } type potentialSubtree[T any] struct { idx int node *node[T] } // fuzzycollectIter performs a fuzzy search and yields matching keys as an iterator func fuzzycollectIter[T any](nd *node[T], partial []rune) iter.Seq[string] { return func(yield func(string) bool) { if len(partial) == 0 { // If no partial pattern, yield all keys from this node for key := range collectIter(nd) { if !yield(key) { return } } return } // Use stack-based traversal for fuzzy matching type potentialNode struct { idx int node *node[T] } potential := make([]potentialNode, 1, 128) potential[0] = potentialNode{node: nd, idx: 0} for len(potential) > 0 { i := len(potential) - 1 p := potential[i] potential = potential[:i] m := maskruneslice(partial[p.idx:]) if (p.node.mask & m) != m { continue } // Check if any rune in segment matches current partial rune segmentRunes := []rune(p.node.segment) for _, r := range segmentRunes { if p.idx < len(partial) && r == partial[p.idx] { p.idx++ if p.idx == len(partial) { // Found a match, yield all terminals from this subtree for key := range collectIter(p.node) { if !yield(key) { return } } break } } } if p.idx < len(partial) && p.node.children != nil { for _, c := range p.node.children { potential = append(potential, potentialNode{node: c, idx: p.idx}) } } } } } derekparker-trie-bf82928/trie_test.go000066400000000000000000000520411507401416700176600ustar00rootroot00000000000000package trie import ( "bufio" "log" "os" "sort" "testing" ) func BenchmarkMaskruneslice(b *testing.B) { testCases := []struct { name string input string }{ {"short", "test"}, {"medium", "benchmark"}, {"long", "thisisaverylongstringfortesting"}, {"alphabet", "abcdefghijklmnopqrstuvwxyz"}, } for _, tc := range testCases { runes := []rune(tc.input) b.Run(tc.name, func(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { _ = maskruneslice(runes) } }) } } func createTrieAndAddFromFile[T any](path string, val T) *Trie[T] { t := New[T]() file, err := os.Open(path) if err != nil { log.Fatal(err) } reader := bufio.NewScanner(file) for reader.Scan() { t.Add(reader.Text(), val) } if reader.Err() != nil { log.Fatal(err) } return t } func TestTrieAll(t *testing.T) { trie := New[int]() trie.Add("foo", 1) trie.Add("bar", 2) trie.Add("baz", 3) trie.Add("bur", 4) for key, value := range trie.AllKeyValuesIter() { switch key { case "foo": if value != 1 { t.Errorf("Expected 1, got: %d", value) } case "bar": if value != 2 { t.Errorf("Expected 2, got: %d", value) } case "baz": if value != 3 { t.Errorf("Expected 3, got: %d", value) } case "bur": if value != 4 { t.Errorf("Expected 4, got: %d", value) } default: t.Errorf("Unexpected key: %s", key) } } } func TestAllKeyValues(t *testing.T) { trie := New[int]() trie.Add("foo", 1) trie.Add("bar", 2) trie.Add("baz", 3) trie.Add("bur", 4) kvMap := trie.AllKeyValues() // Check that we got all 4 entries if len(kvMap) != 4 { t.Errorf("Expected 4 entries, got: %d", len(kvMap)) } // Check each key-value pair expectedPairs := map[string]int{ "foo": 1, "bar": 2, "baz": 3, "bur": 4, } for key, expectedValue := range expectedPairs { if value, ok := kvMap[key]; !ok { t.Errorf("Key %s not found in result", key) } else if value != expectedValue { t.Errorf("For key %s: expected %d, got %d", key, expectedValue, value) } } // Check for unexpected keys for key := range kvMap { if _, ok := expectedPairs[key]; !ok { t.Errorf("Unexpected key in result: %s", key) } } } func TestAllKeyValuesEmpty(t *testing.T) { trie := New[int]() kvMap := trie.AllKeyValues() // Check that we got an empty map if len(kvMap) != 0 { t.Errorf("Expected empty map, got %d entries", len(kvMap)) } } func TestAllKeyValuesWithDifferentValues(t *testing.T) { trie := New[string]() // Add keys with specific string values to verify correct mapping trie.Add("apple", "fruit") trie.Add("application", "software") trie.Add("apply", "verb") trie.Add("banana", "yellow fruit") trie.Add("bandana", "cloth") trie.Add("band", "music group") kvMap := trie.AllKeyValues() // Check that we got all 6 entries if len(kvMap) != 6 { t.Errorf("Expected 6 entries, got: %d", len(kvMap)) } // Check each key-value pair for exact match expectedPairs := map[string]string{ "apple": "fruit", "application": "software", "apply": "verb", "banana": "yellow fruit", "bandana": "cloth", "band": "music group", } for key, expectedValue := range expectedPairs { if value, ok := kvMap[key]; !ok { t.Errorf("Key %s not found in result", key) } else if value != expectedValue { t.Errorf("For key %s: expected '%s', got '%s'", key, expectedValue, value) } } // Check for unexpected keys for key := range kvMap { if _, ok := expectedPairs[key]; !ok { t.Errorf("Unexpected key in result: %s", key) } } } func TestAllKeyValuesAndIteratorConsistency(t *testing.T) { trie := New[int]() // Add various keys with values testData := map[string]int{ "apple": 1, "application": 2, "apply": 3, "banana": 4, "band": 5, "bandana": 6, "foo": 7, "foobar": 8, "foobaz": 9, "bar": 10, } for key, value := range testData { trie.Add(key, value) } // Get results from AllKeyValues mapResult := trie.AllKeyValues() // Collect results from AllKeyValuesIter iterResult := make(map[string]int) for key, value := range trie.AllKeyValuesIter() { iterResult[key] = value } // Check that both have the same number of entries if len(mapResult) != len(iterResult) { t.Errorf("Length mismatch: AllKeyValues returned %d entries, AllKeyValuesIter returned %d entries", len(mapResult), len(iterResult)) } // Check that all entries match for key, mapValue := range mapResult { if iterValue, ok := iterResult[key]; !ok { t.Errorf("Key %s found in AllKeyValues but not in AllKeyValuesIter", key) } else if mapValue != iterValue { t.Errorf("Value mismatch for key %s: AllKeyValues=%d, AllKeyValuesIter=%d", key, mapValue, iterValue) } } // Check the reverse - all iterator entries are in the map for key := range iterResult { if _, ok := mapResult[key]; !ok { t.Errorf("Key %s found in AllKeyValuesIter but not in AllKeyValues", key) } } } func TestTrieAdd(t *testing.T) { trie := New[int]() n := trie.Add("foo", 1) if n.meta != 1 { t.Errorf("Expected 1, got: %d", n.meta) } } func TestTrieFind(t *testing.T) { trie := New[int]() trie.Add("foo", 1) n, ok := trie.Find("foo") if ok != true { t.Fatal("Could not find node") } if n.Val() != 1 { t.Errorf("Expected 1, got: %d", n.meta) } } func TestTrieFindMissingWithSubtree(t *testing.T) { trie := New[int]() trie.Add("fooish", 1) trie.Add("foobar", 1) n, ok := trie.Find("foo") if ok != false { t.Errorf("Expected ok to be false") } if n != nil { t.Errorf("Expected nil, got: %v", n) } } func TestTrieHasKeysWithPrefix(t *testing.T) { trie := New[int]() trie.Add("fooish", 1) trie.Add("foobar", 1) testcases := []struct { key string expected bool }{ {"foobar", true}, {"foo", true}, {"fool", false}, } for _, testcase := range testcases { if trie.HasKeysWithPrefix(testcase.key) != testcase.expected { t.Errorf("HasKeysWithPrefix(\"%s\"): expected result to be %t", testcase.key, testcase.expected) } } } func TestTrieFindMissing(t *testing.T) { trie := New[int]() n, ok := trie.Find("foo") if ok != false { t.Errorf("Expected ok to be false") } if n != nil { t.Errorf("Expected nil, got: %v", n) } } func TestRemove(t *testing.T) { trie := New[int]() initial := []string{"football", "foostar", "foosball"} for _, key := range initial { trie.Add(key, 0) } trie.Remove("foosball") keys := trie.Keys() if len(keys) != 2 { t.Errorf("Expected 2 keys got %d", len(keys)) } for _, k := range keys { if k != "football" && k != "foostar" { t.Errorf("key was: %s", k) } } keys = trie.FuzzySearch("foo") if len(keys) != 2 { t.Errorf("Expected 2 keys got %d", len(keys)) } for _, k := range keys { if k != "football" && k != "foostar" { t.Errorf("Expected football got: %#v", k) } } } func TestRemoveRoot(t *testing.T) { trie := New[interface{}]() trie.Add("root", nil) trie.Remove("root") var ok bool _, ok = trie.Find("root") if ok { t.Error("Expected 0 keys") } // Try to write some data after the trie was purged trie.Add("root", nil) _, ok = trie.Find("root") if !ok { t.Error("Expected 1 keys") } } func TestTrieKeys(t *testing.T) { tableTests := []struct { name string expectedKeys []string }{ {"Two", []string{"bar", "foo"}}, {"One", []string{"foo"}}, {"Empty", []string{}}, } for _, test := range tableTests { t.Run(test.name, func(t *testing.T) { trie := New[interface{}]() for _, key := range test.expectedKeys { trie.Add(key, nil) } keys := trie.Keys() if len(keys) != len(test.expectedKeys) { t.Errorf("Expected %v keys, got %d, keys were: %v", len(test.expectedKeys), len(keys), trie.Keys()) } sort.Strings(keys) for i, key := range keys { if key != test.expectedKeys[i] { t.Errorf("Expected %#v, got %#v", test.expectedKeys[i], key) } } }) } } func TestPrefixSearch(t *testing.T) { trie := New[interface{}]() expected := []string{ "foo", "foosball", "football", "foreboding", "forementioned", "foretold", "foreverandeverandeverandever", "forbidden", } defer func() { r := recover() if r != nil { t.Error(r) } }() trie.Add("bar", nil) for _, key := range expected { trie.Add(key, nil) } tests := []struct { pre string expected []string length int }{ {"fo", expected, len(expected)}, {"foosbal", []string{"foosball"}, 1}, {"abc", []string{}, 0}, } for _, test := range tests { actual := trie.PrefixSearch(test.pre) sort.Strings(actual) sort.Strings(test.expected) if len(actual) != test.length { t.Errorf("Expected len(actual) to == %d for pre %s", test.length, test.pre) } for i, key := range actual { if key != test.expected[i] { t.Errorf("Expected %v got: %v", test.expected[i], key) } } } trie.PrefixSearch("fsfsdfasdf") } func TestPrefixSearchEmpty(t *testing.T) { trie := New[interface{}]() keys := trie.PrefixSearch("") if len(keys) != 0 { t.Errorf("Expected 0 keys from empty trie, got: %d", len(keys)) } } func TestPrefixSearchIter(t *testing.T) { trie := New[string]() // Add test data with values testData := map[string]string{ "foo": "value1", "foosball": "value2", "football": "value3", "foreboding": "value4", "forementioned": "value5", "foretold": "value6", "foreverandeverandeverandever": "value7", "forbidden": "value8", "bar": "value9", "baz": "value10", } for key, value := range testData { trie.Add(key, value) } tests := []struct { prefix string expected map[string]string }{ { prefix: "fo", expected: map[string]string{ "foo": "value1", "foosball": "value2", "football": "value3", "foreboding": "value4", "forementioned": "value5", "foretold": "value6", "foreverandeverandeverandever": "value7", "forbidden": "value8", }, }, { prefix: "foosbal", expected: map[string]string{ "foosball": "value2", }, }, { prefix: "bar", expected: map[string]string{ "bar": "value9", }, }, { prefix: "xyz", expected: map[string]string{}, }, { prefix: "", expected: testData, // Empty prefix should return all entries }, } for _, test := range tests { t.Run(test.prefix, func(t *testing.T) { // Collect results from iterator iterResults := make(map[string]string) for key, value := range trie.PrefixSearchIter(test.prefix) { iterResults[key] = value } // Compare lengths if len(iterResults) != len(test.expected) { t.Errorf("Length mismatch for prefix '%s': got %d, expected %d", test.prefix, len(iterResults), len(test.expected)) } // Compare key-value pairs for expectedKey, expectedValue := range test.expected { if actualValue, ok := iterResults[expectedKey]; !ok { t.Errorf("Missing key '%s' for prefix '%s'", expectedKey, test.prefix) } else if actualValue != expectedValue { t.Errorf("Value mismatch for key '%s' with prefix '%s': got '%s', expected '%s'", expectedKey, test.prefix, actualValue, expectedValue) } } // Check for unexpected keys for actualKey := range iterResults { if _, ok := test.expected[actualKey]; !ok { t.Errorf("Unexpected key '%s' for prefix '%s'", actualKey, test.prefix) } } }) } } func TestPrefixSearchIterEmpty(t *testing.T) { trie := New[string]() count := 0 for range trie.PrefixSearchIter("") { count++ } if count != 0 { t.Errorf("Expected 0 entries from empty trie, got: %d", count) } } func TestPrefixSearchIterEarlyStop(t *testing.T) { trie := New[int]() keys := []string{"foo", "foobar", "foobaz", "football", "foosball"} for i, key := range keys { trie.Add(key, i) } // Test that we can stop iteration early count := 0 maxCount := 2 for range trie.PrefixSearchIter("foo") { count++ if count >= maxCount { break } } if count != maxCount { t.Errorf("Expected to stop at %d iterations, got %d", maxCount, count) } } func TestPrefixSearchAndIterConsistency(t *testing.T) { trie := New[int]() // Add test data testData := map[string]int{ "apple": 1, "application": 2, "apply": 3, "banana": 4, "band": 5, "bandana": 6, "can": 7, "candy": 8, "candid": 9, } for key, value := range testData { trie.Add(key, value) } prefixes := []string{"", "app", "ban", "can", "z"} for _, prefix := range prefixes { t.Run(prefix, func(t *testing.T) { // Get results from PrefixSearch searchResults := trie.PrefixSearch(prefix) searchSet := make(map[string]bool) for _, key := range searchResults { searchSet[key] = true } // Collect results from PrefixSearchIter iterResults := make(map[string]int) for key, value := range trie.PrefixSearchIter(prefix) { iterResults[key] = value } // Check that all keys match if len(searchResults) != len(iterResults) { t.Errorf("Length mismatch for prefix '%s': PrefixSearch=%d, PrefixSearchIter=%d", prefix, len(searchResults), len(iterResults)) } // Verify all keys from PrefixSearch are in PrefixSearchIter for _, key := range searchResults { if _, ok := iterResults[key]; !ok { t.Errorf("Key '%s' found in PrefixSearch but not in PrefixSearchIter for prefix '%s'", key, prefix) } } // Verify all keys from PrefixSearchIter are in PrefixSearch for key := range iterResults { if !searchSet[key] { t.Errorf("Key '%s' found in PrefixSearchIter but not in PrefixSearch for prefix '%s'", key, prefix) } } }) } } func TestFuzzySearch(t *testing.T) { setup := []string{ "foosball", "football", "bmerica", "ked", "kedlock", "frosty", "bfrza", "foo/bart/baz.go", } tests := []struct { partial string length int }{ {"fsb", 1}, {"footbal", 1}, {"football", 1}, {"fs", 2}, {"oos", 1}, {"kl", 1}, {"ft", 3}, {"fy", 1}, {"fz", 2}, {"a", 5}, {"", 8}, {"zzz", 0}, } trie := New[interface{}]() for _, key := range setup { trie.Add(key, nil) } for _, test := range tests { t.Run(test.partial, func(t *testing.T) { actual := trie.FuzzySearch(test.partial) if len(actual) != test.length { t.Errorf("Expected len(actual) to == %d, was %d for %s actual was %#v", test.length, len(actual), test.partial, actual) } }) } } func TestFuzzySearchIter(t *testing.T) { setup := []string{ "foosball", "football", "bmerica", "ked", "kedlock", "frosty", "bfrza", "foo/bart/baz.go", } tests := []struct { partial string length int }{ {"fsb", 1}, {"footbal", 1}, {"football", 1}, {"fs", 2}, {"oos", 1}, {"kl", 1}, {"ft", 3}, {"fy", 1}, {"fz", 2}, {"a", 5}, {"", 8}, {"zzz", 0}, } trie := New[interface{}]() for _, key := range setup { trie.Add(key, nil) } for _, test := range tests { t.Run(test.partial, func(t *testing.T) { // Collect results from iterator var results []string for key := range trie.FuzzySearchIter(test.partial) { results = append(results, key) } // Get results from regular FuzzySearch expected := trie.FuzzySearch(test.partial) // Check lengths match if len(results) != test.length { t.Errorf("Expected len(results) to == %d, was %d for %s results was %#v", test.length, len(results), test.partial, results) } // Check that results contain the same keys (order may differ) if len(results) != len(expected) { t.Errorf("Iterator results length %d doesn't match FuzzySearch length %d for %s", len(results), len(expected), test.partial) } // Create maps to check set equality resultSet := make(map[string]bool) for _, key := range results { resultSet[key] = true } expectedSet := make(map[string]bool) for _, key := range expected { expectedSet[key] = true } // Check that all expected keys are in results for key := range expectedSet { if !resultSet[key] { t.Errorf("Expected key %s not found in iterator results for pattern %s", key, test.partial) } } // Check that no unexpected keys are in results for key := range resultSet { if !expectedSet[key] { t.Errorf("Unexpected key %s found in iterator results for pattern %s", key, test.partial) } } }) } } func TestFuzzySearchIterEarlyStop(t *testing.T) { trie := New[interface{}]() keys := []string{"foo", "foobar", "foobaz", "football", "foosball"} for _, key := range keys { trie.Add(key, nil) } // Test that we can stop iteration early count := 0 maxCount := 2 for range trie.FuzzySearchIter("f") { count++ if count >= maxCount { break } } if count != maxCount { t.Errorf("Expected to stop at %d iterations, got %d", maxCount, count) } } func TestFuzzySearchEmpty(t *testing.T) { trie := New[interface{}]() keys := trie.FuzzySearch("") if len(keys) != 0 { t.Errorf("Expected 0 keys from empty trie, got: %d", len(keys)) } } func TestFuzzySearchSorting(t *testing.T) { trie := New[interface{}]() setup := []string{ "foosball", "football", "bmerica", "ked", "kedlock", "frosty", "bfrza", "foo/bart/baz.go", } for _, key := range setup { trie.Add(key, nil) } actual := trie.FuzzySearch("fz") expected := []string{"bfrza", "foo/bart/baz.go"} if len(actual) != len(expected) { t.Fatalf("expected len %d got %d", len(expected), len(actual)) } for i, v := range expected { if actual[i] != v { t.Errorf("Expected %s got %s", v, actual[i]) } } } func BenchmarkTieKeys(b *testing.B) { trie := New[interface{}]() keys := []string{"bar", "foo", "baz", "bur", "zum", "burzum", "bark", "barcelona", "football", "foosball", "footlocker"} for _, key := range keys { trie.Add(key, nil) } b.ResetTimer() for i := 0; i < b.N; i++ { trie.Keys() } } func BenchmarkPrefixSearch(b *testing.B) { trie := createTrieAndAddFromFile[interface{}]("/usr/share/dict/words", nil) b.ResetTimer() for i := 0; i < b.N; i++ { _ = trie.PrefixSearch("fo") } } func BenchmarkPrefixSearchIter(b *testing.B) { trie := createTrieAndAddFromFile[interface{}]("/usr/share/dict/words", nil) b.ResetTimer() for i := 0; i < b.N; i++ { count := 0 for range trie.PrefixSearchIter("fo") { count++ } } } func BenchmarkPrefixSearchIterEarlyStop(b *testing.B) { trie := createTrieAndAddFromFile[interface{}]("/usr/share/dict/words", nil) b.ResetTimer() for i := 0; i < b.N; i++ { count := 0 maxCount := 10 for range trie.PrefixSearchIter("fo") { count++ if count >= maxCount { break } } } } func BenchmarkFuzzySearch(b *testing.B) { trie := createTrieAndAddFromFile[interface{}]("fixtures/test.txt", nil) b.ResetTimer() for i := 0; i < b.N; i++ { _ = trie.FuzzySearch("fs") } } func BenchmarkFuzzySearchIter(b *testing.B) { trie := createTrieAndAddFromFile[interface{}]("fixtures/test.txt", nil) b.ResetTimer() for i := 0; i < b.N; i++ { count := 0 for range trie.FuzzySearchIter("fs") { count++ } } } func BenchmarkFuzzySearchIterEarlyStop(b *testing.B) { trie := createTrieAndAddFromFile[interface{}]("fixtures/test.txt", nil) b.ResetTimer() for i := 0; i < b.N; i++ { count := 0 maxCount := 10 for range trie.FuzzySearchIter("fs") { count++ if count >= maxCount { break } } } } func BenchmarkBuildTree(b *testing.B) { for i := 0; i < b.N; i++ { createTrieAndAddFromFile[interface{}]("/usr/share/dict/words", nil) } } func BenchmarkAllKeyValues(b *testing.B) { trie := createTrieAndAddFromFile[interface{}]("fixtures/test.txt", nil) b.ResetTimer() for i := 0; i < b.N; i++ { _ = trie.AllKeyValues() } } func BenchmarkAllKeyValuesIter(b *testing.B) { trie := createTrieAndAddFromFile[interface{}]("fixtures/test.txt", nil) b.ResetTimer() for i := 0; i < b.N; i++ { count := 0 for range trie.AllKeyValuesIter() { count++ } } } func TestSupportChinese(t *testing.T) { trie := New[interface{}]() expected := []string{"苹果 沂水县", "苹果", "大蒜", "大豆"} for _, key := range expected { trie.Add(key, nil) } tests := []struct { pre string expected []string length int }{ {"苹", expected[:2], len(expected[:2])}, {"大", expected[2:], len(expected[2:])}, {"大蒜", []string{"大蒜"}, 1}, } for _, test := range tests { actual := trie.PrefixSearch(test.pre) sort.Strings(actual) sort.Strings(test.expected) if len(actual) != test.length { t.Errorf("Expected len(actual) to == %d for pre %s", test.length, test.pre) } for i, key := range actual { if key != test.expected[i] { t.Errorf("Expected %v got: %v", test.expected[i], key) } } } } func BenchmarkAdd(b *testing.B) { f, err := os.Open("/usr/share/dict/words") if err != nil { b.Fatal("couldn't open bag of words") } defer func(f *os.File) { _ = f.Close() }(f) scanner := bufio.NewScanner(f) var words []string for scanner.Scan() { word := scanner.Text() words = append(words, word) } b.ResetTimer() trie := New[interface{}]() for i := 0; i < b.N; i++ { trie.Add(words[i%len(words)], nil) } }