PyTrie-0.2/0000775000175100017510000000000012247057012013705 5ustar gsakkisgsakkis00000000000000PyTrie-0.2/LICENSE0000664000175100017510000000265212247055573014731 0ustar gsakkisgsakkis00000000000000Copyright (c) 2009, George Sakkis All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * The names of its contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PyTrie-0.2/test/0000775000175100017510000000000012247057012014664 5ustar gsakkisgsakkis00000000000000PyTrie-0.2/test/test_mapping.py0000664000175100017510000001312212247055573017741 0ustar gsakkisgsakkis00000000000000'''Mapping tests adapted from stdlib test.mapping_tests Essentially replaced non-iterable hardcoded keys with string keys, plus a few more extra tests. ''' import sys import unittest try: from UserDict import UserDict except ImportError: # Python 3 from collections import UserDict from test import mapping_tests from pytrie import StringTrie class BasicTestMappingTrie(mapping_tests.BasicTestMappingProtocol): # Functions that can be useful to override to adapt to dictionary # semantics type2test = StringTrie # which class is being tested (overwrite in subclasses) def _reference(self): """Return a dictionary of values which are invariant by storage in the object under test.""" return {"key1":"value1", "key2":(1,2,3), "key":None} def test_values(self): d = self._empty_mapping() self.assertEqual(d.values(), []) def test_items(self): d = self._empty_mapping() self.assertEqual(d.items(), []) class TestMappingTrie(BasicTestMappingTrie, mapping_tests.TestMappingProtocol): def test_values(self): super(TestMappingTrie,self).test_values() d = self._full_mapping({'1':2}) self.assertEqual(d.values(), [2]) def test_items(self): super(TestMappingTrie,self).test_items() d = self._full_mapping({'1':2}) self.assertEqual(d.items(), [('1',2)]) def test_clear(self): d = self._full_mapping(self.reference) d.clear() self.assertEqual(d, {}) self.assertRaises(TypeError, d.clear, None) def test_update(self): BasicTestMappingTrie.test_update(self) # mapping argument d = self._empty_mapping() d.update({"1":100}) d.update({"2":20}) d.update({"1":1, "2":2, "3":3}) self.assertEqual(d, {"1":1, "2":2, "3":3}) # no argument d.update() self.assertEqual(d, {"1":1, "2":2, "3":3}) # keyword arguments d = self._empty_mapping() d.update(x=100) d.update(y=20) d.update(x=1, y=2, z=3) self.assertEqual(d, {"x":1, "y":2, "z":3}) # item sequence d = self._empty_mapping() d.update([("x", 100), ("y", 20)]) self.assertEqual(d, {"x":100, "y":20}) # Both item sequence and keyword arguments d = self._empty_mapping() d.update([("x", 100), ("y", 20)], x=1, y=2) self.assertEqual(d, {"x":1, "y":2}) # iterator d = self._full_mapping({"1":3, "2":4}) d.update(self._full_mapping({"1":2, "3":4, "5":6}).iteritems()) self.assertEqual(d, {"1":2, "2":4, "3":4, "5":6}) class SimpleUserDict: def __init__(self): self.d = {'1':1, '2':2, '3':3} def keys(self): return self.d.keys() def __getitem__(self, i): return self.d[i] d.clear() d.update(SimpleUserDict()) self.assertEqual(d, {'1':1, '2':2, '3':3}) def test_fromkeys(self): self.assertEqual(self.type2test.fromkeys("abc"), {"a":None, "b":None, "c":None}) d = self._empty_mapping() self.assert_(not(d.fromkeys("abc") is d)) self.assertEqual(d.fromkeys("abc"), {"a":None, "b":None, "c":None}) self.assertEqual(d.fromkeys(("4","5"),0), {"4":0, "5":0}) self.assertEqual(d.fromkeys([]), {}) def g(): yield "1" self.assertEqual(d.fromkeys(g()), {"1":None}) self.assertRaises(TypeError, d.fromkeys, 3) class dictlike(self.type2test): pass self.assertEqual(dictlike.fromkeys("a"), {"a":None}) self.assertEqual(dictlike().fromkeys("a"), {"a":None}) self.assert_(dictlike.fromkeys("a").__class__ is dictlike) self.assert_(dictlike().fromkeys("a").__class__ is dictlike) self.assert_(type(dictlike.fromkeys("a")) is dictlike) class mydict(self.type2test): def __new__(cls): return UserDict() ud = mydict.fromkeys("ab") self.assertEqual(ud, {"a":None, "b":None}) self.assert_(isinstance(ud, UserDict)) self.assertRaises(TypeError, dict.fromkeys) class Exc(Exception): pass class baddict1(self.type2test): def __init__(self): raise Exc() self.assertRaises(Exc, baddict1.fromkeys, [1]) class BadSeq(object): def __iter__(self): return self def next(self): raise Exc() self.assertRaises(Exception, self.type2test.fromkeys, BadSeq()) class baddict2(self.type2test): def __setitem__(self, key, value): raise Exc() self.assertRaises(Exc, baddict2.fromkeys, [1]) def test_copy(self): d = self._full_mapping({"1":1, "2":2, "3":3, "":[]}) d2 = d.copy(); self.assertEqual(d2, d) d[""].append("x"); self.assertEqual(d2[""], ["x"]) d["4"] = 4; self.assertNotEqual(d2, d) d2["4"] = 4; self.assertEqual(d2, d) d2["5"] = 5; self.assertNotEqual(d2, d) d = self._empty_mapping() self.assertEqual(d.copy(), d) self.assert_(isinstance(d.copy(), d.__class__)) self.assertRaises(TypeError, d.copy, None) def test_pop(self): BasicTestMappingTrie.test_pop(self) # Tests for pop with specified key d = self._empty_mapping() k, v = "abc", "def" self.assertEqual(d.pop(k, v), v) d[k] = v self.assertEqual(d.pop(k, 1), v) class TestMappingTrieSubclass(TestMappingTrie): class type2test(StringTrie): pass if __name__ == "__main__": unittest.main() PyTrie-0.2/test/test_trie.py0000664000175100017510000001032312247055573017251 0ustar gsakkisgsakkis00000000000000import unittest from pytrie import SortedStringTrie class TestTrie(unittest.TestCase): def setUp(self): self.words = 'an ant all allot alloy aloe are ate be'.split() self.trie = SortedStringTrie(zip(self.words, range(len(self.words)))) def test_longest_prefix(self): self.assertEqual(self.trie.longest_prefix('antonym'), 'ant') self.assertEqual(self.trie.longest_prefix('are'), 'are') self.assertEqual(self.trie.longest_prefix('alla'), 'all') self.assertEqual(self.trie.longest_prefix('allo'), 'all') self.assertRaises(KeyError, self.trie.longest_prefix_item, 'alumni') self.assertEqual(self.trie.longest_prefix('alumni', default=None), None) self.assertEqual(self.trie.longest_prefix('linux', default=-1), -1) def test_longest_prefix_value(self): self.assertEqual(self.trie.longest_prefix_value('antonym'), 1) self.assertEqual(self.trie.longest_prefix_value('are'), 6) self.assertEqual(self.trie.longest_prefix_value('alla'), 2) self.assertEqual(self.trie.longest_prefix_value('allo'), 2) self.assertRaises(KeyError, self.trie.longest_prefix_value, 'alumni') self.assertEqual(self.trie.longest_prefix_value('alumni', default=None), None) self.assertEqual(self.trie.longest_prefix_value('linux', default=-1), -1) def test_longest_prefix_item(self): self.assertEqual(self.trie.longest_prefix_item('antonym'), ('ant', 1)) self.assertEqual(self.trie.longest_prefix_item('are'), ('are', 6)) self.assertEqual(self.trie.longest_prefix_item('alla'), ('all', 2)) self.assertEqual(self.trie.longest_prefix_item('allo'), ('all', 2)) self.assertRaises(KeyError, self.trie.longest_prefix_item, 'alumni') self.assertEqual(self.trie.longest_prefix_item('alumni', default=None), None) self.assertEqual(self.trie.longest_prefix_item('linux', default=-1), -1) def test_iter_prefixes(self): self.assertEqual(list(self.trie.iter_prefixes('antonym')), ['an', 'ant']) self.assertEqual(list(self.trie.iter_prefixes('are')), ['are']) self.assertEqual(list(self.trie.iter_prefixes('alumni')), []) def test_iter_prefix_values(self): self.assertEqual(list(self.trie.iter_prefix_values('antonym')), [0, 1]) self.assertEqual(list(self.trie.iter_prefix_values('are')), [6]) self.assertEqual(list(self.trie.iter_prefix_values('alumni')), []) def test_iter_prefix_items(self): self.assertEqual(list(self.trie.iter_prefix_items('antonym')), [('an', 0), ('ant', 1)]) self.assertEqual(list(self.trie.iter_prefix_items('are')), [('are', 6)]) self.assertEqual(list(self.trie.iter_prefix_items('alumni')), []) def test_keys_wprefix(self): self.assertEqual(self.trie.keys('al'), ['all','allot','alloy','aloe']) self.assertEqual(self.trie.keys('are'), ['are']) self.assertEqual(self.trie.keys('ann'), []) def test_values_wprefix(self): self.assertEqual(self.trie.values('al'), [2,3,4,5]) self.assertEqual(self.trie.values('are'), [6]) self.assertEqual(self.trie.values('ann'), []) def test_items_wprefix(self): self.assertEqual(self.trie.items('al'), [('all',2),('allot',3),('alloy',4),('aloe',5)]) self.assertEqual(self.trie.items('are'), [('are',6)]) self.assertEqual(self.trie.items('ann'), []) def test_consistency_wprefix(self): t = self.trie for prefix in 'al','are','ann': self.assertEqual( t.items(prefix), list(zip(t.keys(prefix), t.values(prefix))) ) def test_pickle(self): from pickle import dumps, loads, HIGHEST_PROTOCOL for proto in range(HIGHEST_PROTOCOL): unpickled = loads(dumps(self.trie, proto)) self.assertEqual(self.trie, unpickled) self.assertTrue(type(self.trie) is type(unpickled)) self.assertTrue(self.trie is not unpickled) def test_repr(self): evaled = eval(repr(self.trie)) self.assertEqual(evaled, self.trie) self.assertEqual(evaled.__class__, self.trie.__class__) if __name__ == "__main__": unittest.main() PyTrie-0.2/docs/0000775000175100017510000000000012247057012014635 5ustar gsakkisgsakkis00000000000000PyTrie-0.2/docs/source/0000775000175100017510000000000012247057012016135 5ustar gsakkisgsakkis00000000000000PyTrie-0.2/docs/source/index.rst0000664000175100017510000000403312247055573020010 0ustar gsakkisgsakkis00000000000000.. PyTrie documentation master file, created by sphinx-quickstart on Sun Dec 20 20:05:26 2009. Welcome to PyTrie's documentation! ================================== .. automodule:: pytrie Reference documentation ----------------------- Classes ~~~~~~~ .. autoclass:: Trie :show-inheritance: :members: __init__, fromkeys, KeyFactory, NodeFactory .. autoclass:: StringTrie :show-inheritance: .. autoclass:: SortedTrie :show-inheritance: .. autoclass:: SortedStringTrie :show-inheritance: Trie methods ~~~~~~~~~~~~ The following methods are specific to tries; they are not part of the mapping API. .. automethod:: Trie.longest_prefix(key[, default]) .. automethod:: Trie.longest_prefix_value(key[, default]) .. automethod:: Trie.longest_prefix_item(key[, default]) .. automethod:: Trie.iter_prefixes .. automethod:: Trie.iter_prefix_values .. automethod:: Trie.iter_prefix_items Extended mapping API methods ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The following methods extend the respective mapping API methods with an optional ``prefix`` parameter. If not ``None``, only keys (or associated values/items) that start with ``prefix`` are returned. .. automethod:: Trie.keys .. automethod:: Trie.values .. automethod:: Trie.items .. automethod:: Trie.iterkeys .. automethod:: Trie.itervalues .. automethod:: Trie.iteritems Original mapping API methods ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The following methods have the standard mapping signature and semantics. .. automethod:: Trie.__len__ .. automethod:: Trie.__iter__ .. automethod:: Trie.__contains__ .. automethod:: Trie.__getitem__ .. automethod:: Trie.__setitem__ .. automethod:: Trie.__delitem__ .. automethod:: Trie.__repr__ .. automethod:: Trie.clear .. automethod:: Trie.copy .. automethod:: Trie.has_key Internals ~~~~~~~~~ Tries are implemented as trees of :class:`Node` instances. You don't need to worry about them unless unless you want to extend or replace :class:`Node` with a new node factory and bind it to :attr:`Trie.NodeFactory`. .. autoclass:: Node() :show-inheritance: :members: PyTrie-0.2/docs/source/conf.py0000664000175100017510000001433312247055573017452 0ustar gsakkisgsakkis00000000000000# -*- coding: utf-8 -*- # # PyTrie documentation build configuration file, created by # sphinx-quickstart on Sun Dec 20 21:20:23 2009. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.append(os.path.abspath('.')) sys.path.append(os.path.abspath('../..')) # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8' # The master toctree document. master_doc = 'index' # General information about the project. project = u'PyTrie' copyright = u'2009, George Sakkis' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '0.1' # The full version, including alpha/beta/rc tags. release = '0.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. #unused_docs = [] # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = [] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_use_modindex = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = '' # Output file base name for HTML help builder. htmlhelp_basename = 'PyTriedoc' # -- Options for LaTeX output -------------------------------------------------- # The paper size ('letter' or 'a4'). #latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). #latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'PyTrie.tex', u'PyTrie Documentation', u'George Sakkis', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # Additional stuff for the LaTeX preamble. #latex_preamble = '' # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_use_modindex = True PyTrie-0.2/docs/Makefile0000664000175100017510000000607212247055573016314 0ustar gsakkisgsakkis00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PyTrie.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PyTrie.qhc" latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ "run these through (pdf)latex." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." PyTrie-0.2/setup.py0000664000175100017510000000213312247056073015424 0ustar gsakkisgsakkis00000000000000#!/usr/bin/env python from distutils.core import setup setup( name = 'PyTrie', version = '0.2', author = 'George Sakkis', author_email = 'george.sakkis@gmail.com', url = 'http://bitbucket.org/gsakkis/pytrie/', description = 'A pure Python implementation of the trie data structure.', long_description= '''A *trie* is an ordered tree data structure that is used to store a mapping where the keys are sequences, usually strings over an alphabet. In addition to implementing the mapping interface, tries allow finding the items for a given prefix, and vice versa, finding the items whose keys are prefixes of a given key. ''', classifiers = [ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 3', 'Topic :: Software Development :: Libraries :: Python Modules', ], py_modules = ['pytrie'], ) PyTrie-0.2/PKG-INFO0000664000175100017510000000171312247057012015004 0ustar gsakkisgsakkis00000000000000Metadata-Version: 1.1 Name: PyTrie Version: 0.2 Summary: A pure Python implementation of the trie data structure. Home-page: http://bitbucket.org/gsakkis/pytrie/ Author: George Sakkis Author-email: george.sakkis@gmail.com License: UNKNOWN Description: A *trie* is an ordered tree data structure that is used to store a mapping where the keys are sequences, usually strings over an alphabet. In addition to implementing the mapping interface, tries allow finding the items for a given prefix, and vice versa, finding the items whose keys are prefixes of a given key. Platform: UNKNOWN Classifier: Development Status :: 4 - Beta Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 3 Classifier: Topic :: Software Development :: Libraries :: Python Modules PyTrie-0.2/pytrie.py0000664000175100017510000003403612247055573015613 0ustar gsakkisgsakkis00000000000000''':mod:`pytrie` is a pure Python implementation of the `trie `_ (prefix tree) data structure. A *trie* is a tree data structure that is used to store a mapping where the keys are sequences, usually strings over an alphabet. In addition to implementing the mapping interface, tries facilitate finding the items for a given prefix, and vice versa, finding the items whose keys are prefixes of a given key ``K``. As a common special case, finding the longest-prefix item is also supported. Algorithmically, tries are more efficient than binary search trees (BSTs) both in lookup time and memory when they contain many keys sharing relatively few prefixes. Unlike hash tables, trie keys don't need to be hashable. In the current implementation, a key can be any finite iterable with hashable elements. Usage ----- >>> from pytrie import SortedStringTrie as trie >>> t = trie(an=0, ant=1, all=2, allot=3, alloy=4, aloe=5, are=6, be=7) >>> t SortedStringTrie({'all': 2, 'allot': 3, 'alloy': 4, 'aloe': 5, 'an': 0, 'ant': 1, 'are': 6, 'be': 7}) >>> t.keys(prefix='al') ['all', 'allot', 'alloy', 'aloe'] >>> t.items(prefix='an') [('an', 0), ('ant', 1)] >>> t.longest_prefix('antonym') 'ant' >>> t.longest_prefix_item('allstar') ('all', 2) >>> t.longest_prefix_value('area', default='N/A') 6 >>> t.longest_prefix('alsa') Traceback (most recent call last): ... KeyError >>> t.longest_prefix_value('alsa', default=-1) -1 >>> list(t.iter_prefixes('allotment')) ['all', 'allot'] >>> list(t.iter_prefix_items('antonym')) [('an', 0), ('ant', 1)] ''' __all__ = ['Trie', 'StringTrie', 'SortedTrie', 'SortedStringTrie', 'Node'] import sys from copy import copy from operator import itemgetter from collections import MutableMapping # Python 3 interoperability PY3 = sys.version_info[0] == 3 if PY3: def itervalues(d): return d.values() def iteritems(d): return d.items() else: def itervalues(d): return d.itervalues() def iteritems(d): return d.iteritems() # Singleton sentinel - works with pickling class NULL(object): pass class Node(object): '''Trie node class. Subclasses may extend it to replace :attr:`ChildrenFactory` with a different mapping class (e.g. `sorteddict `_). :ivar value: The value of the key corresponding to this node or :const:`NULL` if there is no such key. :ivar children: A ``{key-part : child-node}`` mapping. ''' __slots__ = ('value', 'children') #: A callable for creating a new :attr:`children` mapping. ChildrenFactory = dict def __init__(self, value=NULL): self.value = value self.children = self.ChildrenFactory() def numkeys(self): '''Return the number of keys in the subtree rooted at this node.''' return (int(self.value is not NULL) + sum(child.numkeys() for child in itervalues(self.children))) def __repr__(self): return '(%s, {%s})' % ( self.value is NULL and 'NULL' or repr(self.value), ', '.join('%r: %r' % t for t in iteritems(self.children))) def __copy__(self): clone = self.__class__(self.value) clone_children = clone.children for key, child in iteritems(self.children): clone_children[key] = child.__copy__() return clone def __getstate__(self): return (self.value, self.children) def __setstate__(self, state): self.value, self.children = state class Trie(MutableMapping): '''Base trie class. As with regular dicts, keys are not necessarily returned sorted. Use :class:`SortedTrie` if sorting is required. ''' #: Callable for forming a key from its parts. KeyFactory = tuple #: Callable for creating new trie nodes. NodeFactory = Node def __init__(self, *args, **kwargs): '''Create a new trie. Parameters are the same with ``dict()``. ''' self._root = self.NodeFactory() self.update(*args, **kwargs) @classmethod def fromkeys(cls, iterable, value=None): '''Create a new trie with keys from ``iterable`` and values set to ``value``. Parameters are the same with ``dict.fromkeys()``. ''' d = cls() for key in iterable: d[key] = value return d #----- trie-specific methods ----------------------------------------------- def longest_prefix(self, key, default=NULL): '''Return the longest key in this trie that is a prefix of ``key``. If the trie doesn't contain any prefix of ``key``: - if ``default`` is given, return it - otherwise raise ``KeyError`` ''' try: return self.longest_prefix_item(key)[0] except KeyError: if default is not NULL: return default raise def longest_prefix_value(self, key, default=NULL): '''Return the value associated with the longest key in this trie that is a prefix of ``key``. If the trie doesn't contain any prefix of ``key``: - if ``default`` is given, return it - otherwise raise ``KeyError`` ''' current = self._root longest_prefix_value = NULL for part in key: current = current.children.get(part) if current is None: break value = current.value if value is not NULL: longest_prefix_value = value if longest_prefix_value is not NULL: return longest_prefix_value elif default is not NULL: return default else: raise KeyError def longest_prefix_item(self, key, default=NULL): '''Return the item (``(key,value)`` tuple) associated with the longest key in this trie that is a prefix of ``key``. If the trie doesn't contain any prefix of ``key``: - if ``default`` is given, return it - otherwise raise ``KeyError`` ''' prefix = [] append = prefix.append current = self._root longest_prefix_value = NULL max_non_null_index = -1 for i, part in enumerate(key): current = current.children.get(part) if current is None: break append(part) value = current.value if value is not NULL: longest_prefix_value = value max_non_null_index = i if longest_prefix_value is not NULL: del prefix[max_non_null_index+1:] return (self.KeyFactory(prefix), longest_prefix_value) elif default is not NULL: return default else: raise KeyError def iter_prefixes(self, key): 'Return an iterator over the keys of this trie that are prefixes of ``key``.' key_factory = self.KeyFactory prefix = [] append = prefix.append node = self._root for part in key: node = node.children.get(part) if node is None: break append(part) if node.value is not NULL: yield key_factory(prefix) def iter_prefix_values(self, key): '''Return an iterator over the values of this trie that are associated with keys that are prefixes of ``key``. ''' node = self._root for part in key: node = node.children.get(part) if node is None: break if node.value is not NULL: yield node.value def iter_prefix_items(self, key): '''Return an iterator over the items (``(key,value)`` tuples) of this trie that are associated with keys that are prefixes of ``key``. ''' key_factory = self.KeyFactory prefix = [] append = prefix.append node = self._root for part in key: node = node.children.get(part) if node is None: break append(part) if node.value is not NULL: yield (key_factory(prefix), node.value) #----- extended mapping API methods ---------------------------------------- def keys(self, prefix=None): '''Return a list of this trie's keys. :param prefix: If not None, return only the keys prefixed by ``prefix``. ''' return list(self.iterkeys(prefix)) def values(self, prefix=None): '''Return a list of this trie's values. :param prefix: If not None, return only the values associated with keys prefixed by ``prefix``. ''' return list(self.itervalues(prefix)) def items(self, prefix=None): '''Return a list of this trie's items (``(key,value)`` tuples). :param prefix: If not None, return only the items associated with keys prefixed by ``prefix``. ''' return list(self.iteritems(prefix)) def iterkeys(self, prefix=None): '''Return an iterator over this trie's keys. :param prefix: If not None, yield only the keys prefixed by ``prefix``. ''' return (key for key,value in self.iteritems(prefix)) def itervalues(self, prefix=None): '''Return an iterator over this trie's values. :param prefix: If not None, yield only the values associated with keys prefixed by ``prefix``. ''' def generator(node, NULL=NULL): if node.value is not NULL: yield node.value for part, child in iteritems(node.children): for subresult in generator(child): yield subresult if prefix is None: node = self._root else: node = self._find(prefix) if node is None: node = self.NodeFactory() return generator(node) def iteritems(self, prefix=None): '''Return an iterator over this trie's items (``(key,value)`` tuples). :param prefix: If not None, yield only the items associated with keys prefixed by ``prefix``. ''' parts = [] append = parts.append def generator(node, key_factory=self.KeyFactory, parts=parts, append=append, NULL=NULL): if node.value is not NULL: yield (key_factory(parts), node.value) for part, child in iteritems(node.children): append(part) for subresult in generator(child): yield subresult del parts[-1] node = self._root if prefix is not None: for part in prefix: append(part) node = node.children.get(part) if node is None: node = self.NodeFactory() break return generator(node) #----- original mapping API methods ---------------------------------------- def __len__(self): return self._root.numkeys() def __iter__(self): return self.iterkeys() def __contains__(self, key): node = self._find(key) return node is not None and node.value is not NULL def has_key(self, key): return key in self def __getitem__(self, key): node = self._find(key) if node is None or node.value is NULL: raise KeyError return node.value def __setitem__(self, key, value): node = self._root Node = self.NodeFactory for part in key: next = node.children.get(part) if next is None: node = node.children.setdefault(part, Node()) else: node = next node.value = value def __delitem__(self, key): nodes_parts = [] append = nodes_parts.append node = self._root for part in key: append((node,part)) node = node.children.get(part) if node is None: break if node is None or node.value is NULL: raise KeyError node.value = NULL pop = nodes_parts.pop while node.value is NULL and not node.children and nodes_parts: node,part = pop() del node.children[part] def clear(self): self._root.children.clear() def copy(self): clone = copy(super(Trie,self)) clone._root = copy(self._root) return clone def __repr__(self): return '%s({%s})' % ( self.__class__.__name__, ', '.join('%r: %r' % t for t in self.iteritems())) def _find(self, key): node = self._root for part in key: node = node.children.get(part) if node is None: break return node class StringTrie(Trie): '''A more appropriate for string keys :class:`Trie`.''' KeyFactory = ''.join # XXX: quick & dirty sorted dict # currently only iteritems() (for Python 2) or items() (for Python 3) has to be # overriden. However this is implementation detail that may change in the future class _SortedDict(dict): if PY3: def items(self): return iter(sorted(dict.items(self), key=itemgetter(0))) else: def iteritems(self): return iter(sorted(dict.iteritems(self), key=itemgetter(0))) class _SortedNode(Node): ChildrenFactory = _SortedDict class SortedTrie(Trie): '''A :class:`Trie` that returns its keys (and associated values/items) sorted. .. note:: This implementation does not keep the keys sorted internally; instead it sorts them every time a method returning a list or iterator (e.g. :meth:`keys`) is called. In cases where a trie is relatively stable (few inserts/deletes) and is iterated often, it is probably more efficient to use a :attr:`NodeFactory` based on a sorted dict such as `sorteddict `_. ''' NodeFactory = _SortedNode class SortedStringTrie(SortedTrie, StringTrie): 'A :class:`Trie` that is both a :class:`StringTrie` and a :class:`SortedTrie`.' if __name__ == '__main__': import doctest doctest.testmod() PyTrie-0.2/MANIFEST.in0000664000175100017510000000006712247055573015460 0ustar gsakkisgsakkis00000000000000include LICENSE MANIFEST.in recursive-include docs *