.\"
.\" This program is free software: you can redistribute it and/or modify
.\" it under the terms of the GNU General Public License as published by
.\" the Free Software Foundation, either version 3 of the License, or
.\" (at your option) any later version.
.\"
.\" This program is distributed in the hope that it will be useful,
.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
.\" GNU General Public License for more details.
.\"
.\" You should have received a copy of the GNU General Public License
.\" along with this program. If not, see .
.\"
.TH FSCK-LARCH 1
.SH NAME
fsck-larch \- verify that a larch B-tree is internally consistent
.SH SYNOPSIS
.SH DESCRIPTION
.B fsck-larch
reads an on-disk, committed B-tree created by the
.B larch
Python library,
and verifies that it is internally consistent.
It reports any problems it finds,
but does not currently fix them.
.SH OPTIONS
.SH "SEE ALSO"
Larch home page
.RI ( http://liw.fi/larch/ ).
larch-1.20131130/idpath-speed 0000755 0001750 0001750 00000002537 12246332521 015366 0 ustar jenkins jenkins #!/usr/bin/python
# Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import os
import sys
import time
import larch
def main():
n = int(sys.argv[1])
dirname = sys.argv[2]
depth = int(sys.argv[3])
bits = int(sys.argv[4])
skip = int(sys.argv[5])
idpath = larch.IdPath(dirname, depth, bits, skip)
start = time.time()
for i in xrange(n):
path = idpath.convert(i)
dirname = os.path.dirname(path)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(path, 'w'):
pass
end = time.time()
duration = end - start
speed = n / duration
print '%d ids, %.1f seconds, %.1f ids/s' % (n, duration, speed)
if __name__ == '__main__':
main()
larch-1.20131130/insert-remove-test 0000755 0001750 0001750 00000007455 12246332521 016577 0 ustar jenkins jenkins #!/usr/bin/python
# Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
# Excercise my B-tree implementation, for simple benchmarking purposes.
# The benchmark gets a location and an operation count as command line
# arguments.
#
# If the location is the empty string, an in-memory node store is used.
# Otherwise it must be a non-existent directory name.
#
# The benchmark will do the given number of insertions into the tree, and
# do_it the speed of that. Then it will look up each of those, and do_it
# the lookups.
import cProfile
import logging
import os
import random
import shutil
import sys
import time
import larch
def do_it(keys, func, final):
start = time.clock()
for key in keys:
func(key)
final()
end = time.clock()
return end - start
def assert_refcounts_are_one(tree):
def helper(node_id):
refcount = tree.node_store.rs.get_refcount(node_id)
node = tree._get_node(node_id)
assert refcount == 1, 'type=%s id=%d refcount=%d' % (repr(node),
node_id, refcount)
if isinstance(node, larch.IndexNode):
for child_id in node.values():
helper(child_id)
helper(tree.root.id)
def do_insert(tree, key, value):
logging.debug('do_insert(%s)' % (repr(key)))
if tree.root is not None:
assert_refcounts_are_one(tree)
tree.insert(key, value)
assert_refcounts_are_one(tree)
def do_remove(tree, key):
logging.debug('do_remove(%s)' % (repr(key)))
assert_refcounts_are_one(tree)
tree.remove(key)
assert_refcounts_are_one(tree)
def main():
if True:
import logging
import tracing
tracing.trace_add_pattern('tree')
logging.basicConfig(filename='larch.log', level=logging.DEBUG)
location = sys.argv[1]
n = int(sys.argv[2])
key_size = 19
value_size = 128
node_size = 300
codec = larch.NodeCodec(key_size)
if location == '':
ns = larch.NodeStoreMemory(node_size, codec)
else:
if os.path.exists(location):
raise Exception('%s exists already' % location)
os.mkdir(location)
ns = larch.NodeStoreDisk(True, node_size, codec, dirname=location)
forest = larch.Forest(ns)
tree = forest.new_tree()
logging.debug('min keys: %d' % tree.min_index_length)
logging.debug('max keys: %d' % tree.max_index_length)
# Create list of keys.
keys = ['%0*d' % (key_size, i) for i in xrange(n)]
# Do inserts.
value = 'x' * value_size
logging.debug('start inserts')
do_it(keys, lambda key: do_insert(tree, key, value),
lambda: forest.commit())
logging.debug('# nodes: %d' % len(ns.list_nodes()))
logging.debug('nodes: %s' % sorted(ns.list_nodes()))
print '# nodes after inserts:', len(ns.list_nodes())
# Remove all but one key.
logging.debug('start removes')
do_it(keys[1:], lambda key: do_remove(tree, key), lambda: forest.commit())
logging.debug('# nodes: %d' % len(ns.list_nodes()))
logging.debug('nodes: %s' % sorted(ns.list_nodes()))
print '# nodes after removes:', len(ns.list_nodes())
assert len(ns.list_nodes()) == 2
if __name__ == '__main__':
main()
larch-1.20131130/larch/ 0000755 0001750 0001750 00000000000 12246332521 014153 5 ustar jenkins jenkins larch-1.20131130/larch/__init__.py 0000644 0001750 0001750 00000002707 12246332521 016272 0 ustar jenkins jenkins # Copyright 2010, 2011, 2012 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
__version__ = '1.20131130'
class Error(Exception):
def __str__(self):
return self.msg
from nodes import FrozenNode, Node, LeafNode, IndexNode
from codec import NodeCodec, CodecError
from tree import BTree, KeySizeMismatch, ValueTooLarge
from forest import (Forest, open_forest, BadKeySize, BadNodeSize,
MetadataMissingKey)
from nodestore import (NodeStore, NodeStoreTests, NodeMissing, NodeTooBig,
NodeExists, NodeCannotBeModified)
from refcountstore import RefcountStore
from lru import LRUCache
from uploadqueue import UploadQueue
from idpath import IdPath
from journal import Journal, ReadOnlyMode
from nodestore_disk import NodeStoreDisk, LocalFS, FormatProblem
from nodestore_memory import NodeStoreMemory
__all__ = locals()
larch-1.20131130/larch/codec.py 0000644 0001750 0001750 00000014142 12246332521 015604 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import struct
import larch
class CodecError(larch.Error):
def __init__(self, msg):
self.msg = msg
class NodeCodec(object):
'''Encode and decode nodes from their binary format.
Node identifiers are assumed to fit into 64 bits.
Leaf node values are assumed to fit into 4 gibibytes.
'''
format = 1
# We use the struct module for encoding and decoding. For speed,
# we construct the format string all at once, so that there is only
# one call to struct.pack or struct.unpack for one node. This brought
# a thousand time speedup over doing it one field at a time. However,
# the code is not quite as clear as it might be, what with no symbolic
# names for anything is used anymore. Patches welcome.
def __init__(self, key_bytes):
self.key_bytes = key_bytes
self.leaf_header = struct.Struct('!4sQI')
self.index_header = struct.Struct('!4sQI')
# space for key and length of value is needed for each pair
self.leaf_pair_fixed_size = key_bytes + struct.calcsize('!I')
self.index_pair_size = key_bytes + struct.calcsize('!Q')
def leaf_size(self, keys, values):
'''Return size of a leaf node with the given pairs.'''
return (self.leaf_header.size + len(keys) * self.leaf_pair_fixed_size +
len(''.join([value for value in values])))
def leaf_size_delta_add(self, old_size, value):
'''Return size of node that gets a new key/value pair added.
``old_size`` is the old size of the node. The key must not already
have existed in the node.
'''
delta = self.leaf_pair_fixed_size + len(value)
return old_size + delta
def leaf_size_delta_replace(self, old_size, old_value, new_value):
'''Return size of node that gets a value replaced.'''
return old_size + len(new_value) - len(old_value)
def encode_leaf(self, node):
'''Encode a leaf node as a byte string.'''
keys = node.keys()
values = node.values()
return (self.leaf_header.pack('ORBL', node.id, len(keys)) +
''.join(keys) +
struct.pack('!%dI' % len(values), *map(len, values)) +
''.join(values))
def decode_leaf(self, encoded):
'''Decode a leaf node from its encoded byte string.'''
buf = buffer(encoded)
cookie, node_id, num_pairs = self.leaf_header.unpack_from(buf)
if cookie != 'ORBL':
raise CodecError('Leaf node does not begin with magic cookie '
'(should be ORBL, is %s)' % repr(cookie))
fmt = '!' + ('%ds' % self.key_bytes) * num_pairs + 'I' * num_pairs
items = struct.unpack_from(fmt, buf, self.leaf_header.size)
keys = items[:num_pairs]
lengths = items[num_pairs:num_pairs*2]
values = []
offset = self.leaf_header.size + self.leaf_pair_fixed_size * num_pairs
append = values.append
for length in lengths:
append(encoded[offset:offset + length])
offset += length
return larch.LeafNode(node_id, keys, values)
def max_index_pairs(self, node_size):
'''Return number of index pairs that fit in a node of a given size.'''
return (node_size - self.index_header.size) / self.index_pair_size
def index_size(self, keys, values):
'''Return size of an index node with the given pairs.'''
return self.index_header.size + self.index_pair_size * len(keys)
def encode_index(self, node):
'''Encode an index node as a byte string.'''
keys = node.keys()
child_ids = node.values()
return (self.index_header.pack('ORBI', node.id, len(keys)) +
''.join(keys) +
struct.pack('!%dQ' % len(child_ids), *child_ids))
def decode_index(self, encoded):
'''Decode an index node from its encoded byte string.'''
buf = buffer(encoded)
cookie, node_id, num_pairs = self.index_header.unpack_from(buf)
if cookie != 'ORBI':
raise CodecError('Index node does not begin with magic cookie '
'(should be ORBI, is %s)' % repr(cookie))
fmt = '!' + ('%ds' % self.key_bytes) * num_pairs + 'Q' * num_pairs
items = struct.unpack_from(fmt, buf, self.index_header.size)
keys = items[:num_pairs]
child_ids = items[num_pairs:]
assert len(keys) == len(child_ids)
for x in child_ids:
assert type(x) == int
return larch.IndexNode(node_id, keys, child_ids)
def encode(self, node):
'''Encode a node of any type.'''
if isinstance(node, larch.LeafNode):
return self.encode_leaf(node)
else:
return self.encode_index(node)
def decode(self, encoded):
'''Decode node of any type.'''
if encoded.startswith('ORBL'):
return self.decode_leaf(encoded)
elif encoded.startswith('ORBI'):
return self.decode_index(encoded)
else:
raise CodecError('Unknown magic cookie in encoded node (%s)' %
repr(encoded[:4]))
def size(self, node):
'''Return encoded size of a node, regardless of type.'''
keys = node.keys()
values = node.values()
if isinstance(node, larch.LeafNode):
return self.leaf_size(keys, values)
else:
return self.index_size(keys, values)
larch-1.20131130/larch/codec_tests.py 0000644 0001750 0001750 00000010505 12246332521 017025 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
import larch
class NodeCodecTests(unittest.TestCase):
def setUp(self):
self.leaf = larch.LeafNode(1234, ['foo', 'yoo'], ['bar', 'yoyo'])
self.index = larch.IndexNode(5678, ['bar', 'foo'], [1234, 7890])
self.codec = larch.NodeCodec(3)
def test_has_format_version(self):
self.assertNotEqual(self.codec.format, None)
def test_returns_reasonable_size_for_empty_leaf(self):
self.assert_(self.codec.leaf_size([], []) > 10)
def test_returns_reasonable_size_for_empty_index(self):
self.assert_(self.codec.index_size([], []) > 10)
def test_returns_reasonable_size_for_empty_leaf_generic(self):
leaf = larch.LeafNode(0, [], [])
self.assert_(self.codec.size(leaf) > 10)
def test_returns_ok_delta_for_added_key_value(self):
leaf = larch.LeafNode(0, [], [])
old_size = self.codec.leaf_size(leaf.keys(), leaf.values())
new_size = self.codec.leaf_size_delta_add(old_size, 'bar')
self.assert_(new_size > old_size + len('foo') + len('bar'))
def test_returns_ok_delta_for_changed_value_of_same_size(self):
leaf = larch.LeafNode(0, ['foo'], ['bar'])
old_size = self.codec.leaf_size(leaf.keys(), leaf.values())
new_size = self.codec.leaf_size_delta_replace(old_size, 'bar', 'xxx')
self.assertEqual(new_size, old_size)
def test_returns_ok_delta_for_changed_value_of_larger_size(self):
leaf = larch.LeafNode(0, ['foo'], ['bar'])
old_size = self.codec.leaf_size(leaf.keys(), leaf.values())
new_size = self.codec.leaf_size_delta_replace(old_size, 'bar',
'foobar')
self.assertEqual(new_size, old_size + len('foobar') - len('foo'))
def test_returns_ok_delta_for_changed_value_of_shorter_size(self):
leaf = larch.LeafNode(0, ['foo'], ['bar'])
old_size = self.codec.leaf_size(leaf.keys(), leaf.values())
new_size = self.codec.leaf_size_delta_replace(old_size, 'bar', '')
self.assertEqual(new_size, old_size - len('foo'))
def test_returns_reasonable_size_for_empty_index_generic(self):
index = larch.IndexNode(0, [], [])
self.assert_(self.codec.size(index) > 10)
def test_leaf_round_trip_ok(self):
encoded = self.codec.encode_leaf(self.leaf)
decoded = self.codec.decode_leaf(encoded)
self.assertEqual(decoded, self.leaf)
def test_index_round_trip_ok(self):
encoded = self.codec.encode_index(self.index)
decoded = self.codec.decode_index(encoded)
self.assertEqual(decoded.keys(), self.index.keys())
self.assertEqual(decoded.values(), self.index.values())
self.assertEqual(decoded, self.index)
def test_generic_round_trip_ok_for_leaf(self):
encoded = self.codec.encode(self.leaf)
self.assertEqual(self.codec.decode(encoded), self.leaf)
def test_generic_round_trip_ok_for_index(self):
encoded = self.codec.encode(self.index)
self.assertEqual(self.codec.decode(encoded), self.index)
def test_decode_leaf_raises_error_for_garbage(self):
self.assertRaises(larch.CodecError, self.codec.decode_leaf, 'x'*1000)
def test_decode_index_raises_error_for_garbage(self):
self.assertRaises(larch.CodecError, self.codec.decode_index, 'x'*1000)
def test_decode_raises_error_for_garbage(self):
self.assertRaises(larch.CodecError, self.codec.decode, 'x'*1000)
def test_returns_resonable_max_number_of_index_pairs(self):
# Header is 16 bytes. A pair is key_bytes + 8 = 11.
self.assert_(self.codec.max_index_pairs(32), 1)
self.assert_(self.codec.max_index_pairs(64), 4)
larch-1.20131130/larch/forest.py 0000644 0001750 0001750 00000015522 12246332521 016034 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import tracing
import larch
class MetadataMissingKey(larch.Error):
def __init__(self, key_name):
self.msg = 'larch forest metadata missing "%s"' % key_name
class BadKeySize(larch.Error):
def __init__(self, store_key_size, wanted_key_size):
self.msg = ('Node store has key size %s, program wanted %s' %
(store_key_size, wanted_key_size))
class BadNodeSize(larch.Error):
def __init__(self, store_node_size, wanted_node_size):
self.msg = ('Node store has node size %s, program wanted %s' %
(store_node_size, wanted_node_size))
class Forest(object):
'''A collection of related B-trees.
Trees in the same forest can share nodes. Cloned trees are always
created in the same forest as the original.
Cloning trees is very fast: only the root node is modified.
Trees can be modified in place. Modifying a tree is done
using copy-on-write, so modifying a clone does not modify
the original (and vice versa). You can have up to 65535
clones of a tree.
The list of trees in the forest is stored in the ``trees``
property as a list of trees in the order in which they were
created.
'''
def __init__(self, node_store):
tracing.trace('new larch.Forest with node_store=%s' % repr(node_store))
self.node_store = node_store
self.trees = []
self.last_id = 0
self._read_metadata()
def _read_metadata(self):
tracing.trace('reading metadata')
keys = self.node_store.get_metadata_keys()
tracing.trace('metadata keys: %s' % repr(keys))
if 'last_id' in keys:
self.last_id = int(self.node_store.get_metadata('last_id'))
tracing.trace('last_id = %s' % self.last_id)
if 'root_ids' in keys:
s = self.node_store.get_metadata('root_ids')
tracing.trace('root_ids: %s', s)
if s.strip():
root_ids = [int(x) for x in s.split(',')]
self.trees = [larch.BTree(self, self.node_store, root_id)
for root_id in root_ids]
tracing.trace('root_ids: %s' % repr(root_ids))
else:
self.trees = []
tracing.trace('empty root_ids')
def new_id(self):
'''Generate next node id for this forest.
Trees should use this whenever they create new nodes.
The ids generated by this method are guaranteed to
be unique (as long as commits happen OK).
'''
self.last_id += 1
tracing.trace('new id = %d' % self.last_id)
return self.last_id
def new_tree(self, old=None):
'''Create a new tree.
If old is None, a completely new tree is created. Otherwise,
a clone of an existing one is created.
'''
tracing.trace('new tree (old=%s)' % repr(old))
if old:
old_root = self.node_store.get_node(old.root.id)
keys = old_root.keys()
values = old_root.values()
else:
keys = []
values = []
t = larch.BTree(self, self.node_store, None)
t._set_root(t._new_index(keys, values))
self.trees.append(t)
tracing.trace('new tree root id: %s' % t.root.id)
return t
def remove_tree(self, tree):
'''Remove a tree from the forest.'''
tracing.trace('removing tree with root id %d' % tree.root.id)
tree._decrement(tree.root.id)
self.trees.remove(tree)
def commit(self):
'''Make sure all changes are stored into the node store.
Changes made to the forest are guaranteed to be persistent
only if commit is called successfully.
'''
tracing.trace('committing forest')
self.node_store.set_metadata('last_id', self.last_id)
root_ids = ','.join('%d' % t.root.id
for t in self.trees
if t.root is not None)
self.node_store.set_metadata('root_ids', root_ids)
self.node_store.set_metadata('key_size',
self.node_store.codec.key_bytes)
self.node_store.set_metadata('node_size', self.node_store.node_size)
self.node_store.save_refcounts()
self.node_store.commit()
def open_forest(allow_writes=None, key_size=None, node_size=None, codec=None,
node_store=None, **kwargs):
'''Create or open a forest.
``key_size`` and ``node_size`` are retrieved from the forest, unless
given. If given, they must match exactly. If the forest does not
yet exist, the sizes **must** be given.
``codec`` is the class to be used for the node codec, defaults to
``larch.NodeCodec``. Similarly, ``node_store`` is the node store class,
defaults to ``larch.NodeStoreDisk``.
All other keyword arguments are given the the ``node_store``
class initializer.
'''
tracing.trace('opening forest')
assert allow_writes is not None
codec = codec or larch.NodeCodec
node_store = node_store or larch.NodeStoreDisk
if key_size is None or node_size is None:
# Open a temporary node store for reading metadata.
# For this, we can use any values for node and key sizes,
# since we won't be accessing nodes or keys.
c_temp = codec(42)
ns_temp = node_store(False, 42, c_temp, **kwargs)
if 'key_size' not in ns_temp.get_metadata_keys():
raise MetadataMissingKey('key_size')
if 'node_size' not in ns_temp.get_metadata_keys():
raise MetadataMissingKey('node_size')
if key_size is None:
key_size = int(ns_temp.get_metadata('key_size'))
if node_size is None:
node_size = int(ns_temp.get_metadata('node_size'))
c = codec(key_size)
ns = node_store(allow_writes, node_size, c, **kwargs)
def check_size(keyname, wanted, exception):
if keyname not in ns.get_metadata_keys():
return
value = int(ns.get_metadata(keyname))
if value != wanted:
raise exception(value, wanted)
check_size('key_size', key_size, BadKeySize)
return Forest(ns)
larch-1.20131130/larch/forest_tests.py 0000644 0001750 0001750 00000020234 12246332521 017252 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import os
import shutil
import tempfile
import unittest
import larch
class ForestTests(unittest.TestCase):
def setUp(self):
self.codec = larch.NodeCodec(3)
self.ns = larch.NodeStoreMemory(
allow_writes=True, node_size=64, codec=self.codec)
self.forest = larch.Forest(self.ns)
def test_new_node_ids_grow(self):
id1 = self.forest.new_id()
id2 = self.forest.new_id()
self.assertEqual(id1 + 1, id2)
def test_has_no_trees_initially(self):
self.assertEqual(self.forest.trees, [])
def test_creates_a_tree(self):
t = self.forest.new_tree()
self.assert_(isinstance(t, larch.BTree))
self.assertEqual(self.forest.trees, [t])
def test_clones_a_tree(self):
t1 = self.forest.new_tree()
t2 = self.forest.new_tree(t1)
self.assertNotEqual(t1.root.id, t2.root.id)
def test_clones_can_be_changed_independently(self):
t1 = self.forest.new_tree()
t2 = self.forest.new_tree(t1)
t1.insert('foo', 'foo')
self.assertNotEqual(t1.root.id, t2.root.id)
def test_clones_do_not_clash_in_new_node_ids(self):
t1 = self.forest.new_tree()
t2 = self.forest.new_tree(t1)
node1 = t1._new_leaf([], [])
node2 = t2._new_leaf([], [])
self.assertEqual(node1.id + 1, node2.id)
def test_is_persistent(self):
t1 = self.forest.new_tree()
t1.insert('foo', 'bar')
self.forest.commit()
f2 = larch.Forest(self.ns)
self.assertEqual([t.root.id for t in f2.trees], [t1.root.id])
def test_removes_trees(self):
t1 = self.forest.new_tree()
self.forest.remove_tree(t1)
self.assertEqual(self.forest.trees, [])
def test_remove_tree_removes_nodes_for_tree_as_well(self):
t = self.forest.new_tree()
t.insert('foo', 'bar')
self.forest.commit()
self.assertNotEqual(self.ns.list_nodes(), [])
self.forest.remove_tree(t)
self.assertEqual(self.ns.list_nodes(), [])
def test_changes_work_across_commit(self):
t1 = self.forest.new_tree()
t1.insert('000', 'foo')
t1.insert('001', 'bar')
t2 = self.forest.new_tree(t1)
t2.insert('002', 'foobar')
t2.remove('000')
self.forest.commit()
f2 = larch.Forest(self.ns)
t1a, t2a = f2.trees
self.assertEqual(t1.root.id, t1a.root.id)
self.assertEqual(t2.root.id, t2a.root.id)
self.assertEqual(t1a.lookup('000'), 'foo')
self.assertEqual(t1a.lookup('001'), 'bar')
self.assertRaises(KeyError, t2a.lookup, '000')
self.assertEqual(t2a.lookup('001'), 'bar')
self.assertEqual(t2a.lookup('002'), 'foobar')
def test_committing_single_empty_tree_works(self):
self.forest.new_tree()
self.assertEqual(self.forest.commit(), None)
def test_read_metadata_works_after_removed_and_committed(self):
t1 = self.forest.new_tree()
t1.insert('foo', 'foo')
self.forest.commit()
self.forest.remove_tree(t1)
self.forest.commit()
f2 = larch.Forest(self.ns)
self.assertEqual(f2.trees, [])
def test_commit_puts_key_and_node_sizes_in_metadata(self):
self.forest.commit()
self.assertEqual(self.ns.get_metadata('key_size'), 3)
self.assertEqual(self.ns.get_metadata('node_size'), 64)
class OpenForestTests(unittest.TestCase):
def setUp(self):
self.key_size = 3
self.node_size = 64
self.tempdir = tempfile.mkdtemp()
def tearDown(self):
shutil.rmtree(self.tempdir)
def test_creates_new_forest(self):
f = larch.open_forest(key_size=self.key_size, node_size=self.node_size,
dirname=self.tempdir, allow_writes=True)
self.assertEqual(f.node_store.codec.key_bytes, self.key_size)
self.assertEqual(f.node_store.node_size, self.node_size)
def test_fail_if_metadata_missing_key_size(self):
with open(os.path.join(self.tempdir, 'metadata'), 'w') as f:
f.write('[metadata]\n')
f.write('format=1/1\n')
f.write('node_size=%s\n' % self.node_size)
self.assertRaises(larch.MetadataMissingKey, larch.open_forest,
key_size=self.key_size, node_size=None,
dirname=self.tempdir, allow_writes=False)
def test_fail_if_metadata_missing_node_size(self):
with open(os.path.join(self.tempdir, 'metadata'), 'w') as f:
f.write('[metadata]\n')
f.write('format=1/1\n')
f.write('key_size=%s\n' % self.key_size)
self.assertRaises(larch.MetadataMissingKey, larch.open_forest,
key_size=self.key_size, node_size=None,
dirname=self.tempdir, allow_writes=False)
def test_fail_if_existing_tree_has_incompatible_key_size(self):
f = larch.open_forest(key_size=self.key_size, node_size=self.node_size,
dirname=self.tempdir, allow_writes=True)
f.commit()
self.assertRaises(larch.BadKeySize,
larch.open_forest,
key_size=self.key_size + 1,
node_size=self.node_size,
dirname=self.tempdir,
allow_writes=True)
def test_opens_existing_tree_with_incompatible_node_size(self):
f = larch.open_forest(allow_writes=True, key_size=self.key_size,
node_size=self.node_size, dirname=self.tempdir)
f.commit()
new_size = self.node_size + 1
f2 = larch.open_forest(key_size=self.key_size,
node_size=new_size,
dirname=self.tempdir,
allow_writes=True)
self.assertEqual(int(f2.node_store.get_metadata('node_size')),
self.node_size)
def test_opens_existing_tree_with_compatible_key_and_node_size(self):
f = larch.open_forest(key_size=self.key_size, node_size=self.node_size,
dirname=self.tempdir, allow_writes=True)
f.commit()
f2 = larch.open_forest(key_size=self.key_size,
node_size=self.node_size,
dirname=self.tempdir,
allow_writes=True)
self.assert_(True)
def test_opens_existing_tree_without_node_and_key_sizes_given(self):
f = larch.open_forest(allow_writes=True, key_size=self.key_size,
node_size=self.node_size, dirname=self.tempdir)
f.commit()
f2 = larch.open_forest(dirname=self.tempdir, allow_writes=True)
self.assertEqual(f2.node_store.node_size, self.node_size)
self.assertEqual(f2.node_store.codec.key_bytes, self.key_size)
def test_fails_with_new_tree_unless_node_and_key_sizes_given(self):
self.assertRaises(AssertionError,
larch.open_forest,
dirname=self.tempdir)
class BadKeySizeTests(unittest.TestCase):
def test_both_sizes_in_error_message(self):
e = larch.BadKeySize(123, 456)
self.assert_('123' in str(e))
self.assert_('456' in str(e))
class BadNodeSizeTests(unittest.TestCase):
def test_both_sizes_in_error_message(self):
e = larch.BadNodeSize(123, 456)
self.assert_('123' in str(e))
self.assert_('456' in str(e))
larch-1.20131130/larch/fsck.py 0000755 0001750 0001750 00000017234 12246332521 015465 0 ustar jenkins jenkins # Copyright 2010, 2011 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import logging
import sys
import tracing
import ttystatus
import larch
class Error(larch.Error):
def __init__(self, msg):
self.msg = 'Assertion failed: %s' % msg
class WorkItem(object):
'''A work item for fsck.
Subclass can optionally set the ``name`` attribute; the class name
is used by default.
'''
def __str__(self):
if hasattr(self, 'name'):
return self.name
else:
return self.__class__.__name__
def do(self):
pass
def __iter__(self):
return iter([self])
def warning(self, msg):
self.fsck.warning('warning: %s: %s' % (self.name, msg))
def error(self, msg):
self.fsck.error('ERROR: %s: %s' % (self.name, msg))
def get_node(self, node_id):
tracing.trace('node_id=%s' % node_id)
try:
return self.fsck.forest.node_store.get_node(node_id)
except larch.NodeMissing:
self.error(
'forest %s: node %s is missing' %
(self.fsck.forest_name, node_id))
def start_modification(self, node):
self.fsck.forest.node_store.start_modification(node)
def put_node(self, node):
tracing.trace('node.id=%s' % node.id)
return self.fsck.forest.node_store.put_node(node)
class CheckIndexNode(WorkItem):
def __init__(self, fsck, node):
self.fsck = fsck
self.node = node
self.name = (
'CheckIndexNode: checking index node %s in %s' %
(self.node.id, self.fsck.forest_name))
def do(self):
tracing.trace('node.id=%s' % self.node.id)
if type(self.node) != larch.IndexNode:
self.error(
'forest %s: node %s: '
'Expected to get an index node, got %s instead' %
(self.fsck.forest_name, self.node.id, type(self.node)))
return
if len(self.node) == 0:
self.error('forest %s: index node %s: No children' %
(self.fsck.forest_name, self.node.id))
return
# Increase refcounts for all children, and check that the child
# nodes exist. If the children are index nodes, create work
# items to check those. Leaf nodes get no further checking.
drop_keys = []
for key in self.node:
child_id = self.node[key]
seen_already = child_id in self.fsck.refcounts
self.fsck.count(child_id)
if not seen_already:
child = self.get_node(child_id)
if child is None:
drop_keys.append(key)
elif type(child) == larch.IndexNode:
yield CheckIndexNode(self.fsck, child)
# Fix references to missing children by dropping them.
if self.fsck.fix and drop_keys:
self.start_modification(self.node)
for key in drop_keys:
self.node.remove(key)
self.warning('index node %s: dropped key %s' %
(self.node.id, key))
self.put_node(self.node)
class CheckForest(WorkItem):
def __init__(self, fsck):
self.fsck = fsck
self.name = 'CheckForest: forest %s' % self.fsck.forest_name
def do(self):
tracing.trace("CheckForest: checking forest %s" % self.name )
for tree in self.fsck.forest.trees:
self.fsck.count(tree.root.id)
root_node = self.get_node(tree.root.id)
tracing.trace('root_node.id=%s' % root_node.id)
yield CheckIndexNode(self.fsck, root_node)
class CheckRefcounts(WorkItem):
def __init__(self, fsck):
self.fsck = fsck
self.name = 'CheckRefcounts: refcounts in %s' % self.fsck.forest_name
def do(self):
tracing.trace(
'CheckRefcounts : %s nodes to check' %
len(self.fsck.refcounts) )
for node_id in self.fsck.refcounts:
tracing.trace('CheckRefcounts checking node %s' % node_id)
refcount = self.fsck.forest.node_store.get_refcount(node_id)
if refcount != self.fsck.refcounts[node_id]:
self.error(
'forest %s: node %s: refcount is %s but should be %s' %
(self.fsck.forest_name,
node_id,
refcount,
self.fsck.refcounts[node_id]))
if self.fsck.fix:
self.fsck.forest.node_store.set_refcount(
node_id, self.fsck.refcounts[node_id])
self.warning('node %s: refcount was set to %s' %
(node_id, self.fsck.refcounts[node_id]))
class CommitForest(WorkItem):
def __init__(self, fsck):
self.fsck = fsck
self.name = ('CommitForest: committing fixes to %s' %
self.fsck.forest_name)
def do(self):
tracing.trace('committing changes to %s' % self.fsck.forest_name)
self.fsck.forest.commit()
class Fsck(object):
'''Verify internal consistency of a larch.Forest.'''
def __init__(self, forest, warning, error, fix):
self.forest = forest
self.forest_name = getattr(
forest.node_store, 'dirname', 'in-memory forest')
self.warning = warning
self.error = error
self.fix = fix
self.refcounts = {}
def find_work(self):
yield CheckForest(self)
yield CheckRefcounts(self)
if self.fix:
yield CommitForest(self)
def count(self, node_id):
self.refcounts[node_id] = self.refcounts.get(node_id, 0) + 1
def run_work(self, work_generators, ts=None):
"""run work_generator.do() recursively as needed
work_generators : list of generators (eg list( self.find_work() ))
who return objects with .do() methods that
either return None or other generators.
if a ttystatus.TerminalStatus instance is passed as ts,
report fsck progress via ts
"""
while work_generators:
work_generator = work_generators.pop(0)
for work in work_generator:
if ts:
ts.increase('items', 1)
ts['item'] = work
generator_or_none = work.do()
if generator_or_none:
# Run new work before carrying-on with work_generators
# (required for proper refcount check)
work_generators.insert(0,generator_or_none)
def run_fsck(self, ts=None):
"""Runs full fsck
if a ttystatus.TerminalStatus instance is passed as ts,
report fsck progress via ts item/items updates
"""
# Make sure that we pass list( self.find_work() ) and not
# [ self.find_work() ] so that when CheckForest.do() returns
# work generators, the work generators are actually called
# before the CheckRefcounts check.
work_generators = list( self.find_work() )
self.run_work(work_generators, ts=ts)
larch-1.20131130/larch/idpath.py 0000644 0001750 0001750 00000003035 12246332521 015777 0 ustar jenkins jenkins # Copyright 2011 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import os
class IdPath(object):
'''Convert a numeric id to a pathname.
The ids are stored in a directory hierarchy, the depth of which
can be adjusted by a parameter to the class. The ids are assumed
to be non-negative integers.
'''
def __init__(self, dirname, depth, bits_per_level, skip_bits):
self.dirname = dirname
self.depth = depth
self.bits_per_level = bits_per_level
self.skip_bits = skip_bits
def convert(self, identifier):
def level_bits(level):
level_mask = 2**self.bits_per_level - 1
n = self.skip_bits + level * self.bits_per_level
return (identifier >> n) & level_mask
subdirs = ['%d' % level_bits(i) for i in range(self.depth)]
parts = [self.dirname] + subdirs + ['%x' % identifier]
return os.path.join(*parts)
larch-1.20131130/larch/idpath_tests.py 0000644 0001750 0001750 00000003514 12246332521 017223 0 ustar jenkins jenkins # Copyright 2011 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import os
import shutil
import tempfile
import unittest
import larch
class IdPathTests(unittest.TestCase):
def setUp(self):
self.tempdir = tempfile.mkdtemp()
self.depth = 3
bits = 1
skip = 0
self.idpath = larch.IdPath(self.tempdir, self.depth, bits, skip)
def tearDown(self):
shutil.rmtree(self.tempdir)
def test_returns_string(self):
self.assertEqual(type(self.idpath.convert(1)), str)
def test_starts_with_designated_path(self):
path = self.idpath.convert(1)
self.assert_(path.startswith(self.tempdir))
def test_different_ids_return_different_values(self):
path1 = self.idpath.convert(42)
path2 = self.idpath.convert(1024)
self.assertNotEqual(path1, path2)
def test_same_id_returns_same_path(self):
path1 = self.idpath.convert(42)
path2 = self.idpath.convert(42)
self.assertEqual(path1, path2)
def test_uses_desired_depth(self):
path = self.idpath.convert(1)
subpath = path[len(self.tempdir + os.sep):]
subdir = os.path.dirname(subpath)
self.assertEqual(len(subdir.split(os.sep)), self.depth)
larch-1.20131130/larch/journal.py 0000644 0001750 0001750 00000024407 12246332521 016206 0 ustar jenkins jenkins # Copyright 2012 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import errno
import logging
import os
import tracing
import larch
class ReadOnlyMode(larch.Error): # pragma: no cover
def __init__(self):
self.msg = 'Larch B-tree is in read-only mode, no changes allowed'
class Journal(object):
'''A journal layer on top of a virtual filesystem.
The journal solves the problem of updating on-disk data structures
atomically. Changes are first written to a journal, and then moved
from there to the real location. If the program or system crashes,
the changes can be completed later on, or rolled back, depending
on what's needed for consistency.
The journal works as follows:
* ``x`` is the real filename
* ``new/x`` is a new or modified file
* ``delete/x`` is a deleted file, created there as a flag file
Commit does this:
* for every ``delete/x``, remove ``x``
* for every ``new/x`` except ``new/metadata``, move to ``x``
* move ``new/metadata`` to ``metadata``
Rollback does this:
* remove every ``new/x``
* remove every ``delete/x``
When a journalled node store is opened, if ``new/metadata`` exists,
the commit happens. Otherwise a rollback happens. This guarantees
that the on-disk state is consistent.
We only provide enough of a filesystem interface as is needed by
NodeStoreDisk. For example, we do not care about directory removal.
The journal can be opened in read-only mode, in which case it ignores
any changes in ``new`` and ``delete``, and does not try to rollback or
commit at start.
'''
flag_basename = 'metadata'
def __init__(self, allow_writes, fs, storedir):
logging.debug('Initializing Journal for %s' % storedir)
self.allow_writes = allow_writes
self.fs = fs
self.storedir = storedir
if not self.storedir.endswith(os.sep):
self.storedir += os.sep
self.newdir = os.path.join(self.storedir, 'new/')
self.deletedir = os.path.join(self.storedir, 'delete/')
self.flag_file = os.path.join(self.storedir, self.flag_basename)
self.new_flag = os.path.join(self.newdir, self.flag_basename)
self.new_flag_seen = self.fs.exists(self.new_flag)
tracing.trace('self.new_flag_seen: %s' % self.new_flag_seen)
if self.allow_writes:
if self.new_flag_seen:
logging.debug('Automatically committing remaining changes')
self.commit()
else:
logging.debug('Automatically rolling back remaining changes')
self.rollback()
else:
logging.debug('Not committing/rolling back since read-only')
self.new_files = set()
self.deleted_files = set()
def _require_rw(self):
'''Raise error if modifications are not allowed.'''
if not self.allow_writes:
raise ReadOnlyMode()
def _relative(self, filename):
'''Return the part of filename that is relative to storedir.'''
assert filename.startswith(self.storedir)
return filename[len(self.storedir):]
def _new(self, filename):
'''Return name for a new file whose final name is filename.'''
return os.path.join(self.newdir, self._relative(filename))
def _deleted(self, filename):
'''Return name for temporary name for file to be deleted.'''
return os.path.join(self.deletedir, self._relative(filename))
def _realname(self, journaldir, filename):
'''Return real name for a file in a journal temporary directory.'''
assert filename.startswith(journaldir)
return os.path.join(self.storedir, filename[len(journaldir):])
def _is_in_new(self, filename):
new = self._new(filename)
return new in self.new_files or self.fs.exists(new)
def _is_in_deleted(self, filename):
deleted = self._deleted(filename)
return deleted in self.deleted_files or self.fs.exists(deleted)
def exists(self, filename):
if self.allow_writes or self.new_flag_seen:
if self._is_in_new(filename):
return True
elif self._is_in_deleted(filename):
return False
return self.fs.exists(filename)
def makedirs(self, dirname):
tracing.trace(dirname)
self._require_rw()
x = self._new(dirname)
self.fs.makedirs(x)
self.new_files.add(x)
def overwrite_file(self, filename, contents):
tracing.trace(filename)
self._require_rw()
new = self._new(filename)
self.fs.overwrite_file(new, contents)
self.new_files.add(new)
def cat(self, filename):
tracing.trace('filename=%s' % filename)
tracing.trace('allow_writes=%s' % self.allow_writes)
tracing.trace('new_flag_seen=%s' % self.new_flag_seen)
if self.allow_writes or self.new_flag_seen:
if self._is_in_new(filename):
return self.fs.cat(self._new(filename))
elif self._is_in_deleted(filename):
raise OSError(
errno.ENOENT, os.strerror(errno.ENOENT), filename)
return self.fs.cat(filename)
def remove(self, filename):
tracing.trace(filename)
self._require_rw()
new = self._new(filename)
deleted = self._deleted(filename)
if new in self.new_files:
self.fs.remove(new)
self.new_files.remove(new)
elif deleted in self.deleted_files:
raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), filename)
else:
self.fs.overwrite_file(deleted, '')
self.deleted_files.add(deleted)
def list_files(self, dirname):
'''List all files.
Files only, no directories.
'''
assert not dirname.startswith(self.newdir)
assert not dirname.startswith(self.deletedir)
if self.allow_writes or self.new_flag_seen:
if self.fs.exists(dirname):
for x in self.climb(dirname, files_only=True):
if self.exists(x):
yield x
new = self._new(dirname)
if self.fs.exists(new):
for x in self.climb(new, files_only=True):
yield self._realname(self.newdir, x)
else:
if self.fs.exists(dirname):
for x in self.climb(dirname, files_only=True):
in_new = x.startswith(self.newdir)
in_deleted = x.startswith(self.deletedir)
if not in_new and not in_deleted:
yield x
def climb(self, dirname, files_only=False):
basenames = self.fs.listdir(dirname)
filenames = []
for basename in basenames:
pathname = os.path.join(dirname, basename)
if self.fs.isdir(pathname):
for x in self.climb(pathname, files_only=files_only):
yield x
else:
filenames.append(pathname)
for filename in filenames:
yield filename
if not files_only:
yield dirname
def _clear_directory(self, dirname):
tracing.trace(dirname)
for pathname in self.climb(dirname):
if pathname != dirname:
if self.fs.isdir(pathname):
self.fs.rmdir(pathname)
else:
self.fs.remove(pathname)
def _vivify(self, dirname, exclude):
tracing.trace('dirname: %s' % dirname)
tracing.trace('exclude: %s' % repr(exclude))
all_excludes = [dirname] + exclude
for pathname in self.climb(dirname):
if pathname not in all_excludes:
r = self._realname(dirname, pathname)
parent = os.path.dirname(r)
if self.fs.isdir(pathname):
if not self.fs.exists(r):
if not self.fs.exists(parent):
self.fs.makedirs(parent)
self.fs.rename(pathname, r)
else:
if not self.fs.exists(parent):
self.fs.makedirs(parent)
self.fs.rename(pathname, r)
def rollback(self):
tracing.trace('%s start' % self.storedir)
self._require_rw()
if self.fs.exists(self.newdir):
self._clear_directory(self.newdir)
if self.fs.exists(self.deletedir):
self._clear_directory(self.deletedir)
self.new_files = set()
self.deleted_files = set()
tracing.trace('%s done' % self.storedir)
def _really_delete(self, deletedir):
tracing.trace(deletedir)
for pathname in self.climb(deletedir, files_only=True):
if pathname != deletedir:
realname = self._realname(deletedir, pathname)
try:
self.fs.remove(realname)
except OSError, e: # pragma: no cover
if e.errno not in (errno.ENOENT, errno.EISDIR):
raise
self.fs.remove(pathname)
def commit(self, skip=[]):
tracing.trace('%s start' % self.storedir)
self._require_rw()
if self.fs.exists(self.deletedir):
self._really_delete(self.deletedir)
if self.fs.exists(self.newdir):
skip = [self._new(x) for x in skip]
self._vivify(self.newdir, [self.new_flag] + skip)
if not skip and self.fs.exists(self.new_flag):
self.fs.rename(self.new_flag, self.flag_file)
self.new_files = set()
self.deleted_files = set()
tracing.trace('%s done' % self.storedir)
larch-1.20131130/larch/journal_tests.py 0000644 0001750 0001750 00000023232 12246332521 017423 0 ustar jenkins jenkins # Copyright 2012 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import os
import shutil
import tempfile
import unittest
import larch
class JournalTests(unittest.TestCase):
def setUp(self):
self.tempdir = tempfile.mkdtemp()
self.fs = larch.LocalFS()
self.j = larch.Journal(True, self.fs, self.tempdir)
def tearDown(self):
shutil.rmtree(self.tempdir)
def join(self, *args):
return os.path.join(self.tempdir, *args)
def test_constructs_new_filename(self):
self.assertEqual(self.j._new(self.join('foo')),
self.join('new', 'foo'))
def test_constructs_deleted_filename(self):
self.assertEqual(self.j._deleted(self.join('foo')),
self.join('delete', 'foo'))
def test_does_not_know_random_directory_initially(self):
self.assertFalse(self.j.exists(self.join('foo')))
def test_creates_directory(self):
dirname = self.join('foo/bar')
self.j.makedirs(dirname)
self.assertTrue(self.j.exists(dirname))
def test_rollback_undoes_directory_creation(self):
dirname = self.join('foo/bar')
self.j.makedirs(dirname)
self.j.rollback()
self.assertFalse(self.j.exists(dirname))
def test_rollback_keeps_committed_directory(self):
dirname = self.join('foo/bar')
self.j.makedirs(dirname)
self.j.commit()
self.j.rollback()
self.assertTrue(self.j.exists(dirname))
def test_rollback_works_without_changes(self):
self.assertEqual(self.j.rollback(), None)
def test_creates_new_file(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
self.assertEqual(self.j.cat(filename), 'bar')
def test_rollback_undoes_new_file(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
self.j.rollback()
self.assertFalse(self.j.exists(filename))
def test_commits_new_file(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
self.j.commit()
self.j.rollback()
self.assertEqual(self.j.cat(filename), 'bar')
def test_creates_new_file_after_commit(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
self.j.commit()
self.j.overwrite_file(filename, 'yo')
self.assertEqual(self.j.cat(filename), 'yo')
def test_cat_does_not_find_deleted_file(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
self.j.commit()
self.j.remove(filename)
self.assertRaises(OSError, self.j.cat, filename)
def test_rollback_brings_back_old_file(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
self.j.commit()
self.j.overwrite_file(filename, 'yo')
self.j.rollback()
self.assertEqual(self.j.cat(filename), 'bar')
def test_removes_uncommitted_file(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
self.j.remove(filename)
self.assertFalse(self.j.exists(filename))
def test_rollback_undoes_removal_of_uncommitted_file(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
self.j.remove(filename)
self.j.rollback()
self.assertFalse(self.j.exists(filename))
def test_commits_file_removal(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
self.j.remove(filename)
self.j.commit()
self.j.rollback()
self.assertFalse(self.j.exists(filename))
def test_removes_committed_file(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
self.j.commit()
self.j.remove(filename)
self.assertFalse(self.j.exists(filename))
def test_removing_committed_file_twice_causes_oserror(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
self.j.commit()
self.j.remove(filename)
self.assertRaises(OSError, self.j.remove, filename)
def test_rollback_brings_back_committed_file(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
self.j.commit()
self.j.remove(filename)
self.j.rollback()
self.assertEqual(self.j.cat(filename), 'bar')
def test_commits_removal_of_committed_file(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
self.j.commit()
self.j.remove(filename)
self.j.commit()
self.j.rollback()
self.assertFalse(self.j.exists(filename))
def test_commits_metadata(self):
metadata = self.join('metadata')
self.j.overwrite_file(metadata, 'yuck')
self.j.commit()
self.assertEqual(self.fs.cat(self.join(metadata)), 'yuck')
def test_unflagged_commit_means_new_instance_rollbacks(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
j2 = larch.Journal(True, self.fs, self.tempdir)
self.assertFalse(j2.exists(filename))
def test_partial_commit_finished_by_new_instance(self):
filename = self.join('foo/bar')
metadata = self.join('metadata')
self.j.overwrite_file(filename, 'bar')
self.j.overwrite_file(metadata, '')
self.j.commit(skip=[filename])
j2 = larch.Journal(True, self.fs, self.tempdir)
self.assertTrue(j2.exists(filename))
class ReadOnlyJournalTests(unittest.TestCase):
def setUp(self):
self.tempdir = tempfile.mkdtemp()
self.fs = larch.LocalFS()
self.rw = larch.Journal(True, self.fs, self.tempdir)
self.ro = larch.Journal(False, self.fs, self.tempdir)
def tearDown(self):
shutil.rmtree(self.tempdir)
def join(self, *args):
return os.path.join(self.tempdir, *args)
def test_does_not_know_random_directory_initially(self):
self.assertFalse(self.ro.exists(self.join('foo')))
def test_creating_directory_raises_error(self):
self.assertRaises(larch.ReadOnlyMode, self.ro.makedirs, 'foo')
def test_calling_rollback_raises_error(self):
self.assertRaises(larch.ReadOnlyMode, self.ro.rollback)
def test_readonly_mode_does_not_check_for_directory_creation(self):
dirname = self.join('foo/bar')
self.rw.makedirs(dirname)
self.assertFalse(self.ro.exists(dirname))
def test_write_file_raisees_error(self):
self.assertRaises(larch.ReadOnlyMode,
self.ro.overwrite_file, 'foo', 'bar')
def test_readonly_mode_does_not_check_for_new_file(self):
filename = self.join('foo')
self.rw.overwrite_file(filename, 'bar')
self.assertFalse(self.ro.exists(filename))
def test_readonly_mode_does_not_check_for_modified_file(self):
filename = self.join('foo')
self.rw.overwrite_file(filename, 'first')
self.rw.commit()
self.assertEqual(self.ro.cat(filename), 'first')
self.rw.overwrite_file(filename, 'second')
self.assertEqual(self.ro.cat(filename), 'first')
def test_readonly_mode_does_not_know_file_is_deleted_in_journal(self):
filename = self.join('foo/bar')
self.rw.overwrite_file(filename, 'bar')
self.rw.commit()
self.rw.remove(filename)
self.assertEqual(self.ro.cat(filename), 'bar')
def tests_lists_no_files_initially(self):
dirname = self.join('foo')
self.assertEqual(list(self.ro.list_files(dirname)), [])
def test_lists_files_correctly_when_no_changes(self):
dirname = self.join('foo')
filename = self.join('foo/bar')
self.rw.overwrite_file(filename, 'bar')
self.rw.commit()
self.assertEqual(list(self.ro.list_files(dirname)), [filename])
def test_lists_added_file_correctly(self):
dirname = self.join('foo')
filename = self.join('foo/bar')
self.rw.overwrite_file(filename, 'bar')
self.assertEqual(list(self.rw.list_files(dirname)), [filename])
self.assertEqual(list(self.ro.list_files(dirname)), [])
def test_lists_added_file_correctly_when_dir_existed_already(self):
dirname = self.join('foo')
filename = self.join('foo/bar')
filename2 = self.join('foo/foobar')
self.rw.overwrite_file(filename, 'bar')
self.rw.commit()
self.rw.overwrite_file(filename2, 'yoyo')
self.assertEqual(sorted(list(self.rw.list_files(dirname))),
sorted([filename, filename2]))
self.assertEqual(list(self.ro.list_files(dirname)), [filename])
def test_lists_removed_file_correctly(self):
dirname = self.join('foo')
filename = self.join('foo/bar')
self.rw.overwrite_file(filename, 'bar')
self.rw.commit()
self.rw.remove(filename)
self.assertEqual(list(self.rw.list_files(dirname)), [])
self.assertEqual(list(self.ro.list_files(dirname)), [filename])
larch-1.20131130/larch/lru.py 0000644 0001750 0001750 00000010137 12246332521 015331 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius, Richard Braakman
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import heapq
import logging
class LRUCache(object):
'''A least-recently-used cache.
This class caches objects, based on keys. The cache has a fixed size,
in number of objects. When a new object is added to the cache, the
least recently used old object is dropped. Each object is associated
with a key, and use is defined as retrieval of the object using the key.
Two hooks are provided for: for removing an object by user request,
and when it is automatically removed due to cache overflow. Either
hook is called with the key and object as arguments.
'''
def __init__(self, max_size, remove_hook=None, forget_hook=None):
self.max_size = max_size
# Together, obj_before and obj_after form a random access
# double-linked sequence. None used as the sentinel on both ends.
self.obj_before = dict()
self.obj_after = dict()
self.obj_before[None] = None
self.obj_after[None] = None
self.ids = dict() # maps key to object
self.objs = dict() # maps object to key
self.remove_hook = remove_hook
self.forget_hook = forget_hook
self.hits = 0
self.misses = 0
def log_stats(self): # pragma: no cover
logging.debug('LRUCache %s: hits=%s misses=%s' %
(self, self.hits, self.misses))
def __len__(self):
return len(self.ids)
def keys(self):
'''List keys for objects in cache.'''
return self.ids.keys()
def add(self, key, obj):
'''Add new item to cache.'''
if key in self.ids:
self.remove(key)
before = self.obj_before[None]
self.obj_before[None] = obj
self.obj_before[obj] = before
self.obj_after[before] = obj
self.obj_after[obj] = None
self.ids[key] = obj
self.objs[obj] = key
while len(self.ids) > self.max_size:
self._forget_oldest()
def _forget_oldest(self):
obj = self.obj_after[None]
key = self.objs[obj]
self._remove(key)
if self.forget_hook:
self.forget_hook(key, obj)
def _remove(self, key):
obj = self.ids[key]
before = self.obj_before[obj]
after = self.obj_after[obj]
self.obj_before[after] = before
self.obj_after[before] = after
del self.obj_before[obj]
del self.obj_after[obj]
del self.ids[key]
del self.objs[obj]
def get(self, key):
'''Retrieve item from cache.
Return object associated with key, or None.
'''
if key in self.ids:
self.hits += 1
obj = self.ids[key]
self.remove(key)
self.add(key, obj)
return obj
else:
self.misses += 1
return None
def remove(self, key):
'''Remove an item from the cache.
Return True if item was in cache, False otherwise.
'''
if key in self.ids:
obj = self.ids[key]
self._remove(key)
if self.remove_hook:
self.remove_hook(key, obj)
return True
else:
return False
def remove_oldest(self):
'''Remove oldest object.
Return key and object.
'''
obj = self.obj_after[None]
key = self.objs[obj]
self.remove(key)
return key, obj
larch-1.20131130/larch/lru_tests.py 0000644 0001750 0001750 00000007544 12246332521 016563 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
import larch
class LRUCacheTests(unittest.TestCase):
def setUp(self):
self.cache = larch.LRUCache(4)
self.cache.remove_hook = self.remove_hook
self.cache.forget_hook = self.forget_hook
self.removed = []
self.forgotten = []
def remove_hook(self, key, obj):
self.removed.append((key, obj))
def forget_hook(self, key, obj):
self.forgotten.append((key, obj))
def test_does_not_have_remove_hook_initially(self):
cache = larch.LRUCache(4)
self.assertEqual(cache.remove_hook, None)
def test_sets_remove_hook_via_init(self):
cache = larch.LRUCache(4, remove_hook=self.remove_hook)
self.assertEqual(cache.remove_hook, self.remove_hook)
def test_does_not_have_forget_hook_initially(self):
cache = larch.LRUCache(4)
self.assertEqual(cache.forget_hook, None)
def test_sets_forget_hook_via_init(self):
cache = larch.LRUCache(4, forget_hook=self.forget_hook)
self.assertEqual(cache.forget_hook, self.forget_hook)
def test_does_not_contain_object_initially(self):
self.assertEqual(self.cache.get('foo'), None)
def test_does_contain_object_after_it_is_added(self):
self.cache.add('foo', 'bar')
self.assertEqual(self.cache.get('foo'), 'bar')
def test_oldest_object_dropped_first(self):
for i in range(self.cache.max_size + 1):
self.cache.add(i, i)
self.assertEqual(self.cache.get(0), None)
self.assertEqual(self.forgotten, [(0, 0)])
for i in range(1, self.cache.max_size + 1):
self.assertEqual(self.cache.get(i), i)
def test_getting_object_prevents_it_from_being_dropped(self):
for i in range(self.cache.max_size + 1):
self.cache.add(i, i)
self.cache.get(0)
self.assertEqual(self.cache.get(1), None)
self.assertEqual(self.forgotten, [(1, 1)])
for i in [0] + range(2, self.cache.max_size + 1):
self.assertEqual(self.cache.get(i), i)
def test_adding_key_twice_changes_object(self):
self.cache.add('foo', 'foo')
self.cache.add('foo', 'bar')
self.assertEqual(self.cache.get('foo'), 'bar')
def test_removes_object(self):
self.cache.add('foo', 'bar')
gotit = self.cache.remove('foo')
self.assertEqual(gotit, True)
self.assertEqual(self.cache.get('foo'), None)
self.assertEqual(self.removed, [('foo', 'bar')])
def test_remove_returns_False_for_unknown_object(self):
self.assertEqual(self.cache.remove('foo'), False)
def test_removes_oldest_object(self):
self.cache.add(0, 0)
self.cache.add(1, 1)
self.assertEqual(self.cache.remove_oldest(), (0, 0))
self.assertEqual(self.cache.get(0), None)
def test_length_is_initially_zero(self):
self.assertEqual(len(self.cache), 0)
def test_length_is_correct_after_adds(self):
self.cache.add(0, 0)
self.assertEqual(len(self.cache), 1)
def test_has_initially_no_keys(self):
self.assertEqual(self.cache.keys(), [])
def test_has_keys_after_add(self):
self.cache.add(0, 1)
self.assertEqual(self.cache.keys(), [0])
larch-1.20131130/larch/nodes.py 0000644 0001750 0001750 00000015712 12246332521 015643 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import bisect
import larch
class FrozenNode(larch.Error):
'''User tried to modify node that is frozen.'''
def __init__(self, node):
self.msg = 'Node %s is frozen against modifications' % node.id
class Node(object):
'''Abstract base class for index and leaf nodes.
A node may be initialized with a list of (key, value) pairs. For
leaf nodes, the values are the actual values. For index nodes, they
are references to other nodes.
A node can be indexed using keys, and give the corresponding value.
Setting key/value pairs cannot be done using indexing. However,
``key in node`` does work, as does iteration over a key's values.
``len(node)`` returns the number if keys.
Two nodes compare equal if they have the same key/value pairs.
The node ids do not need to match.
Nodes can be modified, bt only if the ``frozen`` property is false.
If it is set to true, any attempt at modifying the node causes
the ``FrozenNode`` exception to be raised.
'''
def __init__(self, node_id, keys, values):
self._keys = list(keys)
self._values = list(values)
self._dict = dict()
for i in range(len(keys)):
self._dict[keys[i]] = values[i]
self.id = node_id
self.size = None
self.frozen = False
def __getitem__(self, key):
return self._dict[key]
def __contains__(self, key):
return key in self._dict
def __eq__(self, other):
return self._keys == other._keys and self._values == other._values
def __iter__(self):
for key in self._keys:
yield key
def __len__(self):
return len(self._keys)
def __nonzero__(self):
return True
def keys(self):
'''Return keys in the node, sorted.'''
return self._keys
def values(self):
'''Return value in the node, in same order as keys.'''
return self._values
def first_key(self):
'''Return smallest key in the node.'''
return self._keys[0]
def find_potential_range(self, minkey, maxkey):
'''Find pairs whose key is in desired range.
``minkey`` and ``maxkey`` are inclusive.
We take into account that for index nodes, a child's key
really represents a range of keys, from the key up to (but
not including) the next child's key. The last child's key
represents a range up to infinity.
Thus we return the first child, if its key lies between
``minkey`` and ``maxkey``, and the last child, if its key is at most
``maxkey``.
'''
def helper(key, default):
x = bisect.bisect_left(self._keys, key)
if x < len(self._keys):
if self._keys[x] > key:
if x == 0:
x = default
else:
x -= 1
else:
if x == 0:
x = None
else:
x -= 1
return x
i = helper(minkey, 0)
j = helper(maxkey, None)
if j is None:
i = None
return i, j
def _error_if_frozen(self):
if self.frozen:
raise FrozenNode(self)
def add(self, key, value):
'''Insert a key/value pair into the right place in a node.'''
self._error_if_frozen()
i = bisect.bisect_left(self._keys, key)
if i < len(self._keys) and self._keys[i] == key:
self._keys[i] = key
self._values[i] = value
else:
self._keys.insert(i, key)
self._values.insert(i, value)
self._dict[key] = value
self.size = None
def remove(self, key):
'''Remove a key from the node.
Raise KeyError if key does not exist in node.
'''
self._error_if_frozen()
i = bisect.bisect_left(self._keys, key)
if i >= len(self._keys) or self._keys[i] != key:
raise KeyError(key)
del self._keys[i]
del self._values[i]
del self._dict[key]
self.size = None
def remove_index_range(self, lo, hi):
'''Remove keys given a range of indexes into pairs.
lo and hi are inclusive.
'''
self._error_if_frozen()
del self._keys[lo:hi+1]
del self._values[lo:hi+1]
self.size = None
class LeafNode(Node):
'''Leaf node in the tree.
A leaf node contains key/value pairs (both strings), and has no children.
'''
def find_keys_in_range(self, minkey, maxkey):
'''Find pairs whose key is in desired range.
``minkey`` and ``maxkey`` are inclusive.
'''
i = bisect.bisect_left(self._keys, minkey)
j = bisect.bisect_left(self._keys, maxkey)
if j < len(self._keys) and self._keys[j] == maxkey:
j += 1
return self._keys[i:j]
class IndexNode(Node):
'''Index node in the tree.
An index node contains pairs of keys and references to other nodes
(node ids, which are integers).
The other nodes may be either index nodes or leaf nodes.
'''
def find_key_for_child_containing(self, key):
'''Return key for the child that contains ``key``.'''
i = bisect.bisect_left(self._keys, key)
if i < len(self._keys):
if self._keys[i] == key:
return key
elif i:
return self._keys[i-1]
elif i:
return self._keys[i-1]
def find_children_in_range(self, minkey, maxkey):
'''Find all children whose key is in the range.
``minkey`` and ``maxkey`` are inclusive. Note that a child might
be returned even if not all of its keys are in the range,
just some of them. Also, we consider potential keys here,
not actual keys. We have no way to retrieve the children
to check which keys they actually have, so instead we
return which keys might have the desired keys, and the
caller can go look at those.
'''
i, j = self.find_potential_range(minkey, maxkey)
if i is not None and j is not None:
return self._values[i:j+1]
else:
return []
larch-1.20131130/larch/nodes_tests.py 0000644 0001750 0001750 00000027676 12246332521 017101 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
import larch
class FrozenNodeTests(unittest.TestCase):
def test_node_id_is_in_error_message(self):
node = larch.nodes.Node(123, [], [])
e = larch.FrozenNode(node)
self.assert_('123' in str(e))
class NodeTests(unittest.TestCase):
def setUp(self):
self.node_id = 12765
self.pairs = [('key2', 'value2'), ('key1', 'value1')]
self.pairs.sort()
self.keys = [k for k, v in self.pairs]
self.values = [v for k, v in self.pairs]
self.node = larch.nodes.Node(self.node_id, self.keys, self.values)
def test_has_id(self):
self.assertEqual(self.node.id, self.node_id)
def test_empty_node_is_still_true(self):
empty = larch.nodes.Node(self.node_id, [], [])
self.assert_(empty)
def test_has_no_size(self):
self.assertEqual(self.node.size, None)
def test_has_each_pair(self):
for key, value in self.pairs:
self.assertEqual(self.node[key], value)
def test_raises_keyerror_for_missing_key(self):
self.assertRaises(KeyError, self.node.__getitem__, 'notexist')
def test_contains_each_key(self):
for key, value in self.pairs:
self.assert_(key in self.node)
def test_does_not_contain_wrong_key(self):
self.assertFalse('notexist' in self.node)
def test_is_equal_to_itself(self):
self.assert_(self.node == self.node)
def test_iterates_over_all_keys(self):
self.assertEqual([k for k in self.node],
sorted(k for k, v in self.pairs))
def test_has_correct_length(self):
self.assertEqual(len(self.node), len(self.pairs))
def test_has_keys(self):
self.assertEqual(self.node.keys(), sorted(k for k, v in self.pairs))
def test_sorts_keys(self):
self.assertEqual(self.node.keys(), sorted(k for k, v in self.pairs))
def test_has_values(self):
self.assertEqual(self.node.values(),
[v for k, v in sorted(self.pairs)])
def test_returns_correct_first_key(self):
self.assertEqual(self.node.first_key(), 'key1')
def test_returns_keys_and_values(self):
self.assertEqual(self.node.keys(), self.keys)
self.assertEqual(self.node.values(), self.values)
def test_adds_key_value_pair_to_empty_node(self):
node = larch.nodes.Node(0, [], [])
node.add('foo', 'bar')
self.assertEqual(node.keys(), ['foo'])
self.assertEqual(node.values(), ['bar'])
self.assertEqual(node['foo'], 'bar')
def test_adds_key_value_pair_to_end_of_node_of_one_element(self):
node = larch.nodes.Node(0, ['foo'], ['bar'])
node.add('foo2', 'bar2')
self.assertEqual(node.keys(), ['foo', 'foo2'])
self.assertEqual(node.values(), ['bar', 'bar2'])
self.assertEqual(node['foo2'], 'bar2')
def test_adds_key_value_pair_to_beginning_of_node_of_one_element(self):
node = larch.nodes.Node(0, ['foo'], ['bar'])
node.add('bar', 'bar')
self.assertEqual(node.keys(), ['bar', 'foo'])
self.assertEqual(node.values(), ['bar', 'bar'])
self.assertEqual(node['bar'], 'bar')
def test_adds_key_value_pair_to_middle_of_node_of_two_elements(self):
node = larch.nodes.Node(0, ['bar', 'foo'], ['bar', 'bar'])
node.add('duh', 'bar')
self.assertEqual(node.keys(), ['bar', 'duh', 'foo'])
self.assertEqual(node.values(), ['bar', 'bar', 'bar'])
self.assertEqual(node['duh'], 'bar')
def test_add_replaces_value_for_existing_key(self):
node = larch.nodes.Node(0, ['bar', 'foo'], ['bar', 'bar'])
node.add('bar', 'xxx')
self.assertEqual(node.keys(), ['bar', 'foo'])
self.assertEqual(node.values(), ['xxx', 'bar'])
self.assertEqual(node['bar'], 'xxx')
def test_add_resets_cached_size(self):
node = larch.nodes.Node(0, [], [])
node.size = 1234
node.add('foo', 'bar')
self.assertEqual(node.size, None)
def test_removes_first_key(self):
node = larch.nodes.Node(0, ['bar', 'duh', 'foo'],
['bar', 'bar', 'bar'])
node.remove('bar')
self.assertEqual(node.keys(), ['duh', 'foo'])
self.assertEqual(node.values(), ['bar', 'bar'])
self.assertRaises(KeyError, node.__getitem__, 'bar')
def test_removes_last_key(self):
node = larch.nodes.Node(0, ['bar', 'duh', 'foo'],
['bar', 'bar', 'bar'])
node.remove('foo')
self.assertEqual(node.keys(), ['bar', 'duh'])
self.assertEqual(node.values(), ['bar', 'bar'])
self.assertRaises(KeyError, node.__getitem__, 'foo')
def test_removes_middle_key(self):
node = larch.nodes.Node(0, ['bar', 'duh', 'foo'],
['bar', 'bar', 'bar'])
node.remove('duh')
self.assertEqual(node.keys(), ['bar', 'foo'])
self.assertEqual(node.values(), ['bar', 'bar'])
self.assertRaises(KeyError, node.__getitem__, 'duh')
def test_raises_exception_when_removing_unknown_key(self):
node = larch.nodes.Node(0, ['bar', 'duh', 'foo'],
['bar', 'bar', 'bar'])
self.assertRaises(KeyError, node.remove, 'yo')
def test_remove_resets_cached_size(self):
node = larch.nodes.Node(0, ['foo'], ['bar'])
node.size = 1234
node.remove('foo')
self.assertEqual(node.size, None)
def test_removes_index_range(self):
node = larch.nodes.Node(0, ['bar', 'duh', 'foo'],
['bar', 'bar', 'bar'])
node.size = 12375654
node.remove_index_range(1, 5)
self.assertEqual(node.keys(), ['bar'])
self.assertEqual(node.values(), ['bar'])
self.assertEqual(node.size, None)
def test_finds_keys_in_range(self):
# The children's keys are 'bar' and 'foo'. We need to test for
# every combination of minkey and maxkey being less than, equal,
# or greater than either child key (as long as minkey <= maxkey).
node = larch.LeafNode(0, ['bar', 'foo'], ['bar', 'foo'])
find = node.find_keys_in_range
self.assertEqual(find('aaa', 'aaa'), [])
self.assertEqual(find('aaa', 'bar'), ['bar'])
self.assertEqual(find('aaa', 'ccc'), ['bar'])
self.assertEqual(find('aaa', 'foo'), ['bar', 'foo'])
self.assertEqual(find('aaa', 'ggg'), ['bar', 'foo'])
self.assertEqual(find('bar', 'bar'), ['bar'])
self.assertEqual(find('bar', 'ccc'), ['bar'])
self.assertEqual(find('bar', 'foo'), ['bar', 'foo'])
self.assertEqual(find('bar', 'ggg'), ['bar', 'foo'])
self.assertEqual(find('ccc', 'ccc'), [])
self.assertEqual(find('ccc', 'foo'), ['foo'])
self.assertEqual(find('ccc', 'ggg'), ['foo'])
self.assertEqual(find('foo', 'foo'), ['foo'])
self.assertEqual(find('foo', 'ggg'), ['foo'])
self.assertEqual(find('ggg', 'ggg'), [])
def test_finds_no_potential_range_in_empty_node(self):
node = larch.LeafNode(0, [], [])
self.assertEqual(node.find_potential_range('aaa', 'bbb'), (None, None))
def test_finds_potential_ranges(self):
# The children's keys are 'bar' and 'foo'. We need to test for
# every combination of minkey and maxkey being less than, equal,
# or greater than either child key (as long as minkey <= maxkey).
node = larch.LeafNode(0, ['bar', 'foo'], ['bar', 'foo'])
find = node.find_potential_range
self.assertEqual(find('aaa', 'aaa'), (None, None))
self.assertEqual(find('aaa', 'bar'), (0, 0))
self.assertEqual(find('aaa', 'ccc'), (0, 0))
self.assertEqual(find('aaa', 'foo'), (0, 1))
self.assertEqual(find('aaa', 'ggg'), (0, 1))
self.assertEqual(find('bar', 'bar'), (0, 0))
self.assertEqual(find('bar', 'ccc'), (0, 0))
self.assertEqual(find('bar', 'foo'), (0, 1))
self.assertEqual(find('bar', 'ggg'), (0, 1))
self.assertEqual(find('ccc', 'ccc'), (0, 0))
self.assertEqual(find('ccc', 'foo'), (0, 1))
self.assertEqual(find('ccc', 'ggg'), (0, 1))
self.assertEqual(find('foo', 'foo'), (1, 1))
self.assertEqual(find('foo', 'ggg'), (1, 1))
# This one is a bit special. The last key may refer to a
# child that is an index node, so it _might_ have keys
# in the desired range.
self.assertEqual(find('ggg', 'ggg'), (1, 1))
def test_is_not_frozen(self):
self.assertEqual(self.node.frozen, False)
def test_freezing_makes_add_raise_error(self):
self.node.frozen = True
self.assertRaises(larch.FrozenNode, self.node.add, 'foo', 'bar')
def test_freezing_makes_remove_raise_error(self):
self.node.frozen = True
self.assertRaises(larch.FrozenNode, self.node.remove, 'foo')
def test_freezing_makes_remove_index_range_raise_error(self):
self.node.frozen = True
self.assertRaises(larch.FrozenNode, self.node.remove_index_range, 0, 1)
class IndexNodeTests(unittest.TestCase):
def setUp(self):
self.leaf1 = larch.LeafNode(0, ['bar'], ['bar'])
self.leaf2 = larch.LeafNode(1, ['foo'], ['foo'])
self.index_id = 1234
self.index = larch.IndexNode(self.index_id, ['bar', 'foo'],
[self.leaf1.id, self.leaf2.id])
def test_find_key_for_child_containing(self):
find = self.index.find_key_for_child_containing
self.assertEqual(find('aaa'), None)
self.assertEqual(find('bar'), 'bar')
self.assertEqual(find('bar2'), 'bar')
self.assertEqual(find('foo'), 'foo')
self.assertEqual(find('foo2'), 'foo')
def test_returns_none_when_no_child_contains_key(self):
self.assertEqual(self.index.find_key_for_child_containing('a'), None)
def test_finds_no_key_when_node_is_empty(self):
empty = larch.IndexNode(0, [], [])
self.assertEqual(empty.find_key_for_child_containing('f00'), None)
def test_finds_no_children_in_range_when_empty(self):
empty = larch.IndexNode(0, [], [])
self.assertEqual(empty.find_children_in_range('bar', 'foo'), [])
def test_finds_children_in_ranges(self):
# The children's keys are 'bar' and 'foo'. We need to test for
# every combination of minkey and maxkey being less than, equal,
# or greater than either child key (as long as minkey <= maxkey).
find = self.index.find_children_in_range
bar = self.leaf1.id
foo = self.leaf2.id
self.assertEqual(find('aaa', 'aaa'), [])
self.assertEqual(find('aaa', 'bar'), [bar])
self.assertEqual(find('aaa', 'ccc'), [bar])
self.assertEqual(find('aaa', 'foo'), [bar, foo])
self.assertEqual(find('aaa', 'ggg'), [bar, foo])
self.assertEqual(find('bar', 'bar'), [bar])
self.assertEqual(find('bar', 'ccc'), [bar])
self.assertEqual(find('bar', 'foo'), [bar, foo])
self.assertEqual(find('bar', 'ggg'), [bar, foo])
self.assertEqual(find('ccc', 'ccc'), [bar])
self.assertEqual(find('ccc', 'foo'), [bar, foo])
self.assertEqual(find('ccc', 'ggg'), [bar, foo])
self.assertEqual(find('foo', 'foo'), [foo])
self.assertEqual(find('foo', 'ggg'), [foo])
self.assertEqual(find('ggg', 'ggg'), [foo])
larch-1.20131130/larch/nodestore.py 0000644 0001750 0001750 00000030744 12246332521 016537 0 ustar jenkins jenkins # Copyright 2010, 2011 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import larch
class NodeMissing(larch.Error):
'''A node cannot be found from a NodeStore.'''
def __init__(self, node_store, node_id, error=None, error_msg=''):
if error is not None:
error_msg = (': %s: %s: %s' %
(error.errno, error.strerror, error.filename))
self.msg = ('Node %s cannot be found in the node store %s%s' %
(hex(node_id), node_store, error_msg))
class NodeTooBig(larch.Error):
'''User tried to put a node that was too big into the store.'''
def __init__(self, node, node_size):
self.msg = ('%s %s is too big (%d bytes)' %
(node.__class__.__name__, hex(node.id), node_size))
class NodeExists(larch.Error):
'''User tried to put a node that already exists in the store.'''
def __init__(self, node_id):
self.msg = 'Node %s is already in the store' % hex(node_id)
class NodeCannotBeModified(larch.Error):
'''User called start_modification on node that cannot be modified.'''
def __init__(self, node_id):
self.msg = 'Node %s cannot be modified' % hex(node_id)
class NodeStore(object): # pragma: no cover
'''Abstract base class for storing nodes externally.
The ``BTree`` class itself does not handle external storage of nodes.
Instead, it is given an object that implements the API in this
class. An actual implementation might keep nodes in memory, or
store them on disk using a filesystem, or a database.
Node stores deal with nodes as byte strings: the ``codec``
encodes them before handing them to the store, and decodes them
when it gets them from the store.
Each node has an identifier that is unique within the store.
The identifier is an integer, and the caller makes the following
guarantees about it:
* it is a non-negative integer
* new nodes are assigned the next consecutive one
* it is never re-used
Further, the caller makes the following guarantees about the encoded
nodes:
* they have a strict upper size limit
* the tree attempts to fill nodes as close to the limit as possible
The size limit is given to the node store at initialization time.
It is accessible via the ``node_size`` property. Implementations of
this API must handle that in some suitable way, preferably by
inheriting from this class and calling its initializer.
``self.max_value_size`` gives the maximum size of a value stored
in a node.
A node store additionally stores some metadata, as key/value
pairs, where both key and value is a shortish string. The whole
pair must fit into a node, but more than one node can be used for
metadata.
'''
def __init__(self, allow_writes, node_size, codec):
self.allow_writes = allow_writes
self.node_size = node_size
self.codec = codec
self.max_value_size = (node_size / 2) - codec.leaf_header.size
def max_index_pairs(self):
'''Max number of index pairs in an index node.'''
return self.codec.max_index_pairs(self.node_size)
def set_metadata(self, key, value):
'''Set a metadata key/value pair.'''
def get_metadata(self, key):
'''Return value that corresponds to a key.'''
def get_metadata_keys(self):
'''Return list of all metadata keys.'''
def remove_metadata(self, key):
'''Remove a metadata key, and its corresponding value.'''
def save_metadata(self):
'''Save metadata persistently, if applicable.
Not all node stores are persistent, and this method is
not relevant to them. However, if the user does not call
this method, none of the changes they make will be stored
persistently even with a persistent store.
'''
def put_node(self, node):
'''Put a new node into the store.'''
def get_node(self, node_id):
'''Return a node from the store.
Raise the ``NodeMissing`` exception if the node is not in the
store (has never been, or has been removed). Raise other
errors as suitable.
'''
def can_be_modified(self, node):
'''Can a node be modified?'''
return self.get_refcount(node.id) == 1
def start_modification(self, node):
'''Start modification of a node.
User must call this before modifying a node in place.
If a node cannot be modified, ``NodeCannotBeModified`` exception
will be raised.
'''
def remove_node(self, node_id):
'''Remove a node from the store.'''
def list_nodes(self):
'''Return list of ids of all nodes in store.'''
def get_refcount(self, node_id):
'''Return the reference count for a node.'''
def set_refcount(self, node_id, refcount):
'''Set the reference count for a node.'''
def save_refcounts(self):
'''Save refcounts to disk.
This method only applies to node stores that persist.
'''
def commit(self):
'''Make sure all changes to are committed to the store.
Until this is called, there's no guarantee that any of the
changes since the previous commit are persistent.
'''
class NodeStoreTests(object): # pragma: no cover
'''Re-useable tests for ``NodeStore`` implementations.
The ``NodeStore`` base class can't be usefully instantiated itself.
Instead you are supposed to sub-class it and implement the API in
a suitable way for yourself.
This class implements a number of tests that the API implementation
must pass. The implementation's own test class should inherit from
this class, and ``unittest.TestCase``.
The test sub-class should define a setUp method that sets the following:
* ``self.ns`` to an instance of the API implementation sub-class
* ``self.node_size`` to the node size
Key size (``self.key_bytes``) is always 3.
'''
key_bytes = 3
def assertEqualNodes(self, n1, n2):
'''Assert that two nodes are equal.
Equal means same keys, and same values for keys. Nodes can be
either leaf or index ones.
'''
self.assertEqual(sorted(n1.keys()), sorted(n2.keys()))
for key in n1:
self.assertEqual(n1[key], n2[key])
def test_sets_node_size(self):
self.assertEqual(self.ns.node_size, self.node_size)
def test_sets_max_value_size(self):
self.assert_(self.ns.max_value_size > 1)
self.assert_(self.ns.max_value_size < self.node_size / 2)
def test_sets_metadata(self):
self.ns.set_metadata('foo', 'bar')
self.assert_('foo' in self.ns.get_metadata_keys())
self.assertEqual(self.ns.get_metadata('foo'), 'bar')
def test_sets_existing_metadata(self):
self.ns.set_metadata('foo', 'bar')
self.ns.set_metadata('foo', 'foobar')
self.assert_('foo' in self.ns.get_metadata_keys())
self.assertEqual(self.ns.get_metadata('foo'), 'foobar')
def test_removes_metadata(self):
self.ns.set_metadata('foo', 'bar')
self.ns.remove_metadata('foo')
self.assert_('foo' not in self.ns.get_metadata_keys())
def test_sets_several_metadata_keys(self):
old_keys = self.ns.get_metadata_keys()
pairs = dict(('%d' % i, '%0128d' % i) for i in range(1024))
for key, value in pairs.iteritems():
self.ns.set_metadata(key, value)
self.assertEqual(sorted(self.ns.get_metadata_keys()),
sorted(pairs.keys() + old_keys))
for key, value in pairs.iteritems():
self.assertEqual(self.ns.get_metadata(key), value)
def test_raises_error_when_getting_unknown_key(self):
self.assertRaises(KeyError, self.ns.get_metadata, 'foo')
def test_raises_error_when_removing_unknown_key(self):
self.assertRaises(KeyError, self.ns.remove_metadata, 'foo')
def test_has_no_node_zero_initially(self):
self.assertRaises(NodeMissing, self.ns.get_node, 0)
def test_lists_no_nodes_initially(self):
self.assertEqual(self.ns.list_nodes(), [])
def test_puts_and_gets_same(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.commit()
self.assertEqualNodes(self.ns.get_node(0), node)
def test_put_freezes_node(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
self.assert_(node.frozen)
def test_get_freezes_node(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
node2 = self.ns.get_node(0)
self.assert_(node2.frozen)
def test_node_not_in_store_can_not_be_modified(self):
node = larch.LeafNode(0, [], [])
self.assertFalse(self.ns.can_be_modified(node))
def test_node_with_refcount_0_can_not_be_modified(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.set_refcount(node.id, 0)
self.assertFalse(self.ns.can_be_modified(node))
def test_node_with_refcount_1_can_be_modified(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.set_refcount(node.id, 1)
self.assertTrue(self.ns.can_be_modified(node))
def test_node_with_refcount_2_can_not_be_modified(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.set_refcount(node.id, 2)
self.assertFalse(self.ns.can_be_modified(node))
def test_unfreezes_node_when_modification_starts(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.set_refcount(node.id, 1)
self.ns.start_modification(node)
self.assertFalse(node.frozen)
def test_removes_node(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.commit()
self.ns.remove_node(0)
self.assertRaises(NodeMissing, self.ns.get_node, 0)
self.assertEqual(self.ns.list_nodes(), [])
def test_removes_node_from_upload_queue_if_one_exists(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.remove_node(0)
self.assertRaises(NodeMissing, self.ns.get_node, 0)
self.assertEqual(self.ns.list_nodes(), [])
def test_lists_node_zero(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.commit()
node_ids = self.ns.list_nodes()
self.assertEqual(node_ids, [node.id])
def test_put_allows_to_overwrite_a_node(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
node = larch.LeafNode(0, ['foo'], ['bar'])
self.ns.put_node(node)
new = self.ns.get_node(0)
self.assertEqual(new.keys(), ['foo'])
self.assertEqual(new.values(), ['bar'])
def test_put_allows_to_overwrite_a_node_after_upload_queue_push(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.commit()
node = larch.LeafNode(0, ['foo'], ['bar'])
self.ns.put_node(node)
self.ns.commit()
new = self.ns.get_node(0)
self.assertEqual(new.keys(), ['foo'])
self.assertEqual(new.values(), ['bar'])
def test_remove_raises_nodemissing_if_node_does_not_exist(self):
self.assertRaises(NodeMissing, self.ns.remove_node, 0)
def test_returns_zero_count_for_unknown_node_id(self):
self.assertEqual(self.ns.get_refcount(123), 0)
def test_sets_refcount(self):
self.ns.set_refcount(0, 123)
self.assertEqual(self.ns.get_refcount(0), 123)
def test_updates_refcount(self):
self.ns.set_refcount(0, 123)
self.ns.set_refcount(0, 0)
self.assertEqual(self.ns.get_refcount(0), 0)
larch-1.20131130/larch/nodestore_disk.py 0000644 0001750 0001750 00000023544 12246332521 017551 0 ustar jenkins jenkins # Copyright 2010, 2011 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# (at your option) any later version.
# the Free Software Foundation, either version 3 of the License, or
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import ConfigParser
import logging
import os
import StringIO
import struct
import tempfile
import traceback
import tracing
import larch
DIR_DEPTH = 3
DIR_BITS = 12
DIR_SKIP = 13
class FormatProblem(larch.Error): # pragma: no cover
def __init__(self, msg):
self.msg = msg
class LocalFS(object): # pragma: no cover
'''Access to local filesystem.
The ``NodeStoreDisk`` class will use a class with this interface
to do disk operations. This class implements access to the local
filesystem.
'''
def makedirs(self, dirname):
'''Create directories, simliar to os.makedirs.'''
if not os.path.exists(dirname):
os.makedirs(dirname)
def rmdir(self, dirname):
'''Remove an empty directory.'''
os.rmdir(dirname)
def cat(self, filename):
'''Return contents of a file.'''
return file(filename).read()
def overwrite_file(self, filename, contents):
'''Write data to disk. File may exist already.'''
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
fd, tempname = tempfile.mkstemp(dir=dirname)
os.write(fd, contents)
os.close(fd)
os.rename(tempname, filename)
def exists(self, filename):
'''Does a file exist already?'''
return os.path.exists(filename)
def isdir(self, filename):
'''Does filename and is it a directory?'''
return os.path.isdir(filename)
def rename(self, old, new):
'''Rename a file.'''
os.rename(old, new)
def remove(self, filename):
'''Remove a file.'''
os.remove(filename)
def listdir(self, dirname):
'''Return basenames from directory.'''
return os.listdir(dirname)
class NodeStoreDisk(larch.NodeStore):
'''An implementation of larch.NodeStore API for on-disk storage.
The caller will specify a directory in which the nodes will be stored.
Each node is stored in its own file, named after the node identifier.
The ``vfs`` optional argument to the initializer can be used to
override filesystem access. By default, the local filesystem is
used, but any class can be substituted.
'''
# The on-disk format version is format_base combined with whatever
# format the codec specifies.
format_base = 1
nodedir = 'nodes'
def __init__(self, allow_writes, node_size, codec, dirname=None,
upload_max=1024, lru_size=500, vfs=None, format=None):
tracing.trace('new NodeStoreDisk: %s', dirname)
assert dirname is not None
if format is not None:
tracing.trace('forcing format_base: %s', format)
self.format_base = format
larch.NodeStore.__init__(
self, allow_writes=allow_writes, node_size=node_size, codec=codec)
self.dirname = dirname
self.metadata_name = os.path.join(dirname, 'metadata')
self.metadata = None
self.rs = larch.RefcountStore(self)
self.cache_size = lru_size
self.cache = larch.LRUCache(self.cache_size)
self.upload_max = upload_max
self.upload_queue = larch.UploadQueue(self._really_put_node,
self.upload_max)
self.vfs = vfs if vfs != None else LocalFS()
self.journal = larch.Journal(allow_writes, self.vfs, dirname)
self.idpath = larch.IdPath(os.path.join(dirname, self.nodedir),
DIR_DEPTH, DIR_BITS, DIR_SKIP)
@property
def format_version(self):
return '%s/%s' % (self.format_base, self.codec.format)
def _load_metadata(self):
if self.metadata is None:
tracing.trace('load metadata')
self.metadata = ConfigParser.ConfigParser()
self.metadata.add_section('metadata')
if self.journal.exists(self.metadata_name):
tracing.trace('metadata file (%s) exists, reading it' %
self.metadata_name)
data = self.journal.cat(self.metadata_name)
f = StringIO.StringIO(data)
self.metadata.readfp(f)
self._verify_metadata()
else:
self.metadata.set('metadata', 'format', self.format_version)
def _verify_metadata(self):
if not self.metadata.has_option('metadata', 'format'):
raise FormatProblem('larch on-disk format missing '
'(old version?): %s' % self.dirname)
format = self.metadata.get('metadata', 'format')
if format != self.format_version:
raise FormatProblem('larch on-disk format is incompatible '
'(is %s, should be %s): %s' %
(format, self.format_version,
self.dirname))
def get_metadata_keys(self):
self._load_metadata()
return self.metadata.options('metadata')
def get_metadata(self, key):
self._load_metadata()
if self.metadata.has_option('metadata', key):
return self.metadata.get('metadata', key)
else:
raise KeyError(key)
def set_metadata(self, key, value):
self._load_metadata()
self.metadata.set('metadata', key, value)
tracing.trace('key=%s value=%s', repr(key), repr(value))
def remove_metadata(self, key):
self._load_metadata()
if self.metadata.has_option('metadata', key):
self.metadata.remove_option('metadata', key)
else:
raise KeyError(key)
def save_metadata(self):
tracing.trace('saving metadata')
self._load_metadata()
f = StringIO.StringIO()
self.metadata.write(f)
self.journal.overwrite_file(self.metadata_name, f.getvalue())
def pathname(self, node_id):
return self.idpath.convert(node_id)
def put_node(self, node):
tracing.trace('putting node %s into cache and upload queue' % node.id)
node.frozen = True
self.cache.add(node.id, node)
self.upload_queue.put(node)
def push_upload_queue(self):
tracing.trace('pushing upload queue')
self.upload_queue.push()
self.cache.log_stats()
self.cache = larch.LRUCache(self.cache_size)
def _really_put_node(self, node):
tracing.trace('really put node %s' % node.id)
encoded_node = self.codec.encode(node)
if len(encoded_node) > self.node_size:
raise larch.NodeTooBig(node, len(encoded_node))
name = self.pathname(node.id)
tracing.trace('node %s to be stored in %s' % (node.id, name))
self.journal.overwrite_file(name, encoded_node)
def get_node(self, node_id):
tracing.trace('getting node %s' % node_id)
node = self.cache.get(node_id)
if node is not None:
tracing.trace('cache hit: %s' % node_id)
return node
node = self.upload_queue.get(node_id)
if node is not None:
tracing.trace('upload queue hit: %s' % node_id)
return node
name = self.pathname(node_id)
tracing.trace('reading node %s from file %s' % (node_id, name))
try:
encoded = self.journal.cat(name)
except (IOError, OSError), e:
logging.debug('Error reading node: %s: %s: %s' %
(e.errno, e.strerror, e.filename or name))
logging.debug(traceback.format_exc())
raise larch.NodeMissing(self.dirname, node_id, error=e)
else:
node = self.codec.decode(encoded)
node.frozen = True
self.cache.add(node.id, node)
return node
def start_modification(self, node):
tracing.trace('start modiyfing node %s' % node.id)
self.upload_queue.remove(node.id)
node.frozen = False
def remove_node(self, node_id):
tracing.trace('removing node %s (incl. cache and upload queue)' %
node_id)
self.cache.remove(node_id)
got_it = self.upload_queue.remove(node_id)
name = self.pathname(node_id)
if self.journal.exists(name):
self.journal.remove(name)
elif not got_it:
raise larch.NodeMissing(
self.dirname,
node_id,
error_msg='attempted to remove node that is not '
'in journal or in upload queue')
def list_nodes(self):
queued = self.upload_queue.list_ids()
nodedir = os.path.join(self.dirname, self.nodedir)
uploaded = []
if self.journal.exists(nodedir):
for filename in self.journal.list_files(nodedir):
uploaded.append(int(os.path.basename(filename), 16))
return queued + uploaded
def get_refcount(self, node_id):
return self.rs.get_refcount(node_id)
def set_refcount(self, node_id, refcount):
self.rs.set_refcount(node_id, refcount)
def save_refcounts(self):
tracing.trace('saving refcounts')
self.rs.save_refcounts()
def commit(self):
self.push_upload_queue()
self.save_metadata()
self.journal.commit()
larch-1.20131130/larch/nodestore_disk_tests.py 0000644 0001750 0001750 00000010167 12246332521 020770 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import os
import shutil
import tempfile
import unittest
import larch
import nodestore_disk
class NodeStoreDiskTests(unittest.TestCase, larch.NodeStoreTests):
def setUp(self):
self.node_size = 4096
self.codec = larch.NodeCodec(self.key_bytes)
self.tempdir = tempfile.mkdtemp()
self.ns = self.new_ns()
def tearDown(self):
shutil.rmtree(self.tempdir)
def new_ns(self, format=None):
return nodestore_disk.NodeStoreDisk(True, self.node_size, self.codec,
dirname=self.tempdir,
format=format)
def test_metadata_has_format_version(self):
self.assertEqual(self.ns.get_metadata('format'),
self.ns.format_version)
def test_metadata_format_version_is_persistent(self):
self.ns.save_metadata()
ns2 = self.new_ns()
self.assertEqual(ns2.get_metadata('format'),
ns2.format_version)
def test_refuses_to_open_if_format_version_is_old(self):
old = self.new_ns(format=0)
old.save_metadata()
new = self.new_ns(format=1)
self.assertRaises(larch.Error, new.get_metadata, 'format')
def test_refuses_to_open_if_format_version_is_not_there(self):
self.ns.remove_metadata('format')
self.ns.save_metadata()
ns2 = self.new_ns()
self.assertRaises(larch.Error, ns2.get_metadata, 'format')
def test_has_persistent_metadata(self):
self.ns.set_metadata('foo', 'bar')
self.ns.save_metadata()
ns2 = self.new_ns()
self.assertEqual(ns2.get_metadata('foo'), 'bar')
def test_metadata_does_not_persist_without_saving(self):
self.ns.set_metadata('foo', 'bar')
ns2 = self.new_ns()
self.assertEqual(ns2.get_metadata_keys(), ['format'])
def test_refcounts_persist(self):
self.ns.set_refcount(0, 1234)
self.per_group = 2
self.ns.save_refcounts()
self.ns.journal.commit()
ns2 = self.new_ns()
self.assertEqual(self.ns.get_refcount(0), 1234)
self.assertEqual(ns2.get_refcount(0), 1234)
def test_put_refuses_too_large_a_node(self):
node = larch.LeafNode(0, ['000'], ['x' * (self.node_size + 1)])
def helper(node):
self.ns.put_node(node)
self.ns.commit()
self.assertRaises(larch.NodeTooBig, helper, node)
def test_puts_and_gets_same_with_cache_emptied(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.cache = larch.LRUCache(100)
self.assertEqualNodes(self.ns.get_node(0), node)
def test_put_uploads_queue_overflow(self):
self.ns.upload_max = 2
self.ns.upload_queue.max = self.ns.upload_max
ids = range(self.ns.upload_max + 1)
for i in ids:
node = larch.LeafNode(i, [], [])
self.ns.put_node(node)
self.assertEqual(sorted(self.ns.list_nodes()), ids)
for node_id in ids:
self.ns.cache.remove(node_id)
self.assertEqual(self.ns.get_node(node_id).id, node_id)
def test_gets_node_from_disk(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
self.ns.commit()
ns2 = self.new_ns()
node2 = ns2.get_node(node.id)
self.assertEqual(node.id, node2.id)
self.assertEqual(node.keys(), node2.keys())
self.assertEqual(node.values(), node2.values())
larch-1.20131130/larch/nodestore_memory.py 0000644 0001750 0001750 00000004333 12246332521 020122 0 ustar jenkins jenkins # Copyright 2010, 2011 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import larch
class NodeStoreMemory(larch.NodeStore):
'''An implementation of larch.NodeStore API for in-memory storage.
All nodes are kept in memory. This is for demonstration and testing
purposes only.
'''
def __init__(self,allow_writes, node_size, codec):
larch.NodeStore.__init__(
self, allow_writes=allow_writes, node_size=node_size, codec=codec)
self.nodes = dict()
self.refcounts = dict()
self.metadata = dict()
def get_metadata_keys(self):
return self.metadata.keys()
def get_metadata(self, key):
return self.metadata[key]
def set_metadata(self, key, value):
self.metadata[key] = value
def remove_metadata(self, key):
del self.metadata[key]
def put_node(self, node):
node.frozen = True
self.nodes[node.id] = node
def get_node(self, node_id):
if node_id in self.nodes:
return self.nodes[node_id]
else:
raise larch.NodeMissing(repr(self), node_id)
def start_modification(self, node):
node.frozen = False
def remove_node(self, node_id):
if node_id in self.nodes:
del self.nodes[node_id]
else:
raise larch.NodeMissing(repr(self), node_id)
def list_nodes(self):
return self.nodes.keys()
def get_refcount(self, node_id):
return self.refcounts.get(node_id, 0)
def set_refcount(self, node_id, refcount):
self.refcounts[node_id] = refcount
larch-1.20131130/larch/nodestore_memory_tests.py 0000644 0001750 0001750 00000002010 12246332521 021332 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
import larch
import nodestore_memory
class NodeStoreMemoryTests(unittest.TestCase, larch.NodeStoreTests):
def setUp(self):
self.node_size = 4096
self.codec = larch.NodeCodec(self.key_bytes)
self.ns = nodestore_memory.NodeStoreMemory(
allow_writes=True, node_size=self.node_size, codec=self.codec)
larch-1.20131130/larch/refcountstore.py 0000644 0001750 0001750 00000010627 12246332521 017435 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import logging
import os
import StringIO
import struct
import tempfile
import tracing
import larch
def encode_refcounts(refcounts, start_id, how_many, keys):
fmt = '!QH' + 'H' * how_many
args = [start_id, how_many] + ([0] * how_many)
for key in keys:
args[2 + key - start_id] = refcounts[key]
return struct.pack(fmt, *args)
def decode_refcounts(encoded):
n = struct.calcsize('!QH')
start_id, how_many = struct.unpack('!QH', encoded[:n])
counts = struct.unpack('!' + 'H' * how_many, encoded[n:])
return zip(range(start_id, start_id + how_many), counts)
class RefcountStore(object):
'''Store node reference counts.
Each node has a reference count, which gets stored on disk.
Reference counts are grouped into blocks of ``self.per_group`` counts,
and each group is stored in its own file. This balances the
per-file overhead with the overhead of keeping a lot of unneeded
reference counts in memory.
Only those blocks that are used get loaded into memory. Blocks
that are full of zeroes are not stored in files, to save disk space.
'''
per_group = 2**15
refcountdir = 'refcounts'
def __init__(self, node_store):
self.node_store = node_store
self.refcounts = dict()
self.dirty = set()
def get_refcount(self, node_id):
'''Return reference count for a given node.'''
if node_id not in self.refcounts:
group = self._load_refcount_group(self._start_id(node_id))
if group is None:
self.refcounts[node_id] = 0
else:
for x, count in group:
if x not in self.dirty:
self.refcounts[x] = count
return self.refcounts[node_id]
def set_refcount(self, node_id, refcount):
'''Set the reference count for a given node.'''
tracing.trace('setting refcount for %s to %s' % (node_id, refcount))
self.refcounts[node_id] = refcount
self.dirty.add(node_id)
def save_refcounts(self):
'''Save all modified refcounts.'''
tracing.trace('saving refcounts (len(dirty) = %s)' %
(len(self.dirty)))
if self.dirty:
dirname = os.path.join(self.node_store.dirname, self.refcountdir)
if not self.node_store.journal.exists(dirname):
self.node_store.journal.makedirs(dirname)
ids = sorted(self.dirty)
all_ids_in_memory = set(self.refcounts.keys())
for start_id in range(self._start_id(ids[0]),
self._start_id(ids[-1]) + 1,
self.per_group):
all_ids_in_group = set(
range(start_id, start_id + self.per_group))
keys = all_ids_in_group.intersection(all_ids_in_memory)
if keys:
encoded = encode_refcounts(
self.refcounts, start_id, self.per_group, keys)
filename = self._group_filename(start_id)
self.node_store.journal.overwrite_file(filename, encoded)
# We re-initialize these so that they don't grow indefinitely.
self.refcounts = dict()
self.dirty = set()
def _load_refcount_group(self, start_id):
filename = self._group_filename(start_id)
if self.node_store.journal.exists(filename):
encoded = self.node_store.journal.cat(filename)
return decode_refcounts(encoded)
def _group_filename(self, start_id):
return os.path.join(self.node_store.dirname, self.refcountdir,
'refcounts-%d' % start_id)
def _start_id(self, node_id):
return (node_id / self.per_group) * self.per_group
larch-1.20131130/larch/refcountstore_tests.py 0000644 0001750 0001750 00000006756 12246332521 020667 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import os
import shutil
import tempfile
import unittest
import larch
import larch.nodestore_disk
class DummyNodeStore(object):
def __init__(self, dirname):
self.dirname = dirname
self.journal = self
def makedirs(self, dirname):
if not os.path.exists(dirname):
os.makedirs(dirname)
def cat(self, filename):
return file(filename).read()
def overwrite_file(self, filename, contents):
file(filename, 'w').write(contents)
def exists(self, filename):
return os.path.exists(filename)
def rename(self, old, new):
os.rename(old, new)
def remove(self, filename):
os.remove(filename)
class RefcountStoreTests(unittest.TestCase):
def setUp(self):
self.dirname = tempfile.mkdtemp()
self.rs = self.new_rs()
def tearDown(self):
shutil.rmtree(self.dirname)
def new_rs(self):
return larch.RefcountStore(DummyNodeStore(self.dirname))
def test_returns_zero_for_unset_refcount(self):
self.assertEqual(self.rs.get_refcount(123), 0)
def test_sets_refcount(self):
self.rs.set_refcount(123, 1)
self.assertEqual(self.rs.get_refcount(123), 1)
def test_updates_refcount(self):
self.rs.set_refcount(123, 1)
self.rs.set_refcount(123, 2)
self.assertEqual(self.rs.get_refcount(123), 2)
def test_refcounts_are_not_saved_automatically(self):
self.rs.set_refcount(123, 1)
rs2 = self.new_rs()
self.assertEqual(rs2.get_refcount(123), 0)
def test_saves_refcounts(self):
self.rs.set_refcount(123, 1)
self.rs.save_refcounts()
rs2 = self.new_rs()
self.assertEqual(rs2.get_refcount(123), 1)
def test_save_refcounts_works_without_changes(self):
self.assertEqual(self.rs.save_refcounts(), None)
def test_refcount_group_encode_decode_round_trip_works(self):
refs = range(2048)
for ref in refs:
self.rs.set_refcount(ref, ref)
encoded = larch.refcountstore.encode_refcounts(
self.rs.refcounts, 0, 1024, range(1024))
decoded = larch.refcountstore.decode_refcounts(encoded)
self.assertEqual(decoded, [(x, x) for x in refs[:1024]])
def test_group_returns_correct_start_id_for_node_zero(self):
self.assertEqual(self.rs._start_id(0), 0)
def test_group_returns_correct_start_id_for_last_id_in_group(self):
self.assertEqual(self.rs._start_id(self.rs.per_group - 1), 0)
def test_group_returns_correct_start_id_for_first_in_second_group(self):
self.assertEqual(self.rs._start_id(self.rs.per_group),
self.rs.per_group)
def test_group_returns_correct_start_id_for_second_in_second_group(self):
self.assertEqual(self.rs._start_id(self.rs.per_group + 1),
self.rs.per_group)
larch-1.20131130/larch/tree.py 0000644 0001750 0001750 00000053355 12246332521 015477 0 ustar jenkins jenkins # Copyright 2010, 2011 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import bisect
import tracing
import larch
'''A simple B-tree implementation.'''
class KeySizeMismatch(larch.Error):
'''User tried to use key of wrong size.'''
def __init__(self, key, wanted_size):
self.msg = ('Key %s is of wrong length (%d, should be %d)' %
(repr(key), len(key), wanted_size))
class ValueTooLarge(larch.Error):
'''User tried ot use a vlaue htat is too large for a node.'''
def __init__(self, value, max_size):
self.msg = ('Value %s is too long (%d, max %d)' %
(repr(value), len(value), max_size))
class BTree(object):
'''A balanced search tree (copy-on-write B-tree).
The tree belongs to a forest. The tree nodes are stored in an
external node store; see the ``NodeStore`` class.
``root_id`` gives the id of the root node of the tree. The
root node must be unique to this tree, as it is modified in
place. ``root_id`` may also be ``None``, in which case a
new node is created automatically to serve as the root node.
'''
def __init__(self, forest, node_store, root_id):
self.forest = forest
self.node_store = node_store
self.max_index_length = self.node_store.max_index_pairs()
self.min_index_length = self.max_index_length / 2
if root_id is None:
self.root = None
else:
self.root = self._get_node(root_id)
tracing.trace('init BTree %s with root_id %s' % (self, root_id))
def _check_key_size(self, key):
if len(key) != self.node_store.codec.key_bytes:
raise KeySizeMismatch(key, self.node_store.codec.key_bytes)
def _check_value_size(self, value):
if len(value) > self.node_store.max_value_size:
raise ValueTooLarge(value, self.node_store.max_value_size)
def _new_id(self):
'''Generate a new node identifier.'''
return self.forest.new_id()
def _new_leaf(self, keys, values):
'''Create a new leaf node.'''
node = larch.LeafNode(self._new_id(), keys, values)
tracing.trace('id=%s' % node.id)
return node
def _new_index(self, keys, values):
'''Create a new index node.'''
index = larch.IndexNode(self._new_id(), keys, values)
for child_id in values:
self._increment(child_id)
tracing.trace('id=%s' % index.id)
return index
def _set_root(self, new_root):
'''Replace existing root node.'''
tracing.trace('new_root.id=%s' % new_root.id)
if self.root is not None and self.root.id != new_root.id:
tracing.trace('decrement old root %s' % self.root.id)
self._decrement(self.root.id)
self._put_node(new_root)
self.root = new_root
tracing.trace('setting node %s refcount to 1' % self.root.id)
self.node_store.set_refcount(self.root.id, 1)
def _get_node(self, node_id):
'''Return node corresponding to a node id.'''
return self.node_store.get_node(node_id)
def _put_node(self, node):
'''Put node into node store.'''
tracing.trace('node.id=%s' % node.id)
return self.node_store.put_node(node)
def _leaf_size(self, node):
if node.size is None:
node.size = self.node_store.codec.leaf_size(node.keys(),
node.values())
return node.size
def lookup(self, key):
'''Return value corresponding to ``key``.
If the key is not in the tree, raise ``KeyError``.
'''
tracing.trace('looking up %s' % repr(key))
tracing.trace('tree is %s (root id %s)',
self, self.root.id if self.root else None)
self._check_key_size(key)
node = self.root
while isinstance(node, larch.IndexNode):
k = node.find_key_for_child_containing(key)
# If k is None, then the indexing of node will cause KeyError
# to be returned, just like we want to. This saves us from
# having to test for it separately.
node = self._get_node(node[k])
if isinstance(node, larch.LeafNode):
return node[key]
raise KeyError(key)
def lookup_range(self, minkey, maxkey):
'''Return list of (key, value) pairs for all keys in a range.
``minkey`` and ``maxkey`` are included in range.
'''
tracing.trace('looking up range %s .. %s' %
(repr(minkey), repr(maxkey)))
tracing.trace('tree is %s (root id %s)',
self, self.root.id if self.root else None)
self._check_key_size(minkey)
self._check_key_size(maxkey)
if self.root is None:
return []
else:
return [pair
for pair in
self._lookup_range(self.root.id, minkey, maxkey)]
def _lookup_range(self, node_id, minkey, maxkey):
node = self._get_node(node_id)
if isinstance(node, larch.LeafNode):
for key in node.find_keys_in_range(minkey, maxkey):
yield key, node[key]
else:
assert isinstance(node, larch.IndexNode)
result = []
for child_id in node.find_children_in_range(minkey, maxkey):
for pair in self._lookup_range(child_id, minkey, maxkey):
yield pair
def count_range(self, minkey, maxkey):
'''Return number of keys in range.'''
tracing.trace('count_range(%s, %s)' % (repr(minkey), repr(maxkey)))
tracing.trace('tree is %s (root id %s)',
self, self.root.id if self.root else None)
self._check_key_size(minkey)
self._check_key_size(maxkey)
if self.root is None:
return 0
return self._count_range(self.root.id, minkey, maxkey)
def _count_range(self, node_id, minkey, maxkey):
node = self._get_node(node_id)
if isinstance(node, larch.LeafNode):
return len(list(node.find_keys_in_range(minkey, maxkey)))
else:
assert isinstance(node, larch.IndexNode)
count = 0
for child_id in node.find_children_in_range(minkey, maxkey):
count += self._count_range(child_id, minkey, maxkey)
return count
def range_is_empty(self, minkey, maxkey):
'''Is a range empty in the tree?
This is faster than doing a range lookup for the same range,
and checking if there are any keys returned.
'''
tracing.trace('range_is_empty(%s, %s)' % (repr(minkey), repr(maxkey)))
tracing.trace('tree is %s (root id %s)',
self, self.root.id if self.root else None)
self._check_key_size(minkey)
self._check_key_size(maxkey)
if self.root is None:
return True
return self._range_is_empty(self.root.id, minkey, maxkey)
def _range_is_empty(self, node_id, minkey, maxkey):
node = self._get_node(node_id)
if isinstance(node, larch.LeafNode):
return node.find_keys_in_range(minkey, maxkey) == []
else:
assert isinstance(node, larch.IndexNode)
for child_id in node.find_children_in_range(minkey, maxkey):
if not self._range_is_empty(child_id, minkey, maxkey):
return False
return True
def _shadow(self, node):
'''Shadow a node: make it possible to modify it in-place.'''
tracing.trace('node.id=%s' % node.id)
if self.node_store.can_be_modified(node):
tracing.trace('can be modified in place')
self.node_store.start_modification(node)
new = node
elif isinstance(node, larch.IndexNode):
tracing.trace('new index node')
new = self._new_index(node.keys(), node.values())
else:
tracing.trace('new leaf node')
new = self._new_leaf(node.keys(), node.values())
new.size = node.size
tracing.trace('returning new.id=%s' % new.id)
return new
def insert(self, key, value):
'''Insert a new key/value pair into the tree.
If the key already existed in the tree, the old value is silently
forgotten.
'''
tracing.trace('key=%s' % repr(key))
tracing.trace('value=%s' % repr(value))
tracing.trace('tree is %s (root id %s)',
self, self.root.id if self.root else None)
self._check_key_size(key)
self._check_value_size(value)
# Is the tree empty? This needs special casing to keep
# _insert_into_index simpler.
if self.root is None or len(self.root) == 0:
tracing.trace('tree is empty')
leaf = self._new_leaf([key], [value])
self._put_node(leaf)
if self.root is None:
new_root = self._new_index([key], [leaf.id])
else:
new_root = self._shadow(self.root)
new_root.add(key, leaf.id)
self._increment(leaf.id)
else:
tracing.trace('tree is not empty')
kids = self._insert_into_index(self.root, key, value)
# kids contains either one or more index nodes. If one,
# we use that as the new root. Otherwise, we create a new one,
# making the tree higher because we must.
assert len(kids) > 0
for kid in kids:
assert type(kid) == larch.IndexNode
if len(kids) == 1:
new_root = kids[0]
tracing.trace('only one kid: id=%s' % new_root.id)
else:
keys = [kid.first_key() for kid in kids]
values = [kid.id for kid in kids]
new_root = self._new_index(keys, values)
tracing.trace('create new root: id=%s' % new_root.id)
self._set_root(new_root)
def _insert_into_index(self, old_index, key, value):
'''Insert key, value into an index node.
Return list of replacement nodes. Might be just the same node,
or a single new node, or two nodes, one of which might be the
same node. Note that this method never makes the tree higher,
that is the job of the caller. If two nodes are returned,
they are siblings at the same height as the original node.
'''
tracing.trace('old_index.id=%s' % old_index.id)
new_index = self._shadow(old_index)
child_key = new_index.find_key_for_child_containing(key)
if child_key is None:
child_key = new_index.first_key()
child = self._get_node(new_index[child_key])
if isinstance(child, larch.IndexNode):
new_kids = self._insert_into_index(child, key, value)
else:
new_kids = self._insert_into_leaf(child, key, value)
new_index.remove(child_key)
do_dec = True
for kid in new_kids:
new_index.add(kid.first_key(), kid.id)
if kid.id != child.id:
self._increment(kid.id)
else:
do_dec = False
if do_dec: # pragma: no cover
self._decrement(child.id)
if len(new_index) > self.max_index_length:
tracing.trace('need to split index node id=%s' % new_index.id)
n = len(new_index) / 2
keys = new_index.keys()[n:]
values = new_index.values()[n:]
new = larch.IndexNode(self._new_id(), keys, values)
tracing.trace('new index node id=%s' % new.id)
new_index.remove_index_range(n, len(new_index))
self._put_node(new_index)
self._put_node(new)
return [new_index, new]
else:
tracing.trace('no need to split index node id=%s' % new_index.id)
self._put_node(new_index)
return [new_index]
def _insert_into_leaf(self, leaf, key, value):
'''Insert a key/value pair into a leaf node.
Return value is like for _insert_into_index.
'''
tracing.trace('leaf.id=%s' % leaf.id)
codec = self.node_store.codec
max_size = self.node_store.node_size
size = self._leaf_size
new = self._shadow(leaf)
old_size = size(new)
if key in new:
old_value = new[key]
new.add(key, value)
new.size = codec.leaf_size_delta_replace(old_size, old_value,
value)
else:
new.add(key, value)
new.size = codec.leaf_size_delta_add(old_size, value)
if size(new) <= max_size:
tracing.trace('leaf did not grow too big')
leaves = [new]
else:
tracing.trace('leaf grew too big, splitting')
keys = new.keys()
values = new.values()
n = len(keys) / 2
new2 = self._new_leaf(keys[n:], values[n:])
for key in new2:
new.remove(key)
if size(new2) > max_size: # pragma: no cover
while size(new2) > max_size:
key = new2.keys()[0]
new.add(key, new2[key])
new2.remove(key)
elif size(new) > max_size: # pragma: no cover
while size(new) > max_size:
key = new.keys()[-1]
new2.add(key, new[key])
new.remove(key)
leaves = [new, new2]
for x in leaves:
self._put_node(x)
return leaves
def remove(self, key):
'''Remove ``key`` and its associated value from tree.
If key is not in the tree, ``KeyValue`` is raised.
'''
tracing.trace('key=%s' % repr(key))
tracing.trace('tree is %s (root id %s)',
self, self.root.id if self.root else None)
self._check_key_size(key)
if self.root is None:
tracing.trace('no root')
raise KeyError(key)
new_root = self._remove_from_index(self.root, key)
self._set_root(new_root)
self._reduce_height()
def _remove_from_index(self, old_index, key):
tracing.trace('old_index.id=%s' % old_index.id)
tracing.trace('tree is %s (root id %s)',
self, self.root.id if self.root else None)
child_key = old_index.find_key_for_child_containing(key)
new_index = self._shadow(old_index)
child = self._get_node(new_index[child_key])
if isinstance(child, larch.IndexNode):
new_kid = self._remove_from_index(child, key)
new_index.remove(child_key)
if len(new_kid) > 0:
self._add_or_merge_index(new_index, new_kid)
else:
if new_kid.id != child.id: # pragma: no cover
self._decrement(new_kid.id)
self._decrement(child.id)
else:
assert isinstance(child, larch.LeafNode)
leaf = self._shadow(child)
leaf.remove(key)
self._put_node(leaf)
new_index.remove(child_key)
if len(leaf) > 0:
self._add_or_merge_leaf(new_index, leaf)
else:
tracing.trace('new leaf is empty, forgetting it')
if leaf.id != child.id: # pragma: no cover
self._decrement(leaf.id)
self._decrement(child.id)
self._put_node(new_index)
return new_index
def _add_or_merge_index(self, parent, index):
self._add_or_merge(parent, index, self._merge_index)
def _add_or_merge_leaf(self, parent, leaf):
self._add_or_merge(parent, leaf, self._merge_leaf)
def _add_or_merge(self, parent, node, merge):
assert not parent.frozen
assert node.frozen
keys = parent.keys()
key = node.first_key()
i = bisect.bisect_left(keys, key)
new_node = None
if i > 0:
new_node = merge(parent, node, i-1)
if new_node is None and i < len(keys):
new_node = merge(parent, node, i)
if new_node is None:
new_node = node
assert new_node is not None
self._put_node(new_node)
parent.add(new_node.first_key(), new_node.id)
self._increment(new_node.id)
if new_node != node: # pragma: no cover
# We made a new node, so get rid of the old one.
tracing.trace('decrementing unused node id=%s' % node.id)
self._decrement(node.id)
def _merge_index(self, parent, node, sibling_index):
def merge_indexes_p(a, b):
return len(a) + len(b) <= self.max_index_length
def add_to_index(n, k, v):
n.add(k, v)
self._increment(v)
return self._merge_nodes(parent, node, sibling_index,
merge_indexes_p, add_to_index)
def _merge_leaf(self, parent, node, sibling_index):
def merge_leaves_p(a, b):
a_size = self._leaf_size(a)
b_size = self._leaf_size(b)
return a_size + b_size <= self.node_store.node_size
def add_to_leaf(n, k, v):
n.add(k, v)
return self._merge_nodes(parent, node, sibling_index,
merge_leaves_p, add_to_leaf)
def _merge_nodes(self, parent, node, sibling_index, merge_p, add):
sibling_key = parent.keys()[sibling_index]
sibling_id = parent[sibling_key]
sibling = self._get_node(sibling_id)
if merge_p(node, sibling):
tracing.trace('merging nodes %s and %s' % (node.id, sibling.id))
new_node = self._shadow(node)
for k in sibling:
add(new_node, k, sibling[k])
self._put_node(new_node)
parent.remove(sibling_key)
tracing.trace('decrementing now-unused sibling %s' % sibling.id)
self._decrement(sibling.id)
return new_node
else:
return None
def remove_range(self, minkey, maxkey):
'''Remove all keys in the given range.
Range is inclusive.
'''
tracing.trace('minkey=%s maxkey=%s' % (repr(minkey), repr(maxkey)))
tracing.trace('tree is %s (root id %s)',
self, self.root.id if self.root else None)
self._check_key_size(minkey)
self._check_key_size(maxkey)
keys = [k for k, v in self.lookup_range(minkey, maxkey)]
for key in keys:
self.remove(key)
def _reduce_height(self):
# After removing things, the top of the tree might consist of a
# list of index nodes with only a single child, which is also
# an index node. These can and should be removed, for efficiency.
# Further, since we've modified all of these nodes, they can all
# be modified in place.
tracing.trace('start reducing height')
while len(self.root) == 1:
tracing.trace('self.root.id=%s' % self.root.id)
key = self.root.first_key()
child_id = self.root[key]
assert self.node_store.get_refcount(self.root.id) == 1
if self.node_store.get_refcount(child_id) != 1:
tracing.trace('only child is shared')
break
child = self._get_node(child_id)
if isinstance(child, larch.LeafNode):
tracing.trace('only child is a leaf node')
break
# We can just make the child be the new root node.
assert type(child) == larch.IndexNode
# Prevent child from getting removed when parent's refcount
# gets decremented. set_root will set the refcount to be 1.
tracing.trace('setting node %s refcount to 2' % child.id)
self.node_store.set_refcount(child.id, 2)
self._set_root(child)
tracing.trace('done reducing height')
def _increment(self, node_id):
'''Non-recursively increment refcount for a node.'''
refcount = self.node_store.get_refcount(node_id)
refcount += 1
self.node_store.set_refcount(node_id, refcount)
tracing.trace('node %s refcount grew to %s' % (node_id, refcount))
def _decrement(self, node_id):
'''Recursively, lazily decrement refcounts for a node and children.'''
tracing.trace('decrementing node %s refcount' % node_id)
refcount = self.node_store.get_refcount(node_id)
if refcount > 1:
refcount -= 1
self.node_store.set_refcount(node_id, refcount)
tracing.trace('node %s refcount now %s' % (node_id, refcount))
else:
tracing.trace('node %s refcount %s, removing node' %
(node_id, refcount))
node = self._get_node(node_id)
if isinstance(node, larch.IndexNode):
tracing.trace('reducing refcounts for children')
for child_id in node.values():
self._decrement(child_id)
self.node_store.remove_node(node_id)
self.node_store.set_refcount(node_id, 0)
larch-1.20131130/larch/tree_tests.py 0000644 0001750 0001750 00000062751 12246332521 016721 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import random
import sys
import unittest
import larch
class DummyForest(object):
def __init__(self):
self.last_id = 0
def new_id(self):
self.last_id += 1
return self.last_id
class DummyNodeStore(larch.NodeStoreMemory):
def find_nodes(self):
return self.nodes.keys()
class KeySizeMismatchTests(unittest.TestCase):
def setUp(self):
self.err = larch.KeySizeMismatch('foo', 4)
def test_error_message_contains_key(self):
self.assert_('foo' in str(self.err))
def test_error_message_contains_wanted_size(self):
self.assert_('4' in str(self.err))
class ValueTooLargeTests(unittest.TestCase):
def setUp(self):
self.err = larch.ValueTooLarge('foobar', 3)
def test_error_message_contains_value(self):
self.assert_('foobar' in str(self.err))
def test_error_message_contains_max_size(self):
self.assert_('3' in str(self.err))
class BTreeTests(unittest.TestCase):
def setUp(self):
# We use a small node size so that all code paths are traversed
# during testing. Use coverage.py to make sure they do.
self.codec = larch.NodeCodec(3)
self.ns = DummyNodeStore(
allow_writes=True, node_size=64, codec=self.codec)
self.forest = DummyForest()
self.tree = larch.BTree(self.forest, self.ns, None)
self.dump = False
def test_shadow_increments_childrens_refcounts(self):
leaf = self.tree._new_leaf(['foo'], ['bar'])
index = self.tree._new_index([leaf.first_key()], [leaf.id])
self.assertEqual(self.ns.get_refcount(leaf.id), 1)
self.ns.set_refcount(index.id, 2)
clone = self.tree._shadow(index)
self.assertEqual(self.ns.get_refcount(leaf.id), 2)
def test_shadow_returns_new_leaf_if_cannot_be_modified(self):
node = self.tree._new_leaf(['foo'], ['bar'])
self.tree._put_node(node)
self.ns.set_refcount(node.id, 2)
node2 = self.tree._shadow(node)
self.assertNotEqual(node2.id, node.id)
def test_shadow_returns_new_index_if_cannot_be_modified(self):
node = self.tree._new_index(['foo'], [1])
self.tree._put_node(node)
self.ns.set_refcount(node.id, 2)
node2 = self.tree._shadow(node)
self.assertNotEqual(node2.id, node.id)
def test_shadow_returns_same_node_that_can_be_modified(self):
node = self.tree._new_index(['foo'], [1])
self.tree._put_node(node)
self.ns.set_refcount(node.id, 1)
node2 = self.tree._shadow(node)
self.assertEqual(node2.id, node.id)
def test_new_leaf_does_not_put_node_into_store(self):
leaf = self.tree._new_leaf([], [])
self.assertRaises(larch.NodeMissing, self.tree._get_node, leaf.id)
def test_new_index_does_not_put_node_into_store(self):
index = self.tree._new_index([], [])
self.assertRaises(larch.NodeMissing, self.tree._get_node, index.id)
def test_new_index_increments_childrens_refcounts(self):
leaf = self.tree._new_leaf([], [])
self.tree._put_node(leaf)
self.assertEqual(self.ns.get_refcount(leaf.id), 0)
self.tree._new_index(['foo'], [leaf.id])
self.assertEqual(self.ns.get_refcount(leaf.id), 1)
def test_insert_changes_root_id(self):
self.tree.insert('foo', 'bar')
self.assertNotEqual(self.tree.root.id, 0)
def test_is_empty(self):
self.assertEqual(self.tree.root, None)
def test_lookup_for_missing_key_raises_error(self):
self.assertRaises(KeyError, self.tree.lookup, 'foo')
def test_lookup_with_wrong_size_key_raises_error(self):
self.assertRaises(larch.KeySizeMismatch, self.tree.lookup, '')
def test_insert_inserts_key(self):
self.tree.insert('foo', 'bar')
self.assertEqual(self.tree.lookup('foo'), 'bar')
def test_insert_inserts_empty_value(self):
self.tree.insert('foo', '')
self.assertEqual(self.tree.lookup('foo'), '')
def test_insert_replaces_value_for_existing_key(self):
self.tree.insert('foo', 'foo')
self.tree.insert('foo', 'bar')
self.assertEqual(self.tree.lookup('foo'), 'bar')
def test_insert_with_wrong_size_key_raises_error(self):
self.assertRaises(larch.KeySizeMismatch, self.tree.insert, '', '')
def test_insert_with_too_large_value_raises_error(self):
self.assertRaises(larch.ValueTooLarge, self.tree.insert, 'xxx',
'x' * (self.ns.max_value_size + 1))
def test_remove_from_empty_tree_raises_keyerror(self):
self.assertRaises(KeyError, self.tree.remove, 'foo')
def test_remove_of_missing_key_raises_keyerror(self):
self.tree.insert('bar', 'bar')
self.assertRaises(KeyError, self.tree.remove, 'foo')
def test_remove_removes_key(self):
self.tree.insert('foo', 'bar')
self.tree.remove('foo')
self.assertRaises(KeyError, self.tree.lookup, 'foo')
def get_roots_first_child(self):
child_key = self.tree.root.first_key()
child_id = self.tree.root[child_key]
return self.ns.get_node(child_id)
def test_remove_with_wrong_size_key_raises_error(self):
self.assertRaises(larch.KeySizeMismatch, self.tree.remove, '')
def keys_are_in_range(self, node, lower, upper, level=0):
indent = 2
if isinstance(node, larch.LeafNode):
if self.dump:
print '%*sleaf keys %s' % (level*indent, '', node.keys())
for key in node.keys():
if key < lower or key >= upper:
return False
else:
keys = node.keys()
if self.dump: print '%*sin range; index keys = %s' % (level*indent, '', keys), 'lower..upper:', lower, upper
if keys != sorted(keys):
return False
for i, key in enumerate(keys):
if key < lower or key >= upper:
return False
if i+1 == len(keys):
up = upper
else:
up = keys[i+1]
if self.dump: print '%*sin child, keys should be in %s..%s' % (level*indent, '', key, up)
if not self.keys_are_in_range(self.tree._get_node(node[key]), key, up, level+1):
return False
return True
def find_largest_key(self, node):
if isinstance(node, larch.LeafNode):
return max(node.keys())
else:
return max(node.keys() +
[self.find_largest_key(self.tree._get_node(node[key]))
for key in node.keys()])
def nextkey(self, key):
assert type(key) == str
if key == '':
return '\0'
if key[-1] == '\xff':
return key + '\0'
else:
return key[:-1] + chr(ord(key[-1]) + 1)
def proper_search_tree(self, node):
if not node.keys():
return True
minkey = node.keys()[0]
maxkey = self.find_largest_key(node)
if self.dump: print; print 'proper tree', minkey, self.nextkey(maxkey)
return self.keys_are_in_range(node, minkey, self.nextkey(maxkey))
def test_insert_many_respects_ordering_requirement(self):
ints = range(100)
random.shuffle(ints)
for i in ints:
key = '%03d' % i
value = key
self.tree.insert(key, value)
self.assertEqual(self.tree.lookup(key), value)
self.assert_(self.proper_search_tree(self.tree.root),
'key#%d failed' % (1 + ints.index(i)))
def test_remove_many_works(self):
ints = range(100)
random.shuffle(ints)
for i in ints:
key = '%03d' % i
value = key
self.tree.insert(key, value)
self.assertEqual(self.tree.lookup(key), value)
self.tree.remove(key)
self.assertRaises(KeyError, self.tree.lookup, key)
self.assert_(self.proper_search_tree(self.tree.root),
msg='insert of %d in %s failed to keep tree ok' %
(i, ints))
def test_reduce_height_makes_tree_lower(self):
self.tree.insert('foo', 'bar')
old_root = self.tree.root
extra_root = self.tree._new_index([old_root.first_key()],
[old_root.id])
self.tree._set_root(extra_root)
# Fix old root's refcount, since it got incremented to 2.
self.ns.set_refcount(old_root.id, 1)
self.assertEqual(self.tree.root, extra_root)
self.tree._reduce_height()
self.assertEqual(self.tree.root, old_root)
def test_reduce_height_does_not_lower_tree_when_children_are_shared(self):
self.tree.insert('foo', 'bar')
old_root = self.tree.root
extra_root = self.tree._new_index([old_root.first_key()],
[old_root.id])
self.tree._set_root(extra_root)
# Make old root's refcount be 2, so it looks like it is shared
# between trees.
self.ns.set_refcount(old_root.id, 2)
self.assertEqual(self.tree.root, extra_root)
self.tree._reduce_height()
self.assertEqual(self.tree.root, extra_root)
def dump_tree(self, node, f=sys.stdout, level=0):
if not self.dump:
return
indent = 4
if isinstance(node, larch.LeafNode):
f.write('%*sLeaf:' % (level*indent, ''))
for key in node.keys():
f.write(' %s=%s' % (key, node[key]))
f.write('\n')
else:
assert isinstance(node, larch.IndexNode)
f.write('%*sIndex:\n' % (level*indent, ''))
for key in node.keys():
f.write('%*s%s:\n' % ((level+1)*indent, '', key))
self.dump_tree(self.tree._get_node(node[key]), level=level+2)
def test_insert_many_remove_many_works(self):
keys = ['%03d' % i for i in range(100)]
random.shuffle(keys)
for key in keys:
self.tree.insert(key, key)
self.assert_(self.proper_search_tree(self.tree.root))
if self.dump:
print
print
self.dump_tree(self.tree.root)
print
for key in keys:
if self.dump:
print
print 'removing', key
self.dump_tree(self.tree.root)
try:
self.tree.remove(key)
except:
self.dump = True
self.dump_tree(self.tree.root)
ret = self.proper_search_tree(self.tree.root)
print 'is it?', ret
raise
self.assert_(self.proper_search_tree(self.tree.root))
if self.dump:
print
print
self.assertEqual(self.tree.root.keys(), [])
def test_remove_merges_leaf_with_left_sibling(self):
keys = ['%03d' % i for i in range(3)]
for key in keys:
self.tree.insert(key, 'x')
self.assertEqual(self.tree.remove(keys[1]), None)
def test_persists(self):
self.tree.insert('foo', 'bar')
tree2 = larch.BTree(self.forest, self.ns, self.tree.root.id)
self.assertEqual(tree2.lookup('foo'), 'bar')
def test_last_node_id_persists(self):
self.tree.insert('foo', 'bar') # make tree has root
node1 = self.tree._new_leaf([], [])
tree2 = larch.BTree(self.forest, self.ns, self.tree.root.id)
node2 = tree2._new_leaf([], [])
self.assertEqual(node1.id + 1, node2.id)
def test_lookup_range_returns_empty_list_if_nothing_found(self):
self.assertEqual(list(self.tree.lookup_range('bar', 'foo')), [])
def create_tree_for_range(self):
for key in ['%03d' % i for i in range(2, 10, 2)]:
self.tree.insert(key, key)
def test_lookup_between_keys_raises_keyerror(self):
self.create_tree_for_range()
self.assertRaises(KeyError, self.tree.lookup, '000')
def test_lookup_range_returns_empty_list_if_before_smallest_key(self):
self.create_tree_for_range()
self.assertEqual(list(self.tree.lookup_range('000', '001')), [])
def test_lookup_range_returns_empty_list_if_after_largest_key(self):
self.create_tree_for_range()
self.assertEqual(list(self.tree.lookup_range('010', '999')), [])
def test_lookup_range_returns_empty_list_if_between_keys(self):
self.create_tree_for_range()
self.assertEqual(list(self.tree.lookup_range('003', '003')), [])
def test_lookup_range_returns_single_item_in_range(self):
self.create_tree_for_range()
self.assertEqual(list(self.tree.lookup_range('002', '002')),
[('002', '002')])
def test_lookup_range_returns_single_item_in_range_exclusive(self):
self.create_tree_for_range()
self.assertEqual(list(self.tree.lookup_range('001', '003')),
[('002', '002')])
def test_lookup_range_returns_two_items_in_range(self):
self.create_tree_for_range()
self.assertEqual(sorted(self.tree.lookup_range('002', '004')),
[('002', '002'), ('004', '004')])
def test_lookup_range_returns_all_items_in_range(self):
self.create_tree_for_range()
self.assertEqual(sorted(self.tree.lookup_range('000', '999')),
[('002', '002'),
('004', '004'),
('006', '006'),
('008', '008')])
def test_count_range_returns_zero_for_empty_tree(self):
self.assertEqual(self.tree.count_range('000', '000'), 0)
def test_count_range_returns_zero_for_empty_range_at_beginning(self):
self.create_tree_for_range()
self.assertEqual(self.tree.count_range('000', '000'), 0)
def test_count_range_returns_zero_for_empty_range_in_middle(self):
self.create_tree_for_range()
self.assertEqual(self.tree.count_range('003', '003'), 0)
def test_count_range_returns_zero_for_empty_range_at_end(self):
self.create_tree_for_range()
self.assertEqual(self.tree.count_range('009', '009'), 0)
def test_count_range_returns_one_for_range_with_one_key(self):
self.create_tree_for_range()
self.assertEqual(self.tree.count_range('002', '002'), 1)
def test_count_range_returns_one_for_range_with_one_key_part_2(self):
self.create_tree_for_range()
self.assertEqual(self.tree.count_range('001', '003'), 1)
def test_count_range_returns_correct_result_for_longer_range(self):
self.create_tree_for_range()
self.assertEqual(self.tree.count_range('000', '009'), 4)
def test_range_is_empty_returns_true_for_empty_tree(self):
self.assertTrue(self.tree.range_is_empty('bar', 'foo'))
def test_range_is_empty_works_for_nonempty_tree(self):
self.create_tree_for_range()
self.assertEqual(self.tree.range_is_empty('000', '000'), True)
self.assertEqual(self.tree.range_is_empty('000', '001'), True)
self.assertEqual(self.tree.range_is_empty('000', '002'), False)
self.assertEqual(self.tree.range_is_empty('000', '003'), False)
self.assertEqual(self.tree.range_is_empty('000', '004'), False)
self.assertEqual(self.tree.range_is_empty('000', '005'), False)
self.assertEqual(self.tree.range_is_empty('000', '006'), False)
self.assertEqual(self.tree.range_is_empty('000', '007'), False)
self.assertEqual(self.tree.range_is_empty('000', '008'), False)
self.assertEqual(self.tree.range_is_empty('000', '009'), False)
self.assertEqual(self.tree.range_is_empty('000', '999'), False)
self.assertEqual(self.tree.range_is_empty('001', '001'), True)
self.assertEqual(self.tree.range_is_empty('001', '002'), False)
self.assertEqual(self.tree.range_is_empty('001', '003'), False)
self.assertEqual(self.tree.range_is_empty('001', '004'), False)
self.assertEqual(self.tree.range_is_empty('001', '005'), False)
self.assertEqual(self.tree.range_is_empty('001', '006'), False)
self.assertEqual(self.tree.range_is_empty('001', '007'), False)
self.assertEqual(self.tree.range_is_empty('001', '008'), False)
self.assertEqual(self.tree.range_is_empty('001', '009'), False)
self.assertEqual(self.tree.range_is_empty('001', '999'), False)
self.assertEqual(self.tree.range_is_empty('002', '002'), False)
self.assertEqual(self.tree.range_is_empty('002', '003'), False)
self.assertEqual(self.tree.range_is_empty('002', '004'), False)
self.assertEqual(self.tree.range_is_empty('002', '005'), False)
self.assertEqual(self.tree.range_is_empty('002', '006'), False)
self.assertEqual(self.tree.range_is_empty('002', '007'), False)
self.assertEqual(self.tree.range_is_empty('002', '008'), False)
self.assertEqual(self.tree.range_is_empty('002', '009'), False)
self.assertEqual(self.tree.range_is_empty('002', '999'), False)
self.assertEqual(self.tree.range_is_empty('003', '003'), True)
self.assertEqual(self.tree.range_is_empty('003', '004'), False)
self.assertEqual(self.tree.range_is_empty('003', '005'), False)
self.assertEqual(self.tree.range_is_empty('003', '006'), False)
self.assertEqual(self.tree.range_is_empty('003', '007'), False)
self.assertEqual(self.tree.range_is_empty('003', '008'), False)
self.assertEqual(self.tree.range_is_empty('003', '009'), False)
self.assertEqual(self.tree.range_is_empty('003', '999'), False)
self.assertEqual(self.tree.range_is_empty('004', '004'), False)
self.assertEqual(self.tree.range_is_empty('004', '005'), False)
self.assertEqual(self.tree.range_is_empty('004', '006'), False)
self.assertEqual(self.tree.range_is_empty('004', '007'), False)
self.assertEqual(self.tree.range_is_empty('004', '008'), False)
self.assertEqual(self.tree.range_is_empty('004', '009'), False)
self.assertEqual(self.tree.range_is_empty('004', '999'), False)
self.assertEqual(self.tree.range_is_empty('005', '005'), True)
self.assertEqual(self.tree.range_is_empty('005', '006'), False)
self.assertEqual(self.tree.range_is_empty('005', '007'), False)
self.assertEqual(self.tree.range_is_empty('005', '008'), False)
self.assertEqual(self.tree.range_is_empty('005', '009'), False)
self.assertEqual(self.tree.range_is_empty('005', '999'), False)
self.assertEqual(self.tree.range_is_empty('006', '006'), False)
self.assertEqual(self.tree.range_is_empty('006', '007'), False)
self.assertEqual(self.tree.range_is_empty('006', '008'), False)
self.assertEqual(self.tree.range_is_empty('006', '009'), False)
self.assertEqual(self.tree.range_is_empty('006', '999'), False)
self.assertEqual(self.tree.range_is_empty('007', '007'), True)
self.assertEqual(self.tree.range_is_empty('007', '008'), False)
self.assertEqual(self.tree.range_is_empty('007', '009'), False)
self.assertEqual(self.tree.range_is_empty('007', '999'), False)
self.assertEqual(self.tree.range_is_empty('008', '008'), False)
self.assertEqual(self.tree.range_is_empty('008', '009'), False)
self.assertEqual(self.tree.range_is_empty('008', '999'), False)
self.assertEqual(self.tree.range_is_empty('009', '009'), True)
self.assertEqual(self.tree.range_is_empty('009', '999'), True)
self.assertEqual(self.tree.range_is_empty('999', '999'), True)
def test_remove_range_removes_everything(self):
for key in ['%03d' % i for i in range(1000)]:
self.tree.insert(key, key)
self.tree.remove_range('000', '999')
self.assertEqual(list(self.tree.lookup_range('000', '999')), [])
def test_remove_range_removes_single_key_in_middle(self):
self.create_tree_for_range()
self.tree.remove_range('004', '004')
self.assertEqual(list(self.tree.lookup_range('000', '999')),
[('002', '002'),
('006', '006'),
('008', '008')])
def test_remove_range_removes_from_beginning_of_keys(self):
self.create_tree_for_range()
self.tree.remove_range('000', '004')
self.assertEqual(list(self.tree.lookup_range('000', '999')),
[('006', '006'),
('008', '008')])
def test_remove_range_removes_from_middle_of_keys(self):
self.create_tree_for_range()
self.tree.remove_range('003', '007')
self.assertEqual(list(self.tree.lookup_range('000', '999')),
[('002', '002'),
('008', '008')])
def test_remove_range_removes_from_end_of_keys(self):
self.create_tree_for_range()
self.tree.remove_range('007', '009')
self.assertEqual(list(self.tree.lookup_range('000', '999')),
[('002', '002'),
('004', '004'),
('006', '006')])
def test_remove_range_removes_from_empty_tree(self):
self.create_tree_for_range()
self.tree.remove_range('000', '999')
self.tree.remove_range('007', '009')
self.assertEqual(list(self.tree.lookup_range('000', '999')), [])
def test_bug_remove_range_when_only_key_is_larger_than_maxkey(self):
self.tree.insert('555', '555')
self.tree.remove_range('000', '111')
self.assertEqual(list(self.tree.lookup_range('000', '999')),
[('555', '555')])
class BTreeDecrementTests(unittest.TestCase):
def setUp(self):
# We use a small node size so that all code paths are traversed
# during testing. Use coverage.py to make sure they do.
self.codec = larch.NodeCodec(3)
self.ns = DummyNodeStore(
allow_writes=True, node_size=64, codec=self.codec)
self.forest = DummyForest()
self.tree = larch.BTree(self.forest, self.ns, None)
self.tree.insert('foo', 'bar')
def test_store_has_two_nodes(self):
self.assertEqual(len(self.ns.find_nodes()), 2)
def test_initially_everything_has_refcount_1(self):
for node_id in self.ns.find_nodes():
self.assertEqual(self.ns.get_refcount(node_id), 1)
def test_decrement_removes_everything(self):
self.tree._decrement(self.tree.root.id)
self.assertEqual(len(self.ns.find_nodes()), 0)
def test_decrement_does_not_remove_anything(self):
self.ns.set_refcount(self.tree.root.id, 2)
self.tree._decrement(self.tree.root.id)
self.assertEqual(len(self.ns.find_nodes()), 2)
class BTreeBalanceTests(unittest.TestCase):
def setUp(self):
ns = DummyNodeStore(
allow_writes=True, node_size=4096, codec=larch.NodeCodec(2))
forest = DummyForest()
self.tree = larch.BTree(forest, ns, None)
self.keys = ['%02d' % i for i in range(10)]
self.depth = None
def leaves_at_same_depth(self, node, depth=0):
if isinstance(node, larch.LeafNode):
if self.depth is None:
self.depth = depth
return self.depth == depth
else:
assert isinstance(node, larch.IndexNode)
for key in node:
child = self.tree._get_node(node[key])
if not self.leaves_at_same_depth(child, depth + 1):
return False
return True
def indexes_filled_right_amount(self, node, isroot=True):
if isinstance(node, larch.IndexNode):
if not isroot:
if len(node) < self.fanout or len(node) > 2 * self.fanout + 1:
return False
for key in node:
child = self.tree._get_node(node[key])
ok = self.indexes_filled_right_amount(child, isroot=False)
if not ok:
return False
return True
def test_insert_puts_every_leaf_at_same_depth(self):
for key in self.keys:
self.tree.insert(key, key)
self.depth = None
self.assert_(self.leaves_at_same_depth(self.tree.root),
'key#%d failed' % (self.keys.index(key) + 1))
def test_insert_fills_every_index_node_the_right_amount(self):
self.assert_(self.indexes_filled_right_amount(self.tree.root))
for key in self.keys:
self.tree.insert(key, key)
self.assert_(self.indexes_filled_right_amount(self.tree.root))
def test_remove_keeps_every_leaf_at_same_depth(self):
for key in self.keys:
self.tree.insert(key, key)
for key in self.keys:
self.tree.remove(key)
self.assert_(self.leaves_at_same_depth(self.tree.root))
larch-1.20131130/larch/uploadqueue.py 0000644 0001750 0001750 00000004600 12246332521 017056 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import logging
import os
import StringIO
import struct
import tempfile
import larch
class UploadQueue(object):
'''Queue of objects waiting to be uploaded to the store.
We don't upload nodes directly, because it frequently happens
that a node gets modified or deleted soon after it is created.
It makes sense to wait a bit so we can avoid the costly upload
operation.
This class holds the nodes in a queue, and uploads them
if they get pushed out of the queue.
``really_put`` is the function to call to really upload a node.
``max_length`` is the maximum number of nodes to keep in the queue.
'''
def __init__(self, really_put, max_length):
self.really_put = really_put
self._max_length = max_length
self._create_lru()
def _create_lru(self):
self.lru = larch.LRUCache(self._max_length,
forget_hook=self._push_oldest)
def put(self, node):
'''Put a node into the queue.'''
self.lru.add(node.id, node)
def _push_oldest(self, node_id, node):
self.really_put(node)
def push(self):
'''Upload all nodes in the queue.'''
while len(self.lru) > 0:
node_id, node = self.lru.remove_oldest()
self.really_put(node)
self.lru.log_stats()
self._create_lru()
def remove(self, node_id):
'''Remove a node from the queue given its id.'''
return self.lru.remove(node_id)
def list_ids(self):
'''List identifiers of all nodes in the queue.'''
return self.lru.keys()
def get(self, node_id):
'''Get a node node given its id.'''
return self.lru.get(node_id)
larch-1.20131130/larch/uploadqueue_tests.py 0000644 0001750 0001750 00000005660 12246332521 020307 0 ustar jenkins jenkins # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import os
import shutil
import tempfile
import unittest
import larch
class UploadQueueTests(unittest.TestCase):
def setUp(self):
self.max_queue = 2
self.nodes = []
self.uq = larch.UploadQueue(self.really_put, self.max_queue)
self.node = larch.LeafNode(1, [], [])
def really_put(self, node):
self.nodes.append(node)
def test_has_no_nodes_initially(self):
self.assertEqual(self.uq.list_ids(), [])
def test_get_returns_None_for_nonexistent_node(self):
self.assertEqual(self.uq.get(self.node.id), None)
def test_puts_node(self):
self.uq.put(self.node)
self.assertEqual(self.uq.list_ids(), [self.node.id])
self.assertEqual(self.uq.get(self.node.id), self.node)
def test_put_replaces_existing_node(self):
node2 = larch.LeafNode(1, ['foo'], ['bar'])
self.uq.put(self.node)
self.uq.put(node2)
self.assertEqual(self.uq.get(self.node.id), node2)
def test_remove_returns_false_for_nonexistent_node(self):
self.assertEqual(self.uq.remove(self.node.id), False)
def test_remove_removes_node(self):
self.uq.put(self.node)
self.uq.remove(self.node.id)
self.assertEqual(self.uq.list_ids(), [])
self.assertEqual(self.uq.get(self.node.id), None)
def test_does_not_push_first_node(self):
self.uq.put(self.node)
self.assertEqual(self.nodes, [])
def test_does_not_push_second_node(self):
self.uq.put(self.node)
self.uq.put(larch.LeafNode(2, [], []))
self.assertEqual(self.nodes, [])
def test_pushes_first_node_after_third_is_pushed(self):
self.uq.put(self.node)
self.uq.put(larch.LeafNode(2, [], []))
self.uq.put(larch.LeafNode(3, [], []))
self.assertEqual(self.nodes, [self.node])
def test_pushes_oldest_even_if_recently_used(self):
self.uq.put(self.node)
self.uq.put(larch.LeafNode(2, [], []))
self.uq.get(self.node.id)
self.uq.put(larch.LeafNode(3, [], []))
self.assertEqual(self.nodes, [self.node])
def test_pushes_out_only_node_when_requested(self):
self.uq.put(self.node)
self.uq.push()
self.assertEqual(self.nodes, [self.node])
larch-1.20131130/refcount-speed 0000755 0001750 0001750 00000011604 12246332521 015735 0 ustar jenkins jenkins #!/usr/bin/python
# Copyright 2011 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
# Excercise my B-tree implementation, for simple benchmarking purposes.
# The benchmark gets a location and an operation count as command line
# arguments.
#
# If the location is the empty string, an in-memory node store is used.
# Otherwise it must be a non-existent directory name.
#
# The benchmark will do the given number of insertions into the tree, and
# measure the speed of that. Then it will look up each of those, and measure
# the lookups.
import cliapp
import cProfile
import csv
import gc
import logging
import os
import random
import shutil
import subprocess
import sys
import time
import tracing
import larch
class RefcountSpeedTest(cliapp.Application):
def add_settings(self):
self.settings.boolean(['profile'],
'profile with cProfile?')
self.settings.boolean(['log-memory-use'], 'log VmRSS?')
self.settings.string(['trace'],
'code module in which to do trace logging')
self.settings.integer(['refs'],
'how many refs to test with (default is %default)',
default=2**15)
self.settings.integer(['times'],
'how many times to test each op (default is %default)',
default=1000)
def process_args(self, args):
if self.settings['trace']:
tracing.trace_add_pattern(self.settings['trace'])
n = self.settings['refs']
refcounts = {}
for i in xrange(n):
refcounts[i] = i
# Helper functions.
nop = lambda *args: None
# Calibrate.
looptime = self.measure(nop, 'calibrate')
num_refcounts = len(refcounts)
keys = refcounts.keys()
encode = self.measure(
lambda: larch.refcountstore.encode_refcounts(
refcounts, 0, num_refcounts, keys),
'encode')
encoded = larch.refcountstore.encode_refcounts(
refcounts, 0, num_refcounts, keys)
decode = self.measure(lambda:
larch.refcountstore.decode_refcounts(encoded),
'decode')
# Report
def speed(result):
return n / (result - looptime)
def report(label, result):
print '%-12s: %5.3f s (%8.1f/s)' % \
(label, result, speed(result))
print 'refs: %d' % self.settings['refs']
print 'times: %d' % self.settings['times']
report('encode', encode)
report('decode', decode)
def measure(self, func, profname):
def log_memory_use(stage):
if self.settings['log-memory-use']:
logging.info('%s memory use: %s' % (profname, stage))
logging.info(' VmRSS: %s KiB' % self.vmrss())
logging.info(' # objects: %d' % len(gc.get_objects()))
logging.info(' # garbage: %d' % len(gc.garbage))
def helper():
n = self.settings['times']
log_memory_use('at start')
finished = False
while not finished:
for i in xrange(n):
func()
if time.clock() > start:
# at least one time unit passed - this is enough
finished = True
else:
# Not a single time unit passed: we need more iterations.
# Multiply 'times' by 10 and execute the remaining 9 loops.
self.settings['times'] *= 10
n *= 9
log_memory_use('after calls')
print 'measuring', profname
start = time.clock()
if self.settings['profile']:
globaldict = globals().copy()
localdict = locals().copy()
cProfile.runctx('helper()', globaldict, localdict,
'%s.prof' % profname)
else:
helper()
end = time.clock()
return end - start
def vmrss(self):
f = open('/proc/self/status')
rss = 0
for line in f:
if line.startswith('VmRSS'):
rss = line.split()[1]
f.close()
return rss
def format(self, value):
return '%.0f' % value
if __name__ == '__main__':
RefcountSpeedTest().run()
larch-1.20131130/setup.py 0000644 0001750 0001750 00000004032 12246332521 014573 0 ustar jenkins jenkins # Copyright 2010, 2011, 2012 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from distutils.core import setup
import larch
setup(
name='larch',
version=larch.__version__,
description='copy-on-write B-tree data structure',
long_description='''\
An implementation of a particular kind of B-tree, based on research
by Ohad Rodeh. This is the same data structure that btrfs uses, but
in a new, pure-Python implementation.
The distinctive feature of this B-tree is that a node is never (conceptually)
modified. Instead, all updates are done by copy-on-write. This makes it
easy to clone a tree, and modify only the clone, while other processes
access the original tree.
The implementation is generic and flexible, so that you may use it in
a variety of situations. For example, the tree itself does not decide
where its nodes are stored: you provide a class that does that for it.
The library contains two implementations, one for in-memory and one
for on-disk storage.
''',
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'License :: OSI Approved :: GNU General Public License (GPL)',
'Operating System :: OS Independent',
'Programming Language :: Python :: 2',
'Topic :: Software Development :: Libraries',
],
author='Lars Wirzenius',
author_email='liw@liw.fi',
url='http://liw.fi/larch/',
packages=['larch'],
scripts=['fsck-larch'],
)
larch-1.20131130/speed-test 0000755 0001750 0001750 00000022574 12246332521 015077 0 ustar jenkins jenkins #!/usr/bin/python
# Copyright 2010, 2011 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
# Excercise my B-tree implementation, for simple benchmarking purposes.
# The benchmark gets a location and nb of keys to use as command line
# arguments --location=LOCATION and --keys=KEYS.
# To debug, one can create a tracing logfile by adding arguments like:
# --trace=refcount --log=refcount.logfile
#
# If the location is the empty string, an in-memory node store is used.
# Otherwise it must be a non-existent directory name.
#
# The benchmark will do the given number of insertions into the tree, and
# measure the speed of that. Then it will look up each of those, and measure
# the lookups.
import cliapp
import cProfile
import csv
import gc
import logging
import os
import random
import shutil
import subprocess
import sys
import time
import tracing
import larch
class SpeedTest(cliapp.Application):
def add_settings(self):
self.settings.boolean(['profile'], 'profile with cProfile?')
self.settings.boolean(['log-memory-use'], 'log VmRSS?')
self.settings.string(['trace'],
'code module in which to do trace logging')
self.settings.integer(['keys'],
'how many keys to test with (default is %default)',
default=1000)
self.settings.string(['location'],
'where to store B-tree on disk (in-memory test if not set)')
self.settings.string(['csv'],
'append a CSV row to FILE',
metavar='FILE')
def process_args(self, args):
if self.settings['trace']:
tracing.trace_add_pattern(self.settings['trace'])
key_size = 19
value_size = 128
node_size = 64*1024
n = self.settings['keys']
location = self.settings['location']
if n is None:
raise Exception('You must set number of keys with --keys')
if not location:
forest = larch.open_forest(
allow_writes=True, key_size=key_size, node_size=node_size,
node_store=larch.NodeStoreMemory)
else:
if os.path.exists(location):
raise Exception('%s exists already' % location)
os.mkdir(location)
forest = larch.open_forest(
allow_writes=True, key_size=key_size, node_size=node_size,
dirname=location)
tree = forest.new_tree()
# Create list of keys.
keys = ['%0*d' % (key_size, i) for i in xrange(n)]
ranges = []
range_len = 10
for i in range(0, len(keys) - range_len):
ranges.append((keys[i], keys[i+range_len-1]))
# Helper functions.
nop = lambda *args: None
# Calibrate.
looptime = self.measure(keys, nop, nop, 'calibrate')
# Measure inserts.
random.shuffle(keys)
value = 'x' * value_size
insert = self.measure(keys, lambda key: tree.insert(key, value),
forest.commit, 'insert')
# Measure lookups.
random.shuffle(keys)
lookup = self.measure(keys, tree.lookup, nop, 'lookup')
# Measure range lookups.
random.shuffle(ranges)
lookup_range = self.measure(ranges,
lambda x:
list(tree.lookup_range(x[0], x[1])),
nop, 'lookup_range')
# Measure count of range lookup results.
len_lookup_range = self.measure(ranges,
lambda x: len(list(tree.lookup_range(x[0], x[1]))),
nop, 'len_lookup_range')
# Measure count range.
count_range = self.measure(ranges,
lambda x: tree.count_range(x[0], x[1]),
nop, 'count_range')
# Measure inserts into existing tree.
random.shuffle(keys)
insert2 = self.measure(keys, lambda key: tree.insert(key, value),
forest.commit, 'insert2')
# Measure removes from tree.
random.shuffle(keys)
remove = self.measure(keys, tree.remove, forest.commit, 'remove')
# Measure remove_range. This requires building a new tree.
keys.sort()
for key in keys:
tree.insert(key, value)
random.shuffle(ranges)
remove_range = self.measure(ranges,
lambda x: tree.remove_range(x[0], x[1]),
forest.commit, 'remove_range')
# Report
def speed(result, i):
if result[i] == looptime[i]:
# computer too fast for the number of "keys" used...
return float("infinity")
else:
return n / (result[i] - looptime[i])
def report(label, result):
cpu, wall = result
print '%-16s: %5.3f s (%8.1f/s) CPU; %5.3f s (%8.1f/s) wall' % \
(label, cpu, speed(result, 0), wall, speed(result, 1))
print 'location:', location if location else 'memory'
print 'num_operations: %d' % n
report('insert', insert)
report('lookup', lookup)
report('lookup_range', lookup_range)
report('len_lookup_range', len_lookup_range)
report('count_range', count_range)
report('insert2', insert2)
report('remove', remove)
report('remove_range', remove_range)
if self.settings['profile']:
print 'View *.prof with ./viewprof for profiling results.'
if self.settings['csv']:
self.append_csv(n,
speed(insert, 0),
speed(insert2, 0),
speed(lookup, 0),
speed(lookup_range, 0),
speed(remove, 0),
speed(remove_range, 0))
# Clean up
if location:
shutil.rmtree(location)
def measure(self, items, func, finalize, profname):
def log_memory_use(stage):
if self.settings['log-memory-use']:
logging.info('%s memory use: %s' % (profname, stage))
logging.info(' VmRSS: %s KiB' % self.vmrss())
logging.info(' # objects: %d' % len(gc.get_objects()))
logging.info(' # garbage: %d' % len(gc.garbage))
def helper():
log_memory_use('at start')
for item in items:
func(item)
log_memory_use('after calls')
finalize()
log_memory_use('after finalize')
print 'measuring', profname
start_time = time.time()
start = time.clock()
if self.settings['profile']:
globaldict = globals().copy()
localdict = locals().copy()
cProfile.runctx('helper()', globaldict, localdict,
'%s.prof' % profname)
else:
helper()
end = time.clock()
end_time = time.time()
return end - start, end_time - start_time
def vmrss(self):
f = open('/proc/self/status')
rss = 0
for line in f:
if line.startswith('VmRSS'):
rss = line.split()[1]
f.close()
return rss
def append_csv(self, keys, insert, insert2, lookup, lookup_range,
remove, remove_range):
write_title = not os.path.exists(self.settings['csv'])
f = open(self.settings['csv'], 'a')
self.writer = csv.writer(f, lineterminator='\n')
if write_title:
self.writer.writerow(('revno',
'keys',
'insert (random)',
'insert (seq)',
'lookup',
'lookup_range',
'remove',
'remove_range'))
if os.path.exists('.bzr'):
p = subprocess.Popen(['bzr', 'revno'], stdout=subprocess.PIPE)
out, err = p.communicate()
if p.returncode != 0:
raise cliapp.AppException('bzr failed')
revno = out.strip()
else:
revno = '?'
self.writer.writerow((revno,
keys,
self.format(insert),
self.format(insert2),
self.format(lookup),
self.format(lookup_range),
self.format(remove),
self.format(remove_range)))
f.close()
def format(self, value):
return '%.0f' % value
if __name__ == '__main__':
SpeedTest().run()
larch-1.20131130/test-backwards-compatibility 0000755 0001750 0001750 00000011504 12246332521 020576 0 ustar jenkins jenkins #!/usr/bin/python
# Copyright 2012 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
'''Test backwards compatibility of an on-disk B-tree.
This program tests that a Larch on-disk B-tree is backwards compatible
with previous versions, at least to the extent that it can be read from.
This program operates in one of two modes:
* it can generate a new B-tree to be stored as test data for the future
* it can read an existing tree and verify that it can read it right
The generated B-tree is actually a forest, and contains four trees.
The first tree has the following keys:
* key size is 4 bytes
* keys are 0, 1, 2, ..., 1023, converted into binary strings with struct
* values are 0, 1, 2, ..., 1023, converted into text strings with '%d' % i
* node size is 128 bytes
The second tree is a clone of the first one, but with all odd-numbered
keys removed.
The third tree is a clone of the second one, but with all odd-numbered
keys and values added back.
The fourth tree is a clone of the third one, but with all even-numbered
keys removed.
'''
import cliapp
import os
import shutil
import struct
import tarfile
import tempfile
import larch
class BackwardsCompatibilityTester(cliapp.Application):
key_size = 4
node_size = 128
num_keys = 1024
keys1 = range(num_keys)
remove2 = range(1, num_keys, 2)
keys2 = [i for i in keys1 if i not in remove2]
keys3 = keys2
remove4 = range(0, num_keys, 2)
keys4 = [i for i in keys3 if i not in remove4]
def setup(self):
self.dirname = tempfile.mkdtemp()
def teardown(self):
shutil.rmtree(self.dirname)
def key(self, i):
return struct.pack('!L', i)
def value(self, i):
return '%d' % i
def cmd_generate(self, args):
'''Generate a Larch B-tree forest'''
forest = larch.open_forest(key_size=self.key_size,
node_size=self.node_size,
dirname=self.dirname,
allow_writes=True)
# First tree.
t = forest.new_tree()
for i in self.keys1:
t.insert(self.key(i), self.value(i))
# Second tree.
t = forest.new_tree(t)
for i in self.remove2:
t.remove(self.key(i))
# Third tree.
t = forest.new_tree(t)
for i in self.keys3:
t.insert(self.key(i), self.value(i))
# Fourth tree.
t = forest.new_tree(t)
for i in self.remove4:
t.remove(self.key(i))
# Commit and make into a tarball.
forest.commit()
tf = tarfile.open(fileobj=self.output, mode='w:gz')
tf.add(self.dirname, arcname='.')
tf.close()
def cmd_verify(self, args):
forest_dirname = os.path.join(self.dirname, 'forest')
for filename in args:
os.mkdir(forest_dirname)
tf = tarfile.open(filename)
tf.extractall(path=forest_dirname)
tf.close()
forest = larch.open_forest(dirname=forest_dirname,
allow_writes=False)
if len(forest.trees) != 4:
raise cliapp.AppException('Need 4 trees, not %d' %
len(forest.trees))
self.verify_tree(forest.trees[0], self.keys1)
self.verify_tree(forest.trees[1], self.keys2)
self.verify_tree(forest.trees[2], self.keys3)
self.verify_tree(forest.trees[3], self.keys4)
shutil.rmtree(forest_dirname)
self.output.write('%s is OK\n' % filename)
def verify_tree(self, tree, keys):
minkey = self.key(0)
maxkey = self.key(2**(8*self.key_size) - 1)
i = 0
for key, value in tree.lookup_range(minkey, maxkey):
if key != self.key(keys[i]):
raise cliapp.AppException('Wanted key %s, got %s' %
(keys[i], repr(key)))
if value != self.value(keys[i]):
raise cliapp.AppException('Wanted value %s, got %s' %
(keys[i], repr(value)))
i += 1
BackwardsCompatibilityTester().run()
larch-1.20131130/test-data/ 0000755 0001750 0001750 00000000000 12246332521 014750 5 ustar jenkins jenkins larch-1.20131130/test-data/format-nodestore1-codec1.tar.gz 0000644 0001750 0001750 00000046201 12246332521 022607 0 ustar jenkins jenkins Otest-data/format-nodestore1-codec1.tar
nS}KHR I%IX}NI$=I$aH$I$I$I$I$I$I$I$Iyϱٻg_=1\}b[s9nUU\3_qZTQ9[a}'g>A[M-[ogyBG<4 ܞ{MNnL51w^wV'XOWn=#rozL/h}:8?l [`kŇߙp?6hBj~9B VO<篚?ofCǂe F#DA r$?_f#bϢW__$_nkx+ ~3i2gZfuEU3˓ǀOh>"~)f!@Y/&_oaA٠m0,M4XyOnw _|?Z@XMkF?pp8ǀciT*h
^W8+Xx# v.h* I@?9ޚdgR?D?D.0|78 |>(gB=r[!? /IT,^T3??wbTS4pz:lhaZVZJ-'9^+~AtB-/L#)lW Hi}c_\Pzhqoo":X<b TH7y:ߞ?ɟ(buq>K)q/U39W?H?%|~ _>i_Z_J륫I⚏Wsϒ88|>YwkEԺUjkWL8G#QPR(-] -N.ί#>#F{x{
8uSa˓u`|?X^*>E?ۢ?.`LEemml -'t
\,N^
/`5՞_\808
|HJ?:}Z455NkE:˭`-l_v_IRx~^Jixc_0Ґo.:oKi +{kO]ϥp8|T`"_Wr_f=ZM#ϝ?p$8
-
U|*5/E(o݊BTRTJTZT|O/A)ˁ
`sٴs9|t?֦\)E;bZVUUڱO<*x*X
<
^,HxjU
r?/"qG{kьO
\
Jfc;puu F![?ޞGONgpWmm//s_xO"h+z(uփf_WR?l`{qi6m|ώ_pLI0jXa#EX'?APVy] `jo*#Ykd{bO}E>UAa=|['%4?8|Q_;vs/ך4b)lp1怖4JLSi]U;J4ֿ[CxrJx,8N ' l>*H WcyO~!\`e(i4 A=?ίSPĨubAq>,˭'k瀵o zFпk9ϭ/
b)O6oL0|Oi??.|\
_`E2XFYH=пjQ?_Hgsm_ˠu6hL;i??1dO,V`k^[,ϗ./b5u\rOKJ迄mSorRR)WRZ*cw-n5Z`mX?}%}}exQzV\_E4{mZ/;^jkhN_f-9ZOFh=U
-ysSz
>N֓h'YVS@XyQoxbOM@
\o++lEݏ'1cOU?c q { /q:iL`;
VY/n_5U[-]-N|ǟ>矖6dS_;&&Ɗ[qsI__qP6(*7TWqO?-C{ǀcqxp8T Zi@;/.a3kc;p+\J+|q{f?GE'Pp28\F?,^(ܠHFOi֟:3ތsoL?8 *hʎv ASw~+w{6h/W//Uko?ME{}:cO6?sN)/ׁ
lsb3`㖿Nj1/qOn%uzz^/?ޢmv:;-N+jrE;:cq`BSѿ|?J/K+u)zk[cOn/">{buRh)cO_5_n*F}~@>H`b4h:(V/9AlF5y/v{=`/X*C;Gu%RxS+`5u[M?ʭ(ߍbPz/F._>}XSu9ϥ_'}1.wA]|m6Y8g%Zj6:Nlbeϭ/P<Ψ}z|;?:+p3}ZϦCXMB+1yO+*KAVRH)yw](3r_91:W*>Q{p~-[p]C!0=?=X-՝[e],s x:lDFYe cx/~= l-RKx8N:kog^U31$A7572WC
ϮK4g)4^,V +'s3፬?Fz
ݞ?Vq;ߒ?zJH%Z 5VSqO7x+=
^B!|Ate%rzaqp{+xׇS^ZGi=:;n3<
.Y 7;Ӿ.[^Hiz1l/sS}~`Np@]㑞ѐp5ph %6]ۗ{g13HoYz<JD{Fqk?rLY|N@֥h}hF5?|7w_YpG'4ˇ뇈釈 >uno_ 6 9JQKU)UiUq_/EUU<'wKgw=^_MU[A gfR?:?QNpȒruH
*z&xN:<>!+lUpNέ;,Ofjr\
JpO{ZoNiA+ӌkp] brSG߷J?kIyJr"%E%w&[;?x\Qkua;.`Uߋ?4twK\ 7x0}!D% wE%ܤdH |Nc_eJ`My ؚ]UZZ@Y}-{W;~n!u?Q<^ o//T?!qg<3=ϭտ9pAx/hMoXx7;~p"8 \Q'
#XduAvq*QςSu:jZ-Y}آWWOudzr1NGo3?>z~u:k^Z|'~õs:y`:GWWWsO/EkoϭP"xb,\xtJ:-M߁ ӿrV9W.(>G&ѿ3s.FE, `)l^`0Xme*_yw~ l\K{uBMlmO5|\~Pm6RֆoZtG!xy(x?8N[^o/tsc&V W"(USV.O}-!BZ7b5u[b5v?EcHp8BiP^JhuJ5Q=`oX1jgr'?@
\WwYYYy)*)^_.Tci}.