genbackupdata_1.6.orig/MANIFEST.in0000644000000000000000000000005111624412016016657 0ustar rootroot00000000000000include genbackupdata.1 include tests.py genbackupdata_1.6.orig/Makefile0000644000000000000000000000053411624412016016567 0ustar rootroot00000000000000all: genbackupdata.1 genbackupdata.1: genbackupdata.1.in genbackupdata ./genbackupdata --generate-manpage=genbackupdata.1.in > genbackupdata.1 check: python -m CoverageTestRunner --ignore-missing-from without-tests ./blackboxtest clean: rm -rf *.py[co] */*.py[co] build dist MANIFEST rm -f blackboxtest.log blackboxtest-genbackupdata.log genbackupdata_1.6.orig/NEWS0000644000000000000000000000143411624412016015626 0ustar rootroot00000000000000NEWS for genbackupdata ====================== Version 1.6, released 2011-08-22 -------------------------------- * Manual page's SYNOPSIS and OPTIONS sections are now generated automatically by cliapp's `--generate-manpage`. Version 1.5, released 2011-06-12 -------------------------------- * Fix genbackupdatalib to work better with pydoc. * Change to use newer cliapp API for adding new settings. This avoids the deprecated old API. Version 1.3.1, released 2011-02-02 ---------------------------------- * Fix setup.py so that the genbackupdatalib Python library is included in the package. Version 1.3, released 2011-02-01 -------------------------------- * Rewrite from scratch, becaue the old code was hard to test. It was also full of features nobody uses. New code is small. genbackupdata_1.6.orig/README0000644000000000000000000000355111624412016016011 0ustar rootroot00000000000000genbackupdata ============= genbackupdata creates or modifies directory trees in ways that simulate real filesystems sufficiently well for performance testing of backup software. For example, it can create files that are a mix of small text files and big binary files, with the binary files containing random binary junk which compresses badly. This can then be backed up, and later the directory tree can be changed by creating new files, modifying files, or deleting or renaming files. The backup can then be run again. The output is deterministic, such that for a given set of parameters the same output always happens. Thus it is more efficient to distribute genbackupdata and a set of parameters between people who wish to benchmark backup software than distributing very large test sets. Also included are a couple of benchmark programs I've written to measure aspects of the work genbackupdata does: * binaryjunk.py compares various ways of generating random binary data * createfiles.py compares various directory structures when creating lots of new files The home page is: http://braawi.org/genbackupdata/ Legalese: Copyright (C) 2007-2011 Lars Wirzenius This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. genbackupdata_1.6.orig/blackboxtest0000755000000000000000000002337511624412016017552 0ustar rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2009, 2010 Lars Wirzenius # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. '''Run some black box tests for genbackupdata.''' import hashlib import logging import os import random import re import shutil import stat import subprocess import sys import tempfile import traceback import unittest class GenbackupdataTestCase(unittest.TestCase): '''Base class for genbackupdata test cases. We use the unittest framework even though these are black box tests, not unit tests. unittest makes implementation of these black box tests convenient, even though that might not be true for all black box tests. This base class provides a fresh environment for each test, and cleans up afterwards. It provides helpers for doing the usual backup operations, and for verifyting results. ''' def setUp(self): self.tempdir = tempfile.mkdtemp() self.setUpHook() def setUpHook(self): pass def tearDown(self): self.tearDownHook() shutil.rmtree(self.tempdir) def tearDownHook(self): pass def path(self, *relatives): return os.path.join(self.tempdir, *relatives) def mkdir(self, dirname): abs_dirname = os.path.join(self.tempdir, dirname) os.makedirs(abs_dirname) return abs_dirname def runcmd(self, argv, stderr_ignore=None): '''Run an external command. If the command fails (non-zero exit), raise an exception. If stderr_ignore is not None, it must be a string with a regexp for lines in stderr to ignore. ''' logging.debug('executing %s' % argv) p = subprocess.Popen(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate() if stderr_ignore: lines = [line for line in stderr.splitlines() if not re.match(stderr_ignore, line)] stderr = ''.join(lines) sys.stderr.write(stderr) if p.returncode != 0: raise subprocess.CalledProcessError(p.returncode, argv) return stdout def genbackupdata(self, args, stderr_ignore=None): '''Run genbackupdata, with some default arguments.''' return self.runcmd(['./genbackupdata', '--quiet'] + args, stderr_ignore=stderr_ignore) def create_file(self, dirname, relative, contents): '''Create a new file with the desired contents.''' pathname = os.path.join(dirname, relative) logging.debug('creating file %s' % pathname) f = open(pathname, 'w') f.write(contents) f.close() def remove_file(self, root, relative): '''Remove a file.''' pathname = os.path.join(root, relative) logging.debug('removing file %s' % pathname) os.remove(pathname) def create_dir(self, root, pathname): '''Create a new directory, return name.''' fullname = os.path.join(root, pathname) logging.debug('mkdir %s' % fullname) os.makedirs(fullname) return fullname def get_info(self, root, pathname): '''Get the information about a given file. Return a tuple (relativepath, stat) where relativepath is the path relative to root, and stat is the result of os.lstat. ''' root_base = os.path.basename(root) del_prefix = root[:-len(root_base)] if pathname == root: return None assert pathname.startswith(root + os.sep), (pathname, root) return pathname[len(root + os.sep):], os.lstat(pathname) def find_everything(self, root): '''Find all filesystem objects inside a directory tree. Return list of (pathname, stat) tuples. The pathname will be relative to the root of the directory tree. The stat tuples will be the result of os.lstat for each pathname. ''' result = [] for dirname, dirnames, filenames in os.walk(root): result.append(self.get_info(root, dirname)) for filename in filenames: pathname = os.path.join(dirname, filename) result.append(self.get_info(root, pathname)) return [x for x in result if x] def apparent_size(self, root): '''Return sum of length of regular files in directory, recursively.''' size = 0 for dirname, subdirs, filenames in os.walk(self.path(root)): for filename in filenames: pathname = self.path(dirname, filename) st = os.lstat(pathname) if stat.S_ISREG(st.st_mode): size += st.st_size return size def checksum(self, pathname): '''Return MD5 checksum for contents of a file.''' s = hashlib.new('md5') f = open(pathname, 'rb') while True: data = f.read(64*1024) if not data: break s.update(data) f.close() return s.hexdigest() def checksums(self, root): '''Return sorted list of (pathname, checksum) pairs for reg. files.''' result = [] prefix = self.path(root) + os.sep for dirname, subdirs, filenames in os.walk(self.path(root)): for filename in filenames: pathname = self.path(dirname, filename) st = os.lstat(pathname) if stat.S_ISREG(st.st_mode): assert pathname.startswith(prefix) relative = pathname[len(prefix):] result.append((relative, self.checksum(pathname))) result.sort() return result def assert_equal_stat_fields(self, filename, stat1, stat2, fieldname): field1 = getattr(stat1, fieldname) field2 = getattr(stat2, fieldname) self.assertEqual(field1, field2, '%s stat field %s difference: %s vs %s' % (filename, fieldname, repr(field1), repr(field2))) def assert_same_stat(self, name, stat1, stat2): '''Are two stat results effectively identical?''' class Fake(object): def __init__(self, stat_result): self.st = stat_result def __getattr__(self, name): if name == 'st_mtime': return int(getattr(self.st, name)) else: return getattr(self.st, name) self.assert_equal_stat_fields(name, stat1, stat2, 'st_blocks') self.assert_equal_stat_fields(name, stat1, stat2, 'st_gid') self.assert_equal_stat_fields(name, stat1, stat2, 'st_mode') self.assert_equal_stat_fields(name, Fake(stat1), Fake(stat2), 'st_mtime') self.assert_equal_stat_fields(name, stat1, stat2, 'st_nlink') self.assert_equal_stat_fields(name, stat1, stat2, 'st_size') self.assert_equal_stat_fields(name, stat1, stat2, 'st_uid') def assert_same_contents(self, relative, root1, root2): '''Verify that file contents has been restored correctly.''' path1 = os.path.join(root1, relative) path2 = os.path.join(root2, relative) self.assertFilesEqual(path1, path2) def assertFileExists(self, path): self.assert_(os.path.exists(path), '%s does not exist' % path) def assertIsRegularFile(self, path): self.assert_(os.path.isfile(path), '%s is not a regular file' % path) def assertFilesEqual(self, path1, path2): '''Verify that file contents are equal.''' self.assertFileExists(path1) self.assertFileExists(path2) self.assertIsRegularFile(path1) self.assertIsRegularFile(path2) f1 = open(path1, 'r') f2 = open(path2, 'r') data1 = f1.read() data2 = f2.read() f1.close() f2.close() self.assertEqual(data1, data2, 'contents of %s and %s differ' % (path1, path2)) class GenbackupdataTests(GenbackupdataTestCase): def test_returns_success_with_help_option(self): self.genbackupdata(['--help']) self.assertTrue(True) def test_creates_requested_amount_of_data(self): bytes = 12765 self.genbackupdata([self.path('data'), '--create=%d' % bytes]) self.assertEqual(self.apparent_size('data'), bytes) def test_creates_same_data_every_time(self): size = '10m' # big enough to allow both ample text and binary data self.genbackupdata([self.path('data1'), '--create', size]) self.genbackupdata([self.path('data2'), '--create', size]) sums1 = self.checksums('data1') sums2 = self.checksums('data2') self.assertEqual(len(sums1), len(sums2)) for n in range(1, len(sums1)): self.assertEqual(sums1[:n], sums2[:n]) if __name__ == '__main__': logging.basicConfig(filename='blackboxtest.log', level=logging.DEBUG, format='%(levelname)s: %(message)s') unittest.main() genbackupdata_1.6.orig/debian/0000755000000000000000000000000011624412016016347 5ustar rootroot00000000000000genbackupdata_1.6.orig/debian/changelog0000644000000000000000000000415511624412016020226 0ustar rootroot00000000000000genbackupdata (1.6-1) unstable; urgency=low * First upload to Debian. (Closes: #636445) * New upstream version. * Add missing Depends and Build-Depends on python-ttystatus. * Remove useless debian/pycompat. * Use 3.0 (quilt) format. * Add Homapage header. * Use DEP5 for debian/copyright. * Added missing Build-Depends on python-cliapp. -- Lars Wirzenius Mon, 22 Aug 2011 09:10:28 +0100 genbackupdata (1.5) squeeze; urgency=low * New upstream version. * Bump Standards-Version. No other changes required. -- Lars Wirzenius Sun, 12 Jun 2011 12:55:28 +0100 genbackupdata (1.4) squeeze; urgency=low * debian/control: Add dependency on python-cliapp. -- Lars Wirzenius Wed, 06 Apr 2011 11:02:05 +0100 genbackupdata (1.3.1) squeeze; urgency=low * New upstream release. - include genbackupdatalib in the package -- Lars Wirzenius Wed, 06 Apr 2011 11:02:05 +0100 genbackupdata (1.3) squeeze; urgency=low * New upstream release. This is a rewrite. It is not as functional as the old code. But it is functional enough, and I'm the only known user, so... -- Lars Wirzenius Tue, 01 Feb 2011 21:50:34 +0000 genbackupdata (1.2) squeeze; urgency=low * New upstream release. This release uses a new way to generate binary junk, which is faster than the previous one, and does not result in repetition of existing binary blocks. Obnam is now so good in noticing duplication that it would store a one gigabyte data set from genbackupdata in 132 kilobytes. * Update my e-mail address in Maintainer. -- Lars Wirzenius Thu, 02 Dec 2010 15:17:05 +0000 genbackupdata (1.1.1) squeeze; urgency=low * Upload to my squeeze repository. -- Lars Wirzenius Tue, 18 May 2010 18:54:12 +1200 genbackupdata (1.1) karmic; urgency=low * New upstream version. * Convert packaging to be native, and use dh. -- Lars Wirzenius Sat, 20 Mar 2010 18:44:05 +1300 genbackupdata (1.0-1) unstable; urgency=low * First version. -- Lars Wirzenius Fri, 13 Jul 2007 00:33:00 +0300 genbackupdata_1.6.orig/debian/compat0000644000000000000000000000000211624412016017545 0ustar rootroot000000000000007 genbackupdata_1.6.orig/debian/control0000644000000000000000000000172211624412016017754 0ustar rootroot00000000000000Source: genbackupdata Maintainer: Lars Wirzenius Section: devel Priority: optional Standards-Version: 3.9.2 Build-Depends: debhelper (>= 7.3.8), python-all (>= 2.6.6-3~), python-ttystatus, python-cliapp X-Python-Version: >= 2.6 Package: genbackupdata Architecture: all Depends: ${python:Depends}, ${misc:Depends}, python-cliapp (>= 0.9), python-ttystatus Homepage: http://braawi.org/genbackupdata/ Description: generate test data sets for backup software genbackupdata creates or modifies directory trees in ways that simulate real filesystems sufficiently well for performance testing of backup software. For example, it can create files that are a mix of small text files and big binary files, with the binary files containing random binary junk which compresses badly. This can then be backed up, and later the directory tree can be changed by creating new files, modifying files, or deleting or renaming files. The backup can then be run again. genbackupdata_1.6.orig/debian/copyright0000644000000000000000000000167511624412016020313 0ustar rootroot00000000000000Format: http://dep.debian.net/deps/dep5/ Upstream-Name: genbackupdata Upstream-Contact: Lars Wirzenius Source: http://code.liw.fi/genbackupdata/bzr/trunk/ Files: * Copyright: 2007-2011, Lars Wirzenius License: GPL-3+ This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. . This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. . You should have received a copy of the GNU General Public License along with this program. If not, see . . On a Debian system, you can find a copy of GPL version 3 at /usr/share/common-licenses/GPL-3 . genbackupdata_1.6.orig/debian/dirs0000644000000000000000000000002311624412016017226 0ustar rootroot00000000000000usr/share/man/man1 genbackupdata_1.6.orig/debian/rules0000755000000000000000000000015511624412016017430 0ustar rootroot00000000000000#!/usr/bin/make -f %: dh $@ --with=python2 --buildsystem=python_distutils override_dh_auto_build: $(MAKE) genbackupdata_1.6.orig/debian/source/0000755000000000000000000000000011624412016017647 5ustar rootroot00000000000000genbackupdata_1.6.orig/debian/source/format0000644000000000000000000000001411624412016021055 0ustar rootroot000000000000003.0 (quilt) genbackupdata_1.6.orig/genbackupdata0000755000000000000000000000777311624412016017662 0ustar rootroot00000000000000#!/usr/bin/python # Copyright 2011 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import cliapp import os import sys import ttystatus import genbackupdatalib class GenbackupdataApp(cliapp.Application): def add_settings(self): self.settings.bytesize(['create', 'c'], 'how much data to create (default: %default)') self.settings.bytesize(['file-size'], 'size of one file (default: %default)', default=16*1024) self.settings.bytesize(['chunk-size'], 'generate data in chunks of this size ' '(default: %default)', default=16*1024) self.settings.integer(['depth'], 'depth of directory tree (default: %default)', default=3) self.settings.integer(['max-files'], 'max files/dirs per dir (default: %default)', default=128) self.settings.integer(['seed'], 'seed for random number generator ' '(default: %default)', default=0) self.settings.boolean(['quiet'], 'do not report progress') def process_args(self, args): outputdir = args[0] bytes = self.settings['create'] self.gen = genbackupdatalib.DataGenerator(self.settings['seed']) self.names = genbackupdatalib.NameGenerator(outputdir, self.settings['depth'], self.settings['max-files']) self.setup_ttystatus() self.status['total'] = bytes while bytes > 0: n = min(self.settings['file-size'], bytes) self.create_file(n) bytes -= n self.status.finish() def create_file(self, bytes): '''Generate one output file.''' file_size = self.settings['file-size'] chunk_size = self.settings['chunk-size'] pathname = self.names.new() dirname = os.path.dirname(pathname) if not os.path.exists(dirname): os.makedirs(dirname) f = open(pathname, 'wb') while bytes >= chunk_size: self.write_bytes(f, chunk_size) bytes -= chunk_size if bytes > 0: self.write_bytes(f, bytes) f.close() def write_bytes(self, f, bytes): chunk = self.gen.generate(bytes) f.write(chunk) self.status['written'] += bytes def setup_ttystatus(self): self.status = ttystatus.TerminalStatus(period=0.1) if self.settings['quiet']: self.status.disable() self.status['written'] = 0 self.status['total'] = 0 self.status.add(ttystatus.Literal('Generating: ')) self.status.add(ttystatus.ByteSize('written')) self.status.add(ttystatus.Literal(' of ')) self.status.add(ttystatus.ByteSize('total')) self.status.add(ttystatus.Literal(' ')) self.status.add(ttystatus.PercentDone('written', 'total')) self.status.add(ttystatus.Literal(' (')) self.status.add(ttystatus.ByteSpeed('written')) self.status.add(ttystatus.Literal(')')) if __name__ == '__main__': GenbackupdataApp().run() genbackupdata_1.6.orig/genbackupdata.1.in0000644000000000000000000000551311624412016020411 0ustar rootroot00000000000000.\" Copyright 2007-2011 Lars Wirzenius .\" .\" This program is free software: you can redistribute it and/or modify .\" it under the terms of the GNU General Public License as published by .\" the Free Software Foundation, either version 3 of the License, or .\" (at your option) any later version. .\" .\" This program is distributed in the hope that it will be useful, .\" but WITHOUT ANY WARRANTY; without even the implied warranty of .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the .\" GNU General Public License for more details. .\" .\" You should have received a copy of the GNU General Public License .\" along with this program. If not, see . .\" .TH GENBACKUPDATA 1 .SH NAME genbackupdata \- generate backup test data .SH SYNOPSIS .SH DESCRIPTION .B genbackupdata generates test data sets for performance testing of backup software. It creates a directory tree filled with files of different sizes. The total size and the distribution of sizes between small and big are configurable. The program can also modify an existing directory tree by creating new files, and deleting, renaming, or modifying existing files. This can be used to generate test data for successive generations of backups. .PP The program is deterministic: with a given set of parameters (and a given pre-existing directory tree), it always creates the same output. This way, it is possible to reproduce backup tests exactly, without having to distribute the potentially very large test sets. .PP The data set consists of plain files and directories. Files are either small text files or big binary files. Text files contain the "lorem ipsum" stanza, binary files contain randomly generated byte streams. The percentage of file data that is small text or big binary files can be set, as can the sizes of the respective file types. .PP Files and directories are named "fileXXXX" or "dirXXXX", where "XXXX" is a successive integer, separate successions for files and directories. There is an upper limit to how many files a directory may contain. After the file limit is reached, a new sub-directory is created. The first set of files go into the root directory of the test set. .PP You have to give one of the options .BR \-\-create , .BR \-\-delete , .BR \-\-rename , or .BR \-\-modify for the program to do anything. You can, however, give more than one of them, if .I DIR already exists. (Giving the same option more than once means that only the last instance is counted.) .RI ( DIR ) is created if it doesn't exist already. .SH OPTIONS .SH EXAMPLES Create data for the first generation of a backup: .PP .RS genbackupdata \-\-create=10G testdir .RE .PP Modify an existing set of backup data to create a new generation: .PP .RS genbackupdata \-c 5% \-d 2% \-m 5% \-r 0.5% testdir .RE .PP The above command can be run for each new generation. genbackupdata_1.6.orig/genbackupdatalib/0000755000000000000000000000000011624412016020405 5ustar rootroot00000000000000genbackupdata_1.6.orig/genbackupdatalib/__init__.py0000644000000000000000000000141711624412016022521 0ustar rootroot00000000000000# Copyright 2010 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . __version__ = '1.6' from generator import DataGenerator from names import NameGenerator __all__ = locals() genbackupdata_1.6.orig/genbackupdatalib/generator.py0000644000000000000000000000424011624412016022745 0ustar rootroot00000000000000# Copyright 2010 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import random import struct class DataGenerator(object): '''Generate random binary data.''' # We generate data by using a blob of suitable size. The output # sequence repeats the blob, where each repetition is preceded by # a 64-bit counter. # # We need to be relatively prime with obnam's chunk size, which # defaults to 64 KiB (65536 bytes). This is so that obnam does not # notice a lot of duplicated data, resulting in unrealistically # high amounts of compression in the backup store. # # Ideally, we would not generate any repeating data, but the random # number generator is not fast enough for that. We need to generate # data about as fast as the disk can write it, and the random number # generator is orders of magnitude slower than that. _blob_size = 65521 _blob_size = 1021 def __init__(self, seed): self._random = random.Random(seed) self._blob = self._generate_blob() self._counter = 0 self._buffer = '' def _generate_blob(self): return ''.join(chr(self._random.randint(0, 255)) for i in range(self._blob_size)) def generate(self, size): while size > len(self._buffer): self._buffer += self._generate_more_data() data = self._buffer[:size] self._buffer = self._buffer[size:] return data def _generate_more_data(self): self._counter += 1 return struct.pack('!Q', self._counter) + self._blob genbackupdata_1.6.orig/genbackupdatalib/generator_tests.py0000644000000000000000000000276011624412016024174 0ustar rootroot00000000000000# Copyright 2010 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import unittest import genbackupdatalib class DataGeneratorTests(unittest.TestCase): def setUp(self): self.g1 = genbackupdatalib.DataGenerator(0) self.g2 = genbackupdatalib.DataGenerator(0) def test_every_generator_returns_same_sequence(self): amount = 1024 self.assertEqual(self.g1.generate(amount), self.g2.generate(amount)) def test_returns_different_sequence_for_different_seed(self): amount = 1024 g3 = genbackupdatalib.DataGenerator(1) self.assertNotEqual(self.g1.generate(amount), g3.generate(amount)) def test_returns_distinct_64k_chunks(self): size = 64 * 1024 chunk1 = self.g1.generate(size) num_chunks = 100 for i in range(num_chunks): self.assertNotEqual(self.g1.generate(size), chunk1) genbackupdata_1.6.orig/genbackupdatalib/names.py0000644000000000000000000000474611624412016022075 0ustar rootroot00000000000000# Copyright 2011 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import os class NameGenerator(object): '''Generate names for new output files. If the target directory is empty, the sequence of output files is always the same for the same parameters. A directory structure is also generated. The shape of the tree is defined by two parameters: 'max' and 'depth'. 'depth' is the number of levels of subdirectories to create, and 'max' is the maximum number of files/dirs to allow per output directory. Thus, if max is 3 and depth is 2, the output files are: 0/0/0, 0/0/1, 0/0/2, 0/1/0, 0/1/1, etc. If depth is zero, all output files go directly to the target directory, and max is ignored. ''' def __init__(self, dirname, depth, max): self.dirname = dirname self.depth = depth self.max = max self.counter = 0 def _path_tuple(self, n): '''Return tuple for dir/file numbers for nth output file. The last item in the tuple gives the file number, the precding items the directory numbers. Thus, a tuple (1, 2, 3) would mean path '1/2/3', but it is given as a tuple for easier manipulation. ''' if self.depth == 0: return (n,) else: items = [] for i in range(self.depth): items.append(n % self.max) n /= self.max items.append(n) items.reverse() return tuple(items) def _next_candidate_name(self): items = self._path_tuple(self.counter) self.counter += 1 return os.path.join(self.dirname, *[str(i) for i in items]) def new(self): while True: name = self._next_candidate_name() if not os.path.exists(name): return name genbackupdata_1.6.orig/genbackupdatalib/names_tests.py0000644000000000000000000000604011624412016023304 0ustar rootroot00000000000000# Copyright 2011 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import os import shutil import tempfile import unittest import genbackupdatalib class NameGeneratorTests(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp() self.depth = 2 self.max = 3 self.names = self.new() def tearDown(self): shutil.rmtree(self.tempdir) def new(self): return genbackupdatalib.NameGenerator(self.tempdir, self.depth, self.max) def test_generates_name_that_is_inside_target_directory(self): name = self.names.new() self.assert_(name.startswith(self.tempdir + os.sep)) def test_generates_different_names_every_time(self): names = set(self.names.new() for i in range(10)) self.assertEqual(len(names), 10) def test_generates_names_that_do_not_exist(self): for i in range(10): name = self.names.new() self.assertFalse(os.path.exists(name)) def test_generates_the_same_sequence_with_every_instance(self): n = 10 first = [self.names.new() for i in range(n)] names2 = self.new() second = [names2.new() for i in range(n)] self.assertEqual(first, second) def test_does_not_generate_names_of_existing_files(self): name = self.names.new() os.makedirs(os.path.dirname(name)) file(name, 'w').close() names2 = self.new() name2 = names2.new() self.assertNotEqual(name, name2) self.assertFalse(os.path.exists(name2)) def test_converts_file_sequence_number_into_right_path_tuple(self): self.assertEqual(self.names._path_tuple(0), (0, 0, 0)) self.assertEqual(self.names._path_tuple(1), (0, 0, 1)) self.assertEqual(self.names._path_tuple(2), (0, 0, 2)) self.assertEqual(self.names._path_tuple(3), (0, 1, 0)) self.assertEqual(self.names._path_tuple(4), (0, 1, 1)) self.assertEqual(self.names._path_tuple(5), (0, 1, 2)) self.assertEqual(self.names._path_tuple(6), (0, 2, 0)) self.assertEqual(self.names._path_tuple(9), (1, 0, 0)) self.assertEqual(self.names._path_tuple(18), (2, 0, 0)) self.assertEqual(self.names._path_tuple(27), (3, 0, 0)) def test_returns_1tuple_for_depth_zero(self): names = genbackupdatalib.NameGenerator(self.tempdir, 0, 1) self.assertEqual(names._path_tuple(42), (42,)) genbackupdata_1.6.orig/generate-speed0000755000000000000000000000400311624412016017740 0ustar rootroot00000000000000#!/usr/bin/python # Copyright 2010 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import cProfile import sys import time import genbackupdatalib def measure(repeats, func, arg, do_profile, profname): def helper(): for i in range(repeats): func(arg) print 'measuring', profname start_time = time.time() start = time.clock() if do_profile: globaldict = globals().copy() localdict = locals().copy() cProfile.runctx('helper()', globaldict, localdict, '%s.prof' % profname) else: helper() end = time.clock() end_time = time.time() return end - start, end_time - start_time def nop(size): pass def main(): repeats = int(sys.argv[1]) size1 = int(sys.argv[2]) do_profile = sys.argv[3] == 'yes' looptime = measure(repeats, nop, None, do_profile, 'calibrate') g = genbackupdatalib.DataGenerator(0) result = measure(repeats, g.generate, size1, do_profile, 'generate') def speed(result, i): total_data = repeats * size1 return total_data / (result[i] - looptime[i]) def humansize(size): return '%4.1f MiB/s' % (size / 1024 / 1024) def report(label, result): cpu, wall = result print '%-12s: %5.3f s (%8s)' % \ (label, cpu, humansize(speed(result, 0))) report('generate', result) if __name__ == '__main__': main() genbackupdata_1.6.orig/project.meta0000644000000000000000000000007511624412016017445 0ustar rootroot00000000000000[config] basetgz = /home/pbuilder-tgz/sid-amd64-pristine.tgz genbackupdata_1.6.orig/setup.py0000644000000000000000000000332711624412016016644 0ustar rootroot00000000000000from distutils.core import setup import genbackupdatalib setup(name='genbackupdata', version=genbackupdatalib.__version__, description='Generate test data for backup software', long_description='''\ genbackupdata creates or modifies directory trees in ways that simulate real filesystems sufficiently well for performance testing of backup software. For example, it can create files that are a mix of small text files and big binary files, with the binary files containing random binary junk which compresses badly. This can then be backed up, and later the directory tree can be changed by creating new files, modifying files, or deleting or renaming files. The backup can then be run again. The output is deterministic, such that for a given set of parameters the same output always happens. Thus it is more efficient to distribute genbackupdata and a set of parameters between people who wish to benchmark backup software than distributing very large test sets. ''', author='Lars Wirzenius', author_email='liw@iki.fi', url='http://braawi.org/genbackupdata/', classifiers=[ 'Development Status :: 3 - Alpha', 'Environment :: Console', 'Intended Audience :: Developers', 'License :: OSI Approved :: GNU General Public License (GPL)', 'Natural Language :: English', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Topic :: Software Development :: Testing', 'Topic :: System :: Archiving :: Backup', ], license='GNU General Public License, version 3 or later', packages=['genbackupdatalib'], scripts=['genbackupdata'], data_files=[('share/man/man1', ['genbackupdata.1'])], ) genbackupdata_1.6.orig/without-tests0000644000000000000000000000005211624412016017710 0ustar rootroot00000000000000./setup.py ./genbackupdatalib/__init__.py