fastimport-0.9.4/0000755000175000017500000000000012356107410014471 5ustar jelmerjelmer00000000000000fastimport-0.9.4/PKG-INFO0000644000175000017500000000042612356107410015570 0ustar jelmerjelmer00000000000000Metadata-Version: 1.0 Name: fastimport Version: 0.9.4 Summary: VCS fastimport/fastexport parser Home-page: https://launchpad.net/python-fastimport Author: Canonical Ltd Author-email: bazaar@lists.canonical.com License: GNU GPL v2 or later Description: UNKNOWN Platform: UNKNOWN fastimport-0.9.4/fastimport/0000755000175000017500000000000012356107410016661 5ustar jelmerjelmer00000000000000fastimport-0.9.4/fastimport/tests/0000755000175000017500000000000012356107410020023 5ustar jelmerjelmer00000000000000fastimport-0.9.4/fastimport/tests/test_dates.py0000644000175000017500000000215612304377714022551 0ustar jelmerjelmer00000000000000# Copyright (C) 2012 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Test parsing of dates.""" from unittest import TestCase from fastimport import ( dates, ) class ParseTzTests(TestCase): def test_parse_tz_utc(self): self.assertEquals(0, dates.parse_tz("+0000")) self.assertEquals(0, dates.parse_tz("-0000")) def test_parse_tz_cet(self): self.assertEquals(3600, dates.parse_tz("+0100")) def test_parse_tz_odd(self): self.assertEquals(1864800, dates.parse_tz("+51800")) fastimport-0.9.4/fastimport/tests/test_errors.py0000644000175000017500000000534112304377714022764 0ustar jelmerjelmer00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Test the Import errors""" from unittest import TestCase from fastimport import ( errors, ) class TestErrors(TestCase): def test_MissingBytes(self): e = errors.MissingBytes(99, 10, 8) self.assertEqual("line 99: Unexpected EOF - expected 10 bytes, found 8", str(e)) def test_MissingTerminator(self): e = errors.MissingTerminator(99, '---') self.assertEqual("line 99: Unexpected EOF - expected '---' terminator", str(e)) def test_InvalidCommand(self): e = errors.InvalidCommand(99, 'foo') self.assertEqual("line 99: Invalid command 'foo'", str(e)) def test_MissingSection(self): e = errors.MissingSection(99, 'foo', 'bar') self.assertEqual("line 99: Command foo is missing section bar", str(e)) def test_BadFormat(self): e = errors.BadFormat(99, 'foo', 'bar', 'xyz') self.assertEqual("line 99: Bad format for section bar in " "command foo: found 'xyz'", str(e)) def test_InvalidTimezone(self): e = errors.InvalidTimezone(99, 'aa:bb') self.assertEqual('aa:bb', e.timezone) self.assertEqual('', e.reason) self.assertEqual("line 99: Timezone 'aa:bb' could not be converted.", str(e)) e = errors.InvalidTimezone(99, 'aa:bb', 'Non-numeric hours') self.assertEqual('aa:bb', e.timezone) self.assertEqual(' Non-numeric hours', e.reason) self.assertEqual("line 99: Timezone 'aa:bb' could not be converted." " Non-numeric hours", str(e)) def test_UnknownDateFormat(self): e = errors.UnknownDateFormat('aaa') self.assertEqual("Unknown date format 'aaa'", str(e)) def test_MissingHandler(self): e = errors.MissingHandler('foo') self.assertEqual("Missing handler for command foo", str(e)) def test_UnknownFeature(self): e = errors.UnknownFeature('aaa') self.assertEqual("Unknown feature 'aaa' - try a later importer or " "an earlier data format", str(e)) fastimport-0.9.4/fastimport/tests/test_helpers.py0000644000175000017500000000362312304377714023113 0ustar jelmerjelmer00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Test the helper functions.""" import unittest from fastimport import ( helpers, ) class TestCommonDirectory(unittest.TestCase): def test_no_paths(self): c = helpers.common_directory(None) self.assertEqual(c, None) c = helpers.common_directory([]) self.assertEqual(c, None) def test_one_path(self): c = helpers.common_directory(['foo']) self.assertEqual(c, '') c = helpers.common_directory(['foo/']) self.assertEqual(c, 'foo/') c = helpers.common_directory(['foo/bar']) self.assertEqual(c, 'foo/') def test_two_paths(self): c = helpers.common_directory(['foo', 'bar']) self.assertEqual(c, '') c = helpers.common_directory(['foo/', 'bar']) self.assertEqual(c, '') c = helpers.common_directory(['foo/', 'foo/bar']) self.assertEqual(c, 'foo/') c = helpers.common_directory(['foo/bar/x', 'foo/bar/y']) self.assertEqual(c, 'foo/bar/') c = helpers.common_directory(['foo/bar/aa_x', 'foo/bar/aa_y']) self.assertEqual(c, 'foo/bar/') def test_lots_of_paths(self): c = helpers.common_directory(['foo/bar/x', 'foo/bar/y', 'foo/bar/z']) self.assertEqual(c, 'foo/bar/') fastimport-0.9.4/fastimport/tests/test_commands.py0000644000175000017500000003677312356107233023260 0ustar jelmerjelmer00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Test how Commands are displayed""" from unittest import TestCase from fastimport import ( commands, ) class TestBlobDisplay(TestCase): def test_blob(self): c = commands.BlobCommand("1", "hello world") self.assertEqual("blob\nmark :1\ndata 11\nhello world", repr(c)) def test_blob_no_mark(self): c = commands.BlobCommand(None, "hello world") self.assertEqual("blob\ndata 11\nhello world", repr(c)) class TestCheckpointDisplay(TestCase): def test_checkpoint(self): c = commands.CheckpointCommand() self.assertEqual("checkpoint", repr(c)) class TestCommitDisplay(TestCase): def test_commit(self): # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, "release v1.0", ":aaa", None, None) self.assertEqual( "commit refs/heads/master\n" "mark :bbb\n" "committer Joe Wong 1234567890 -0600\n" "data 12\n" "release v1.0\n" "from :aaa", repr(c)) def test_commit_unicode_committer(self): # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) name = u'\u013d\xf3r\xe9m \xcdp\u0161\xfam' name_utf8 = name.encode('utf8') committer = (name, 'test@example.com', 1234567890, -6 * 3600) c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, "release v1.0", ":aaa", None, None) self.assertEqual( "commit refs/heads/master\n" "mark :bbb\n" "committer %s 1234567890 -0600\n" "data 12\n" "release v1.0\n" "from :aaa" % (name_utf8,), repr(c)) def test_commit_no_mark(self): # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) c = commands.CommitCommand("refs/heads/master", None, None, committer, "release v1.0", ":aaa", None, None) self.assertEqual( "commit refs/heads/master\n" "committer Joe Wong 1234567890 -0600\n" "data 12\n" "release v1.0\n" "from :aaa", repr(c)) def test_commit_no_from(self): # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, "release v1.0", None, None, None) self.assertEqual( "commit refs/heads/master\n" "mark :bbb\n" "committer Joe Wong 1234567890 -0600\n" "data 12\n" "release v1.0", repr(c)) def test_commit_with_author(self): # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) author = ('Sue Wong', 'sue@example.com', 1234565432, -6 * 3600) committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) c = commands.CommitCommand("refs/heads/master", "bbb", author, committer, "release v1.0", ":aaa", None, None) self.assertEqual( "commit refs/heads/master\n" "mark :bbb\n" "author Sue Wong 1234565432 -0600\n" "committer Joe Wong 1234567890 -0600\n" "data 12\n" "release v1.0\n" "from :aaa", repr(c)) def test_commit_with_merges(self): # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) c = commands.CommitCommand("refs/heads/master", "ddd", None, committer, "release v1.0", ":aaa", [':bbb', ':ccc'], None) self.assertEqual( "commit refs/heads/master\n" "mark :ddd\n" "committer Joe Wong 1234567890 -0600\n" "data 12\n" "release v1.0\n" "from :aaa\n" "merge :bbb\n" "merge :ccc", repr(c)) def test_commit_with_filecommands(self): file_cmds = iter([ commands.FileDeleteCommand('readme.txt'), commands.FileModifyCommand('NEWS', 0100644, None, 'blah blah blah'), ]) # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, "release v1.0", ":aaa", None, file_cmds) self.assertEqual( "commit refs/heads/master\n" "mark :bbb\n" "committer Joe Wong 1234567890 -0600\n" "data 12\n" "release v1.0\n" "from :aaa\n" "D readme.txt\n" "M 644 inline NEWS\n" "data 14\n" "blah blah blah", repr(c)) def test_commit_with_more_authors(self): # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) author = ('Sue Wong', 'sue@example.com', 1234565432, -6 * 3600) committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) more_authors = [ ('Al Smith', 'al@example.com', 1234565432, -6 * 3600), ('Bill Jones', 'bill@example.com', 1234565432, -6 * 3600), ] c = commands.CommitCommand("refs/heads/master", "bbb", author, committer, "release v1.0", ":aaa", None, None, more_authors=more_authors) self.assertEqual( "commit refs/heads/master\n" "mark :bbb\n" "author Sue Wong 1234565432 -0600\n" "author Al Smith 1234565432 -0600\n" "author Bill Jones 1234565432 -0600\n" "committer Joe Wong 1234567890 -0600\n" "data 12\n" "release v1.0\n" "from :aaa", repr(c)) def test_commit_with_properties(self): # user tuple is (name, email, secs-since-epoch, secs-offset-from-utc) committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) properties = { u'greeting': u'hello', u'planet': u'world', } c = commands.CommitCommand("refs/heads/master", "bbb", None, committer, "release v1.0", ":aaa", None, None, properties=properties) self.assertEqual( "commit refs/heads/master\n" "mark :bbb\n" "committer Joe Wong 1234567890 -0600\n" "data 12\n" "release v1.0\n" "from :aaa\n" "property greeting 5 hello\n" "property planet 5 world", repr(c)) class TestCommitCopy(TestCase): def setUp(self): super(TestCommitCopy, self).setUp() file_cmds = iter([ commands.FileDeleteCommand('readme.txt'), commands.FileModifyCommand('NEWS', 0100644, None, 'blah blah blah'), ]) committer = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) self.c = commands.CommitCommand( "refs/heads/master", "bbb", None, committer, "release v1.0", ":aaa", None, file_cmds) def test_simple_copy(self): c2 = self.c.copy() self.assertFalse(self.c is c2) self.assertEqual(repr(self.c), repr(c2)) def test_replace_attr(self): c2 = self.c.copy(mark='ccc') self.assertEqual( repr(self.c).replace('mark :bbb', 'mark :ccc'), repr(c2)) def test_invalid_attribute(self): self.assertRaises(TypeError, self.c.copy, invalid=True) class TestFeatureDisplay(TestCase): def test_feature(self): c = commands.FeatureCommand("dwim") self.assertEqual("feature dwim", repr(c)) def test_feature_with_value(self): c = commands.FeatureCommand("dwim", "please") self.assertEqual("feature dwim=please", repr(c)) class TestProgressDisplay(TestCase): def test_progress(self): c = commands.ProgressCommand("doing foo") self.assertEqual("progress doing foo", repr(c)) class TestResetDisplay(TestCase): def test_reset(self): c = commands.ResetCommand("refs/tags/v1.0", ":xxx") self.assertEqual("reset refs/tags/v1.0\nfrom :xxx\n", repr(c)) def test_reset_no_from(self): c = commands.ResetCommand("refs/remotes/origin/master", None) self.assertEqual("reset refs/remotes/origin/master", repr(c)) class TestTagDisplay(TestCase): def test_tag(self): # tagger tuple is (name, email, secs-since-epoch, secs-offset-from-utc) tagger = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) c = commands.TagCommand("refs/tags/v1.0", ":xxx", tagger, "create v1.0") self.assertEqual( "tag refs/tags/v1.0\n" "from :xxx\n" "tagger Joe Wong 1234567890 -0600\n" "data 11\n" "create v1.0", repr(c)) def test_tag_no_from(self): tagger = ('Joe Wong', 'joe@example.com', 1234567890, -6 * 3600) c = commands.TagCommand("refs/tags/v1.0", None, tagger, "create v1.0") self.assertEqual( "tag refs/tags/v1.0\n" "tagger Joe Wong 1234567890 -0600\n" "data 11\n" "create v1.0", repr(c)) class TestFileModifyDisplay(TestCase): def test_filemodify_file(self): c = commands.FileModifyCommand("foo/bar", 0100644, ":23", None) self.assertEqual("M 644 :23 foo/bar", repr(c)) def test_filemodify_file_executable(self): c = commands.FileModifyCommand("foo/bar", 0100755, ":23", None) self.assertEqual("M 755 :23 foo/bar", repr(c)) def test_filemodify_file_internal(self): c = commands.FileModifyCommand("foo/bar", 0100644, None, "hello world") self.assertEqual("M 644 inline foo/bar\ndata 11\nhello world", repr(c)) def test_filemodify_symlink(self): c = commands.FileModifyCommand("foo/bar", 0120000, None, "baz") self.assertEqual("M 120000 inline foo/bar\ndata 3\nbaz", repr(c)) def test_filemodify_treeref(self): c = commands.FileModifyCommand("tree-info", 0160000, "revision-id-info", None) self.assertEqual("M 160000 revision-id-info tree-info", repr(c)) class TestFileDeleteDisplay(TestCase): def test_filedelete(self): c = commands.FileDeleteCommand("foo/bar") self.assertEqual("D foo/bar", repr(c)) class TestFileCopyDisplay(TestCase): def test_filecopy(self): c = commands.FileCopyCommand("foo/bar", "foo/baz") self.assertEqual("C foo/bar foo/baz", repr(c)) def test_filecopy_quoted(self): # Check the first path is quoted if it contains spaces c = commands.FileCopyCommand("foo/b a r", "foo/b a z") self.assertEqual('C "foo/b a r" foo/b a z', repr(c)) class TestFileRenameDisplay(TestCase): def test_filerename(self): c = commands.FileRenameCommand("foo/bar", "foo/baz") self.assertEqual("R foo/bar foo/baz", repr(c)) def test_filerename_quoted(self): # Check the first path is quoted if it contains spaces c = commands.FileRenameCommand("foo/b a r", "foo/b a z") self.assertEqual('R "foo/b a r" foo/b a z', repr(c)) class TestFileDeleteAllDisplay(TestCase): def test_filedeleteall(self): c = commands.FileDeleteAllCommand() self.assertEqual("deleteall", repr(c)) class TestNotesDisplay(TestCase): def test_noteonly(self): c = commands.NoteModifyCommand('foo', "A basic note") self.assertEqual('N inline :foo\ndata 12\nA basic note', repr(c)) def test_notecommit(self): committer = ("Ed Mund", 'ed@example.org', 1234565432, 0) commits = [ commands.CommitCommand( ref='refs/heads/master', mark='1', author=committer, committer=committer, message="test\n", from_=None, merges=[], file_iter=[ commands.FileModifyCommand('bar', 0100644, None, '') ]), commands.CommitCommand( ref='refs/notes/commits', mark=None, author=None, committer=committer, message="Notes added by 'git notes add'\n", from_=None, merges=[], file_iter=[ commands.NoteModifyCommand('1', "Test note\n") ]), commands.CommitCommand( ref='refs/notes/test', mark=None, author=None, committer=committer, message="Notes added by 'git notes add'\n", from_=None, merges=[], file_iter=[ commands.NoteModifyCommand('1', "Test test\n") ]) ] self.assertEqual( """commit refs/heads/master mark :1 author %(user)s committer %(user)s data 5 test M 644 inline bar data 0 commit refs/notes/commits committer %(user)s data 31 Notes added by 'git notes add' N inline :1 data 10 Test note commit refs/notes/test committer %(user)s data 31 Notes added by 'git notes add' N inline :1 data 10 Test test """ % { 'user': '%s <%s> %d %+05d' % committer, }, ''.join(map(repr, commits))) class TestPathChecking(TestCase): def test_filemodify_path_checking(self): self.assertRaises(ValueError, commands.FileModifyCommand, "", 0100644, None, "text") self.assertRaises(ValueError, commands.FileModifyCommand, None, 0100644, None, "text") def test_filedelete_path_checking(self): self.assertRaises(ValueError, commands.FileDeleteCommand, "") self.assertRaises(ValueError, commands.FileDeleteCommand, None) def test_filerename_path_checking(self): self.assertRaises(ValueError, commands.FileRenameCommand, "", "foo") self.assertRaises(ValueError, commands.FileRenameCommand, None, "foo") self.assertRaises(ValueError, commands.FileRenameCommand, "foo", "") self.assertRaises(ValueError, commands.FileRenameCommand, "foo", None) def test_filecopy_path_checking(self): self.assertRaises(ValueError, commands.FileCopyCommand, "", "foo") self.assertRaises(ValueError, commands.FileCopyCommand, None, "foo") self.assertRaises(ValueError, commands.FileCopyCommand, "foo", "") self.assertRaises(ValueError, commands.FileCopyCommand, "foo", None) fastimport-0.9.4/fastimport/tests/test_filter_processor.py0000644000175000017500000004677512304377714025054 0ustar jelmerjelmer00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Test FilterProcessor""" from cStringIO import StringIO from unittest import TestCase from fastimport import ( parser, ) from fastimport.processors import ( filter_processor, ) # A sample input stream containing all (top level) import commands _SAMPLE_ALL = \ """blob mark :1 data 4 foo commit refs/heads/master mark :2 committer Joe 1234567890 +1000 data 14 Initial import M 644 :1 COPYING checkpoint progress first import done reset refs/remote/origin/master from :2 tag v0.1 from :2 tagger Joe 1234567890 +1000 data 12 release v0.1 """ # A sample input stream creating the following tree: # # NEWS # doc/README.txt # doc/index.txt _SAMPLE_WITH_DIR = \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 doc/README.txt blob mark :2 data 17 Life is good ... commit refs/heads/master mark :101 committer a 1234798653 +0000 data 8 test ing from :100 M 644 :2 NEWS blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :101 M 644 :3 doc/README.txt M 644 :4 doc/index.txt """ class TestCaseWithFiltering(TestCase): def assertFiltering(self, input, params, expected): outf = StringIO() proc = filter_processor.FilterProcessor( params=params) proc.outf = outf s = StringIO(input) p = parser.ImportParser(s) proc.process(p.iter_commands) out = outf.getvalue() self.assertEquals(expected, out) class TestNoFiltering(TestCaseWithFiltering): def test_params_not_given(self): self.assertFiltering(_SAMPLE_ALL, None, _SAMPLE_ALL) def test_params_are_none(self): params = {'include_paths': None, 'exclude_paths': None} self.assertFiltering(_SAMPLE_ALL, params, _SAMPLE_ALL) class TestIncludePaths(TestCaseWithFiltering): def test_file_in_root(self): # Things to note: # * only referenced blobs are retained # * from clause is dropped from the first command params = {'include_paths': ['NEWS']} self.assertFiltering(_SAMPLE_WITH_DIR, params, \ """blob mark :2 data 17 Life is good ... commit refs/heads/master mark :101 committer a 1234798653 +0000 data 8 test ing M 644 :2 NEWS """) def test_file_in_subdir(self): # Additional things to note: # * new root: path is now index.txt, not doc/index.txt # * other files changed in matching commits are excluded params = {'include_paths': ['doc/index.txt']} self.assertFiltering(_SAMPLE_WITH_DIR, params, \ """blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing M 644 :4 index.txt """) def test_file_with_changes(self): # Additional things to note: # * from updated to reference parents in the output params = {'include_paths': ['doc/README.txt']} self.assertFiltering(_SAMPLE_WITH_DIR, params, \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 README.txt blob mark :3 data 19 Welcome! my friend commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 README.txt """) def test_subdir(self): params = {'include_paths': ['doc/']} self.assertFiltering(_SAMPLE_WITH_DIR, params, \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 README.txt blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 README.txt M 644 :4 index.txt """) def test_multiple_files_in_subdir(self): # The new root should be the subdrectory params = {'include_paths': ['doc/README.txt', 'doc/index.txt']} self.assertFiltering(_SAMPLE_WITH_DIR, params, \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 README.txt blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 README.txt M 644 :4 index.txt """) class TestExcludePaths(TestCaseWithFiltering): def test_file_in_root(self): params = {'exclude_paths': ['NEWS']} self.assertFiltering(_SAMPLE_WITH_DIR, params, \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 doc/README.txt blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 doc/README.txt M 644 :4 doc/index.txt """) def test_file_in_subdir(self): params = {'exclude_paths': ['doc/README.txt']} self.assertFiltering(_SAMPLE_WITH_DIR, params, \ """blob mark :2 data 17 Life is good ... commit refs/heads/master mark :101 committer a 1234798653 +0000 data 8 test ing M 644 :2 NEWS blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :101 M 644 :4 doc/index.txt """) def test_subdir(self): params = {'exclude_paths': ['doc/']} self.assertFiltering(_SAMPLE_WITH_DIR, params, \ """blob mark :2 data 17 Life is good ... commit refs/heads/master mark :101 committer a 1234798653 +0000 data 8 test ing M 644 :2 NEWS """) def test_multple_files(self): params = {'exclude_paths': ['doc/index.txt', 'NEWS']} self.assertFiltering(_SAMPLE_WITH_DIR, params, \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 doc/README.txt blob mark :3 data 19 Welcome! my friend commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 doc/README.txt """) class TestIncludeAndExcludePaths(TestCaseWithFiltering): def test_included_dir_and_excluded_file(self): params = {'include_paths': ['doc/'], 'exclude_paths': ['doc/index.txt']} self.assertFiltering(_SAMPLE_WITH_DIR, params, \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 README.txt blob mark :3 data 19 Welcome! my friend commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 README.txt """) # A sample input stream creating the following tree: # # NEWS # doc/README.txt # doc/index.txt # # It then renames doc/README.txt => doc/README _SAMPLE_WITH_RENAME_INSIDE = _SAMPLE_WITH_DIR + \ """commit refs/heads/master mark :103 committer d 1234798653 +0000 data 10 move intro from :102 R doc/README.txt doc/README """ # A sample input stream creating the following tree: # # NEWS # doc/README.txt # doc/index.txt # # It then renames doc/README.txt => README _SAMPLE_WITH_RENAME_TO_OUTSIDE = _SAMPLE_WITH_DIR + \ """commit refs/heads/master mark :103 committer d 1234798653 +0000 data 10 move intro from :102 R doc/README.txt README """ # A sample input stream creating the following tree: # # NEWS # doc/README.txt # doc/index.txt # # It then renames NEWS => doc/NEWS _SAMPLE_WITH_RENAME_TO_INSIDE = _SAMPLE_WITH_DIR + \ """commit refs/heads/master mark :103 committer d 1234798653 +0000 data 10 move intro from :102 R NEWS doc/NEWS """ class TestIncludePathsWithRenames(TestCaseWithFiltering): def test_rename_all_inside(self): # These rename commands ought to be kept but adjusted for the new root params = {'include_paths': ['doc/']} self.assertFiltering(_SAMPLE_WITH_RENAME_INSIDE, params, \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 README.txt blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 README.txt M 644 :4 index.txt commit refs/heads/master mark :103 committer d 1234798653 +0000 data 10 move intro from :102 R README.txt README """) def test_rename_to_outside(self): # These rename commands become deletes params = {'include_paths': ['doc/']} self.assertFiltering(_SAMPLE_WITH_RENAME_TO_OUTSIDE, params, \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 README.txt blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 README.txt M 644 :4 index.txt commit refs/heads/master mark :103 committer d 1234798653 +0000 data 10 move intro from :102 D README.txt """) def test_rename_to_inside(self): # This ought to create a new file but doesn't yet params = {'include_paths': ['doc/']} self.assertFiltering(_SAMPLE_WITH_RENAME_TO_INSIDE, params, \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 README.txt blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 README.txt M 644 :4 index.txt """) # A sample input stream creating the following tree: # # NEWS # doc/README.txt # doc/index.txt # # It then copies doc/README.txt => doc/README _SAMPLE_WITH_COPY_INSIDE = _SAMPLE_WITH_DIR + \ """commit refs/heads/master mark :103 committer d 1234798653 +0000 data 10 move intro from :102 C doc/README.txt doc/README """ # A sample input stream creating the following tree: # # NEWS # doc/README.txt # doc/index.txt # # It then copies doc/README.txt => README _SAMPLE_WITH_COPY_TO_OUTSIDE = _SAMPLE_WITH_DIR + \ """commit refs/heads/master mark :103 committer d 1234798653 +0000 data 10 move intro from :102 C doc/README.txt README """ # A sample input stream creating the following tree: # # NEWS # doc/README.txt # doc/index.txt # # It then copies NEWS => doc/NEWS _SAMPLE_WITH_COPY_TO_INSIDE = _SAMPLE_WITH_DIR + \ """commit refs/heads/master mark :103 committer d 1234798653 +0000 data 10 move intro from :102 C NEWS doc/NEWS """ class TestIncludePathsWithCopies(TestCaseWithFiltering): def test_copy_all_inside(self): # These copy commands ought to be kept but adjusted for the new root params = {'include_paths': ['doc/']} self.assertFiltering(_SAMPLE_WITH_COPY_INSIDE, params, \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 README.txt blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 README.txt M 644 :4 index.txt commit refs/heads/master mark :103 committer d 1234798653 +0000 data 10 move intro from :102 C README.txt README """) def test_copy_to_outside(self): # This can be ignored params = {'include_paths': ['doc/']} self.assertFiltering(_SAMPLE_WITH_COPY_TO_OUTSIDE, params, \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 README.txt blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 README.txt M 644 :4 index.txt """) def test_copy_to_inside(self): # This ought to create a new file but doesn't yet params = {'include_paths': ['doc/']} self.assertFiltering(_SAMPLE_WITH_COPY_TO_INSIDE, params, \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 README.txt blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 README.txt M 644 :4 index.txt """) # A sample input stream with deleteall's creating the following tree: # # NEWS # doc/README.txt # doc/index.txt _SAMPLE_WITH_DELETEALL = \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test deleteall M 644 :1 doc/README.txt blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 deleteall M 644 :3 doc/README.txt M 644 :4 doc/index.txt """ class TestIncludePathsWithDeleteAll(TestCaseWithFiltering): def test_deleteall(self): params = {'include_paths': ['doc/index.txt']} self.assertFiltering(_SAMPLE_WITH_DELETEALL, params, \ """blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 deleteall M 644 :4 index.txt """) _SAMPLE_WITH_TAGS = _SAMPLE_WITH_DIR + \ """tag v0.1 from :100 tagger d 1234798653 +0000 data 12 release v0.1 tag v0.2 from :102 tagger d 1234798653 +0000 data 12 release v0.2 """ class TestIncludePathsWithTags(TestCaseWithFiltering): def test_tag_retention(self): # If a tag references a commit with a parent we kept, # keep the tag but adjust 'from' accordingly. # Otherwise, delete the tag command. params = {'include_paths': ['NEWS']} self.assertFiltering(_SAMPLE_WITH_TAGS, params, \ """blob mark :2 data 17 Life is good ... commit refs/heads/master mark :101 committer a 1234798653 +0000 data 8 test ing M 644 :2 NEWS tag v0.2 from :101 tagger d 1234798653 +0000 data 12 release v0.2 """) _SAMPLE_WITH_RESETS = _SAMPLE_WITH_DIR + \ """reset refs/heads/foo reset refs/heads/bar from :102 """ class TestIncludePathsWithResets(TestCaseWithFiltering): def test_reset_retention(self): # Resets init'ing a branch (without a from) are passed through. # If a reset references a commit with a parent we kept, # keep the reset but adjust 'from' accordingly. params = {'include_paths': ['NEWS']} self.assertFiltering(_SAMPLE_WITH_RESETS, params, \ """blob mark :2 data 17 Life is good ... commit refs/heads/master mark :101 committer a 1234798653 +0000 data 8 test ing M 644 :2 NEWS reset refs/heads/foo reset refs/heads/bar from :101 """) # A sample input stream containing empty commit _SAMPLE_EMPTY_COMMIT = \ """blob mark :1 data 4 foo commit refs/heads/master mark :2 committer Joe 1234567890 +1000 data 14 Initial import M 644 :1 COPYING commit refs/heads/master mark :3 committer Joe 1234567890 +1000 data 12 empty commit """ # A sample input stream containing unresolved from and merge references _SAMPLE_FROM_MERGE_COMMIT = \ """blob mark :1 data 4 foo commit refs/heads/master mark :3 committer Joe 1234567890 +1000 data 6 import M 644 :1 COPYING blob mark :2 data 4 bar commit refs/heads/master mark :4 committer Joe 1234567890 +1000 data 19 unknown from commit from :999 M 644 :2 data/DATA blob mark :99 data 4 bar commit refs/heads/master mark :5 committer Joe 1234567890 +1000 data 12 merge commit from :3 merge :4 merge :1001 M 644 :99 data/DATA2 """ class TestSquashEmptyCommitsFlag(TestCaseWithFiltering): def test_squash_empty_commit(self): params = {'include_paths': None, 'exclude_paths': None} self.assertFiltering(_SAMPLE_EMPTY_COMMIT, params, \ """blob mark :1 data 4 foo commit refs/heads/master mark :2 committer Joe 1234567890 +1000 data 14 Initial import M 644 :1 COPYING """) def test_keep_empty_commit(self): params = {'include_paths': None, 'exclude_paths': None, 'squash_empty_commits': False} self.assertFiltering(_SAMPLE_EMPTY_COMMIT, params, _SAMPLE_EMPTY_COMMIT) def test_squash_unresolved_references(self): params = {'include_paths': None, 'exclude_paths': None} self.assertFiltering(_SAMPLE_FROM_MERGE_COMMIT, params, \ """blob mark :1 data 4 foo commit refs/heads/master mark :3 committer Joe 1234567890 +1000 data 6 import M 644 :1 COPYING blob mark :2 data 4 bar commit refs/heads/master mark :4 committer Joe 1234567890 +1000 data 19 unknown from commit from :999 M 644 :2 data/DATA blob mark :99 data 4 bar commit refs/heads/master mark :5 committer Joe 1234567890 +1000 data 12 merge commit from :3 merge :4 merge :1001 M 644 :99 data/DATA2 """) def test_keep_unresolved_from_and_merge(self): params = {'include_paths': None, 'exclude_paths': None, 'squash_empty_commits': False} self.assertFiltering(_SAMPLE_FROM_MERGE_COMMIT, params, _SAMPLE_FROM_MERGE_COMMIT) def test_with_excludes(self): params = {'include_paths': None, 'exclude_paths': ['data/DATA'], 'squash_empty_commits': False} self.assertFiltering(_SAMPLE_FROM_MERGE_COMMIT, params, \ """blob mark :1 data 4 foo commit refs/heads/master mark :3 committer Joe 1234567890 +1000 data 6 import M 644 :1 COPYING commit refs/heads/master mark :4 committer Joe 1234567890 +1000 data 19 unknown from commit from :999 blob mark :99 data 4 bar commit refs/heads/master mark :5 committer Joe 1234567890 +1000 data 12 merge commit from :3 merge :4 merge :1001 M 644 :99 data/DATA2 """) def test_with_file_includes(self): params = {'include_paths': ['COPYING', 'data/DATA2'], 'exclude_paths': None, 'squash_empty_commits': False} self.assertFiltering(_SAMPLE_FROM_MERGE_COMMIT, params, \ """blob mark :1 data 4 foo commit refs/heads/master mark :3 committer Joe 1234567890 +1000 data 6 import M 644 :1 COPYING commit refs/heads/master mark :4 committer Joe 1234567890 +1000 data 19 unknown from commit from :999 blob mark :99 data 4 bar commit refs/heads/master mark :5 committer Joe 1234567890 +1000 data 12 merge commit from :3 merge :4 merge :1001 M 644 :99 data/DATA2 """ ) def test_with_directory_includes(self): params = {'include_paths': ['data/'], 'exclude_paths': None, 'squash_empty_commits': False} self.assertFiltering(_SAMPLE_FROM_MERGE_COMMIT, params, \ """commit refs/heads/master mark :3 committer Joe 1234567890 +1000 data 6 import blob mark :2 data 4 bar commit refs/heads/master mark :4 committer Joe 1234567890 +1000 data 19 unknown from commit from :999 M 644 :2 DATA blob mark :99 data 4 bar commit refs/heads/master mark :5 committer Joe 1234567890 +1000 data 12 merge commit from :3 merge :4 merge :1001 M 644 :99 DATA2 """) fastimport-0.9.4/fastimport/tests/test_parser.py0000644000175000017500000002706112356107233022741 0ustar jelmerjelmer00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Test the Import parsing""" import StringIO import time import unittest from fastimport import ( commands, errors, parser, ) class TestLineBasedParser(unittest.TestCase): def test_push_line(self): s = StringIO.StringIO("foo\nbar\nbaz\n") p = parser.LineBasedParser(s) self.assertEqual('foo', p.next_line()) self.assertEqual('bar', p.next_line()) p.push_line('bar') self.assertEqual('bar', p.next_line()) self.assertEqual('baz', p.next_line()) self.assertEqual(None, p.next_line()) def test_read_bytes(self): s = StringIO.StringIO("foo\nbar\nbaz\n") p = parser.LineBasedParser(s) self.assertEqual('fo', p.read_bytes(2)) self.assertEqual('o\nb', p.read_bytes(3)) self.assertEqual('ar', p.next_line()) # Test that the line buffer is ignored p.push_line('bar') self.assertEqual('baz', p.read_bytes(3)) # Test missing bytes self.assertRaises(errors.MissingBytes, p.read_bytes, 10) def test_read_until(self): # TODO return s = StringIO.StringIO("foo\nbar\nbaz\nabc\ndef\nghi\n") p = parser.LineBasedParser(s) self.assertEqual('foo\nbar', p.read_until('baz')) self.assertEqual('abc', p.next_line()) # Test that the line buffer is ignored p.push_line('abc') self.assertEqual('def', p.read_until('ghi')) # Test missing terminator self.assertRaises(errors.MissingTerminator, p.read_until('>>>')) # Sample text _sample_import_text = """ progress completed # Test blob formats blob mark :1 data 4 aaaablob data 5 bbbbb # Commit formats commit refs/heads/master mark :2 committer bugs bunny now data 14 initial import M 644 inline README data 18 Welcome from bugs commit refs/heads/master committer now data 13 second commit from :2 M 644 inline README data 23 Welcome from bugs, etc. # Miscellaneous checkpoint progress completed # Test a commit without sub-commands (bug #351717) commit refs/heads/master mark :3 author now committer now data 20 first commit, empty # Test a commit with a heredoc-style (delimited_data) messsage (bug #400960) commit refs/heads/master mark :4 author now committer now data < now committer now data 15 submodule test M 160000 rev-id tree-id # Test features feature whatever feature foo=bar # Test commit with properties commit refs/heads/master mark :6 committer now data 18 test of properties property p1 property p2 5 hohum property p3 16 alpha beta gamma property p4 8 whatever # Test a commit with multiple authors commit refs/heads/master mark :7 author Fluffy now author Daffy now author Donald now committer now data 17 multi-author test """ _timefunc = time.time class TestImportParser(unittest.TestCase): def setUp(self): self.fake_time = 42.0123 time.time = lambda: self.fake_time def tearDown(self): time.time = _timefunc del self.fake_time def test_iter_commands(self): s = StringIO.StringIO(_sample_import_text) p = parser.ImportParser(s) result = [] for cmd in p.iter_commands(): result.append(cmd) if cmd.name == 'commit': for fc in cmd.iter_files(): result.append(fc) self.assertEqual(len(result), 17) cmd1 = result.pop(0) self.assertEqual('progress', cmd1.name) self.assertEqual('completed', cmd1.message) cmd2 = result.pop(0) self.assertEqual('blob', cmd2.name) self.assertEqual('1', cmd2.mark) self.assertEqual(':1', cmd2.id) self.assertEqual('aaaa', cmd2.data) self.assertEqual(4, cmd2.lineno) cmd3 = result.pop(0) self.assertEqual('blob', cmd3.name) self.assertEqual('@7', cmd3.id) self.assertEqual(None, cmd3.mark) self.assertEqual('bbbbb', cmd3.data) self.assertEqual(7, cmd3.lineno) cmd4 = result.pop(0) self.assertEqual('commit', cmd4.name) self.assertEqual('2', cmd4.mark) self.assertEqual(':2', cmd4.id) self.assertEqual('initial import', cmd4.message) self.assertEqual(('bugs bunny', 'bugs@bunny.org', self.fake_time, 0), cmd4.committer) # namedtuple attributes self.assertEqual('bugs bunny', cmd4.committer.name) self.assertEqual('bugs@bunny.org', cmd4.committer.email) self.assertEqual(self.fake_time, cmd4.committer.timestamp) self.assertEqual(0, cmd4.committer.timezone) self.assertEqual(None, cmd4.author) self.assertEqual(11, cmd4.lineno) self.assertEqual('refs/heads/master', cmd4.ref) self.assertEqual(None, cmd4.from_) self.assertEqual([], cmd4.merges) file_cmd1 = result.pop(0) self.assertEqual('filemodify', file_cmd1.name) self.assertEqual('README', file_cmd1.path) self.assertEqual(0100644, file_cmd1.mode) self.assertEqual('Welcome from bugs\n', file_cmd1.data) cmd5 = result.pop(0) self.assertEqual('commit', cmd5.name) self.assertEqual(None, cmd5.mark) self.assertEqual('@19', cmd5.id) self.assertEqual('second commit', cmd5.message) self.assertEqual(('', 'bugs@bunny.org', self.fake_time, 0), cmd5.committer) self.assertEqual(None, cmd5.author) self.assertEqual(19, cmd5.lineno) self.assertEqual('refs/heads/master', cmd5.ref) self.assertEqual(':2', cmd5.from_) self.assertEqual([], cmd5.merges) file_cmd2 = result.pop(0) self.assertEqual('filemodify', file_cmd2.name) self.assertEqual('README', file_cmd2.path) self.assertEqual(0100644, file_cmd2.mode) self.assertEqual('Welcome from bugs, etc.', file_cmd2.data) cmd6 = result.pop(0) self.assertEqual(cmd6.name, 'checkpoint') cmd7 = result.pop(0) self.assertEqual('progress', cmd7.name) self.assertEqual('completed', cmd7.message) cmd = result.pop(0) self.assertEqual('commit', cmd.name) self.assertEqual('3', cmd.mark) self.assertEqual(None, cmd.from_) cmd = result.pop(0) self.assertEqual('commit', cmd.name) self.assertEqual('4', cmd.mark) self.assertEqual('Commit with heredoc-style message\n', cmd.message) cmd = result.pop(0) self.assertEqual('commit', cmd.name) self.assertEqual('5', cmd.mark) self.assertEqual('submodule test\n', cmd.message) file_cmd1 = result.pop(0) self.assertEqual('filemodify', file_cmd1.name) self.assertEqual('tree-id', file_cmd1.path) self.assertEqual(0160000, file_cmd1.mode) self.assertEqual("rev-id", file_cmd1.dataref) cmd = result.pop(0) self.assertEqual('feature', cmd.name) self.assertEqual('whatever', cmd.feature_name) self.assertEqual(None, cmd.value) cmd = result.pop(0) self.assertEqual('feature', cmd.name) self.assertEqual('foo', cmd.feature_name) self.assertEqual('bar', cmd.value) cmd = result.pop(0) self.assertEqual('commit', cmd.name) self.assertEqual('6', cmd.mark) self.assertEqual('test of properties', cmd.message) self.assertEqual({ 'p1': None, 'p2': u'hohum', 'p3': u'alpha\nbeta\ngamma', 'p4': u'whatever', }, cmd.properties) cmd = result.pop(0) self.assertEqual('commit', cmd.name) self.assertEqual('7', cmd.mark) self.assertEqual('multi-author test', cmd.message) self.assertEqual('', cmd.committer[0]) self.assertEqual('bugs@bunny.org', cmd.committer[1]) self.assertEqual('Fluffy', cmd.author[0]) self.assertEqual('fluffy@bunny.org', cmd.author[1]) self.assertEqual('Daffy', cmd.more_authors[0][0]) self.assertEqual('daffy@duck.org', cmd.more_authors[0][1]) self.assertEqual('Donald', cmd.more_authors[1][0]) self.assertEqual('donald@duck.org', cmd.more_authors[1][1]) def test_done_feature_missing_done(self): s = StringIO.StringIO("""feature done """) p = parser.ImportParser(s) cmds = p.iter_commands() self.assertEquals("feature", cmds.next().name) self.assertRaises(errors.PrematureEndOfStream, cmds.next) def test_done_with_feature(self): s = StringIO.StringIO("""feature done done more data """) p = parser.ImportParser(s) cmds = p.iter_commands() self.assertEquals("feature", cmds.next().name) self.assertRaises(StopIteration, cmds.next) def test_done_without_feature(self): s = StringIO.StringIO("""done more data """) p = parser.ImportParser(s) cmds = p.iter_commands() self.assertEquals([], list(cmds)) class TestStringParsing(unittest.TestCase): def test_unquote(self): s = r'hello \"sweet\" wo\\r\tld' self.assertEquals(r'hello "sweet" wo\r' + "\tld", parser._unquote_c_string(s)) class TestPathPairParsing(unittest.TestCase): def test_path_pair_simple(self): p = parser.ImportParser("") self.assertEqual(['foo', 'bar'], p._path_pair("foo bar")) def test_path_pair_spaces_in_first(self): p = parser.ImportParser("") self.assertEqual(['foo bar', 'baz'], p._path_pair('"foo bar" baz')) class TestTagParsing(unittest.TestCase): def test_tagger_with_email(self): p = parser.ImportParser(StringIO.StringIO( "tag refs/tags/v1.0\n" "from :xxx\n" "tagger Joe Wong 1234567890 -0600\n" "data 11\n" "create v1.0")) cmds = list(p.iter_commands()) self.assertEquals(1, len(cmds)) self.assertTrue(isinstance(cmds[0], commands.TagCommand)) self.assertEquals(cmds[0].tagger, ('Joe Wong', 'joe@example.com', 1234567890.0, -21600)) def test_tagger_no_email_strict(self): p = parser.ImportParser(StringIO.StringIO( "tag refs/tags/v1.0\n" "from :xxx\n" "tagger Joe Wong\n" "data 11\n" "create v1.0")) self.assertRaises(errors.BadFormat, list, p.iter_commands()) def test_tagger_no_email_not_strict(self): p = parser.ImportParser(StringIO.StringIO( "tag refs/tags/v1.0\n" "from :xxx\n" "tagger Joe Wong\n" "data 11\n" "create v1.0"), strict=False) cmds = list(p.iter_commands()) self.assertEquals(1, len(cmds)) self.assertTrue(isinstance(cmds[0], commands.TagCommand)) self.assertEquals(cmds[0].tagger[:2], ('Joe Wong', None)) fastimport-0.9.4/fastimport/tests/__init__.py0000644000175000017500000000226312266552053022146 0ustar jelmerjelmer00000000000000# __init__.py -- The tests for python-fastimport # Copyright (C) 2010 Canonical, Ltd. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; version 2 # of the License or (at your option) any later version of # the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Tests for fastimport.""" import unittest def test_suite(): names = [ 'test_commands', 'test_dates', 'test_errors', 'test_filter_processor', 'test_helpers', 'test_parser', ] module_names = ['fastimport.tests.' + name for name in names] result = unittest.TestSuite() loader = unittest.TestLoader() suite = loader.loadTestsFromNames(module_names) result.addTests(suite) return result fastimport-0.9.4/fastimport/errors.py0000644000175000017500000001613412206661612020557 0ustar jelmerjelmer00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Exception classes for fastimport""" # Prefix to messages to show location information _LOCATION_FMT = "line %(lineno)d: " # ImportError is heavily based on BzrError class ImportError(StandardError): """The base exception class for all import processing exceptions.""" _fmt = "Unknown Import Error" def __init__(self, msg=None, **kwds): StandardError.__init__(self) if msg is not None: self._preformatted_string = msg else: self._preformatted_string = None for key, value in kwds.items(): setattr(self, key, value) def _format(self): s = getattr(self, '_preformatted_string', None) if s is not None: # contains a preformatted message return s try: fmt = self._fmt if fmt: d = dict(self.__dict__) s = fmt % d # __str__() should always return a 'str' object # never a 'unicode' object. return s except (AttributeError, TypeError, NameError, ValueError, KeyError), e: return 'Unprintable exception %s: dict=%r, fmt=%r, error=%r' \ % (self.__class__.__name__, self.__dict__, getattr(self, '_fmt', None), e) def __unicode__(self): u = self._format() if isinstance(u, str): # Try decoding the str using the default encoding. u = unicode(u) elif not isinstance(u, unicode): # Try to make a unicode object from it, because __unicode__ must # return a unicode object. u = unicode(u) return u def __str__(self): s = self._format() if isinstance(s, unicode): s = s.encode('utf8') else: # __str__ must return a str. s = str(s) return s def __repr__(self): return '%s(%s)' % (self.__class__.__name__, str(self)) def __eq__(self, other): if self.__class__ is not other.__class__: return NotImplemented return self.__dict__ == other.__dict__ class ParsingError(ImportError): """The base exception class for all import processing exceptions.""" _fmt = _LOCATION_FMT + "Unknown Import Parsing Error" def __init__(self, lineno): ImportError.__init__(self) self.lineno = lineno class MissingBytes(ParsingError): """Raised when EOF encountered while expecting to find more bytes.""" _fmt = (_LOCATION_FMT + "Unexpected EOF - expected %(expected)d bytes," " found %(found)d") def __init__(self, lineno, expected, found): ParsingError.__init__(self, lineno) self.expected = expected self.found = found class MissingTerminator(ParsingError): """Raised when EOF encountered while expecting to find a terminator.""" _fmt = (_LOCATION_FMT + "Unexpected EOF - expected '%(terminator)s' terminator") def __init__(self, lineno, terminator): ParsingError.__init__(self, lineno) self.terminator = terminator class InvalidCommand(ParsingError): """Raised when an unknown command found.""" _fmt = (_LOCATION_FMT + "Invalid command '%(cmd)s'") def __init__(self, lineno, cmd): ParsingError.__init__(self, lineno) self.cmd = cmd class MissingSection(ParsingError): """Raised when a section is required in a command but not present.""" _fmt = (_LOCATION_FMT + "Command %(cmd)s is missing section %(section)s") def __init__(self, lineno, cmd, section): ParsingError.__init__(self, lineno) self.cmd = cmd self.section = section class BadFormat(ParsingError): """Raised when a section is formatted incorrectly.""" _fmt = (_LOCATION_FMT + "Bad format for section %(section)s in " "command %(cmd)s: found '%(text)s'") def __init__(self, lineno, cmd, section, text): ParsingError.__init__(self, lineno) self.cmd = cmd self.section = section self.text = text class InvalidTimezone(ParsingError): """Raised when converting a string timezone to a seconds offset.""" _fmt = (_LOCATION_FMT + "Timezone %(timezone)r could not be converted.%(reason)s") def __init__(self, lineno, timezone, reason=None): ParsingError.__init__(self, lineno) self.timezone = timezone if reason: self.reason = ' ' + reason else: self.reason = '' class PrematureEndOfStream(ParsingError): """Raised when the 'done' feature was specified but missing.""" _fmt = (_LOCATION_FMT + "Stream end before 'done' command") def __init__(self, lineno): ParsingError.__init__(self, lineno) class UnknownDateFormat(ImportError): """Raised when an unknown date format is given.""" _fmt = ("Unknown date format '%(format)s'") def __init__(self, format): ImportError.__init__(self) self.format = format class MissingHandler(ImportError): """Raised when a processor can't handle a command.""" _fmt = ("Missing handler for command %(cmd)s") def __init__(self, cmd): ImportError.__init__(self) self.cmd = cmd class UnknownParameter(ImportError): """Raised when an unknown parameter is passed to a processor.""" _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s") def __init__(self, param, knowns): ImportError.__init__(self) self.param = param self.knowns = knowns class BadRepositorySize(ImportError): """Raised when the repository has an incorrect number of revisions.""" _fmt = ("Bad repository size - %(found)d revisions found, " "%(expected)d expected") def __init__(self, expected, found): ImportError.__init__(self) self.expected = expected self.found = found class BadRestart(ImportError): """Raised when the import stream and id-map do not match up.""" _fmt = ("Bad restart - attempted to skip commit %(commit_id)s " "but matching revision-id is unknown") def __init__(self, commit_id): ImportError.__init__(self) self.commit_id = commit_id class UnknownFeature(ImportError): """Raised when an unknown feature is given in the input stream.""" _fmt = ("Unknown feature '%(feature)s' - try a later importer or " "an earlier data format") def __init__(self, feature): ImportError.__init__(self) self.feature = feature fastimport-0.9.4/fastimport/parser.py0000644000175000017500000005171712356107233020545 0ustar jelmerjelmer00000000000000# Copyright (C) 2008-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Parser of import data into command objects. In order to reuse existing front-ends, the stream format is a subset of the one used by git-fast-import (as of the 1.5.4 release of git at least). The grammar is: stream ::= cmd*; cmd ::= new_blob | new_commit | new_tag | reset_branch | checkpoint | progress ; new_blob ::= 'blob' lf mark? file_content; file_content ::= data; new_commit ::= 'commit' sp ref_str lf mark? ('author' sp name '<' email '>' when lf)? 'committer' sp name '<' email '>' when lf commit_msg ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)* file_change* lf?; commit_msg ::= data; file_change ::= file_clr | file_del | file_rnm | file_cpy | file_obm | file_inm; file_clr ::= 'deleteall' lf; file_del ::= 'D' sp path_str lf; file_rnm ::= 'R' sp path_str sp path_str lf; file_cpy ::= 'C' sp path_str sp path_str lf; file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf; file_inm ::= 'M' sp mode sp 'inline' sp path_str lf data; new_tag ::= 'tag' sp tag_str lf 'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf 'tagger' sp name '<' email '>' when lf tag_msg; tag_msg ::= data; reset_branch ::= 'reset' sp ref_str lf ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? lf?; checkpoint ::= 'checkpoint' lf lf?; progress ::= 'progress' sp not_lf* lf lf?; # note: the first idnum in a stream should be 1 and subsequent # idnums should not have gaps between values as this will cause # the stream parser to reserve space for the gapped values. An # idnum can be updated in the future to a new object by issuing # a new mark directive with the old idnum. # mark ::= 'mark' sp idnum lf; data ::= (delimited_data | exact_data) lf?; # note: delim may be any string but must not contain lf. # data_line may contain any data but must not be exactly # delim. The lf after the final data_line is included in # the data. delimited_data ::= 'data' sp '<<' delim lf (data_line lf)* delim lf; # note: declen indicates the length of binary_data in bytes. # declen does not include the lf preceeding the binary data. # exact_data ::= 'data' sp declen lf binary_data; # note: quoted strings are C-style quoting supporting \c for # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn # is the signed byte value in octal. Note that the only # characters which must actually be escaped to protect the # stream formatting is: \, " and LF. Otherwise these values # are UTF8. # ref_str ::= ref; sha1exp_str ::= sha1exp; tag_str ::= tag; path_str ::= path | '"' quoted(path) '"' ; mode ::= '100644' | '644' | '100755' | '755' | '120000' ; declen ::= # unsigned 32 bit value, ascii base10 notation; bigint ::= # unsigned integer value, ascii base10 notation; binary_data ::= # file content, not interpreted; when ::= raw_when | rfc2822_when; raw_when ::= ts sp tz; rfc2822_when ::= # Valid RFC 2822 date and time; sp ::= # ASCII space character; lf ::= # ASCII newline (LF) character; # note: a colon (':') must precede the numerical value assigned to # an idnum. This is to distinguish it from a ref or tag name as # GIT does not permit ':' in ref or tag strings. # idnum ::= ':' bigint; path ::= # GIT style file path, e.g. "a/b/c"; ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT"; tag ::= # GIT tag name, e.g. "FIREFOX_1_5"; sha1exp ::= # Any valid GIT SHA1 expression; hexsha1 ::= # SHA1 in hexadecimal format; # note: name and email are UTF8 strings, however name must not # contain '<' or lf and email must not contain any of the # following: '<', '>', lf. # name ::= # valid GIT author/committer name; email ::= # valid GIT author/committer email; ts ::= # time since the epoch in seconds, ascii base10 notation; tz ::= # GIT style timezone; # note: comments may appear anywhere in the input, except # within a data command. Any form of the data command # always escapes the related input from comment processing. # # In case it is not clear, the '#' that starts the comment # must be the first character on that the line (an lf have # preceeded it). # comment ::= '#' not_lf* lf; not_lf ::= # Any byte that is not ASCII newline (LF); """ import collections import re import sys from fastimport import ( commands, dates, errors, ) ## Stream parsing ## class LineBasedParser(object): def __init__(self, input): """A Parser that keeps track of line numbers. :param input: the file-like object to read from """ self.input = input self.lineno = 0 # Lines pushed back onto the input stream self._buffer = [] def abort(self, exception, *args): """Raise an exception providing line number information.""" raise exception(self.lineno, *args) def readline(self): """Get the next line including the newline or '' on EOF.""" self.lineno += 1 if self._buffer: return self._buffer.pop() else: return self.input.readline() def next_line(self): """Get the next line without the newline or None on EOF.""" line = self.readline() if line: return line[:-1] else: return None def push_line(self, line): """Push line back onto the line buffer. :param line: the line with no trailing newline """ self.lineno -= 1 self._buffer.append(line + "\n") def read_bytes(self, count): """Read a given number of bytes from the input stream. Throws MissingBytes if the bytes are not found. Note: This method does not read from the line buffer. :return: a string """ result = self.input.read(count) found = len(result) self.lineno += result.count("\n") if found != count: self.abort(errors.MissingBytes, count, found) return result def read_until(self, terminator): """Read the input stream until the terminator is found. Throws MissingTerminator if the terminator is not found. Note: This method does not read from the line buffer. :return: the bytes read up to but excluding the terminator. """ lines = [] term = terminator + '\n' while True: line = self.input.readline() if line == term: break else: lines.append(line) return ''.join(lines) # Regular expression used for parsing. (Note: The spec states that the name # part should be non-empty but git-fast-export doesn't always do that so # the first bit is \w*, not \w+.) Also git-fast-import code says the # space before the email is optional. _WHO_AND_WHEN_RE = re.compile(r'([^<]*)<(.*)> (.+)') _WHO_RE = re.compile(r'([^<]*)<(.*)>') class ImportParser(LineBasedParser): def __init__(self, input, verbose=False, output=sys.stdout, user_mapper=None, strict=True): """A Parser of import commands. :param input: the file-like object to read from :param verbose: display extra information of not :param output: the file-like object to write messages to (YAGNI?) :param user_mapper: if not None, the UserMapper used to adjust user-ids for authors, committers and taggers. :param strict: Raise errors on strictly invalid data """ LineBasedParser.__init__(self, input) self.verbose = verbose self.output = output self.user_mapper = user_mapper self.strict = strict # We auto-detect the date format when a date is first encountered self.date_parser = None self.features = {} def warning(self, msg): sys.stderr.write("warning line %d: %s\n" % (self.lineno, msg)) def iter_commands(self): """Iterator returning ImportCommand objects.""" while True: line = self.next_line() if line is None: if 'done' in self.features: raise errors.PrematureEndOfStream(self.lineno) break elif len(line) == 0 or line.startswith('#'): continue # Search for commands in order of likelihood elif line.startswith('commit '): yield self._parse_commit(line[len('commit '):]) elif line.startswith('blob'): yield self._parse_blob() elif line.startswith('done'): break elif line.startswith('progress '): yield commands.ProgressCommand(line[len('progress '):]) elif line.startswith('reset '): yield self._parse_reset(line[len('reset '):]) elif line.startswith('tag '): yield self._parse_tag(line[len('tag '):]) elif line.startswith('checkpoint'): yield commands.CheckpointCommand() elif line.startswith('feature'): yield self._parse_feature(line[len('feature '):]) else: self.abort(errors.InvalidCommand, line) def iter_file_commands(self): """Iterator returning FileCommand objects. If an invalid file command is found, the line is silently pushed back and iteration ends. """ while True: line = self.next_line() if line is None: break elif len(line) == 0 or line.startswith('#'): continue # Search for file commands in order of likelihood elif line.startswith('M '): yield self._parse_file_modify(line[2:]) elif line.startswith('D '): path = self._path(line[2:]) yield commands.FileDeleteCommand(path) elif line.startswith('R '): old, new = self._path_pair(line[2:]) yield commands.FileRenameCommand(old, new) elif line.startswith('C '): src, dest = self._path_pair(line[2:]) yield commands.FileCopyCommand(src, dest) elif line.startswith('deleteall'): yield commands.FileDeleteAllCommand() else: self.push_line(line) break def _parse_blob(self): """Parse a blob command.""" lineno = self.lineno mark = self._get_mark_if_any() data = self._get_data('blob') return commands.BlobCommand(mark, data, lineno) def _parse_commit(self, ref): """Parse a commit command.""" lineno = self.lineno mark = self._get_mark_if_any() author = self._get_user_info('commit', 'author', False) more_authors = [] while True: another_author = self._get_user_info('commit', 'author', False) if another_author is not None: more_authors.append(another_author) else: break committer = self._get_user_info('commit', 'committer') message = self._get_data('commit', 'message') from_ = self._get_from() merges = [] while True: merge = self._get_merge() if merge is not None: # while the spec suggests it's illegal, git-fast-export # outputs multiple merges on the one line, e.g. # merge :x :y :z these_merges = merge.split(" ") merges.extend(these_merges) else: break properties = {} while True: name_value = self._get_property() if name_value is not None: name, value = name_value properties[name] = value else: break return commands.CommitCommand(ref, mark, author, committer, message, from_, merges, list(self.iter_file_commands()), lineno=lineno, more_authors=more_authors, properties=properties) def _parse_feature(self, info): """Parse a feature command.""" parts = info.split("=", 1) name = parts[0] if len(parts) > 1: value = self._path(parts[1]) else: value = None self.features[name] = value return commands.FeatureCommand(name, value, lineno=self.lineno) def _parse_file_modify(self, info): """Parse a filemodify command within a commit. :param info: a string in the format "mode dataref path" (where dataref might be the hard-coded literal 'inline'). """ params = info.split(' ', 2) path = self._path(params[2]) mode = self._mode(params[0]) if params[1] == 'inline': dataref = None data = self._get_data('filemodify') else: dataref = params[1] data = None return commands.FileModifyCommand(path, mode, dataref, data) def _parse_reset(self, ref): """Parse a reset command.""" from_ = self._get_from() return commands.ResetCommand(ref, from_) def _parse_tag(self, name): """Parse a tag command.""" from_ = self._get_from('tag') tagger = self._get_user_info('tag', 'tagger', accept_just_who=True) message = self._get_data('tag', 'message') return commands.TagCommand(name, from_, tagger, message) def _get_mark_if_any(self): """Parse a mark section.""" line = self.next_line() if line.startswith('mark :'): return line[len('mark :'):] else: self.push_line(line) return None def _get_from(self, required_for=None): """Parse a from section.""" line = self.next_line() if line is None: return None elif line.startswith('from '): return line[len('from '):] elif required_for: self.abort(errors.MissingSection, required_for, 'from') else: self.push_line(line) return None def _get_merge(self): """Parse a merge section.""" line = self.next_line() if line is None: return None elif line.startswith('merge '): return line[len('merge '):] else: self.push_line(line) return None def _get_property(self): """Parse a property section.""" line = self.next_line() if line is None: return None elif line.startswith('property '): return self._name_value(line[len('property '):]) else: self.push_line(line) return None def _get_user_info(self, cmd, section, required=True, accept_just_who=False): """Parse a user section.""" line = self.next_line() if line.startswith(section + ' '): return self._who_when(line[len(section + ' '):], cmd, section, accept_just_who=accept_just_who) elif required: self.abort(errors.MissingSection, cmd, section) else: self.push_line(line) return None def _get_data(self, required_for, section='data'): """Parse a data section.""" line = self.next_line() if line.startswith('data '): rest = line[len('data '):] if rest.startswith('<<'): return self.read_until(rest[2:]) else: size = int(rest) read_bytes = self.read_bytes(size) # optional LF after data. next = self.input.readline() self.lineno += 1 if len(next) > 1 or next != "\n": self.push_line(next[:-1]) return read_bytes else: self.abort(errors.MissingSection, required_for, section) def _who_when(self, s, cmd, section, accept_just_who=False): """Parse who and when information from a string. :return: a tuple of (name,email,timestamp,timezone). name may be the empty string if only an email address was given. """ match = _WHO_AND_WHEN_RE.search(s) if match: datestr = match.group(3).lstrip() if self.date_parser is None: # auto-detect the date format if len(datestr.split(' ')) == 2: format = 'raw' elif datestr == 'now': format = 'now' else: format = 'rfc2822' self.date_parser = dates.DATE_PARSERS_BY_NAME[format] try: when = self.date_parser(datestr, self.lineno) except ValueError: print "failed to parse datestr '%s'" % (datestr,) raise name = match.group(1) email = match.group(2) else: match = _WHO_RE.search(s) if accept_just_who and match: # HACK around missing time # TODO: output a warning here when = dates.DATE_PARSERS_BY_NAME['now']('now') name = match.group(1) email = match.group(2) elif self.strict: self.abort(errors.BadFormat, cmd, section, s) else: name = s email = None when = dates.DATE_PARSERS_BY_NAME['now']('now') if len(name) > 0: if name[-1] == " ": name = name[:-1] # While it shouldn't happen, some datasets have email addresses # which contain unicode characters. See bug 338186. We sanitize # the data at this level just in case. if self.user_mapper: name, email = self.user_mapper.map_name_and_email(name, email) return Authorship(name, email, when[0], when[1]) def _name_value(self, s): """Parse a (name,value) tuple from 'name value-length value'.""" parts = s.split(' ', 2) name = parts[0] if len(parts) == 1: value = None else: size = int(parts[1]) value = parts[2] still_to_read = size - len(value) if still_to_read > 0: read_bytes = self.read_bytes(still_to_read) value += "\n" + read_bytes[:still_to_read - 1] value = value.decode('utf8') return (name, value) def _path(self, s): """Parse a path.""" if s.startswith('"'): if s[-1] != '"': self.abort(errors.BadFormat, '?', '?', s) else: return _unquote_c_string(s[1:-1]) return s def _path_pair(self, s): """Parse two paths separated by a space.""" # TODO: handle a space in the first path if s.startswith('"'): parts = s[1:].split('" ', 1) else: parts = s.split(' ', 1) if len(parts) != 2: self.abort(errors.BadFormat, '?', '?', s) elif parts[1].startswith('"') and parts[1].endswith('"'): parts[1] = parts[1][1:-1] elif parts[1].startswith('"') or parts[1].endswith('"'): self.abort(errors.BadFormat, '?', '?', s) return map(_unquote_c_string, parts) def _mode(self, s): """Check file mode format and parse into an int. :return: mode as integer """ # Note: Output from git-fast-export slightly different to spec if s in ['644', '100644', '0100644']: return 0100644 elif s in ['755', '100755', '0100755']: return 0100755 elif s in ['040000', '0040000']: return 040000 elif s in ['120000', '0120000']: return 0120000 elif s in ['160000', '0160000']: return 0160000 else: self.abort(errors.BadFormat, 'filemodify', 'mode', s) def _unquote_c_string(s): """replace C-style escape sequences (\n, \", etc.) with real chars.""" # HACK: Python strings are close enough return s.decode('string_escape', 'replace') Authorship = collections.namedtuple('Authorship', 'name email timestamp timezone') fastimport-0.9.4/fastimport/dates.py0000644000175000017500000000434012356107233020337 0ustar jelmerjelmer00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Date parsing routines. Each routine represents a date format that can be specified in a stream using the date-format feature. The return value is timestamp,timezone where * timestamp is seconds since epoch * timezone is the offset from UTC in seconds. """ import time from fastimport import errors def parse_raw(s, lineno=0): """Parse a date from a raw string. The format must be exactly "seconds-since-epoch offset-utc". See the spec for details. """ timestamp_str, timezone_str = s.split(' ', 1) timestamp = float(timestamp_str) try: timezone = parse_tz(timezone_str) except ValueError: raise errors.InvalidTimezone(lineno, timezone_str) return timestamp, timezone def parse_tz(tz): """Parse a timezone specification in the [+|-]HHMM format. :return: the timezone offset in seconds. """ # from git_repository.py in bzr-git if tz[0] not in ('+', '-'): raise ValueError(tz) sign = {'+': +1, '-': -1}[tz[0]] hours = int(tz[1:-2]) minutes = int(tz[-2:]) return sign * 60 * (60 * hours + minutes) def parse_rfc2822(s, lineno=0): """Parse a date from a rfc2822 string. See the spec for details. """ raise NotImplementedError(parse_rfc2822) def parse_now(s, lineno=0): """Parse a date from a string. The format must be exactly "now". See the spec for details. """ return time.time(), 0 # Lookup tabel of date parsing routines DATE_PARSERS_BY_NAME = { 'raw': parse_raw, 'rfc2822': parse_rfc2822, 'now': parse_now, } fastimport-0.9.4/fastimport/helpers.py0000644000175000017500000000541012347666005020707 0ustar jelmerjelmer00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Miscellaneous useful stuff.""" def _common_path_and_rest(l1, l2, common=[]): # From http://code.activestate.com/recipes/208993/ if len(l1) < 1: return (common, l1, l2) if len(l2) < 1: return (common, l1, l2) if l1[0] != l2[0]: return (common, l1, l2) return _common_path_and_rest(l1[1:], l2[1:], common+[l1[0]]) def common_path(path1, path2): """Find the common bit of 2 paths.""" return ''.join(_common_path_and_rest(path1, path2)[0]) def common_directory(paths): """Find the deepest common directory of a list of paths. :return: if no paths are provided, None is returned; if there is no common directory, '' is returned; otherwise the common directory with a trailing / is returned. """ import posixpath def get_dir_with_slash(path): if path == '' or path.endswith('/'): return path else: dirname, basename = posixpath.split(path) if dirname == '': return dirname else: return dirname + '/' if not paths: return None elif len(paths) == 1: return get_dir_with_slash(paths[0]) else: common = common_path(paths[0], paths[1]) for path in paths[2:]: common = common_path(common, path) return get_dir_with_slash(common) def is_inside(dir, fname): """True if fname is inside dir. The parameters should typically be passed to osutils.normpath first, so that . and .. and repeated slashes are eliminated, and the separators are canonical for the platform. The empty string as a dir name is taken as top-of-tree and matches everything. """ # XXX: Most callers of this can actually do something smarter by # looking at the inventory if dir == fname: return True if dir == '': return True if dir[-1] != '/': dir += '/' return fname.startswith(dir) def is_inside_any(dir_list, fname): """True if fname is inside any of given dirs.""" for dirname in dir_list: if is_inside(dirname, fname): return True return False fastimport-0.9.4/fastimport/processor.py0000644000175000017500000001400312356107233021253 0ustar jelmerjelmer00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Processor for fast-import commands. This module provides the skeleton of a fast-import backend. To import from a fast-import stream to your version-control system: - derive a class from the abstract ImportProcessor class and implement the *_helper methods. - parse a fast-import stream into a sequence of commands, for example using the helpers from the parser module. - pass that command sequence to the process method of your processor. See git-fast-import.1 for the meaning of each command and the processors package for examples. """ import sys import time import errors class ImportProcessor(object): """Base class for fast-import stream processors. Subclasses should override the pre_*, post_* and *_handler methods as appropriate. """ known_params = [] def __init__(self, params=None, verbose=False, outf=None): if outf is None: self.outf = sys.stdout else: self.outf = outf self.verbose = verbose if params is None: self.params = {} else: self.params = params self.validate_parameters() # Handlers can set this to request exiting cleanly without # iterating through the remaining commands self.finished = False def validate_parameters(self): """Validate that the parameters are correctly specified.""" for p in self.params: if p not in self.known_params: raise errors.UnknownParameter(p, self.known_params) def process(self, command_iter): """Import data into Bazaar by processing a stream of commands. :param command_iter: an iterator providing commands """ self._process(command_iter) def _process(self, command_iter): self.pre_process() for cmd in command_iter(): try: handler = getattr(self.__class__, cmd.name + "_handler") except KeyError: raise errors.MissingHandler(cmd.name) else: self.pre_handler(cmd) handler(self, cmd) self.post_handler(cmd) if self.finished: break self.post_process() def warning(self, msg, *args): """Output a warning but timestamp it.""" pass def debug(self, mgs, *args): """Output a debug message.""" pass def _time_of_day(self): """Time of day as a string.""" # Note: this is a separate method so tests can patch in a fixed value return time.strftime("%H:%M:%S") def pre_process(self): """Hook for logic at start of processing.""" pass def post_process(self): """Hook for logic at end of processing.""" pass def pre_handler(self, cmd): """Hook for logic before each handler starts.""" pass def post_handler(self, cmd): """Hook for logic after each handler finishes.""" pass def progress_handler(self, cmd): """Process a ProgressCommand.""" raise NotImplementedError(self.progress_handler) def blob_handler(self, cmd): """Process a BlobCommand.""" raise NotImplementedError(self.blob_handler) def checkpoint_handler(self, cmd): """Process a CheckpointCommand.""" raise NotImplementedError(self.checkpoint_handler) def commit_handler(self, cmd): """Process a CommitCommand.""" raise NotImplementedError(self.commit_handler) def reset_handler(self, cmd): """Process a ResetCommand.""" raise NotImplementedError(self.reset_handler) def tag_handler(self, cmd): """Process a TagCommand.""" raise NotImplementedError(self.tag_handler) def feature_handler(self, cmd): """Process a FeatureCommand.""" raise NotImplementedError(self.feature_handler) class CommitHandler(object): """Base class for commit handling. Subclasses should override the pre_*, post_* and *_handler methods as appropriate. """ def __init__(self, command): self.command = command def process(self): self.pre_process_files() for fc in self.command.iter_files(): try: handler = getattr(self.__class__, fc.name[4:] + "_handler") except KeyError: raise errors.MissingHandler(fc.name) else: handler(self, fc) self.post_process_files() def warning(self, msg, *args): """Output a warning but add context.""" pass def pre_process_files(self): """Prepare for committing.""" pass def post_process_files(self): """Save the revision.""" pass def modify_handler(self, filecmd): """Handle a filemodify command.""" raise NotImplementedError(self.modify_handler) def delete_handler(self, filecmd): """Handle a filedelete command.""" raise NotImplementedError(self.delete_handler) def copy_handler(self, filecmd): """Handle a filecopy command.""" raise NotImplementedError(self.copy_handler) def rename_handler(self, filecmd): """Handle a filerename command.""" raise NotImplementedError(self.rename_handler) def deleteall_handler(self, filecmd): """Handle a filedeleteall command.""" raise NotImplementedError(self.deleteall_handler) fastimport-0.9.4/fastimport/processors/0000755000175000017500000000000012356107410021063 5ustar jelmerjelmer00000000000000fastimport-0.9.4/fastimport/processors/filter_processor.py0000644000175000017500000002635512206661612025037 0ustar jelmerjelmer00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Import processor that filters the input (and doesn't import).""" from fastimport import ( commands, helpers, processor, ) import stat class FilterProcessor(processor.ImportProcessor): """An import processor that filters the input to include/exclude objects. No changes to the current repository are made. Here are the supported parameters: * include_paths - a list of paths that commits must change in order to be kept in the output stream * exclude_paths - a list of paths that should not appear in the output stream * squash_empty_commits - if set to False, squash commits that don't have any changes after the filter has been applied """ known_params = [ 'include_paths', 'exclude_paths', 'squash_empty_commits' ] def pre_process(self): self.includes = self.params.get('include_paths') self.excludes = self.params.get('exclude_paths') self.squash_empty_commits = bool( self.params.get('squash_empty_commits', True)) # What's the new root, if any self.new_root = helpers.common_directory(self.includes) # Buffer of blobs until we know we need them: mark -> cmd self.blobs = {} # These are the commits we've squashed so far self.squashed_commits = set() # Map of commit-id to list of parents self.parents = {} def pre_handler(self, cmd): self.command = cmd # Should this command be included in the output or not? self.keep = False # Blobs to dump into the output before dumping the command itself self.referenced_blobs = [] def post_handler(self, cmd): if not self.keep: return # print referenced blobs and the command for blob_id in self.referenced_blobs: self._print_command(self.blobs[blob_id]) self._print_command(self.command) def progress_handler(self, cmd): """Process a ProgressCommand.""" # These always pass through self.keep = True def blob_handler(self, cmd): """Process a BlobCommand.""" # These never pass through directly. We buffer them and only # output them if referenced by an interesting command. self.blobs[cmd.id] = cmd self.keep = False def checkpoint_handler(self, cmd): """Process a CheckpointCommand.""" # These always pass through self.keep = True def commit_handler(self, cmd): """Process a CommitCommand.""" # These pass through if they meet the filtering conditions interesting_filecmds = self._filter_filecommands(cmd.iter_files) if interesting_filecmds or not self.squash_empty_commits: # If all we have is a single deleteall, skip this commit if len(interesting_filecmds) == 1 and isinstance( interesting_filecmds[0], commands.FileDeleteAllCommand): pass else: # Remember just the interesting file commands self.keep = True cmd.file_iter = iter(interesting_filecmds) # Record the referenced blobs for fc in interesting_filecmds: if isinstance(fc, commands.FileModifyCommand): if (fc.dataref is not None and not stat.S_ISDIR(fc.mode)): self.referenced_blobs.append(fc.dataref) # Update from and merges to refer to commits in the output cmd.from_ = self._find_interesting_from(cmd.from_) cmd.merges = self._find_interesting_merges(cmd.merges) else: self.squashed_commits.add(cmd.id) # Keep track of the parents if cmd.from_ and cmd.merges: parents = [cmd.from_] + cmd.merges elif cmd.from_: parents = [cmd.from_] else: parents = None if cmd.mark is not None: self.parents[":" + cmd.mark] = parents def reset_handler(self, cmd): """Process a ResetCommand.""" if cmd.from_ is None: # We pass through resets that init a branch because we have to # assume the branch might be interesting. self.keep = True else: # Keep resets if they indirectly reference something we kept cmd.from_ = self._find_interesting_from(cmd.from_) self.keep = cmd.from_ is not None def tag_handler(self, cmd): """Process a TagCommand.""" # Keep tags if they indirectly reference something we kept cmd.from_ = self._find_interesting_from(cmd.from_) self.keep = cmd.from_ is not None def feature_handler(self, cmd): """Process a FeatureCommand.""" feature = cmd.feature_name if feature not in commands.FEATURE_NAMES: self.warning("feature %s is not supported - parsing may fail" % (feature,)) # These always pass through self.keep = True def _print_command(self, cmd): """Wrapper to avoid adding unnecessary blank lines.""" text = repr(cmd) self.outf.write(text) if not text.endswith("\n"): self.outf.write("\n") def _filter_filecommands(self, filecmd_iter): """Return the filecommands filtered by includes & excludes. :return: a list of FileCommand objects """ if self.includes is None and self.excludes is None: return list(filecmd_iter()) # Do the filtering, adjusting for the new_root result = [] for fc in filecmd_iter(): if (isinstance(fc, commands.FileModifyCommand) or isinstance(fc, commands.FileDeleteCommand)): if self._path_to_be_kept(fc.path): fc.path = self._adjust_for_new_root(fc.path) else: continue elif isinstance(fc, commands.FileDeleteAllCommand): pass elif isinstance(fc, commands.FileRenameCommand): fc = self._convert_rename(fc) elif isinstance(fc, commands.FileCopyCommand): fc = self._convert_copy(fc) else: self.warning("cannot handle FileCommands of class %s - ignoring", fc.__class__) continue if fc is not None: result.append(fc) return result def _path_to_be_kept(self, path): """Does the given path pass the filtering criteria?""" if self.excludes and (path in self.excludes or helpers.is_inside_any(self.excludes, path)): return False if self.includes: return (path in self.includes or helpers.is_inside_any(self.includes, path)) return True def _adjust_for_new_root(self, path): """Adjust a path given the new root directory of the output.""" if self.new_root is None: return path elif path.startswith(self.new_root): return path[len(self.new_root):] else: return path def _find_interesting_parent(self, commit_ref): while True: if commit_ref not in self.squashed_commits: return commit_ref parents = self.parents.get(commit_ref) if not parents: return None commit_ref = parents[0] def _find_interesting_from(self, commit_ref): if commit_ref is None: return None return self._find_interesting_parent(commit_ref) def _find_interesting_merges(self, commit_refs): if commit_refs is None: return None merges = [] for commit_ref in commit_refs: parent = self._find_interesting_parent(commit_ref) if parent is not None: merges.append(parent) if merges: return merges else: return None def _convert_rename(self, fc): """Convert a FileRenameCommand into a new FileCommand. :return: None if the rename is being ignored, otherwise a new FileCommand based on the whether the old and new paths are inside or outside of the interesting locations. """ old = fc.old_path new = fc.new_path keep_old = self._path_to_be_kept(old) keep_new = self._path_to_be_kept(new) if keep_old and keep_new: fc.old_path = self._adjust_for_new_root(old) fc.new_path = self._adjust_for_new_root(new) return fc elif keep_old: # The file has been renamed to a non-interesting location. # Delete it! old = self._adjust_for_new_root(old) return commands.FileDeleteCommand(old) elif keep_new: # The file has been renamed into an interesting location # We really ought to add it but we don't currently buffer # the contents of all previous files and probably never want # to. Maybe fast-import-info needs to be extended to # remember all renames and a config file can be passed # into here ala fast-import? self.warning("cannot turn rename of %s into an add of %s yet" % (old, new)) return None def _convert_copy(self, fc): """Convert a FileCopyCommand into a new FileCommand. :return: None if the copy is being ignored, otherwise a new FileCommand based on the whether the source and destination paths are inside or outside of the interesting locations. """ src = fc.src_path dest = fc.dest_path keep_src = self._path_to_be_kept(src) keep_dest = self._path_to_be_kept(dest) if keep_src and keep_dest: fc.src_path = self._adjust_for_new_root(src) fc.dest_path = self._adjust_for_new_root(dest) return fc elif keep_src: # The file has been copied to a non-interesting location. # Ignore it! return None elif keep_dest: # The file has been copied into an interesting location # We really ought to add it but we don't currently buffer # the contents of all previous files and probably never want # to. Maybe fast-import-info needs to be extended to # remember all copies and a config file can be passed # into here ala fast-import? self.warning("cannot turn copy of %s into an add of %s yet" % (src, dest)) return None fastimport-0.9.4/fastimport/processors/query_processor.py0000644000175000017500000000576412206661612024720 0ustar jelmerjelmer00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Import processor that queries the input (and doesn't import).""" from fastimport import ( commands, processor, ) class QueryProcessor(processor.ImportProcessor): """An import processor that queries the input. No changes to the current repository are made. """ known_params = commands.COMMAND_NAMES + commands.FILE_COMMAND_NAMES + \ ['commit-mark'] def __init__(self, params=None, verbose=False): processor.ImportProcessor.__init__(self, params, verbose) self.parsed_params = {} self.interesting_commit = None self._finished = False if params: if 'commit-mark' in params: self.interesting_commit = params['commit-mark'] del params['commit-mark'] for name, value in params.iteritems(): if value == 1: # All fields fields = None else: fields = value.split(',') self.parsed_params[name] = fields def pre_handler(self, cmd): """Hook for logic before each handler starts.""" if self._finished: return if self.interesting_commit and cmd.name == 'commit': if cmd.mark == self.interesting_commit: print cmd.to_string() self._finished = True return if self.parsed_params.has_key(cmd.name): fields = self.parsed_params[cmd.name] str = cmd.dump_str(fields, self.parsed_params, self.verbose) print "%s" % (str,) def progress_handler(self, cmd): """Process a ProgressCommand.""" pass def blob_handler(self, cmd): """Process a BlobCommand.""" pass def checkpoint_handler(self, cmd): """Process a CheckpointCommand.""" pass def commit_handler(self, cmd): """Process a CommitCommand.""" pass def reset_handler(self, cmd): """Process a ResetCommand.""" pass def tag_handler(self, cmd): """Process a TagCommand.""" pass def feature_handler(self, cmd): """Process a FeatureCommand.""" feature = cmd.feature_name if feature not in commands.FEATURE_NAMES: self.warning("feature %s is not supported - parsing may fail" % (feature,)) fastimport-0.9.4/fastimport/processors/__init__.py0000644000175000017500000000000012206661612023165 0ustar jelmerjelmer00000000000000fastimport-0.9.4/fastimport/commands.py0000644000175000017500000003513712356107233021050 0ustar jelmerjelmer00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """fast-import command classes. These objects are used by the parser to represent the content of a fast-import stream. """ import stat # There is a bug in git 1.5.4.3 and older by which unquoting a string consumes # one extra character. Set this variable to True to work-around it. It only # happens when renaming a file whose name contains spaces and/or quotes, and # the symptom is: # % git-fast-import # fatal: Missing space after source: R "file 1.txt" file 2.txt # http://git.kernel.org/?p=git/git.git;a=commit;h=c8744d6a8b27115503565041566d97c21e722584 GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE = False # Lists of command names COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'feature', 'progress', 'reset', 'tag'] FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename', 'filedeleteall'] # Feature names MULTIPLE_AUTHORS_FEATURE = "multiple-authors" COMMIT_PROPERTIES_FEATURE = "commit-properties" EMPTY_DIRS_FEATURE = "empty-directories" FEATURE_NAMES = [ MULTIPLE_AUTHORS_FEATURE, COMMIT_PROPERTIES_FEATURE, EMPTY_DIRS_FEATURE, ] class ImportCommand(object): """Base class for import commands.""" def __init__(self, name): self.name = name # List of field names not to display self._binary = [] def __str__(self): return repr(self) def dump_str(self, names=None, child_lists=None, verbose=False): """Dump fields as a string. For debugging. :param names: the list of fields to include or None for all public fields :param child_lists: dictionary of child command names to fields for that child command to include :param verbose: if True, prefix each line with the command class and display fields as a dictionary; if False, dump just the field values with tabs between them """ interesting = {} if names is None: fields = [k for k in self.__dict__.keys() if not k.startswith('_')] else: fields = names for field in fields: value = self.__dict__.get(field) if field in self._binary and value is not None: value = '(...)' interesting[field] = value if verbose: return "%s: %s" % (self.__class__.__name__, interesting) else: return "\t".join([repr(interesting[k]) for k in fields]) class BlobCommand(ImportCommand): def __init__(self, mark, data, lineno=0): ImportCommand.__init__(self, 'blob') self.mark = mark self.data = data self.lineno = lineno # Provide a unique id in case the mark is missing if mark is None: self.id = '@%d' % lineno else: self.id = ':' + mark self._binary = ['data'] def __repr__(self): if self.mark is None: mark_line = "" else: mark_line = "\nmark :%s" % self.mark return "blob%s\ndata %d\n%s" % (mark_line, len(self.data), self.data) class CheckpointCommand(ImportCommand): def __init__(self): ImportCommand.__init__(self, 'checkpoint') def __repr__(self): return "checkpoint" class CommitCommand(ImportCommand): def __init__(self, ref, mark, author, committer, message, from_, merges, file_iter, lineno=0, more_authors=None, properties=None): ImportCommand.__init__(self, 'commit') self.ref = ref self.mark = mark self.author = author self.committer = committer self.message = message self.from_ = from_ self.merges = merges self.file_iter = file_iter self.more_authors = more_authors self.properties = properties self.lineno = lineno self._binary = ['file_iter'] # Provide a unique id in case the mark is missing if mark is None: self.id = '@%d' % lineno else: self.id = ':%s' % mark def copy(self, **kwargs): if not isinstance(self.file_iter, list): self.file_iter = list(self.file_iter) fields = dict((k, v) for k, v in self.__dict__.iteritems() if k not in ('id', 'name') if not k.startswith('_')) fields.update(kwargs) return CommitCommand(**fields) def __repr__(self): return self.to_string(include_file_contents=True) def __str__(self): return self.to_string(include_file_contents=False) def to_string(self, use_features=True, include_file_contents=False): if self.mark is None: mark_line = "" else: mark_line = "\nmark :%s" % self.mark if self.author is None: author_section = "" else: author_section = "\nauthor %s" % format_who_when(self.author) if use_features and self.more_authors: for author in self.more_authors: author_section += "\nauthor %s" % format_who_when(author) committer = "committer %s" % format_who_when(self.committer) if self.message is None: msg_section = "" else: msg = self.message msg_section = "\ndata %d\n%s" % (len(msg), msg) if self.from_ is None: from_line = "" else: from_line = "\nfrom %s" % self.from_ if self.merges is None: merge_lines = "" else: merge_lines = "".join(["\nmerge %s" % (m,) for m in self.merges]) if use_features and self.properties: property_lines = [] for name in sorted(self.properties): value = self.properties[name] property_lines.append("\n" + format_property(name, value)) properties_section = "".join(property_lines) else: properties_section = "" if self.file_iter is None: filecommands = "" else: if include_file_contents: format_str = "\n%r" else: format_str = "\n%s" filecommands = "".join([format_str % (c,) for c in self.iter_files()]) return "commit %s%s%s\n%s%s%s%s%s%s" % (self.ref, mark_line, author_section, committer, msg_section, from_line, merge_lines, properties_section, filecommands) def dump_str(self, names=None, child_lists=None, verbose=False): result = [ImportCommand.dump_str(self, names, verbose=verbose)] for f in self.iter_files(): if child_lists is None: continue try: child_names = child_lists[f.name] except KeyError: continue result.append("\t%s" % f.dump_str(child_names, verbose=verbose)) return '\n'.join(result) def iter_files(self): """Iterate over files.""" # file_iter may be a callable or an iterator if callable(self.file_iter): return self.file_iter() return iter(self.file_iter) class FeatureCommand(ImportCommand): def __init__(self, feature_name, value=None, lineno=0): ImportCommand.__init__(self, 'feature') self.feature_name = feature_name self.value = value self.lineno = lineno def __repr__(self): if self.value is None: value_text = "" else: value_text = "=%s" % self.value return "feature %s%s" % (self.feature_name, value_text) class ProgressCommand(ImportCommand): def __init__(self, message): ImportCommand.__init__(self, 'progress') self.message = message def __repr__(self): return "progress %s" % (self.message,) class ResetCommand(ImportCommand): def __init__(self, ref, from_): ImportCommand.__init__(self, 'reset') self.ref = ref self.from_ = from_ def __repr__(self): if self.from_ is None: from_line = "" else: # According to git-fast-import(1), the extra LF is optional here; # however, versions of git up to 1.5.4.3 had a bug by which the LF # was needed. Always emit it, since it doesn't hurt and maintains # compatibility with older versions. # http://git.kernel.org/?p=git/git.git;a=commit;h=655e8515f279c01f525745d443f509f97cd805ab from_line = "\nfrom %s\n" % self.from_ return "reset %s%s" % (self.ref, from_line) class TagCommand(ImportCommand): def __init__(self, id, from_, tagger, message): ImportCommand.__init__(self, 'tag') self.id = id self.from_ = from_ self.tagger = tagger self.message = message def __repr__(self): if self.from_ is None: from_line = "" else: from_line = "\nfrom %s" % self.from_ if self.tagger is None: tagger_line = "" else: tagger_line = "\ntagger %s" % format_who_when(self.tagger) if self.message is None: msg_section = "" else: msg = self.message msg_section = "\ndata %d\n%s" % (len(msg), msg) return "tag %s%s%s%s" % (self.id, from_line, tagger_line, msg_section) class FileCommand(ImportCommand): """Base class for file commands.""" pass class FileModifyCommand(FileCommand): def __init__(self, path, mode, dataref, data): # Either dataref or data should be null FileCommand.__init__(self, 'filemodify') self.path = check_path(path) self.mode = mode self.dataref = dataref self.data = data self._binary = ['data'] def __repr__(self): return self.to_string(include_file_contents=True) def __str__(self): return self.to_string(include_file_contents=False) def _format_mode(self, mode): if mode in (0755, 0100755): return "755" elif mode in (0644, 0100644): return "644" elif mode == 040000: return "040000" elif mode == 0120000: return "120000" elif mode == 0160000: return "160000" else: raise AssertionError("Unknown mode %o" % mode) def to_string(self, include_file_contents=False): datastr = "" if stat.S_ISDIR(self.mode): dataref = '-' elif self.dataref is None: dataref = "inline" if include_file_contents: datastr = "\ndata %d\n%s" % (len(self.data), self.data) else: dataref = "%s" % (self.dataref,) path = format_path(self.path) return "M %s %s %s%s" % (self._format_mode(self.mode), dataref, path, datastr) class FileDeleteCommand(FileCommand): def __init__(self, path): FileCommand.__init__(self, 'filedelete') self.path = check_path(path) def __repr__(self): return "D %s" % (format_path(self.path),) class FileCopyCommand(FileCommand): def __init__(self, src_path, dest_path): FileCommand.__init__(self, 'filecopy') self.src_path = check_path(src_path) self.dest_path = check_path(dest_path) def __repr__(self): return "C %s %s" % ( format_path(self.src_path, quote_spaces=True), format_path(self.dest_path)) class FileRenameCommand(FileCommand): def __init__(self, old_path, new_path): FileCommand.__init__(self, 'filerename') self.old_path = check_path(old_path) self.new_path = check_path(new_path) def __repr__(self): return "R %s %s" % ( format_path(self.old_path, quote_spaces=True), format_path(self.new_path)) class FileDeleteAllCommand(FileCommand): def __init__(self): FileCommand.__init__(self, 'filedeleteall') def __repr__(self): return "deleteall" class NoteModifyCommand(FileCommand): def __init__(self, from_, data): super(NoteModifyCommand, self).__init__('notemodify') self.from_ = from_ self.data = data self._binary = ['data'] def __str__(self): return "N inline :%s" % self.from_ def __repr__(self): return "%s\ndata %d\n%s" % (self, len(self.data), self.data) def check_path(path): """Check that a path is legal. :return: the path if all is OK :raise ValueError: if the path is illegal """ if path is None or path == '' or path[0] == "/": raise ValueError("illegal path '%s'" % path) if type(path) != str: raise TypeError("illegale type for path '%r'" % path) return path def format_path(p, quote_spaces=False): """Format a path in utf8, quoting it if necessary.""" if '\n' in p: import re p = re.sub('\n', '\\n', p) quote = True else: quote = p[0] == '"' or (quote_spaces and ' ' in p) if quote: extra = GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE and ' ' or '' p = '"%s"%s' % (p, extra) return p def format_who_when(fields): """Format a tuple of name,email,secs-since-epoch,utc-offset-secs as a string.""" offset = fields[3] if offset < 0: offset_sign = '-' offset = abs(offset) else: offset_sign = '+' offset_hours = offset / 3600 offset_minutes = offset / 60 - offset_hours * 60 offset_str = "%s%02d%02d" % (offset_sign, offset_hours, offset_minutes) name = fields[0] if name == '': sep = '' else: sep = ' ' if isinstance(name, unicode): name = name.encode('utf8') email = fields[1] if isinstance(email, unicode): email = email.encode('utf8') result = "%s%s<%s> %d %s" % (name, sep, email, fields[2], offset_str) return result def format_property(name, value): """Format the name and value (both unicode) of a property as a string.""" utf8_name = name.encode('utf8') if value is not None: utf8_value = value.encode('utf8') result = "property %s %d %s" % (utf8_name, len(utf8_value), utf8_value) else: result = "property %s" % (utf8_name,) return result fastimport-0.9.4/fastimport/__init__.py0000644000175000017500000000225512356107233021001 0ustar jelmerjelmer00000000000000# Copyright (C) 2008-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Fastimport file format parser and generator This is a Python parser for git's fast-import format. It was originally developed for bzr-fastimport but has been extracted so it can be used by other projects. Use it like so: import fastimport.processor import fastimport.parser class ImportProcessor(fastimport.processor.ImportProcessor): ... parser = fastimport.parser.ImportParser(sys.stdin) processor = ImportProcessor(...) processor.process(parser.parse()) """ __version__ = (0, 9, 4) fastimport-0.9.4/setup.py0000755000175000017500000000064612356107265016224 0ustar jelmerjelmer00000000000000#!/usr/bin/env python from distutils.core import setup version = "0.9.4" setup(name="fastimport", description="VCS fastimport/fastexport parser", version=version, author="Canonical Ltd", author_email="bazaar@lists.canonical.com", license="GNU GPL v2 or later", url="https://launchpad.net/python-fastimport", packages=['fastimport', 'fastimport.tests', 'fastimport.processors'])