GitPython-0.3.2.RC1/0000755000175100017510000000000011605055665013104 5ustar byronbyronGitPython-0.3.2.RC1/git/0000755000175100017510000000000011605055665013667 5ustar byronbyronGitPython-0.3.2.RC1/git/diff.py0000644000175100017510000002417411575507662015166 0ustar byronbyron# diff.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import re from objects.blob import Blob from objects.util import mode_str_to_int from exc import GitCommandError from gitdb.util import hex_to_bin __all__ = ('Diffable', 'DiffIndex', 'Diff') class Diffable(object): """Common interface for all object that can be diffed against another object of compatible type. :note: Subclasses require a repo member as it is the case for Object instances, for practical reasons we do not derive from Object.""" __slots__ = tuple() # standin indicating you want to diff against the index class Index(object): pass def _process_diff_args(self, args): """ :return: possibly altered version of the given args list. Method is called right before git command execution. Subclasses can use it to alter the behaviour of the superclass""" return args def diff(self, other=Index, paths=None, create_patch=False, **kwargs): """Creates diffs between two items being trees, trees and index or an index and the working tree. :param other: Is the item to compare us with. If None, we will be compared to the working tree. If Treeish, it will be compared against the respective tree If Index ( type ), it will be compared against the index. It defaults to Index to assure the method will not by-default fail on bare repositories. :param paths: is a list of paths or a single path to limit the diff to. It will only include at least one of the givne path or paths. :param create_patch: If True, the returned Diff contains a detailed patch that if applied makes the self to other. Patches are somwhat costly as blobs have to be read and diffed. :param kwargs: Additional arguments passed to git-diff, such as R=True to swap both sides of the diff. :return: git.DiffIndex :note: Rename detection will only work if create_patch is True. On a bare repository, 'other' needs to be provided as Index or as as Tree/Commit, or a git command error will occour""" args = list() args.append( "--abbrev=40" ) # we need full shas args.append( "--full-index" ) # get full index paths, not only filenames if create_patch: args.append("-p") args.append("-M") # check for renames else: args.append("--raw") if paths is not None and not isinstance(paths, (tuple,list)): paths = [ paths ] if other is not None and other is not self.Index: args.insert(0, other) if other is self.Index: args.insert(0, "--cached") args.insert(0,self) # paths is list here or None if paths: args.append("--") args.extend(paths) # END paths handling kwargs['as_process'] = True proc = self.repo.git.diff(*self._process_diff_args(args), **kwargs) diff_method = Diff._index_from_raw_format if create_patch: diff_method = Diff._index_from_patch_format index = diff_method(self.repo, proc.stdout) status = proc.wait() return index class DiffIndex(list): """Implements an Index for diffs, allowing a list of Diffs to be queried by the diff properties. The class improves the diff handling convenience""" # change type invariant identifying possible ways a blob can have changed # A = Added # D = Deleted # R = Renamed # M = modified change_type = ("A", "D", "R", "M") def iter_change_type(self, change_type): """ :return: iterator yieling Diff instances that match the given change_type :param change_type: Member of DiffIndex.change_type, namely: * 'A' for added paths * 'D' for deleted paths * 'R' for renamed paths * 'M' for paths with modified data""" if change_type not in self.change_type: raise ValueError( "Invalid change type: %s" % change_type ) for diff in self: if change_type == "A" and diff.new_file: yield diff elif change_type == "D" and diff.deleted_file: yield diff elif change_type == "R" and diff.renamed: yield diff elif change_type == "M" and diff.a_blob and diff.b_blob and diff.a_blob != diff.b_blob: yield diff # END for each diff class Diff(object): """A Diff contains diff information between two Trees. It contains two sides a and b of the diff, members are prefixed with "a" and "b" respectively to inidcate that. Diffs keep information about the changed blob objects, the file mode, renames, deletions and new files. There are a few cases where None has to be expected as member variable value: ``New File``:: a_mode is None a_blob is None ``Deleted File``:: b_mode is None b_blob is None ``Working Tree Blobs`` When comparing to working trees, the working tree blob will have a null hexsha as a corresponding object does not yet exist. The mode will be null as well. But the path will be available though. If it is listed in a diff the working tree version of the file must be different to the version in the index or tree, and hence has been modified.""" # precompiled regex re_header = re.compile(r""" #^diff[ ]--git [ ]a/(?P.+?)[ ]b/(?P.+?)\n (?:^similarity[ ]index[ ](?P\d+)%\n ^rename[ ]from[ ](?P\S+)\n ^rename[ ]to[ ](?P\S+)(?:\n|$))? (?:^old[ ]mode[ ](?P\d+)\n ^new[ ]mode[ ](?P\d+)(?:\n|$))? (?:^new[ ]file[ ]mode[ ](?P.+)(?:\n|$))? (?:^deleted[ ]file[ ]mode[ ](?P.+)(?:\n|$))? (?:^index[ ](?P[0-9A-Fa-f]+) \.\.(?P[0-9A-Fa-f]+)[ ]?(?P.+)?(?:\n|$))? """, re.VERBOSE | re.MULTILINE) # can be used for comparisons NULL_HEX_SHA = "0"*40 NULL_BIN_SHA = "\0"*20 __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file", "rename_from", "rename_to", "diff") def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, b_mode, new_file, deleted_file, rename_from, rename_to, diff): self.a_mode = a_mode self.b_mode = b_mode if self.a_mode: self.a_mode = mode_str_to_int(self.a_mode) if self.b_mode: self.b_mode = mode_str_to_int(self.b_mode) if a_blob_id is None: self.a_blob = None else: self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path) if b_blob_id is None: self.b_blob = None else: self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path) self.new_file = new_file self.deleted_file = deleted_file # be clear and use None instead of empty strings self.rename_from = rename_from or None self.rename_to = rename_to or None self.diff = diff def __eq__(self, other): for name in self.__slots__: if getattr(self, name) != getattr(other, name): return False # END for each name return True def __ne__(self, other): return not ( self == other ) def __hash__(self): return hash(tuple(getattr(self,n) for n in self.__slots__)) def __str__(self): h = "%s" if self.a_blob: h %= self.a_blob.path elif self.b_blob: h %= self.b_blob.path msg = '' l = None # temp line ll = 0 # line length for b,n in zip((self.a_blob, self.b_blob), ('lhs', 'rhs')): if b: l = "\n%s: %o | %s" % (n, b.mode, b.hexsha) else: l = "\n%s: None" % n # END if blob is not None ll = max(len(l), ll) msg += l # END for each blob # add headline h += '\n' + '='*ll if self.deleted_file: msg += '\nfile deleted in rhs' if self.new_file: msg += '\nfile added in rhs' if self.rename_from: msg += '\nfile renamed from %r' % self.rename_from if self.rename_to: msg += '\nfile renamed to %r' % self.rename_to if self.diff: msg += '\n---' msg += self.diff msg += '\n---' # END diff info return h + msg @property def renamed(self): """:returns: True if the blob of our diff has been renamed""" return self.rename_from != self.rename_to @classmethod def _index_from_patch_format(cls, repo, stream): """Create a new DiffIndex from the given text which must be in patch format :param repo: is the repository we are operating on - it is required :param stream: result of 'git diff' as a stream (supporting file protocol) :return: git.DiffIndex """ # for now, we have to bake the stream text = stream.read() index = DiffIndex() diff_header = cls.re_header.match for diff in ('\n' + text).split('\ndiff --git')[1:]: header = diff_header(diff) a_path, b_path, similarity_index, rename_from, rename_to, \ old_mode, new_mode, new_file_mode, deleted_file_mode, \ a_blob_id, b_blob_id, b_mode = header.groups() new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) index.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode, new_file, deleted_file, rename_from, rename_to, diff[header.end():])) return index @classmethod def _index_from_raw_format(cls, repo, stream): """Create a new DiffIndex from the given stream which must be in raw format. :note: This format is inherently incapable of detecting renames, hence we only modify, delete and add files :return: git.DiffIndex""" # handles # :100644 100644 6870991011cc8d9853a7a8a6f02061512c6a8190 37c5e30c879213e9ae83b21e9d11e55fc20c54b7 M .gitignore index = DiffIndex() for line in stream: if not line.startswith(":"): continue # END its not a valid diff line old_mode, new_mode, a_blob_id, b_blob_id, change_type, path = line[1:].split(None, 5) path = path.strip() a_path = path b_path = path deleted_file = False new_file = False # NOTE: We cannot conclude from the existance of a blob to change type # as diffs with the working do not have blobs yet if change_type == 'D': b_blob_id = None deleted_file = True elif change_type == 'A': a_blob_id = None new_file = True # END add/remove handling diff = Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode, new_mode, new_file, deleted_file, None, None, '') index.append(diff) # END for each line return index GitPython-0.3.2.RC1/git/index/0000755000175100017510000000000011605055665014776 5ustar byronbyronGitPython-0.3.2.RC1/git/index/base.py0000644000175100017510000011622711575507662016300 0ustar byronbyron# index.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import tempfile import os import sys import subprocess import glob from cStringIO import StringIO from stat import S_ISLNK from typ import ( BaseIndexEntry, IndexEntry, ) from util import ( TemporaryFileSwap, post_clear_cache, default_index, git_working_dir ) import git.objects import git.diff as diff from git.exc import ( GitCommandError, CheckoutError ) from git.objects import ( Blob, Submodule, Tree, Object, Commit, ) from git.objects.util import Serializable from git.util import ( IndexFileSHA1Writer, LazyMixin, LockedFD, join_path_native, file_contents_ro, to_native_path_linux, to_native_path ) from fun import ( entry_key, write_cache, read_cache, aggressive_tree_merge, write_tree_from_cache, stat_mode_to_index_mode, S_IFGITLINK ) from gitdb.base import IStream from gitdb.db import MemoryDB from gitdb.util import to_bin_sha from itertools import izip __all__ = ( 'IndexFile', 'CheckoutError' ) class IndexFile(LazyMixin, diff.Diffable, Serializable): """ Implements an Index that can be manipulated using a native implementation in order to save git command function calls wherever possible. It provides custom merging facilities allowing to merge without actually changing your index or your working tree. This way you can perform own test-merges based on the index only without having to deal with the working copy. This is useful in case of partial working trees. ``Entries`` The index contains an entries dict whose keys are tuples of type IndexEntry to facilitate access. You may read the entries dict or manipulate it using IndexEntry instance, i.e.:: index.entries[index.entry_key(index_entry_instance)] = index_entry_instance Make sure you use index.write() once you are done manipulating the index directly before operating on it using the git command""" __slots__ = ("repo", "version", "entries", "_extension_data", "_file_path") _VERSION = 2 # latest version we support S_IFGITLINK = S_IFGITLINK # a submodule def __init__(self, repo, file_path=None): """Initialize this Index instance, optionally from the given ``file_path``. If no file_path is given, we will be created from the current index file. If a stream is not given, the stream will be initialized from the current repository's index on demand.""" self.repo = repo self.version = self._VERSION self._extension_data = '' self._file_path = file_path or self._index_path() def _set_cache_(self, attr): if attr == "entries": # read the current index # try memory map for speed lfd = LockedFD(self._file_path) try: fd = lfd.open(write=False, stream=False) except OSError: lfd.rollback() # in new repositories, there may be no index, which means we are empty self.entries = dict() return # END exception handling # Here it comes: on windows in python 2.5, memory maps aren't closed properly # Hence we are in trouble if we try to delete a file that is memory mapped, # which happens during read-tree. # In this case, we will just read the memory in directly. # Its insanely bad ... I am disappointed ! allow_mmap = (os.name != 'nt' or sys.version_info[1] > 5) stream = file_contents_ro(fd, stream=True, allow_mmap=allow_mmap) try: self._deserialize(stream) finally: lfd.rollback() # The handles will be closed on desctruction # END read from default index on demand else: super(IndexFile, self)._set_cache_(attr) def _index_path(self): return join_path_native(self.repo.git_dir, "index") @property def path(self): """ :return: Path to the index file we are representing """ return self._file_path def _delete_entries_cache(self): """Safely clear the entries cache so it can be recreated""" try: del(self.entries) except AttributeError: # fails in python 2.6.5 with this exception pass # END exception handling #{ Serializable Interface def _deserialize(self, stream): """Initialize this instance with index values read from the given stream""" self.version, self.entries, self._extension_data, conten_sha = read_cache(stream) return self def _entries_sorted(self): """:return: list of entries, in a sorted fashion, first by path, then by stage""" entries_sorted = self.entries.values() entries_sorted.sort(key=lambda e: (e.path, e.stage)) # use path/stage as sort key return entries_sorted def _serialize(self, stream, ignore_tree_extension_data=False): entries = self._entries_sorted() write_cache(entries, stream, (ignore_tree_extension_data and None) or self._extension_data) return self #} END serializable interface def write(self, file_path = None, ignore_tree_extension_data=False): """Write the current state to our file path or to the given one :param file_path: If None, we will write to our stored file path from which we have been initialized. Otherwise we write to the given file path. Please note that this will change the file_path of this index to the one you gave. :param ignore_tree_extension_data: If True, the TREE type extension data read in the index will not be written to disk. Use this if you have altered the index and would like to use git-write-tree afterwards to create a tree representing your written changes. If this data is present in the written index, git-write-tree will instead write the stored/cached tree. Alternatively, use IndexFile.write_tree() to handle this case automatically :return: self""" # make sure we have our entries read before getting a write lock # else it would be done when streaming. This can happen # if one doesn't change the index, but writes it right away self.entries lfd = LockedFD(file_path or self._file_path) stream = lfd.open(write=True, stream=True) self._serialize(stream, ignore_tree_extension_data) lfd.commit() # make sure we represent what we have written if file_path is not None: self._file_path = file_path @post_clear_cache @default_index def merge_tree(self, rhs, base=None): """Merge the given rhs treeish into the current index, possibly taking a common base treeish into account. As opposed to the from_tree_ method, this allows you to use an already existing tree as the left side of the merge :param rhs: treeish reference pointing to the 'other' side of the merge. :param base: optional treeish reference pointing to the common base of 'rhs' and this index which equals lhs :return: self ( containing the merge and possibly unmerged entries in case of conflicts ) :raise GitCommandError: If there is a merge conflict. The error will be raised at the first conflicting path. If you want to have proper merge resolution to be done by yourself, you have to commit the changed index ( or make a valid tree from it ) and retry with a three-way index.from_tree call. """ # -i : ignore working tree status # --aggressive : handle more merge cases # -m : do an actual merge args = ["--aggressive", "-i", "-m"] if base is not None: args.append(base) args.append(rhs) self.repo.git.read_tree(args) return self @classmethod def new(cls, repo, *tree_sha): """ Merge the given treeish revisions into a new index which is returned. This method behaves like git-read-tree --aggressive when doing the merge. :param repo: The repository treeish are located in. :param tree_sha: 20 byte or 40 byte tree sha or tree objects :return: New IndexFile instance. Its path will be undefined. If you intend to write such a merged Index, supply an alternate file_path to its 'write' method.""" base_entries = aggressive_tree_merge(repo.odb, [to_bin_sha(str(t)) for t in tree_sha]) inst = cls(repo) # convert to entries dict entries = dict(izip(((e.path, e.stage) for e in base_entries), (IndexEntry.from_base(e) for e in base_entries))) inst.entries = entries return inst @classmethod def from_tree(cls, repo, *treeish, **kwargs): """Merge the given treeish revisions into a new index which is returned. The original index will remain unaltered :param repo: The repository treeish are located in. :param treeish: One, two or three Tree Objects, Commits or 40 byte hexshas. The result changes according to the amount of trees. If 1 Tree is given, it will just be read into a new index If 2 Trees are given, they will be merged into a new index using a two way merge algorithm. Tree 1 is the 'current' tree, tree 2 is the 'other' one. It behaves like a fast-forward. If 3 Trees are given, a 3-way merge will be performed with the first tree being the common ancestor of tree 2 and tree 3. Tree 2 is the 'current' tree, tree 3 is the 'other' one :param kwargs: Additional arguments passed to git-read-tree :return: New IndexFile instance. It will point to a temporary index location which does not exist anymore. If you intend to write such a merged Index, supply an alternate file_path to its 'write' method. :note: In the three-way merge case, --aggressive will be specified to automatically resolve more cases in a commonly correct manner. Specify trivial=True as kwarg to override that. As the underlying git-read-tree command takes into account the current index, it will be temporarily moved out of the way to assure there are no unsuspected interferences.""" if len(treeish) == 0 or len(treeish) > 3: raise ValueError("Please specify between 1 and 3 treeish, got %i" % len(treeish)) arg_list = list() # ignore that working tree and index possibly are out of date if len(treeish)>1: # drop unmerged entries when reading our index and merging arg_list.append("--reset") # handle non-trivial cases the way a real merge does arg_list.append("--aggressive") # END merge handling # tmp file created in git home directory to be sure renaming # works - /tmp/ dirs could be on another device tmp_index = tempfile.mktemp('','',repo.git_dir) arg_list.append("--index-output=%s" % tmp_index) arg_list.extend(treeish) # move current index out of the way - otherwise the merge may fail # as it considers existing entries. moving it essentially clears the index. # Unfortunately there is no 'soft' way to do it. # The TemporaryFileSwap assure the original file get put back index_handler = TemporaryFileSwap(join_path_native(repo.git_dir, 'index')) try: repo.git.read_tree(*arg_list, **kwargs) index = cls(repo, tmp_index) index.entries # force it to read the file as we will delete the temp-file del(index_handler) # release as soon as possible finally: if os.path.exists(tmp_index): os.remove(tmp_index) # END index merge handling return index # UTILITIES def _iter_expand_paths(self, paths): """Expand the directories in list of paths to the corresponding paths accordingly, Note: git will add items multiple times even if a glob overlapped with manually specified paths or if paths where specified multiple times - we respect that and do not prune""" def raise_exc(e): raise e r = self.repo.working_tree_dir rs = r + os.sep for path in paths: abs_path = path if not os.path.isabs(abs_path): abs_path = os.path.join(r, path) # END make absolute path # resolve globs if possible if '?' in path or '*' in path or '[' in path: for f in self._iter_expand_paths(glob.glob(abs_path)): yield f.replace(rs, '') continue # END glob handling try: for root, dirs, files in os.walk(abs_path, onerror=raise_exc): for rela_file in files: # add relative paths only yield os.path.join(root.replace(rs, ''), rela_file) # END for each file in subdir # END for each subdirectory except OSError: # was a file or something that could not be iterated yield path.replace(rs, '') # END path exception handling # END for each path def _write_path_to_stdin(self, proc, filepath, item, fmakeexc, fprogress, read_from_stdout=True): """Write path to proc.stdin and make sure it processes the item, including progress. :return: stdout string :param read_from_stdout: if True, proc.stdout will be read after the item was sent to stdin. In that case, it will return None :note: There is a bug in git-update-index that prevents it from sending reports just in time. This is why we have a version that tries to read stdout and one which doesn't. In fact, the stdout is not important as the piped-in files are processed anyway and just in time :note: Newlines are essential here, gits behaviour is somewhat inconsistent on this depending on the version, hence we try our best to deal with newlines carefully. Usually the last newline will not be sent, instead we will close stdin to break the pipe.""" fprogress(filepath, False, item) rval = None try: proc.stdin.write("%s\n" % filepath) except IOError: # pipe broke, usually because some error happend raise fmakeexc() # END write exception handling proc.stdin.flush() if read_from_stdout: rval = proc.stdout.readline().strip() fprogress(filepath, True, item) return rval def iter_blobs(self, predicate = lambda t: True): """ :return: Iterator yielding tuples of Blob objects and stages, tuple(stage, Blob) :param predicate: Function(t) returning True if tuple(stage, Blob) should be yielded by the iterator. A default filter, the BlobFilter, allows you to yield blobs only if they match a given list of paths. """ for entry in self.entries.itervalues(): # TODO: is it necessary to convert the mode ? We did that when adding # it to the index, right ? mode = stat_mode_to_index_mode(entry.mode) blob = entry.to_blob(self.repo) blob.size = entry.size output = (entry.stage, blob) if predicate(output): yield output # END for each entry def unmerged_blobs(self): """ :return: Iterator yielding dict(path : list( tuple( stage, Blob, ...))), being a dictionary associating a path in the index with a list containing sorted stage/blob pairs :note: Blobs that have been removed in one side simply do not exist in the given stage. I.e. a file removed on the 'other' branch whose entries are at stage 3 will not have a stage 3 entry. """ is_unmerged_blob = lambda t: t[0] != 0 path_map = dict() for stage, blob in self.iter_blobs(is_unmerged_blob): path_map.setdefault(blob.path, list()).append((stage, blob)) # END for each unmerged blob for l in path_map.itervalues(): l.sort() return path_map @classmethod def entry_key(cls, *entry): return entry_key(*entry) def resolve_blobs(self, iter_blobs): """Resolve the blobs given in blob iterator. This will effectively remove the index entries of the respective path at all non-null stages and add the given blob as new stage null blob. For each path there may only be one blob, otherwise a ValueError will be raised claiming the path is already at stage 0. :raise ValueError: if one of the blobs already existed at stage 0 :return: self :note: You will have to write the index manually once you are done, i.e. index.resolve_blobs(blobs).write() """ for blob in iter_blobs: stage_null_key = (blob.path, 0) if stage_null_key in self.entries: raise ValueError( "Path %r already exists at stage 0" % blob.path ) # END assert blob is not stage 0 already # delete all possible stages for stage in (1, 2, 3): try: del( self.entries[(blob.path, stage)]) except KeyError: pass # END ignore key errors # END for each possible stage self.entries[stage_null_key] = IndexEntry.from_blob(blob) # END for each blob return self def update(self): """Reread the contents of our index file, discarding all cached information we might have. :note: This is a possibly dangerious operations as it will discard your changes to index.entries :return: self""" self._delete_entries_cache() # allows to lazily reread on demand return self def write_tree(self): """Writes this index to a corresponding Tree object into the repository's object database and return it. :return: Tree object representing this index :note: The tree will be written even if one or more objects the tree refers to does not yet exist in the object database. This could happen if you added Entries to the index directly. :raise ValueError: if there are no entries in the cache :raise UnmergedEntriesError: """ # we obtain no lock as we just flush our contents to disk as tree # If we are a new index, the entries access will load our data accordingly mdb = MemoryDB() entries = self._entries_sorted() binsha, tree_items = write_tree_from_cache(entries, mdb, slice(0, len(entries))) # copy changed trees only mdb.stream_copy(mdb.sha_iter(), self.repo.odb) # note: additional deserialization could be saved if write_tree_from_cache # would return sorted tree entries root_tree = Tree(self.repo, binsha, path='') root_tree._cache = tree_items return root_tree def _process_diff_args(self, args): try: args.pop(args.index(self)) except IndexError: pass # END remove self return args def _to_relative_path(self, path): """:return: Version of path relative to our git directory or raise ValueError if it is not within our git direcotory""" if not os.path.isabs(path): return path relative_path = path.replace(self.repo.working_tree_dir+os.sep, "") if relative_path == path: raise ValueError("Absolute path %r is not in git repository at %r" % (path,self.repo.working_tree_dir)) return relative_path def _preprocess_add_items(self, items): """ Split the items into two lists of path strings and BaseEntries. """ paths = list() entries = list() for item in items: if isinstance(item, basestring): paths.append(self._to_relative_path(item)) elif isinstance(item, (Blob, Submodule)): entries.append(BaseIndexEntry.from_blob(item)) elif isinstance(item, BaseIndexEntry): entries.append(item) else: raise TypeError("Invalid Type: %r" % item) # END for each item return (paths, entries) @git_working_dir def add(self, items, force=True, fprogress=lambda *args: None, path_rewriter=None, write=True): """Add files from the working tree, specific blobs or BaseIndexEntries to the index. :param items: Multiple types of items are supported, types can be mixed within one call. Different types imply a different handling. File paths may generally be relative or absolute. - path string strings denote a relative or absolute path into the repository pointing to an existing file, i.e. CHANGES, lib/myfile.ext, '/home/gitrepo/lib/myfile.ext'. Paths provided like this must exist. When added, they will be written into the object database. PathStrings may contain globs, such as 'lib/__init__*' or can be directories like 'lib', the latter ones will add all the files within the dirctory and subdirectories. This equals a straight git-add. They are added at stage 0 - Blob or Submodule object Blobs are added as they are assuming a valid mode is set. The file they refer to may or may not exist in the file system, but must be a path relative to our repository. If their sha is null ( 40*0 ), their path must exist in the file system relative to the git repository as an object will be created from the data at the path. The handling now very much equals the way string paths are processed, except that the mode you have set will be kept. This allows you to create symlinks by settings the mode respectively and writing the target of the symlink directly into the file. This equals a default Linux-Symlink which is not dereferenced automatically, except that it can be created on filesystems not supporting it as well. Please note that globs or directories are not allowed in Blob objects. They are added at stage 0 - BaseIndexEntry or type Handling equals the one of Blob objects, but the stage may be explicitly set. Please note that Index Entries require binary sha's. :param force: **CURRENTLY INEFFECTIVE** If True, otherwise ignored or excluded files will be added anyway. As opposed to the git-add command, we enable this flag by default as the API user usually wants the item to be added even though they might be excluded. :param fprogress: Function with signature f(path, done=False, item=item) called for each path to be added, one time once it is about to be added where done==False and once after it was added where done=True. item is set to the actual item we handle, either a Path or a BaseIndexEntry Please note that the processed path is not guaranteed to be present in the index already as the index is currently being processed. :param path_rewriter: Function with signature (string) func(BaseIndexEntry) function returning a path for each passed entry which is the path to be actually recorded for the object created from entry.path. This allows you to write an index which is not identical to the layout of the actual files on your hard-disk. If not None and ``items`` contain plain paths, these paths will be converted to Entries beforehand and passed to the path_rewriter. Please note that entry.path is relative to the git repository. :param write: If True, the index will be written once it was altered. Otherwise the changes only exist in memory and are not available to git commands. :return: List(BaseIndexEntries) representing the entries just actually added. :raise OSError: if a supplied Path did not exist. Please note that BaseIndexEntry Objects that do not have a null sha will be added even if their paths do not exist. """ # sort the entries into strings and Entries, Blobs are converted to entries # automatically # paths can be git-added, for everything else we use git-update-index entries_added = list() paths, entries = self._preprocess_add_items(items) if paths and path_rewriter: for path in paths: abspath = os.path.abspath(path) gitrelative_path = abspath[len(self.repo.working_tree_dir)+1:] blob = Blob(self.repo, Blob.NULL_BIN_SHA, stat_mode_to_index_mode(os.stat(abspath).st_mode), to_native_path_linux(gitrelative_path)) entries.append(BaseIndexEntry.from_blob(blob)) # END for each path del(paths[:]) # END rewrite paths def store_path(filepath): """Store file at filepath in the database and return the base index entry""" st = os.lstat(filepath) # handles non-symlinks as well stream = None if S_ISLNK(st.st_mode): stream = StringIO(os.readlink(filepath)) else: stream = open(filepath, 'rb') # END handle stream fprogress(filepath, False, filepath) istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream)) fprogress(filepath, True, filepath) return BaseIndexEntry((stat_mode_to_index_mode(st.st_mode), istream.binsha, 0, to_native_path_linux(filepath))) # END utility method # HANDLE PATHS if paths: assert len(entries_added) == 0 added_files = list() for filepath in self._iter_expand_paths(paths): entries_added.append(store_path(filepath)) # END for each filepath # END path handling # HANDLE ENTRIES if entries: null_mode_entries = [ e for e in entries if e.mode == 0 ] if null_mode_entries: raise ValueError("At least one Entry has a null-mode - please use index.remove to remove files for clarity") # END null mode should be remove # HANLDE ENTRY OBJECT CREATION # create objects if required, otherwise go with the existing shas null_entries_indices = [ i for i,e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA ] if null_entries_indices: for ei in null_entries_indices: null_entry = entries[ei] new_entry = store_path(null_entry.path) # update null entry entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.binsha, null_entry.stage, null_entry.path)) # END for each entry index # END null_entry handling # REWRITE PATHS # If we have to rewrite the entries, do so now, after we have generated # all object sha's if path_rewriter: for i,e in enumerate(entries): entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e))) # END for each entry # END handle path rewriting # just go through the remaining entries and provide progress info for i, entry in enumerate(entries): progress_sent = i in null_entries_indices if not progress_sent: fprogress(entry.path, False, entry) fprogress(entry.path, True, entry) # END handle progress # END for each enty entries_added.extend(entries) # END if there are base entries # FINALIZE # add the new entries to this instance for entry in entries_added: self.entries[(entry.path, 0)] = IndexEntry.from_base(entry) if write: self.write() # END handle write return entries_added def _items_to_rela_paths(self, items): """Returns a list of repo-relative paths from the given items which may be absolute or relative paths, entries or blobs""" paths = list() for item in items: if isinstance(item, (BaseIndexEntry,(Blob, Submodule))): paths.append(self._to_relative_path(item.path)) elif isinstance(item, basestring): paths.append(self._to_relative_path(item)) else: raise TypeError("Invalid item type: %r" % item) # END for each item return paths @post_clear_cache @default_index def remove(self, items, working_tree=False, **kwargs): """Remove the given items from the index and optionally from the working tree as well. :param items: Multiple types of items are supported which may be be freely mixed. - path string Remove the given path at all stages. If it is a directory, you must specify the r=True keyword argument to remove all file entries below it. If absolute paths are given, they will be converted to a path relative to the git repository directory containing the working tree The path string may include globs, such as *.c. - Blob Object Only the path portion is used in this case. - BaseIndexEntry or compatible type The only relevant information here Yis the path. The stage is ignored. :param working_tree: If True, the entry will also be removed from the working tree, physically removing the respective file. This may fail if there are uncommited changes in it. :param kwargs: Additional keyword arguments to be passed to git-rm, such as 'r' to allow recurive removal of :return: List(path_string, ...) list of repository relative paths that have been removed effectively. This is interesting to know in case you have provided a directory or globs. Paths are relative to the repository. """ args = list() if not working_tree: args.append("--cached") args.append("--") # preprocess paths paths = self._items_to_rela_paths(items) removed_paths = self.repo.git.rm(args, paths, **kwargs).splitlines() # process output to gain proper paths # rm 'path' return [ p[4:-1] for p in removed_paths ] @post_clear_cache @default_index def move(self, items, skip_errors=False, **kwargs): """Rename/move the items, whereas the last item is considered the destination of the move operation. If the destination is a file, the first item ( of two ) must be a file as well. If the destination is a directory, it may be preceeded by one or more directories or files. The working tree will be affected in non-bare repositories. :parma items: Multiple types of items are supported, please see the 'remove' method for reference. :param skip_errors: If True, errors such as ones resulting from missing source files will be skpped. :param kwargs: Additional arguments you would like to pass to git-mv, such as dry_run or force. :return:List(tuple(source_path_string, destination_path_string), ...) A list of pairs, containing the source file moved as well as its actual destination. Relative to the repository root. :raise ValueErorr: If only one item was given GitCommandError: If git could not handle your request""" args = list() if skip_errors: args.append('-k') paths = self._items_to_rela_paths(items) if len(paths) < 2: raise ValueError("Please provide at least one source and one destination of the move operation") was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None)) kwargs['dry_run'] = True # first execute rename in dryrun so the command tells us what it actually does # ( for later output ) out = list() mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines() # parse result - first 0:n/2 lines are 'checking ', the remaining ones # are the 'renaming' ones which we parse for ln in xrange(len(mvlines)/2, len(mvlines)): tokens = mvlines[ln].split(' to ') assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln] # [0] = Renaming x # [1] = y out.append((tokens[0][9:], tokens[1])) # END for each line to parse # either prepare for the real run, or output the dry-run result if was_dry_run: return out # END handle dryrun # now apply the actual operation kwargs.pop('dry_run') self.repo.git.mv(args, paths, **kwargs) return out def commit(self, message, parent_commits=None, head=True): """Commit the current default index file, creating a commit object. For more information on the arguments, see tree.commit. :note: If you have manually altered the .entries member of this instance, don't forget to write() your changes to disk beforehand. :return: Commit object representing the new commit""" tree = self.write_tree() return Commit.create_from_tree(self.repo, tree, message, parent_commits, head) @classmethod def _flush_stdin_and_wait(cls, proc, ignore_stdout = False): proc.stdin.flush() proc.stdin.close() stdout = '' if not ignore_stdout: stdout = proc.stdout.read() proc.stdout.close() proc.wait() return stdout @default_index def checkout(self, paths=None, force=False, fprogress=lambda *args: None, **kwargs): """Checkout the given paths or all files from the version known to the index into the working tree. :note: Be sure you have written pending changes using the ``write`` method in case you have altered the enties dictionary directly :param paths: If None, all paths in the index will be checked out. Otherwise an iterable of relative or absolute paths or a single path pointing to files or directories in the index is expected. :param force: If True, existing files will be overwritten even if they contain local modifications. If False, these will trigger a CheckoutError. :param fprogress: see Index.add_ for signature and explanation. The provided progress information will contain None as path and item if no explicit paths are given. Otherwise progress information will be send prior and after a file has been checked out :param kwargs: Additional arguments to be pasesd to git-checkout-index :return: iterable yielding paths to files which have been checked out and are guaranteed to match the version stored in the index :raise CheckoutError: If at least one file failed to be checked out. This is a summary, hence it will checkout as many files as it can anyway. If one of files or directories do not exist in the index ( as opposed to the original git command who ignores them ). Raise GitCommandError if error lines could not be parsed - this truly is an exceptional state .. note:: The checkout is limited to checking out the files in the index. Files which are not in the index anymore and exist in the working tree will not be deleted. This behaviour is fundamentally different to *head.checkout*, i.e. if you want git-checkout like behaviour, use head.checkout instead of index.checkout. """ args = ["--index"] if force: args.append("--force") def handle_stderr(proc, iter_checked_out_files): stderr = proc.stderr.read() if not stderr: return # line contents: # git-checkout-index: this already exists failed_files = list() failed_reasons = list() unknown_lines = list() endings = (' already exists', ' is not in the cache', ' does not exist at stage', ' is unmerged') for line in stderr.splitlines(): if not line.startswith("git checkout-index: ") and not line.startswith("git-checkout-index: "): is_a_dir = " is a directory" unlink_issue = "unable to unlink old '" already_exists_issue = ' already exists, no checkout' # created by entry.c:checkout_entry(...) if line.endswith(is_a_dir): failed_files.append(line[:-len(is_a_dir)]) failed_reasons.append(is_a_dir) elif line.startswith(unlink_issue): failed_files.append(line[len(unlink_issue):line.rfind("'")]) failed_reasons.append(unlink_issue) elif line.endswith(already_exists_issue): failed_files.append(line[:-len(already_exists_issue)]) failed_reasons.append(already_exists_issue) else: unknown_lines.append(line) continue # END special lines parsing for e in endings: if line.endswith(e): failed_files.append(line[20:-len(e)]) failed_reasons.append(e) break # END if ending matches # END for each possible ending # END for each line if unknown_lines: raise GitCommandError(("git-checkout-index", ), 128, stderr) if failed_files: valid_files = list(set(iter_checked_out_files) - set(failed_files)) raise CheckoutError("Some files could not be checked out from the index due to local modifications", failed_files, valid_files, failed_reasons) # END stderr handler if paths is None: args.append("--all") kwargs['as_process'] = 1 fprogress(None, False, None) proc = self.repo.git.checkout_index(*args, **kwargs) proc.wait() fprogress(None, True, None) rval_iter = ( e.path for e in self.entries.itervalues() ) handle_stderr(proc, rval_iter) return rval_iter else: if isinstance(paths, basestring): paths = [paths] # make sure we have our entries loaded before we start checkout_index # which will hold a lock on it. We try to get the lock as well during # our entries initialization self.entries args.append("--stdin") kwargs['as_process'] = True kwargs['istream'] = subprocess.PIPE proc = self.repo.git.checkout_index(args, **kwargs) make_exc = lambda : GitCommandError(("git-checkout-index",)+tuple(args), 128, proc.stderr.read()) checked_out_files = list() for path in paths: co_path = to_native_path_linux(self._to_relative_path(path)) # if the item is not in the index, it could be a directory path_is_directory = False try: self.entries[(co_path, 0)] except KeyError: dir = co_path if not dir.endswith('/'): dir += '/' for entry in self.entries.itervalues(): if entry.path.startswith(dir): p = entry.path self._write_path_to_stdin(proc, p, p, make_exc, fprogress, read_from_stdout=False) checked_out_files.append(p) path_is_directory = True # END if entry is in directory # END for each entry # END path exception handlnig if not path_is_directory: self._write_path_to_stdin(proc, co_path, path, make_exc, fprogress, read_from_stdout=False) checked_out_files.append(co_path) # END path is a file # END for each path self._flush_stdin_and_wait(proc, ignore_stdout=True) handle_stderr(proc, checked_out_files) return checked_out_files # END paths handling assert "Should not reach this point" @default_index def reset(self, commit='HEAD', working_tree=False, paths=None, head=False, **kwargs): """Reset the index to reflect the tree at the given commit. This will not adjust our HEAD reference as opposed to HEAD.reset by default. :param commit: Revision, Reference or Commit specifying the commit we should represent. If you want to specify a tree only, use IndexFile.from_tree and overwrite the default index. :param working_tree: If True, the files in the working tree will reflect the changed index. If False, the working tree will not be touched Please note that changes to the working copy will be discarded without warning ! :param head: If True, the head will be set to the given commit. This is False by default, but if True, this method behaves like HEAD.reset. :param paths: if given as an iterable of absolute or repository-relative paths, only these will be reset to their state at the given commit'ish. The paths need to exist at the commit, otherwise an exception will be raised. :param kwargs: Additional keyword arguments passed to git-reset .. note:: IndexFile.reset, as opposed to HEAD.reset, will not delete anyfiles in order to maintain a consistent working tree. Instead, it will just checkout the files according to their state in the index. If you want git-reset like behaviour, use *HEAD.reset* instead. :return: self """ # what we actually want to do is to merge the tree into our existing # index, which is what git-read-tree does new_inst = type(self).from_tree(self.repo, commit) if not paths: self.entries = new_inst.entries else: nie = new_inst.entries for path in paths: path = self._to_relative_path(path) try: key = entry_key(path, 0) self.entries[key] = nie[key] except KeyError: # if key is not in theirs, it musn't be in ours try: del(self.entries[key]) except KeyError: pass # END handle deletion keyerror # END handle keyerror # END for each path # END handle paths self.write() if working_tree: self.checkout(paths=paths, force=True) # END handle working tree if head: self.repo.head.set_commit(self.repo.commit(commit), logmsg="%s: Updating HEAD" % commit) # END handle head change return self @default_index def diff(self, other=diff.Diffable.Index, paths=None, create_patch=False, **kwargs): """Diff this index against the working copy or a Tree or Commit object For a documentation of the parameters and return values, see Diffable.diff :note: Will only work with indices that represent the default git index as they have not been initialized with a stream. """ # index against index is always empty if other is self.Index: return diff.DiffIndex() # index against anything but None is a reverse diff with the respective # item. Handle existing -R flags properly. Transform strings to the object # so that we can call diff on it if isinstance(other, basestring): other = self.repo.rev_parse(other) # END object conversion if isinstance(other, Object): # invert the existing R flag cur_val = kwargs.get('R', False) kwargs['R'] = not cur_val return other.diff(self.Index, paths, create_patch, **kwargs) # END diff against other item handlin # if other is not None here, something is wrong if other is not None: raise ValueError( "other must be None, Diffable.Index, a Tree or Commit, was %r" % other ) # diff against working copy - can be handled by superclass natively return super(IndexFile, self).diff(other, paths, create_patch, **kwargs) GitPython-0.3.2.RC1/git/index/fun.py0000644000175100017510000002460611573623573016153 0ustar byronbyron# Contains standalone functions to accompany the index implementation and make it # more versatile # NOTE: Autodoc hates it if this is a docstring from stat import ( S_IFDIR, S_IFLNK, S_ISLNK, S_IFDIR, S_ISDIR, S_IFMT, S_IFREG, ) S_IFGITLINK = S_IFLNK | S_IFDIR # a submodule from cStringIO import StringIO from git.util import IndexFileSHA1Writer from git.exc import UnmergedEntriesError from git.objects.fun import ( tree_to_stream, traverse_tree_recursive, traverse_trees_recursive ) from typ import ( BaseIndexEntry, IndexEntry, CE_NAMEMASK, CE_STAGESHIFT ) CE_NAMEMASK_INV = ~CE_NAMEMASK from util import ( pack, unpack ) from gitdb.base import IStream from gitdb.typ import str_tree_type __all__ = ('write_cache', 'read_cache', 'write_tree_from_cache', 'entry_key', 'stat_mode_to_index_mode', 'S_IFGITLINK') def stat_mode_to_index_mode(mode): """Convert the given mode from a stat call to the corresponding index mode and return it""" if S_ISLNK(mode): # symlinks return S_IFLNK if S_ISDIR(mode) or S_IFMT(mode) == S_IFGITLINK: # submodules return S_IFGITLINK return S_IFREG | 0644 | (mode & 0100) # blobs with or without executable bit def write_cache(entries, stream, extension_data=None, ShaStreamCls=IndexFileSHA1Writer): """Write the cache represented by entries to a stream :param entries: **sorted** list of entries :param stream: stream to wrap into the AdapterStreamCls - it is used for final output. :param ShaStreamCls: Type to use when writing to the stream. It produces a sha while writing to it, before the data is passed on to the wrapped stream :param extension_data: any kind of data to write as a trailer, it must begin a 4 byte identifier, followed by its size ( 4 bytes )""" # wrap the stream into a compatible writer stream = ShaStreamCls(stream) tell = stream.tell write = stream.write # header version = 2 write("DIRC") write(pack(">LL", version, len(entries))) # body for entry in entries: beginoffset = tell() write(entry[4]) # ctime write(entry[5]) # mtime path = entry[3] plen = len(path) & CE_NAMEMASK # path length assert plen == len(path), "Path %s too long to fit into index" % entry[3] flags = plen | (entry[2] & CE_NAMEMASK_INV) # clear possible previous values write(pack(">LLLLLL20sH", entry[6], entry[7], entry[0], entry[8], entry[9], entry[10], entry[1], flags)) write(path) real_size = ((tell() - beginoffset + 8) & ~7) write("\0" * ((beginoffset + real_size) - tell())) # END for each entry # write previously cached extensions data if extension_data is not None: stream.write(extension_data) # write the sha over the content stream.write_sha() def read_header(stream): """Return tuple(version_long, num_entries) from the given stream""" type_id = stream.read(4) if type_id != "DIRC": raise AssertionError("Invalid index file header: %r" % type_id) version, num_entries = unpack(">LL", stream.read(4 * 2)) # TODO: handle version 3: extended data, see read-cache.c assert version in (1, 2) return version, num_entries def entry_key(*entry): """:return: Key suitable to be used for the index.entries dictionary :param entry: One instance of type BaseIndexEntry or the path and the stage""" if len(entry) == 1: return (entry[0].path, entry[0].stage) else: return tuple(entry) # END handle entry def read_cache(stream): """Read a cache file from the given stream :return: tuple(version, entries_dict, extension_data, content_sha) * version is the integer version number * entries dict is a dictionary which maps IndexEntry instances to a path at a stage * extension_data is '' or 4 bytes of type + 4 bytes of size + size bytes * content_sha is a 20 byte sha on all cache file contents""" version, num_entries = read_header(stream) count = 0 entries = dict() read = stream.read tell = stream.tell while count < num_entries: beginoffset = tell() ctime = unpack(">8s", read(8))[0] mtime = unpack(">8s", read(8))[0] (dev, ino, mode, uid, gid, size, sha, flags) = \ unpack(">LLLLLL20sH", read(20 + 4 * 6 + 2)) path_size = flags & CE_NAMEMASK path = read(path_size) real_size = ((tell() - beginoffset + 8) & ~7) data = read((beginoffset + real_size) - tell()) entry = IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size)) # entry_key would be the method to use, but we safe the effort entries[(path, entry.stage)] = entry count += 1 # END for each entry # the footer contains extension data and a sha on the content so far # Keep the extension footer,and verify we have a sha in the end # Extension data format is: # 4 bytes ID # 4 bytes length of chunk # repeated 0 - N times extension_data = stream.read(~0) assert len(extension_data) > 19, "Index Footer was not at least a sha on content as it was only %i bytes in size" % len(extension_data) content_sha = extension_data[-20:] # truncate the sha in the end as we will dynamically create it anyway extension_data = extension_data[:-20] return (version, entries, extension_data, content_sha) def write_tree_from_cache(entries, odb, sl, si=0): """Create a tree from the given sorted list of entries and put the respective trees into the given object database :param entries: **sorted** list of IndexEntries :param odb: object database to store the trees in :param si: start index at which we should start creating subtrees :param sl: slice indicating the range we should process on the entries list :return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of tree entries being a tuple of hexsha, mode, name""" tree_items = list() tree_items_append = tree_items.append ci = sl.start end = sl.stop while ci < end: entry = entries[ci] if entry.stage != 0: raise UnmergedEntriesError(entry) # END abort on unmerged ci += 1 rbound = entry.path.find('/', si) if rbound == -1: # its not a tree tree_items_append((entry.binsha, entry.mode, entry.path[si:])) else: # find common base range base = entry.path[si:rbound] xi = ci while xi < end: oentry = entries[xi] orbound = oentry.path.find('/', si) if orbound == -1 or oentry.path[si:orbound] != base: break # END abort on base mismatch xi += 1 # END find common base # enter recursion # ci - 1 as we want to count our current item as well sha, tree_entry_list = write_tree_from_cache(entries, odb, slice(ci-1, xi), rbound+1) tree_items_append((sha, S_IFDIR, base)) # skip ahead ci = xi # END handle bounds # END for each entry # finally create the tree sio = StringIO() tree_to_stream(tree_items, sio.write) sio.seek(0) istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio)) return (istream.binsha, tree_items) def _tree_entry_to_baseindexentry(tree_entry, stage): return BaseIndexEntry((tree_entry[1], tree_entry[0], stage < 3: raise ValueError("Cannot handle %i trees at once" % len(tree_shas)) # three trees for base, ours, theirs in traverse_trees_recursive(odb, tree_shas, ''): if base is not None: # base version exists if ours is not None: # ours exists if theirs is not None: # it exists in all branches, if it was changed in both # its a conflict, otherwise we take the changed version # This should be the most common branch, so it comes first if( base[0] != ours[0] and base[0] != theirs[0] and ours[0] != theirs[0] ) or \ ( base[1] != ours[1] and base[1] != theirs[1] and ours[1] != theirs[1] ): # changed by both out_append(_tree_entry_to_baseindexentry(base, 1)) out_append(_tree_entry_to_baseindexentry(ours, 2)) out_append(_tree_entry_to_baseindexentry(theirs, 3)) elif base[0] != ours[0] or base[1] != ours[1]: # only we changed it out_append(_tree_entry_to_baseindexentry(ours, 0)) else: # either nobody changed it, or they did. In either # case, use theirs out_append(_tree_entry_to_baseindexentry(theirs, 0)) # END handle modification else: if ours[0] != base[0] or ours[1] != base[1]: # they deleted it, we changed it, conflict out_append(_tree_entry_to_baseindexentry(base, 1)) out_append(_tree_entry_to_baseindexentry(ours, 2)) # else: # we didn't change it, ignore # pass # END handle our change # END handle theirs else: if theirs is None: # deleted in both, its fine - its out pass else: if theirs[0] != base[0] or theirs[1] != base[1]: # deleted in ours, changed theirs, conflict out_append(_tree_entry_to_baseindexentry(base, 1)) out_append(_tree_entry_to_baseindexentry(theirs, 3)) # END theirs changed #else: # theirs didnt change # pass # END handle theirs # END handle ours else: # all three can't be None if ours is None: # added in their branch out_append(_tree_entry_to_baseindexentry(theirs, 0)) elif theirs is None: # added in our branch out_append(_tree_entry_to_baseindexentry(ours, 0)) else: # both have it, except for the base, see whether it changed if ours[0] != theirs[0] or ours[1] != theirs[1]: out_append(_tree_entry_to_baseindexentry(ours, 2)) out_append(_tree_entry_to_baseindexentry(theirs, 3)) else: # it was added the same in both out_append(_tree_entry_to_baseindexentry(ours, 0)) # END handle two items # END handle heads # END handle base exists # END for each entries tuple return out GitPython-0.3.2.RC1/git/index/typ.py0000644000175100017510000001045211575507662016173 0ustar byronbyron"""Module with additional types used by the index""" from util import ( pack, unpack ) from binascii import ( b2a_hex, ) from git.objects import Blob __all__ = ('BlobFilter', 'BaseIndexEntry', 'IndexEntry') #{ Invariants CE_NAMEMASK = 0x0fff CE_STAGEMASK = 0x3000 CE_EXTENDED = 0x4000 CE_VALID = 0x8000 CE_STAGESHIFT = 12 #} END invariants class BlobFilter(object): """ Predicate to be used by iter_blobs allowing to filter only return blobs which match the given list of directories or files. The given paths are given relative to the repository. """ __slots__ = 'paths' def __init__(self, paths): """:param paths: tuple or list of paths which are either pointing to directories or to files relative to the current repository """ self.paths = paths def __call__(self, stage_blob): path = stage_blob[1].path for p in self.paths: if path.startswith(p): return True # END for each path in filter paths return False class BaseIndexEntry(tuple): """Small Brother of an index entry which can be created to describe changes done to the index in which case plenty of additional information is not requried. As the first 4 data members match exactly to the IndexEntry type, methods expecting a BaseIndexEntry can also handle full IndexEntries even if they use numeric indices for performance reasons. """ def __str__(self): return "%o %s %i\t%s" % (self.mode, self.hexsha, self.stage, self.path) def __repr__(self): return "(%o, %s, %i, %s)" % (self.mode, self.hexsha, self.stage, self.path) @property def mode(self): """ File Mode, compatible to stat module constants """ return self[0] @property def binsha(self): """binary sha of the blob """ return self[1] @property def hexsha(self): """hex version of our sha""" return b2a_hex(self[1]) @property def stage(self): """Stage of the entry, either: * 0 = default stage * 1 = stage before a merge or common ancestor entry in case of a 3 way merge * 2 = stage of entries from the 'left' side of the merge * 3 = stage of entries from the right side of the merge :note: For more information, see http://www.kernel.org/pub/software/scm/git/docs/git-read-tree.html """ return (self[2] & CE_STAGEMASK) >> CE_STAGESHIFT @property def path(self): """:return: our path relative to the repository working tree root""" return self[3] @property def flags(self): """:return: flags stored with this entry""" return self[2] @classmethod def from_blob(cls, blob, stage = 0): """:return: Fully equipped BaseIndexEntry at the given stage""" return cls((blob.mode, blob.binsha, stage << CE_STAGESHIFT, blob.path)) def to_blob(self, repo): """:return: Blob using the information of this index entry""" return Blob(repo, self.binsha, self.mode, self.path) class IndexEntry(BaseIndexEntry): """Allows convenient access to IndexEntry data without completely unpacking it. Attributes usully accessed often are cached in the tuple whereas others are unpacked on demand. See the properties for a mapping between names and tuple indices. """ @property def ctime(self): """ :return: Tuple(int_time_seconds_since_epoch, int_nano_seconds) of the file's creation time""" return unpack(">LL", self[4]) @property def mtime(self): """See ctime property, but returns modification time """ return unpack(">LL", self[5]) @property def dev(self): """ Device ID """ return self[6] @property def inode(self): """ Inode ID """ return self[7] @property def uid(self): """ User ID """ return self[8] @property def gid(self): """ Group ID """ return self[9] @property def size(self): """:return: Uncompressed size of the blob """ return self[10] @classmethod def from_base(cls, base): """ :return: Minimal entry as created from the given BaseIndexEntry instance. Missing values will be set to null-like values :param base: Instance of type BaseIndexEntry""" time = pack(">LL", 0, 0) return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0)) @classmethod def from_blob(cls, blob, stage = 0): """:return: Minimal entry resembling the given blob object""" time = pack(">LL", 0, 0) return IndexEntry((blob.mode, blob.binsha, stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size)) GitPython-0.3.2.RC1/git/index/__init__.py0000644000175100017510000000011011573623553017100 0ustar byronbyron"""Initialize the index package""" from base import * from typ import *GitPython-0.3.2.RC1/git/index/util.py0000644000175100017510000000514011575507662016332 0ustar byronbyron"""Module containing index utilities""" import struct import tempfile import os __all__ = ( 'TemporaryFileSwap', 'post_clear_cache', 'default_index', 'git_working_dir' ) #{ Aliases pack = struct.pack unpack = struct.unpack #} END aliases class TemporaryFileSwap(object): """Utility class moving a file to a temporary location within the same directory and moving it back on to where on object deletion.""" __slots__ = ("file_path", "tmp_file_path") def __init__(self, file_path): self.file_path = file_path self.tmp_file_path = self.file_path + tempfile.mktemp('','','') # it may be that the source does not exist try: os.rename(self.file_path, self.tmp_file_path) except OSError: pass def __del__(self): if os.path.isfile(self.tmp_file_path): if os.name == 'nt' and os.path.exists(self.file_path): os.remove(self.file_path) os.rename(self.tmp_file_path, self.file_path) # END temp file exists #{ Decorators def post_clear_cache(func): """Decorator for functions that alter the index using the git command. This would invalidate our possibly existing entries dictionary which is why it must be deleted to allow it to be lazily reread later. :note: This decorator will not be required once all functions are implemented natively which in fact is possible, but probably not feasible performance wise. """ def post_clear_cache_if_not_raised(self, *args, **kwargs): rval = func(self, *args, **kwargs) self._delete_entries_cache() return rval # END wrapper method post_clear_cache_if_not_raised.__name__ = func.__name__ return post_clear_cache_if_not_raised def default_index(func): """Decorator assuring the wrapped method may only run if we are the default repository index. This is as we rely on git commands that operate on that index only. """ def check_default_index(self, *args, **kwargs): if self._file_path != self._index_path(): raise AssertionError( "Cannot call %r on indices that do not represent the default git index" % func.__name__ ) return func(self, *args, **kwargs) # END wrpaper method check_default_index.__name__ = func.__name__ return check_default_index def git_working_dir(func): """Decorator which changes the current working dir to the one of the git repository in order to assure relative paths are handled correctly""" def set_git_working_dir(self, *args, **kwargs): cur_wd = os.getcwd() os.chdir(self.repo.working_tree_dir) try: return func(self, *args, **kwargs) finally: os.chdir(cur_wd) # END handle working dir # END wrapper set_git_working_dir.__name__ = func.__name__ return set_git_working_dir #} END decorators GitPython-0.3.2.RC1/git/cmd.py0000644000175100017510000004102711604665410015002 0ustar byronbyron# cmd.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os, sys from util import ( LazyMixin, stream_copy ) from exc import GitCommandError from subprocess import ( call, Popen, PIPE ) execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', 'with_exceptions', 'as_process', 'output_stream' ) __all__ = ('Git', ) def dashify(string): return string.replace('_', '-') class Git(LazyMixin): """ The Git class manages communication with the Git binary. It provides a convenient interface to calling the Git binary, such as in:: g = Git( git_dir ) g.init() # calls 'git init' program rval = g.ls_files() # calls 'git ls-files' program ``Debugging`` Set the GIT_PYTHON_TRACE environment variable print each invocation of the command to stdout. Set its value to 'full' to see details about the returned values. """ __slots__ = ("_working_dir", "cat_file_all", "cat_file_header", "_version_info") # CONFIGURATION # The size in bytes read from stdout when copying git's output to another stream max_chunk_size = 1024*64 # Enables debugging of GitPython's git commands GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) # Provide the full path to the git executable. Otherwise it assumes git is in the path GIT_PYTHON_GIT_EXECUTABLE = os.environ.get("GIT_PYTHON_GIT_EXECUTABLE", 'git') class AutoInterrupt(object): """Kill/Interrupt the stored process instance once this instance goes out of scope. It is used to prevent processes piling up in case iterators stop reading. Besides all attributes are wired through to the contained process object. The wait method was overridden to perform automatic status code checking and possibly raise.""" __slots__= ("proc", "args") def __init__(self, proc, args ): self.proc = proc self.args = args def __del__(self): # did the process finish already so we have a return code ? if self.proc.poll() is not None: return # can be that nothing really exists anymore ... if os is None: return # try to kill it try: os.kill(self.proc.pid, 2) # interrupt signal except AttributeError: # try windows # for some reason, providing None for stdout/stderr still prints something. This is why # we simply use the shell and redirect to nul. Its slower than CreateProcess, question # is whether we really want to see all these messages. Its annoying no matter what. call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True) # END exception handling def __getattr__(self, attr): return getattr(self.proc, attr) def wait(self): """Wait for the process and return its status code. :raise GitCommandError: if the return status is not 0""" status = self.proc.wait() if status != 0: raise GitCommandError(self.args, status, self.proc.stderr.read()) # END status handling return status # END auto interrupt class CatFileContentStream(object): """Object representing a sized read-only stream returning the contents of an object. It behaves like a stream, but counts the data read and simulates an empty stream once our sized content region is empty. If not all data is read to the end of the objects's lifetime, we read the rest to assure the underlying stream continues to work""" __slots__ = ('_stream', '_nbr', '_size') def __init__(self, size, stream): self._stream = stream self._size = size self._nbr = 0 # num bytes read # special case: if the object is empty, has null bytes, get the # final newline right away. if size == 0: stream.read(1) # END handle empty streams def read(self, size=-1): bytes_left = self._size - self._nbr if bytes_left == 0: return '' if size > -1: # assure we don't try to read past our limit size = min(bytes_left, size) else: # they try to read all, make sure its not more than what remains size = bytes_left # END check early depletion data = self._stream.read(size) self._nbr += len(data) # check for depletion, read our final byte to make the stream usable by others if self._size - self._nbr == 0: self._stream.read(1) # final newline # END finish reading return data def readline(self, size=-1): if self._nbr == self._size: return '' # clamp size to lowest allowed value bytes_left = self._size - self._nbr if size > -1: size = min(bytes_left, size) else: size = bytes_left # END handle size data = self._stream.readline(size) self._nbr += len(data) # handle final byte if self._size - self._nbr == 0: self._stream.read(1) # END finish reading return data def readlines(self, size=-1): if self._nbr == self._size: return list() # leave all additional logic to our readline method, we just check the size out = list() nbr = 0 while True: line = self.readline() if not line: break out.append(line) if size > -1: nbr += len(line) if nbr > size: break # END handle size constraint # END readline loop return out def __iter__(self): return self def next(self): line = self.readline() if not line: raise StopIteration return line def __del__(self): bytes_left = self._size - self._nbr if bytes_left: # read and discard - seeking is impossible within a stream # includes terminating newline self._stream.read(bytes_left + 1) # END handle incomplete read def __init__(self, working_dir=None): """Initialize this instance with: :param working_dir: Git directory we should work in. If None, we always work in the current directory as returned by os.getcwd(). It is meant to be the working tree directory if available, or the .git directory in case of bare repositories.""" super(Git, self).__init__() self._working_dir = working_dir # cached command slots self.cat_file_header = None self.cat_file_all = None def __getattr__(self, name): """A convenience method as it allows to call the command as if it was an object. :return: Callable object that will execute call _call_process with your arguments.""" if name[0] == '_': return LazyMixin.__getattr__(self, name) return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) def _set_cache_(self, attr): if attr == '_version_info': # We only use the first 4 numbers, as everthing else could be strings in fact (on windows) version_numbers = self._call_process('version').split(' ')[2] self._version_info = tuple(int(n) for n in version_numbers.split('.')[:4]) else: super(Git, self)._set_cache_(attr) #END handle version info @property def working_dir(self): """:return: Git directory we are working on""" return self._working_dir @property def version_info(self): """ :return: tuple(int, int, int, int) tuple with integers representing the major, minor and additional version numbers as parsed from git version. This value is generated on demand and is cached""" return self._version_info def execute(self, command, istream=None, with_keep_cwd=False, with_extended_output=False, with_exceptions=True, as_process=False, output_stream=None, **subprocess_kwargs ): """Handles executing the command on the shell and consumes and returns the returned information (stdout) :param command: The command argument list to execute. It should be a string, or a sequence of program arguments. The program to execute is the first item in the args sequence or string. :param istream: Standard input filehandle passed to subprocess.Popen. :param with_keep_cwd: Whether to use the current working directory from os.getcwd(). The cmd otherwise uses its own working_dir that it has been initialized with if possible. :param with_extended_output: Whether to return a (status, stdout, stderr) tuple. :param with_exceptions: Whether to raise an exception when git returns a non-zero status. :param as_process: Whether to return the created process instance directly from which streams can be read on demand. This will render with_extended_output and with_exceptions ineffective - the caller will have to deal with the details himself. It is important to note that the process will be placed into an AutoInterrupt wrapper that will interrupt the process once it goes out of scope. If you use the command in iterators, you should pass the whole process instance instead of a single stream. :param output_stream: If set to a file-like object, data produced by the git command will be output to the given stream directly. This feature only has any effect if as_process is False. Processes will always be created with a pipe due to issues with subprocess. This merely is a workaround as data will be copied from the output pipe to the given output stream directly. :param subprocess_kwargs: Keyword arguments to be passed to subprocess.Popen. Please note that some of the valid kwargs are already set by this method, the ones you specify may not be the same ones. :return: * str(output) if extended_output = False (Default) * tuple(int(status), str(stdout), str(stderr)) if extended_output = True if ouput_stream is True, the stdout value will be your output stream: * output_stream if extended_output = False * tuple(int(status), output_stream, str(stderr)) if extended_output = True :raise GitCommandError: :note: If you add additional keyword arguments to the signature of this method, you must update the execute_kwargs tuple housed in this module.""" if self.GIT_PYTHON_TRACE and not self.GIT_PYTHON_TRACE == 'full': print ' '.join(command) # Allow the user to have the command executed in their working dir. if with_keep_cwd or self._working_dir is None: cwd = os.getcwd() else: cwd=self._working_dir # Start the process proc = Popen(command, cwd=cwd, stdin=istream, stderr=PIPE, stdout=PIPE, close_fds=(os.name=='posix'),# unsupported on linux **subprocess_kwargs ) if as_process: return self.AutoInterrupt(proc, command) # Wait for the process to return status = 0 stdout_value = '' stderr_value = '' try: if output_stream is None: stdout_value, stderr_value = proc.communicate() # strip trailing "\n" if stdout_value.endswith("\n"): stdout_value = stdout_value[:-1] if stderr_value.endswith("\n"): stderr_value = stderr_value[:-1] status = proc.returncode else: stream_copy(proc.stdout, output_stream, self.max_chunk_size) stdout_value = output_stream stderr_value = proc.stderr.read() # strip trailing "\n" if stderr_value.endswith("\n"): stderr_value = stderr_value[:-1] status = proc.wait() # END stdout handling finally: proc.stdout.close() proc.stderr.close() if self.GIT_PYTHON_TRACE == 'full': cmdstr = " ".join(command) if stderr_value: print "%s -> %d; stdout: '%s'; stderr: '%s'" % (cmdstr, status, stdout_value, stderr_value) elif stdout_value: print "%s -> %d; stdout: '%s'" % (cmdstr, status, stdout_value) else: print "%s -> %d" % (cmdstr, status) # END handle debug printing if with_exceptions and status != 0: raise GitCommandError(command, status, stderr_value) # Allow access to the command's status code if with_extended_output: return (status, stdout_value, stderr_value) else: return stdout_value def transform_kwargs(self, **kwargs): """Transforms Python style kwargs into git command line options.""" args = list() for k, v in kwargs.items(): if len(k) == 1: if v is True: args.append("-%s" % k) elif type(v) is not bool: args.append("-%s%s" % (k, v)) else: if v is True: args.append("--%s" % dashify(k)) elif type(v) is not bool: args.append("--%s=%s" % (dashify(k), v)) return args @classmethod def __unpack_args(cls, arg_list): if not isinstance(arg_list, (list,tuple)): return [ str(arg_list) ] outlist = list() for arg in arg_list: if isinstance(arg_list, (list, tuple)): outlist.extend(cls.__unpack_args( arg )) # END recursion else: outlist.append(str(arg)) # END for each arg return outlist def _call_process(self, method, *args, **kwargs): """Run the given git command with the specified arguments and return the result as a String :param method: is the command. Contained "_" characters will be converted to dashes, such as in 'ls_files' to call 'ls-files'. :param args: is the list of arguments. If None is included, it will be pruned. This allows your commands to call git more conveniently as None is realized as non-existent :param kwargs: is a dict of keyword arguments. This function accepts the same optional keyword arguments as execute(). ``Examples``:: git.rev_list('master', max_count=10, header=True) :return: Same as ``execute``""" # Handle optional arguments prior to calling transform_kwargs # otherwise these'll end up in args, which is bad. _kwargs = dict() for kwarg in execute_kwargs: try: _kwargs[kwarg] = kwargs.pop(kwarg) except KeyError: pass # Prepare the argument list opt_args = self.transform_kwargs(**kwargs) ext_args = self.__unpack_args([a for a in args if a is not None]) args = opt_args + ext_args call = [self.GIT_PYTHON_GIT_EXECUTABLE, dashify(method)] call.extend(args) return self.execute(call, **_kwargs) def _parse_object_header(self, header_line): """ :param header_line: type_string size_as_int :return: (hex_sha, type_string, size_as_int) :raise ValueError: if the header contains indication for an error due to incorrect input sha""" tokens = header_line.split() if len(tokens) != 3: if not tokens: raise ValueError("SHA could not be resolved, git returned: %r" % (header_line.strip())) else: raise ValueError("SHA %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip())) # END handle actual return value # END error handling if len(tokens[0]) != 40: raise ValueError("Failed to parse header: %r" % header_line) return (tokens[0], tokens[1], int(tokens[2])) def __prepare_ref(self, ref): # required for command to separate refs on stdin refstr = str(ref) # could be ref-object if refstr.endswith("\n"): return refstr return refstr + "\n" def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs): cur_val = getattr(self, attr_name) if cur_val is not None: return cur_val options = { "istream" : PIPE, "as_process" : True } options.update( kwargs ) cmd = self._call_process( cmd_name, *args, **options ) setattr(self, attr_name, cmd ) return cmd def __get_object_header(self, cmd, ref): cmd.stdin.write(self.__prepare_ref(ref)) cmd.stdin.flush() return self._parse_object_header(cmd.stdout.readline()) def get_object_header(self, ref): """ Use this method to quickly examine the type and size of the object behind the given ref. :note: The method will only suffer from the costs of command invocation once and reuses the command in subsequent calls. :return: (hexsha, type_string, size_as_int)""" cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) return self.__get_object_header(cmd, ref) def get_object_data(self, ref): """ As get_object_header, but returns object data as well :return: (hexsha, type_string, size_as_int,data_string) :note: not threadsafe""" hexsha, typename, size, stream = self.stream_object_data(ref) data = stream.read(size) del(stream) return (hexsha, typename, size, data) def stream_object_data(self, ref): """As get_object_header, but returns the data as a stream :return: (hexsha, type_string, size_as_int, stream) :note: This method is not threadsafe, you need one independent Command instance per thread to be safe !""" cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) hexsha, typename, size = self.__get_object_header(cmd, ref) return (hexsha, typename, size, self.CatFileContentStream(size, cmd.stdout)) def clear_cache(self): """Clear all kinds of internal caches to release resources. Currently persistent commands will be interrupted. :return: self""" self.cat_file_all = None self.cat_file_header = None return self GitPython-0.3.2.RC1/git/exc.py0000644000175100017510000000372411573623573015031 0ustar byronbyron# exc.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php """ Module containing all exceptions thrown througout the git package, """ from gitdb.exc import * class InvalidGitRepositoryError(Exception): """ Thrown if the given repository appears to have an invalid format. """ class NoSuchPathError(OSError): """ Thrown if a path could not be access by the system. """ class GitCommandError(Exception): """ Thrown if execution of the git command fails with non-zero status code. """ def __init__(self, command, status, stderr=None): self.stderr = stderr self.status = status self.command = command def __str__(self): return ("'%s' returned exit status %i: %s" % (' '.join(str(i) for i in self.command), self.status, self.stderr)) class CheckoutError( Exception ): """Thrown if a file could not be checked out from the index as it contained changes. The .failed_files attribute contains a list of relative paths that failed to be checked out as they contained changes that did not exist in the index. The .failed_reasons attribute contains a string informing about the actual cause of the issue. The .valid_files attribute contains a list of relative paths to files that were checked out successfully and hence match the version stored in the index""" def __init__(self, message, failed_files, valid_files, failed_reasons): Exception.__init__(self, message) self.failed_files = failed_files self.failed_reasons = failed_reasons self.valid_files = valid_files def __str__(self): return Exception.__str__(self) + ":%s" % self.failed_files class CacheError(Exception): """Base for all errors related to the git index, which is called cache internally""" class UnmergedEntriesError(CacheError): """Thrown if an operation cannot proceed as there are still unmerged entries in the cache""" GitPython-0.3.2.RC1/git/objects/0000755000175100017510000000000011605055665015320 5ustar byronbyronGitPython-0.3.2.RC1/git/objects/base.py0000644000175100017510000001336611575507662016622 0ustar byronbyron# base.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php from git.util import LazyMixin, join_path_native, stream_copy from util import get_object_type_by_name from gitdb.util import ( hex_to_bin, bin_to_hex, basename ) import gitdb.typ as dbtyp _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" __all__ = ("Object", "IndexObject") class Object(LazyMixin): """Implements an Object which may be Blobs, Trees, Commits and Tags""" NULL_HEX_SHA = '0'*40 NULL_BIN_SHA = '\0'*20 TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type) __slots__ = ("repo", "binsha", "size" ) type = None # to be set by subclass def __init__(self, repo, binsha): """Initialize an object by identifying it by its binary sha. All keyword arguments will be set on demand if None. :param repo: repository this object is located in :param binsha: 20 byte SHA1""" super(Object,self).__init__() self.repo = repo self.binsha = binsha assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha)) @classmethod def new(cls, repo, id): """ :return: New Object instance of a type appropriate to the object type behind id. The id of the newly created object will be a binsha even though the input id may have been a Reference or Rev-Spec :param id: reference, rev-spec, or hexsha :note: This cannot be a __new__ method as it would always call __init__ with the input id which is not necessarily a binsha.""" return repo.rev_parse(str(id)) @classmethod def new_from_sha(cls, repo, sha1): """ :return: new object instance of a type appropriate to represent the given binary sha1 :param sha1: 20 byte binary sha1""" if sha1 == cls.NULL_BIN_SHA: # the NULL binsha is always the root commit return get_object_type_by_name('commit')(repo, sha1) #END handle special case oinfo = repo.odb.info(sha1) inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha) inst.size = oinfo.size return inst def _set_cache_(self, attr): """Retrieve object information""" if attr == "size": oinfo = self.repo.odb.info(self.binsha) self.size = oinfo.size # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type) else: super(Object,self)._set_cache_(attr) def __eq__(self, other): """:return: True if the objects have the same SHA1""" if not hasattr(other, 'binsha'): return False return self.binsha == other.binsha def __ne__(self, other): """:return: True if the objects do not have the same SHA1 """ if not hasattr(other, 'binsha'): return True return self.binsha != other.binsha def __hash__(self): """:return: Hash of our id allowing objects to be used in dicts and sets""" return hash(self.binsha) def __str__(self): """:return: string of our SHA1 as understood by all git commands""" return bin_to_hex(self.binsha) def __repr__(self): """:return: string with pythonic representation of our object""" return '' % (self.__class__.__name__, self.hexsha) @property def hexsha(self): """:return: 40 byte hex version of our 20 byte binary sha""" return bin_to_hex(self.binsha) @property def data_stream(self): """ :return: File Object compatible stream to the uncompressed raw data of the object :note: returned streams must be read in order""" return self.repo.odb.stream(self.binsha) def stream_data(self, ostream): """Writes our data directly to the given output stream :param ostream: File object compatible stream object. :return: self""" istream = self.repo.odb.stream(self.binsha) stream_copy(istream, ostream) return self class IndexObject(Object): """Base for all objects that can be part of the index file , namely Tree, Blob and SubModule objects""" __slots__ = ("path", "mode") # for compatability with iterable lists _id_attribute_ = 'path' def __init__(self, repo, binsha, mode=None, path=None): """Initialize a newly instanced IndexObject :param repo: is the Repo we are located in :param binsha: 20 byte sha1 :param mode: is the stat compatible file mode as int, use the stat module to evaluate the infomration :param path: is the path to the file in the file system, relative to the git repository root, i.e. file.ext or folder/other.ext :note: Path may not be set of the index object has been created directly as it cannot be retrieved without knowing the parent tree.""" super(IndexObject, self).__init__(repo, binsha) if mode is not None: self.mode = mode if path is not None: self.path = path def __hash__(self): """:return: Hash of our path as index items are uniquely identifyable by path, not by their data !""" return hash(self.path) def _set_cache_(self, attr): if attr in IndexObject.__slots__: # they cannot be retrieved lateron ( not without searching for them ) raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) else: super(IndexObject, self)._set_cache_(attr) # END hanlde slot attribute @property def name(self): """:return: Name portion of the path, effectively being the basename""" return basename(self.path) @property def abspath(self): """ :return: Absolute path to this index object in the file system ( as opposed to the .path field which is a path relative to the git repository ). The returned path will be native to the system and contains '\' on windows. """ return join_path_native(self.repo.working_tree_dir, self.path) GitPython-0.3.2.RC1/git/objects/blob.py0000644000175100017510000000151711575507662016621 0ustar byronbyron# blob.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php from mimetypes import guess_type import base __all__ = ('Blob', ) class Blob(base.IndexObject): """A Blob encapsulates a git blob object""" DEFAULT_MIME_TYPE = "text/plain" type = "blob" # valid blob modes executable_mode = 0100755 file_mode = 0100644 link_mode = 0120000 __slots__ = tuple() @property def mime_type(self): """ :return: String describing the mime type of this file (based on the filename) :note: Defaults to 'text/plain' in case the actual file type is unknown. """ guesses = None if self.path: guesses = guess_type(self.path) return guesses and guesses[0] or self.DEFAULT_MIME_TYPE GitPython-0.3.2.RC1/git/objects/fun.py0000644000175100017510000001432111575507662016470 0ustar byronbyron"""Module with functions which are supposed to be as fast as possible""" from stat import S_ISDIR __all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', 'traverse_tree_recursive') def tree_to_stream(entries, write): """Write the give list of entries into a stream using its write method :param entries: **sorted** list of tuples with (binsha, mode, name) :param write: write method which takes a data string""" ord_zero = ord('0') bit_mask = 7 # 3 bits set for binsha, mode, name in entries: mode_str = '' for i in xrange(6): mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str # END for each 8 octal value # git slices away the first octal if its zero if mode_str[0] == '0': mode_str = mode_str[1:] # END save a byte # here it comes: if the name is actually unicode, the replacement below # will not work as the binsha is not part of the ascii unicode encoding - # hence we must convert to an utf8 string for it to work properly. # According to my tests, this is exactly what git does, that is it just # takes the input literally, which appears to be utf8 on linux. if isinstance(name, unicode): name = name.encode("utf8") write("%s %s\0%s" % (mode_str, name, binsha)) # END for each item def tree_entries_from_data(data): """Reads the binary representation of a tree and returns tuples of Tree items :param data: data block with tree data :return: list(tuple(binsha, mode, tree_relative_path), ...)""" ord_zero = ord('0') len_data = len(data) i = 0 out = list() while i < len_data: mode = 0 # read mode # Some git versions truncate the leading 0, some don't # The type will be extracted from the mode later while data[i] != ' ': # move existing mode integer up one level being 3 bits # and add the actual ordinal value of the character mode = (mode << 3) + (ord(data[i]) - ord_zero) i += 1 # END while reading mode # byte is space now, skip it i += 1 # parse name, it is NULL separated ns = i while data[i] != '\0': i += 1 # END while not reached NULL # default encoding for strings in git is utf8 # Only use the respective unicode object if the byte stream was encoded name = data[ns:i] name_enc = name.decode("utf-8") if len(name) > len(name_enc): name = name_enc # END handle encoding # byte is NULL, get next 20 i += 1 sha = data[i:i+20] i = i + 20 out.append((sha, mode, name)) # END for each byte in data stream return out def _find_by_name(tree_data, name, is_dir, start_at): """return data entry matching the given name and tree mode or None. Before the item is returned, the respective data item is set None in the tree_data list to mark it done""" try: item = tree_data[start_at] if item and item[2] == name and S_ISDIR(item[1]) == is_dir: tree_data[start_at] = None return item except IndexError: pass # END exception handling for index, item in enumerate(tree_data): if item and item[2] == name and S_ISDIR(item[1]) == is_dir: tree_data[index] = None return item # END if item matches # END for each item return None def _to_full_path(item, path_prefix): """Rebuild entry with given path prefix""" if not item: return item return (item[0], item[1], path_prefix+item[2]) def traverse_trees_recursive(odb, tree_shas, path_prefix): """ :return: list with entries according to the given binary tree-shas. The result is encoded in a list of n tuple|None per blob/commit, (n == len(tree_shas)), where * [0] == 20 byte sha * [1] == mode as int * [2] == path relative to working tree root The entry tuple is None if the respective blob/commit did not exist in the given tree. :param tree_shas: iterable of shas pointing to trees. All trees must be on the same level. A tree-sha may be None in which case None :param path_prefix: a prefix to be added to the returned paths on this level, set it '' for the first iteration :note: The ordering of the returned items will be partially lost""" trees_data = list() nt = len(tree_shas) for tree_sha in tree_shas: if tree_sha is None: data = list() else: data = tree_entries_from_data(odb.stream(tree_sha).read()) # END handle muted trees trees_data.append(data) # END for each sha to get data for out = list() out_append = out.append # find all matching entries and recursively process them together if the match # is a tree. If the match is a non-tree item, put it into the result. # Processed items will be set None for ti, tree_data in enumerate(trees_data): for ii, item in enumerate(tree_data): if not item: continue # END skip already done items entries = [ None for n in range(nt) ] entries[ti] = item sha, mode, name = item # its faster to unpack is_dir = S_ISDIR(mode) # type mode bits # find this item in all other tree data items # wrap around, but stop one before our current index, hence # ti+nt, not ti+1+nt for tio in range(ti+1, ti+nt): tio = tio % nt entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii) # END for each other item data # if we are a directory, enter recursion if is_dir: out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/')) else: out_append(tuple(_to_full_path(e, path_prefix) for e in entries)) # END handle recursion # finally mark it done tree_data[ii] = None # END for each item # we are done with one tree, set all its data empty del(tree_data[:]) # END for each tree_data chunk return out def traverse_tree_recursive(odb, tree_sha, path_prefix): """ :return: list of entries of the tree pointed to by the binary tree_sha. An entry has the following format: * [0] 20 byte sha * [1] mode as int * [2] path relative to the repository :param path_prefix: prefix to prepend to the front of all returned paths""" entries = list() data = tree_entries_from_data(odb.stream(tree_sha).read()) # unpacking/packing is faster than accessing individual items for sha, mode, name in data: if S_ISDIR(mode): entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/')) else: entries.append((sha, mode, path_prefix+name)) # END for each item return entries GitPython-0.3.2.RC1/git/objects/commit.py0000644000175100017510000003566311575507662017204 0ustar byronbyron# commit.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php from git.util import ( Actor, Iterable, Stats, ) from git.diff import Diffable from tree import Tree from gitdb import IStream from cStringIO import StringIO import base from gitdb.util import ( hex_to_bin ) from util import ( Traversable, Serializable, parse_date, altz_to_utctz_str, parse_actor_and_date ) from time import ( time, altzone ) import os import sys __all__ = ('Commit', ) class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): """Wraps a git Commit object. This class will act lazily on some of its attributes and will query the value on demand only if it involves calling the git binary.""" # ENVIRONMENT VARIABLES # read when creating new commits env_author_date = "GIT_AUTHOR_DATE" env_committer_date = "GIT_COMMITTER_DATE" # CONFIGURATION KEYS conf_encoding = 'i18n.commitencoding' # INVARIANTS default_encoding = "UTF-8" # object configuration type = "commit" __slots__ = ("tree", "author", "authored_date", "author_tz_offset", "committer", "committed_date", "committer_tz_offset", "message", "parents", "encoding") _id_attribute_ = "binsha" def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, committer=None, committed_date=None, committer_tz_offset=None, message=None, parents=None, encoding=None): """Instantiate a new Commit. All keyword arguments taking None as default will be implicitly set on first query. :param binsha: 20 byte sha1 :param parents: tuple( Commit, ... ) is a tuple of commit ids or actual Commits :param tree: Tree Tree object :param author: Actor is the author string ( will be implicitly converted into an Actor object ) :param authored_date: int_seconds_since_epoch is the authored DateTime - use time.gmtime() to convert it into a different format :param author_tz_offset: int_seconds_west_of_utc is the timezone that the authored_date is in :param committer: Actor is the committer string :param committed_date: int_seconds_since_epoch is the committed DateTime - use time.gmtime() to convert it into a different format :param committer_tz_offset: int_seconds_west_of_utc is the timezone that the authored_date is in :param message: string is the commit message :param encoding: string encoding of the message, defaults to UTF-8 :param parents: List or tuple of Commit objects which are our parent(s) in the commit dependency graph :return: git.Commit :note: Timezone information is in the same format and in the same sign as what time.altzone returns. The sign is inverted compared to git's UTC timezone.""" super(Commit,self).__init__(repo, binsha) if tree is not None: assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) if tree is not None: self.tree = tree if author is not None: self.author = author if authored_date is not None: self.authored_date = authored_date if author_tz_offset is not None: self.author_tz_offset = author_tz_offset if committer is not None: self.committer = committer if committed_date is not None: self.committed_date = committed_date if committer_tz_offset is not None: self.committer_tz_offset = committer_tz_offset if message is not None: self.message = message if parents is not None: self.parents = parents if encoding is not None: self.encoding = encoding @classmethod def _get_intermediate_items(cls, commit): return commit.parents def _set_cache_(self, attr): if attr in Commit.__slots__: # read the data in a chunk, its faster - then provide a file wrapper binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) self._deserialize(StringIO(stream.read())) else: super(Commit, self)._set_cache_(attr) # END handle attrs @property def summary(self): """:return: First line of the commit message""" return self.message.split('\n', 1)[0] def count(self, paths='', **kwargs): """Count the number of commits reachable from this commit :param paths: is an optinal path or a list of paths restricting the return value to commits actually containing the paths :param kwargs: Additional options to be passed to git-rev-list. They must not alter the ouput style of the command, or parsing will yield incorrect results :return: int defining the number of reachable commits""" # yes, it makes a difference whether empty paths are given or not in our case # as the empty paths version will ignore merge commits for some reason. if paths: return len(self.repo.git.rev_list(self.hexsha, '--', paths, **kwargs).splitlines()) else: return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines()) @property def name_rev(self): """ :return: String describing the commits hex sha based on the closest Reference. Mostly useful for UI purposes""" return self.repo.git.name_rev(self) @classmethod def iter_items(cls, repo, rev, paths='', **kwargs): """Find all commits matching the given criteria. :param repo: is the Repo :param rev: revision specifier, see git-rev-parse for viable options :param paths: is an optinal path or list of paths, if set only Commits that include the path or paths will be considered :param kwargs: optional keyword arguments to git rev-list where ``max_count`` is the maximum number of commits to fetch ``skip`` is the number of commits to skip ``since`` all commits since i.e. '1970-01-01' :return: iterator yielding Commit items""" if 'pretty' in kwargs: raise ValueError("--pretty cannot be used as parsing expects single sha's only") # END handle pretty args = list() if paths: args.extend(('--', paths)) # END if paths proc = repo.git.rev_list(rev, args, as_process=True, **kwargs) return cls._iter_from_process_or_stream(repo, proc) def iter_parents(self, paths='', **kwargs): """Iterate _all_ parents of this commit. :param paths: Optional path or list of paths limiting the Commits to those that contain at least one of the paths :param kwargs: All arguments allowed by git-rev-list :return: Iterator yielding Commit objects which are parents of self """ # skip ourselves skip = kwargs.get("skip", 1) if skip == 0: # skip ourselves skip = 1 kwargs['skip'] = skip return self.iter_items(self.repo, self, paths, **kwargs) @property def stats(self): """Create a git stat from changes between this commit and its first parent or from all changes done if this is the very first commit. :return: git.Stats""" if not self.parents: text = self.repo.git.diff_tree(self.hexsha, '--', numstat=True, root=True) text2 = "" for line in text.splitlines()[1:]: (insertions, deletions, filename) = line.split("\t") text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) text = text2 else: text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True) return Stats._list_from_string(self.repo, text) @classmethod def _iter_from_process_or_stream(cls, repo, proc_or_stream): """Parse out commit information into a list of Commit objects We expect one-line per commit, and parse the actual commit information directly from our lighting fast object database :param proc: git-rev-list process instance - one sha per line :return: iterator returning Commit objects""" stream = proc_or_stream if not hasattr(stream,'readline'): stream = proc_or_stream.stdout readline = stream.readline while True: line = readline() if not line: break hexsha = line.strip() if len(hexsha) > 40: # split additional information, as returned by bisect for instance hexsha, rest = line.split(None, 1) # END handle extra info assert len(hexsha) == 40, "Invalid line: %s" % hexsha yield Commit(repo, hex_to_bin(hexsha)) # END for each line in stream @classmethod def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): """Commit the given tree, creating a commit object. :param repo: Repo object the commit should be part of :param tree: Tree object or hex or bin sha the tree of the new commit :param message: Commit message. It may be an empty string if no message is provided. It will be converted to a string in any case. :param parent_commits: Optional Commit objects to use as parents for the new commit. If empty list, the commit will have no parents at all and become a root commit. If None , the current head commit will be the parent of the new commit object :param head: If True, the HEAD will be advanced to the new commit automatically. Else the HEAD will remain pointing on the previous commit. This could lead to undesired results when diffing files. :return: Commit object representing the new commit :note: Additional information about the committer and Author are taken from the environment or from the git configuration, see git-commit-tree for more information""" parents = parent_commits if parent_commits is None: try: parent_commits = [ repo.head.commit ] except ValueError: # empty repositories have no head commit parent_commits = list() # END handle parent commits # END if parent commits are unset # retrieve all additional information, create a commit object, and # serialize it # Generally: # * Environment variables override configuration values # * Sensible defaults are set according to the git documentation # COMMITER AND AUTHOR INFO cr = repo.config_reader() env = os.environ committer = Actor.committer(cr) author = Actor.author(cr) # PARSE THE DATES unix_time = int(time()) offset = altzone author_date_str = env.get(cls.env_author_date, '') if author_date_str: author_time, author_offset = parse_date(author_date_str) else: author_time, author_offset = unix_time, offset # END set author time committer_date_str = env.get(cls.env_committer_date, '') if committer_date_str: committer_time, committer_offset = parse_date(committer_date_str) else: committer_time, committer_offset = unix_time, offset # END set committer time # assume utf8 encoding enc_section, enc_option = cls.conf_encoding.split('.') conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) # if the tree is no object, make sure we create one - otherwise # the created commit object is invalid if isinstance(tree, str): tree = repo.tree(tree) # END tree conversion # CREATE NEW COMMIT new_commit = cls(repo, cls.NULL_BIN_SHA, tree, author, author_time, author_offset, committer, committer_time, committer_offset, message, parent_commits, conf_encoding) stream = StringIO() new_commit._serialize(stream) streamlen = stream.tell() stream.seek(0) istream = repo.odb.store(IStream(cls.type, streamlen, stream)) new_commit.binsha = istream.binsha if head: # need late import here, importing git at the very beginning throws # as well ... import git.refs try: repo.head.set_commit(new_commit, logmsg="commit: %s" % message) except ValueError: # head is not yet set to the ref our HEAD points to # Happens on first commit import git.refs master = git.refs.Head.create(repo, repo.head.ref, new_commit, logmsg="commit (initial): %s" % message) repo.head.set_reference(master, logmsg='commit: Switching to %s' % master) # END handle empty repositories # END advance head handling return new_commit #{ Serializable Implementation def _serialize(self, stream): write = stream.write write("tree %s\n" % self.tree) for p in self.parents: write("parent %s\n" % p) a = self.author aname = a.name if isinstance(aname, unicode): aname = aname.encode(self.encoding) # END handle unicode in name c = self.committer fmt = "%s %s <%s> %s %s\n" write(fmt % ("author", aname, a.email, self.authored_date, altz_to_utctz_str(self.author_tz_offset))) # encode committer aname = c.name if isinstance(aname, unicode): aname = aname.encode(self.encoding) # END handle unicode in name write(fmt % ("committer", aname, c.email, self.committed_date, altz_to_utctz_str(self.committer_tz_offset))) if self.encoding != self.default_encoding: write("encoding %s\n" % self.encoding) write("\n") # write plain bytes, be sure its encoded according to our encoding if isinstance(self.message, unicode): write(self.message.encode(self.encoding)) else: write(self.message) # END handle encoding return self def _deserialize(self, stream): """:param from_rev_list: if true, the stream format is coming from the rev-list command Otherwise it is assumed to be a plain data stream from our object""" readline = stream.readline self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') self.parents = list() next_line = None while True: parent_line = readline() if not parent_line.startswith('parent'): next_line = parent_line break # END abort reading parents self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) # END for each parent line self.parents = tuple(self.parents) self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) # now we can have the encoding line, or an empty line followed by the optional # message. self.encoding = self.default_encoding # read encoding or empty line to separate message enc = readline() enc = enc.strip() if enc: self.encoding = enc[enc.find(' ')+1:] # now comes the message separator readline() # END handle encoding # decode the authors name try: self.author.name = self.author.name.decode(self.encoding) except UnicodeDecodeError: print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) # END handle author's encoding # decode committer name try: self.committer.name = self.committer.name.decode(self.encoding) except UnicodeDecodeError: print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding) # END handle author's encoding # a stream from our data simply gives us the plain message # The end of our message stream is marked with a newline that we strip self.message = stream.read() try: self.message = self.message.decode(self.encoding) except UnicodeDecodeError: print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) # END exception handling return self #} END serializable implementation GitPython-0.3.2.RC1/git/objects/submodule/0000755000175100017510000000000011605055665017317 5ustar byronbyronGitPython-0.3.2.RC1/git/objects/submodule/base.py0000644000175100017510000010252111575507662020611 0ustar byronbyronimport util from util import ( mkhead, sm_name, sm_section, unbare_repo, SubmoduleConfigParser, find_first_remote_branch ) from git.objects.util import Traversable from StringIO import StringIO # need a dict to set bloody .name field from git.util import ( Iterable, join_path_native, to_native_path_linux, RemoteProgress, rmtree ) from git.config import SectionConstraint from git.exc import ( InvalidGitRepositoryError, NoSuchPathError ) import stat import git import os import sys import time __all__ = ["Submodule", "UpdateProgress"] class UpdateProgress(RemoteProgress): """Class providing detailed progress information to the caller who should derive from it and implement the ``update(...)`` message""" CLONE, FETCH, UPDWKTREE = [1 << x for x in range(RemoteProgress._num_op_codes, RemoteProgress._num_op_codes+3)] _num_op_codes = RemoteProgress._num_op_codes + 3 __slots__ = tuple() BEGIN = UpdateProgress.BEGIN END = UpdateProgress.END CLONE = UpdateProgress.CLONE FETCH = UpdateProgress.FETCH UPDWKTREE = UpdateProgress.UPDWKTREE # IndexObject comes via util module, its a 'hacky' fix thanks to pythons import # mechanism which cause plenty of trouble of the only reason for packages and # modules is refactoring - subpackages shoudn't depend on parent packages class Submodule(util.IndexObject, Iterable, Traversable): """Implements access to a git submodule. They are special in that their sha represents a commit in the submodule's repository which is to be checked out at the path of this instance. The submodule type does not have a string type associated with it, as it exists solely as a marker in the tree and index. All methods work in bare and non-bare repositories.""" _id_attribute_ = "name" k_modules_file = '.gitmodules' k_head_option = 'branch' k_head_default = 'master' k_default_mode = stat.S_IFDIR | stat.S_IFLNK # submodules are directories with link-status # this is a bogus type for base class compatability type = 'submodule' __slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__') _cache_attrs = ('path', '_url', '_branch_path') def __init__(self, repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, branch_path=None): """Initialize this instance with its attributes. We only document the ones that differ from ``IndexObject`` :param repo: Our parent repository :param binsha: binary sha referring to a commit in the remote repository, see url parameter :param parent_commit: see set_parent_commit() :param url: The url to the remote repository which is the submodule :param branch_path: full (relative) path to ref to checkout when cloning the remote repository""" super(Submodule, self).__init__(repo, binsha, mode, path) self.size = 0 if parent_commit is not None: self._parent_commit = parent_commit if url is not None: self._url = url if branch_path is not None: assert isinstance(branch_path, basestring) self._branch_path = branch_path if name is not None: self._name = name def _set_cache_(self, attr): if attr == '_parent_commit': # set a default value, which is the root tree of the current head self._parent_commit = self.repo.commit() elif attr in ('path', '_url', '_branch_path'): reader = self.config_reader() # default submodule values self.path = reader.get_value('path') self._url = reader.get_value('url') # git-python extension values - optional self._branch_path = reader.get_value(self.k_head_option, git.Head.to_full_path(self.k_head_default)) elif attr == '_name': raise AttributeError("Cannot retrieve the name of a submodule if it was not set initially") else: super(Submodule, self)._set_cache_(attr) # END handle attribute name def _get_intermediate_items(self, item): """:return: all the submodules of our module repository""" try: return type(self).list_items(item.module()) except InvalidGitRepositoryError: return list() # END handle intermeditate items def __eq__(self, other): """Compare with another submodule""" # we may only compare by name as this should be the ID they are hashed with # Otherwise this type wouldn't be hashable # return self.path == other.path and self.url == other.url and super(Submodule, self).__eq__(other) return self._name == other._name def __ne__(self, other): """Compare with another submodule for inequality""" return not (self == other) def __hash__(self): """Hash this instance using its logical id, not the sha""" return hash(self._name) def __str__(self): return self._name def __repr__(self): return "git.%s(name=%s, path=%s, url=%s, branch_path=%s)" % (type(self).__name__, self._name, self.path, self.url, self.branch_path) @classmethod def _config_parser(cls, repo, parent_commit, read_only): """:return: Config Parser constrained to our submodule in read or write mode :raise IOError: If the .gitmodules file cannot be found, either locally or in the repository at the given parent commit. Otherwise the exception would be delayed until the first access of the config parser""" parent_matches_head = repo.head.commit == parent_commit if not repo.bare and parent_matches_head: fp_module = cls.k_modules_file fp_module_path = os.path.join(repo.working_tree_dir, fp_module) if not os.path.isfile(fp_module_path): raise IOError("%s file was not accessible" % fp_module_path) # END handle existance fp_module = fp_module_path else: try: fp_module = cls._sio_modules(parent_commit) except KeyError: raise IOError("Could not find %s file in the tree of parent commit %s" % (cls.k_modules_file, parent_commit)) # END handle exceptions # END handle non-bare working tree if not read_only and (repo.bare or not parent_matches_head): raise ValueError("Cannot write blobs of 'historical' submodule configurations") # END handle writes of historical submodules return SubmoduleConfigParser(fp_module, read_only = read_only) def _clear_cache(self): # clear the possibly changed values for name in self._cache_attrs: try: delattr(self, name) except AttributeError: pass # END try attr deletion # END for each name to delete @classmethod def _sio_modules(cls, parent_commit): """:return: Configuration file as StringIO - we only access it through the respective blob's data""" sio = StringIO(parent_commit.tree[cls.k_modules_file].data_stream.read()) sio.name = cls.k_modules_file return sio def _config_parser_constrained(self, read_only): """:return: Config Parser constrained to our submodule in read or write mode""" parser = self._config_parser(self.repo, self._parent_commit, read_only) parser.set_submodule(self) return SectionConstraint(parser, sm_section(self.name)) #{ Edit Interface @classmethod def add(cls, repo, name, path, url=None, branch=None, no_checkout=False): """Add a new submodule to the given repository. This will alter the index as well as the .gitmodules file, but will not create a new commit. If the submodule already exists, no matter if the configuration differs from the one provided, the existing submodule will be returned. :param repo: Repository instance which should receive the submodule :param name: The name/identifier for the submodule :param path: repository-relative or absolute path at which the submodule should be located It will be created as required during the repository initialization. :param url: git-clone compatible URL, see git-clone reference for more information If None, the repository is assumed to exist, and the url of the first remote is taken instead. This is useful if you want to make an existing repository a submodule of anotherone. :param branch: name of branch at which the submodule should (later) be checked out. The given branch must exist in the remote repository, and will be checked out locally as a tracking branch. It will only be written into the configuration if it not None, which is when the checked out branch will be the one the remote HEAD pointed to. The result you get in these situation is somewhat fuzzy, and it is recommended to specify at least 'master' here. Examples are 'master' or 'feature/new' :param no_checkout: if True, and if the repository has to be cloned manually, no checkout will be performed :return: The newly created submodule instance :note: works atomically, such that no change will be done if the repository update fails for instance""" if repo.bare: raise InvalidGitRepositoryError("Cannot add submodules to bare repositories") # END handle bare repos path = to_native_path_linux(path) if path.endswith('/'): path = path[:-1] # END handle trailing slash # assure we never put backslashes into the url, as some operating systems # like it ... if url != None: url = to_native_path_linux(url) #END assure url correctness # INSTANTIATE INTERMEDIATE SM sm = cls(repo, cls.NULL_BIN_SHA, cls.k_default_mode, path, name) if sm.exists(): # reretrieve submodule from tree try: return repo.head.commit.tree[path] except KeyError: # could only be in index index = repo.index entry = index.entries[index.entry_key(path, 0)] sm.binsha = entry.binsha return sm # END handle exceptions # END handle existing # fake-repo - we only need the functionality on the branch instance br = git.Head(repo, git.Head.to_full_path(str(branch) or cls.k_head_default)) has_module = sm.module_exists() branch_is_default = branch is None if has_module and url is not None: if url not in [r.url for r in sm.module().remotes]: raise ValueError("Specified URL '%s' does not match any remote url of the repository at '%s'" % (url, sm.abspath)) # END check url # END verify urls match mrepo = None if url is None: if not has_module: raise ValueError("A URL was not given and existing repository did not exsit at %s" % path) # END check url mrepo = sm.module() urls = [r.url for r in mrepo.remotes] if not urls: raise ValueError("Didn't find any remote url in repository at %s" % sm.abspath) # END verify we have url url = urls[0] else: # clone new repo kwargs = {'n' : no_checkout} if not branch_is_default: kwargs['b'] = br.name # END setup checkout-branch mrepo = git.Repo.clone_from(url, path, **kwargs) # END verify url # update configuration and index index = sm.repo.index writer = sm.config_writer(index=index, write=False) writer.set_value('url', url) writer.set_value('path', path) sm._url = url if not branch_is_default: # store full path writer.set_value(cls.k_head_option, br.path) sm._branch_path = br.path # END handle path del(writer) # we deliberatly assume that our head matches our index ! pcommit = repo.head.commit sm._parent_commit = pcommit sm.binsha = mrepo.head.commit.binsha index.add([sm], write=True) return sm def update(self, recursive=False, init=True, to_latest_revision=False, progress=None, dry_run=False): """Update the repository of this submodule to point to the checkout we point at with the binsha of this instance. :param recursive: if True, we will operate recursively and update child- modules as well. :param init: if True, the module repository will be cloned into place if necessary :param to_latest_revision: if True, the submodule's sha will be ignored during checkout. Instead, the remote will be fetched, and the local tracking branch updated. This only works if we have a local tracking branch, which is the case if the remote repository had a master branch, or of the 'branch' option was specified for this submodule and the branch existed remotely :param progress: UpdateProgress instance or None of no progress should be shown :param dry_run: if True, the operation will only be simulated, but not performed. All performed operations are read-only :note: does nothing in bare repositories :note: method is definitely not atomic if recurisve is True :return: self""" if self.repo.bare: return self #END pass in bare mode if progress is None: progress = UpdateProgress() #END handle progress prefix = '' if dry_run: prefix = "DRY-RUN: " #END handle prefix # to keep things plausible in dry-run mode if dry_run: mrepo = None #END init mrepo # ASSURE REPO IS PRESENT AND UPTODATE ##################################### try: mrepo = self.module() rmts = mrepo.remotes len_rmts = len(rmts) for i, remote in enumerate(rmts): op = FETCH if i == 0: op |= BEGIN #END handle start progress.update(op, i, len_rmts, prefix+"Fetching remote %s of submodule %r" % (remote, self.name)) #=============================== if not dry_run: remote.fetch(progress=progress) #END handle dry-run #=============================== if i == len_rmts-1: op |= END #END handle end progress.update(op, i, len_rmts, prefix+"Done fetching remote of submodule %r" % self.name) #END fetch new data except InvalidGitRepositoryError: if not init: return self # END early abort if init is not allowed import git # there is no git-repository yet - but delete empty paths module_path = join_path_native(self.repo.working_tree_dir, self.path) if not dry_run and os.path.isdir(module_path): try: os.rmdir(module_path) except OSError: raise OSError("Module directory at %r does already exist and is non-empty" % module_path) # END handle OSError # END handle directory removal # don't check it out at first - nonetheless it will create a local # branch according to the remote-HEAD if possible progress.update(BEGIN|CLONE, 0, 1, prefix+"Cloning %s to %s in submodule %r" % (self.url, module_path, self.name)) if not dry_run: mrepo = git.Repo.clone_from(self.url, module_path, n=True) #END handle dry-run progress.update(END|CLONE, 0, 1, prefix+"Done cloning to %s" % module_path) if not dry_run: # see whether we have a valid branch to checkout try: # find a remote which has our branch - we try to be flexible remote_branch = find_first_remote_branch(mrepo.remotes, self.branch_name) local_branch = mkhead(mrepo, self.branch_path) # have a valid branch, but no checkout - make sure we can figure # that out by marking the commit with a null_sha local_branch.set_object(util.Object(mrepo, self.NULL_BIN_SHA)) # END initial checkout + branch creation # make sure HEAD is not detached mrepo.head.set_reference(local_branch, logmsg="submodule: attaching head to %s" % local_branch) mrepo.head.ref.set_tracking_branch(remote_branch) except IndexError: print >> sys.stderr, "Warning: Failed to checkout tracking branch %s" % self.branch_path #END handle tracking branch # NOTE: Have to write the repo config file as well, otherwise # the default implementation will be offended and not update the repository # Maybe this is a good way to assure it doesn't get into our way, but # we want to stay backwards compatible too ... . Its so redundant ! self.repo.config_writer().set_value(sm_section(self.name), 'url', self.url) #END handle dry_run #END handle initalization # DETERMINE SHAS TO CHECKOUT ############################ binsha = self.binsha hexsha = self.hexsha if mrepo is not None: # mrepo is only set if we are not in dry-run mode or if the module existed is_detached = mrepo.head.is_detached #END handle dry_run if mrepo is not None and to_latest_revision: msg_base = "Cannot update to latest revision in repository at %r as " % mrepo.working_dir if not is_detached: rref = mrepo.head.ref.tracking_branch() if rref is not None: rcommit = rref.commit binsha = rcommit.binsha hexsha = rcommit.hexsha else: print >> sys.stderr, "%s a tracking branch was not set for local branch '%s'" % (msg_base, mrepo.head.ref) # END handle remote ref else: print >> sys.stderr, "%s there was no local tracking branch" % msg_base # END handle detached head # END handle to_latest_revision option # update the working tree # handles dry_run if mrepo is not None and mrepo.head.commit.binsha != binsha: progress.update(BEGIN|UPDWKTREE, 0, 1, prefix+"Updating working tree at %s for submodule %r to revision %s" % (self.path, self.name, hexsha)) if not dry_run: if is_detached: # NOTE: for now we force, the user is no supposed to change detached # submodules anyway. Maybe at some point this becomes an option, to # properly handle user modifications - see below for future options # regarding rebase and merge. mrepo.git.checkout(hexsha, force=True) else: # TODO: allow to specify a rebase, merge, or reset # TODO: Warn if the hexsha forces the tracking branch off the remote # branch - this should be prevented when setting the branch option mrepo.head.reset(hexsha, index=True, working_tree=True) # END handle checkout #END handle dry_run progress.update(END|UPDWKTREE, 0, 1, prefix+"Done updating working tree for submodule %r" % self.name) # END update to new commit only if needed # HANDLE RECURSION ################## if recursive: # in dry_run mode, the module might not exist if mrepo is not None: for submodule in self.iter_items(self.module()): submodule.update(recursive, init, to_latest_revision, progress=progress, dry_run=dry_run) # END handle recursive update #END handle dry run # END for each submodule return self @unbare_repo def move(self, module_path, configuration=True, module=True): """Move the submodule to a another module path. This involves physically moving the repository at our current path, changing the configuration, as well as adjusting our index entry accordingly. :param module_path: the path to which to move our module, given as repository-relative path. Intermediate directories will be created accordingly. If the path already exists, it must be empty. Trailling (back)slashes are removed automatically :param configuration: if True, the configuration will be adjusted to let the submodule point to the given path. :param module: if True, the repository managed by this submodule will be moved, not the configuration. This will effectively leave your repository in an inconsistent state unless the configuration and index already point to the target location. :return: self :raise ValueError: if the module path existed and was not empty, or was a file :note: Currently the method is not atomic, and it could leave the repository in an inconsistent state if a sub-step fails for some reason """ if module + configuration < 1: raise ValueError("You must specify to move at least the module or the configuration of the submodule") #END handle input module_path = to_native_path_linux(module_path) if module_path.endswith('/'): module_path = module_path[:-1] # END handle trailing slash # VERIFY DESTINATION if module_path == self.path: return self #END handle no change dest_path = join_path_native(self.repo.working_tree_dir, module_path) if os.path.isfile(dest_path): raise ValueError("Cannot move repository onto a file: %s" % dest_path) # END handle target files index = self.repo.index tekey = index.entry_key(module_path, 0) # if the target item already exists, fail if configuration and tekey in index.entries: raise ValueError("Index entry for target path did alredy exist") #END handle index key already there # remove existing destination if module: if os.path.exists(dest_path): if len(os.listdir(dest_path)): raise ValueError("Destination module directory was not empty") #END handle non-emptyness if os.path.islink(dest_path): os.remove(dest_path) else: os.rmdir(dest_path) #END handle link else: # recreate parent directories # NOTE: renames() does that now pass #END handle existance # END handle module # move the module into place if possible cur_path = self.abspath renamed_module = False if module and os.path.exists(cur_path): os.renames(cur_path, dest_path) renamed_module = True #END move physical module # rename the index entry - have to manipulate the index directly as # git-mv cannot be used on submodules ... yeah try: if configuration: try: ekey = index.entry_key(self.path, 0) entry = index.entries[ekey] del(index.entries[ekey]) nentry = git.IndexEntry(entry[:3]+(module_path,)+entry[4:]) index.entries[tekey] = nentry except KeyError: raise InvalidGitRepositoryError("Submodule's entry at %r did not exist" % (self.path)) #END handle submodule doesn't exist # update configuration writer = self.config_writer(index=index) # auto-write writer.set_value('path', module_path) self.path = module_path del(writer) # END handle configuration flag except Exception: if renamed_module: os.renames(dest_path, cur_path) # END undo module renaming raise #END handle undo rename return self @unbare_repo def remove(self, module=True, force=False, configuration=True, dry_run=False): """Remove this submodule from the repository. This will remove our entry from the .gitmodules file and the entry in the .git/config file. :param module: If True, the module we point to will be deleted as well. If the module is currently on a commit which is not part of any branch in the remote, if the currently checked out branch working tree, or untracked files, is ahead of its tracking branch, if you have modifications in the In case the removal of the repository fails for these reasons, the submodule status will not have been altered. If this submodule has child-modules on its own, these will be deleted prior to touching the own module. :param force: Enforces the deletion of the module even though it contains modifications. This basically enforces a brute-force file system based deletion. :param configuration: if True, the submodule is deleted from the configuration, otherwise it isn't. Although this should be enabled most of the times, this flag enables you to safely delete the repository of your submodule. :param dry_run: if True, we will not actually do anything, but throw the errors we would usually throw :return: self :note: doesn't work in bare repositories :raise InvalidGitRepositoryError: thrown if the repository cannot be deleted :raise OSError: if directories or files could not be removed""" if not (module + configuration): raise ValueError("Need to specify to delete at least the module, or the configuration") # END handle params # DELETE MODULE REPOSITORY ########################## if module and self.module_exists(): if force: # take the fast lane and just delete everything in our module path # TODO: If we run into permission problems, we have a highly inconsistent # state. Delete the .git folders last, start with the submodules first mp = self.abspath method = None if os.path.islink(mp): method = os.remove elif os.path.isdir(mp): method = rmtree elif os.path.exists(mp): raise AssertionError("Cannot forcibly delete repository as it was neither a link, nor a directory") #END handle brutal deletion if not dry_run: assert method method(mp) #END apply deletion method else: # verify we may delete our module mod = self.module() if mod.is_dirty(untracked_files=True): raise InvalidGitRepositoryError("Cannot delete module at %s with any modifications, unless force is specified" % mod.working_tree_dir) # END check for dirt # figure out whether we have new commits compared to the remotes # NOTE: If the user pulled all the time, the remote heads might # not have been updated, so commits coming from the remote look # as if they come from us. But we stay strictly read-only and # don't fetch beforhand. for remote in mod.remotes: num_branches_with_new_commits = 0 rrefs = remote.refs for rref in rrefs: num_branches_with_new_commits = len(mod.git.cherry(rref)) != 0 # END for each remote ref # not a single remote branch contained all our commits if num_branches_with_new_commits == len(rrefs): raise InvalidGitRepositoryError("Cannot delete module at %s as there are new commits" % mod.working_tree_dir) # END handle new commits # have to manually delete references as python's scoping is # not existing, they could keep handles open ( on windows this is a problem ) if len(rrefs): del(rref) #END handle remotes del(rrefs) del(remote) # END for each remote # gently remove all submodule repositories for sm in self.children(): sm.remove(module=True, force=False, configuration=False, dry_run=dry_run) del(sm) # END for each child-submodule # finally delete our own submodule if not dry_run: wtd = mod.working_tree_dir del(mod) # release file-handles (windows) rmtree(wtd) # END delete tree if possible # END handle force # END handle module deletion # DELETE CONFIGURATION ###################### if configuration and not dry_run: # first the index-entry index = self.repo.index try: del(index.entries[index.entry_key(self.path, 0)]) except KeyError: pass #END delete entry index.write() # now git config - need the config intact, otherwise we can't query # inforamtion anymore self.repo.config_writer().remove_section(sm_section(self.name)) self.config_writer().remove_section() # END delete configuration # void our data not to delay invalid access self._clear_cache() return self def set_parent_commit(self, commit, check=True): """Set this instance to use the given commit whose tree is supposed to contain the .gitmodules blob. :param commit: Commit'ish reference pointing at the root_tree :param check: if True, relatively expensive checks will be performed to verify validity of the submodule. :raise ValueError: if the commit's tree didn't contain the .gitmodules blob. :raise ValueError: if the parent commit didn't store this submodule under the current path :return: self""" pcommit = self.repo.commit(commit) pctree = pcommit.tree if self.k_modules_file not in pctree: raise ValueError("Tree of commit %s did not contain the %s file" % (commit, self.k_modules_file)) # END handle exceptions prev_pc = self._parent_commit self._parent_commit = pcommit if check: parser = self._config_parser(self.repo, self._parent_commit, read_only=True) if not parser.has_section(sm_section(self.name)): self._parent_commit = prev_pc raise ValueError("Submodule at path %r did not exist in parent commit %s" % (self.path, commit)) # END handle submodule did not exist # END handle checking mode # update our sha, it could have changed self.binsha = pctree[self.path].binsha self._clear_cache() return self @unbare_repo def config_writer(self, index=None, write=True): """:return: a config writer instance allowing you to read and write the data belonging to this submodule into the .gitmodules file. :param index: if not None, an IndexFile instance which should be written. defaults to the index of the Submodule's parent repository. :param write: if True, the index will be written each time a configuration value changes. :note: the parameters allow for a more efficient writing of the index, as you can pass in a modified index on your own, prevent automatic writing, and write yourself once the whole operation is complete :raise ValueError: if trying to get a writer on a parent_commit which does not match the current head commit :raise IOError: If the .gitmodules file/blob could not be read""" writer = self._config_parser_constrained(read_only=False) if index is not None: writer.config._index = index writer.config._auto_write = write return writer #} END edit interface #{ Query Interface @unbare_repo def module(self): """:return: Repo instance initialized from the repository at our submodule path :raise InvalidGitRepositoryError: if a repository was not available. This could also mean that it was not yet initialized""" # late import to workaround circular dependencies module_path = self.abspath try: repo = git.Repo(module_path) if repo != self.repo: return repo # END handle repo uninitialized except (InvalidGitRepositoryError, NoSuchPathError): raise InvalidGitRepositoryError("No valid repository at %s" % self.path) else: raise InvalidGitRepositoryError("Repository at %r was not yet checked out" % module_path) # END handle exceptions def module_exists(self): """:return: True if our module exists and is a valid git repository. See module() method""" try: self.module() return True except Exception: return False # END handle exception def exists(self): """ :return: True if the submodule exists, False otherwise. Please note that a submodule may exist (in the .gitmodules file) even though its module doesn't exist""" # keep attributes for later, and restore them if we have no valid data # this way we do not actually alter the state of the object loc = locals() for attr in self._cache_attrs: if hasattr(self, attr): loc[attr] = getattr(self, attr) # END if we have the attribute cache #END for each attr self._clear_cache() try: try: self.path return True except Exception: return False # END handle exceptions finally: for attr in self._cache_attrs: if attr in loc: setattr(self, attr, loc[attr]) # END if we have a cache # END reapply each attribute # END handle object state consistency @property def branch(self): """:return: The branch instance that we are to checkout :raise InvalidGitRepositoryError: if our module is not yet checked out""" return mkhead(self.module(), self._branch_path) @property def branch_path(self): """ :return: full (relative) path as string to the branch we would checkout from the remote and track""" return self._branch_path @property def branch_name(self): """:return: the name of the branch, which is the shortest possible branch name""" # use an instance method, for this we create a temporary Head instance # which uses a repository that is available at least ( it makes no difference ) return git.Head(self.repo, self._branch_path).name @property def url(self): """:return: The url to the repository which our module-repository refers to""" return self._url @property def parent_commit(self): """:return: Commit instance with the tree containing the .gitmodules file :note: will always point to the current head's commit if it was not set explicitly""" return self._parent_commit @property def name(self): """:return: The name of this submodule. It is used to identify it within the .gitmodules file. :note: by default, the name is the path at which to find the submodule, but in git-python it should be a unique identifier similar to the identifiers used for remotes, which allows to change the path of the submodule easily """ return self._name def config_reader(self): """ :return: ConfigReader instance which allows you to qurey the configuration values of this submodule, as provided by the .gitmodules file :note: The config reader will actually read the data directly from the repository and thus does not need nor care about your working tree. :note: Should be cached by the caller and only kept as long as needed :raise IOError: If the .gitmodules file/blob could not be read""" return self._config_parser_constrained(read_only=True) def children(self): """ :return: IterableList(Submodule, ...) an iterable list of submodules instances which are children of this submodule or 0 if the submodule is not checked out""" return self._get_intermediate_items(self) #} END query interface #{ Iterable Interface @classmethod def iter_items(cls, repo, parent_commit='HEAD'): """:return: iterator yielding Submodule instances available in the given repository""" pc = repo.commit(parent_commit) # parent commit instance try: parser = cls._config_parser(repo, pc, read_only=True) except IOError: raise StopIteration # END handle empty iterator rt = pc.tree # root tree for sms in parser.sections(): n = sm_name(sms) p = parser.get_value(sms, 'path') u = parser.get_value(sms, 'url') b = cls.k_head_default if parser.has_option(sms, cls.k_head_option): b = parser.get_value(sms, cls.k_head_option) # END handle optional information # get the binsha index = repo.index try: sm = rt[p] except KeyError: # try the index, maybe it was just added try: entry = index.entries[index.entry_key(p, 0)] sm = Submodule(repo, entry.binsha, entry.mode, entry.path) except KeyError: raise InvalidGitRepositoryError("Gitmodule path %r did not exist in revision of parent commit %s" % (p, parent_commit)) # END handle keyerror # END handle critical error # fill in remaining info - saves time as it doesn't have to be parsed again sm._name = n sm._parent_commit = pc sm._branch_path = git.Head.to_full_path(b) sm._url = u yield sm # END for each section #} END iterable interface GitPython-0.3.2.RC1/git/objects/submodule/root.py0000644000175100017510000002776411575422024020665 0ustar byronbyronfrom base import Submodule, UpdateProgress from util import ( find_first_remote_branch ) from git.exc import InvalidGitRepositoryError import git import sys __all__ = ["RootModule", "RootUpdateProgress"] class RootUpdateProgress(UpdateProgress): """Utility class which adds more opcodes to the UpdateProgress""" REMOVE, PATHCHANGE, BRANCHCHANGE, URLCHANGE = [1 << x for x in range(UpdateProgress._num_op_codes, UpdateProgress._num_op_codes+4)] _num_op_codes = UpdateProgress._num_op_codes+4 __slots__ = tuple() BEGIN = RootUpdateProgress.BEGIN END = RootUpdateProgress.END REMOVE = RootUpdateProgress.REMOVE BRANCHCHANGE = RootUpdateProgress.BRANCHCHANGE URLCHANGE = RootUpdateProgress.URLCHANGE PATHCHANGE = RootUpdateProgress.PATHCHANGE class RootModule(Submodule): """A (virtual) Root of all submodules in the given repository. It can be used to more easily traverse all submodules of the master repository""" __slots__ = tuple() k_root_name = '__ROOT__' def __init__(self, repo): # repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, ref=None) super(RootModule, self).__init__( repo, binsha = self.NULL_BIN_SHA, mode = self.k_default_mode, path = '', name = self.k_root_name, parent_commit = repo.head.commit, url = '', branch_path = git.Head.to_full_path(self.k_head_default) ) def _clear_cache(self): """May not do anything""" pass #{ Interface def update(self, previous_commit=None, recursive=True, force_remove=False, init=True, to_latest_revision=False, progress=None, dry_run=False): """Update the submodules of this repository to the current HEAD commit. This method behaves smartly by determining changes of the path of a submodules repository, next to changes to the to-be-checked-out commit or the branch to be checked out. This works if the submodules ID does not change. Additionally it will detect addition and removal of submodules, which will be handled gracefully. :param previous_commit: If set to a commit'ish, the commit we should use as the previous commit the HEAD pointed to before it was set to the commit it points to now. If None, it defaults to HEAD@{1} otherwise :param recursive: if True, the children of submodules will be updated as well using the same technique :param force_remove: If submodules have been deleted, they will be forcibly removed. Otherwise the update may fail if a submodule's repository cannot be deleted as changes have been made to it (see Submodule.update() for more information) :param init: If we encounter a new module which would need to be initialized, then do it. :param to_latest_revision: If True, instead of checking out the revision pointed to by this submodule's sha, the checked out tracking branch will be merged with the newest remote branch fetched from the repository's origin :param progress: RootUpdateProgress instance or None if no progress should be sent :param dry_run: if True, operations will not actually be performed. Progress messages will change accordingly to indicate the WOULD DO state of the operation.""" if self.repo.bare: raise InvalidGitRepositoryError("Cannot update submodules in bare repositories") # END handle bare if progress is None: progress = RootUpdateProgress() #END assure progress is set prefix = '' if dry_run: prefix = 'DRY-RUN: ' repo = self.repo # SETUP BASE COMMIT ################### cur_commit = repo.head.commit if previous_commit is None: try: previous_commit = repo.commit(repo.head.log_entry(-1).oldhexsha) if previous_commit.binsha == previous_commit.NULL_BIN_SHA: raise IndexError #END handle initial commit except IndexError: # in new repositories, there is no previous commit previous_commit = cur_commit #END exception handling else: previous_commit = repo.commit(previous_commit) # obtain commit object # END handle previous commit psms = self.list_items(repo, parent_commit=previous_commit) sms = self.list_items(repo) spsms = set(psms) ssms = set(sms) # HANDLE REMOVALS ################### rrsm = (spsms - ssms) len_rrsm = len(rrsm) for i, rsm in enumerate(rrsm): op = REMOVE if i == 0: op |= BEGIN #END handle begin # fake it into thinking its at the current commit to allow deletion # of previous module. Trigger the cache to be updated before that progress.update(op, i, len_rrsm, prefix+"Removing submodule %r at %s" % (rsm.name, rsm.abspath)) rsm._parent_commit = repo.head.commit if not dry_run: rsm.remove(configuration=False, module=True, force=force_remove) #END handle dry-run if i == len_rrsm-1: op |= END #END handle end progress.update(op, i, len_rrsm, prefix+"Done removing submodule %r" % rsm.name) # END for each removed submodule # HANDLE PATH RENAMES ##################### # url changes + branch changes csms = (spsms & ssms) len_csms = len(csms) for i, csm in enumerate(csms): psm = psms[csm.name] sm = sms[csm.name] #PATH CHANGES ############## if sm.path != psm.path and psm.module_exists(): progress.update(BEGIN|PATHCHANGE, i, len_csms, prefix+"Moving repository of submodule %r from %s to %s" % (sm.name, psm.abspath, sm.abspath)) # move the module to the new path if not dry_run: psm.move(sm.path, module=True, configuration=False) #END handle dry_run progress.update(END|PATHCHANGE, i, len_csms, prefix+"Done moving repository of submodule %r" % sm.name) # END handle path changes if sm.module_exists(): # HANDLE URL CHANGE ################### if sm.url != psm.url: # Add the new remote, remove the old one # This way, if the url just changes, the commits will not # have to be re-retrieved nn = '__new_origin__' smm = sm.module() rmts = smm.remotes # don't do anything if we already have the url we search in place if len([r for r in rmts if r.url == sm.url]) == 0: progress.update(BEGIN|URLCHANGE, i, len_csms, prefix+"Changing url of submodule %r from %s to %s" % (sm.name, psm.url, sm.url)) if not dry_run: assert nn not in [r.name for r in rmts] smr = smm.create_remote(nn, sm.url) smr.fetch(progress=progress) # If we have a tracking branch, it should be available # in the new remote as well. if len([r for r in smr.refs if r.remote_head == sm.branch_name]) == 0: raise ValueError("Submodule branch named %r was not available in new submodule remote at %r" % (sm.branch_name, sm.url)) # END head is not detached # now delete the changed one rmt_for_deletion = None for remote in rmts: if remote.url == psm.url: rmt_for_deletion = remote break # END if urls match # END for each remote # if we didn't find a matching remote, but have exactly one, # we can safely use this one if rmt_for_deletion is None: if len(rmts) == 1: rmt_for_deletion = rmts[0] else: # if we have not found any remote with the original url # we may not have a name. This is a special case, # and its okay to fail here # Alternatively we could just generate a unique name and leave all # existing ones in place raise InvalidGitRepositoryError("Couldn't find original remote-repo at url %r" % psm.url) #END handle one single remote # END handle check we found a remote orig_name = rmt_for_deletion.name smm.delete_remote(rmt_for_deletion) # NOTE: Currently we leave tags from the deleted remotes # as well as separate tracking branches in the possibly totally # changed repository ( someone could have changed the url to # another project ). At some point, one might want to clean # it up, but the danger is high to remove stuff the user # has added explicitly # rename the new remote back to what it was smr.rename(orig_name) # early on, we verified that the our current tracking branch # exists in the remote. Now we have to assure that the # sha we point to is still contained in the new remote # tracking branch. smsha = sm.binsha found = False rref = smr.refs[self.branch_name] for c in rref.commit.traverse(): if c.binsha == smsha: found = True break # END traverse all commits in search for sha # END for each commit if not found: # adjust our internal binsha to use the one of the remote # this way, it will be checked out in the next step # This will change the submodule relative to us, so # the user will be able to commit the change easily print >> sys.stderr, "WARNING: Current sha %s was not contained in the tracking branch at the new remote, setting it the the remote's tracking branch" % sm.hexsha sm.binsha = rref.commit.binsha #END reset binsha #NOTE: All checkout is performed by the base implementation of update #END handle dry_run progress.update(END|URLCHANGE, i, len_csms, prefix+"Done adjusting url of submodule %r" % (sm.name)) # END skip remote handling if new url already exists in module # END handle url # HANDLE PATH CHANGES ##################### if sm.branch_path != psm.branch_path: # finally, create a new tracking branch which tracks the # new remote branch progress.update(BEGIN|BRANCHCHANGE, i, len_csms, prefix+"Changing branch of submodule %r from %s to %s" % (sm.name, psm.branch_path, sm.branch_path)) if not dry_run: smm = sm.module() smmr = smm.remotes try: tbr = git.Head.create(smm, sm.branch_name, logmsg='branch: Created from HEAD') except OSError: # ... or reuse the existing one tbr = git.Head(smm, sm.branch_path) #END assure tracking branch exists tbr.set_tracking_branch(find_first_remote_branch(smmr, sm.branch_name)) # figure out whether the previous tracking branch contains # new commits compared to the other one, if not we can # delete it. try: tbr = find_first_remote_branch(smmr, psm.branch_name) if len(smm.git.cherry(tbr, psm.branch)) == 0: psm.branch.delete(smm, psm.branch) #END delete original tracking branch if there are no changes except InvalidGitRepositoryError: # ignore it if the previous branch couldn't be found in the # current remotes, this just means we can't handle it pass # END exception handling #NOTE: All checkout is done in the base implementation of update #END handle dry_run progress.update(END|BRANCHCHANGE, i, len_csms, prefix+"Done changing branch of submodule %r" % sm.name) #END handle branch #END handle # END for each common submodule # FINALLY UPDATE ALL ACTUAL SUBMODULES ###################################### for sm in sms: # update the submodule using the default method sm.update(recursive=False, init=init, to_latest_revision=to_latest_revision, progress=progress, dry_run=dry_run) # update recursively depth first - question is which inconsitent # state will be better in case it fails somewhere. Defective branch # or defective depth. The RootSubmodule type will never process itself, # which was done in the previous expression if recursive: # the module would exist by now if we are not in dry_run mode if sm.module_exists(): type(self)(sm.module()).update( recursive=True, force_remove=force_remove, init=init, to_latest_revision=to_latest_revision, progress=progress, dry_run=dry_run) #END handle dry_run #END handle recursive # END for each submodule to update def module(self): """:return: the actual repository containing the submodules""" return self.repo #} END interface #} END classes GitPython-0.3.2.RC1/git/objects/submodule/__init__.py0000644000175100017510000000013511573623573021432 0ustar byronbyron# NOTE: Cannot import anything here as the top-level _init_ has to handle # our dependencies GitPython-0.3.2.RC1/git/objects/submodule/util.py0000644000175100017510000000552611573623573020661 0ustar byronbyronimport git from git.exc import InvalidGitRepositoryError from git.config import GitConfigParser from StringIO import StringIO import weakref __all__ = ( 'sm_section', 'sm_name', 'mkhead', 'unbare_repo', 'find_first_remote_branch', 'SubmoduleConfigParser') #{ Utilities def sm_section(name): """:return: section title used in .gitmodules configuration file""" return 'submodule "%s"' % name def sm_name(section): """:return: name of the submodule as parsed from the section name""" section = section.strip() return section[11:-1] def mkhead(repo, path): """:return: New branch/head instance""" return git.Head(repo, git.Head.to_full_path(path)) def unbare_repo(func): """Methods with this decorator raise InvalidGitRepositoryError if they encounter a bare repository""" def wrapper(self, *args, **kwargs): if self.repo.bare: raise InvalidGitRepositoryError("Method '%s' cannot operate on bare repositories" % func.__name__) #END bare method return func(self, *args, **kwargs) # END wrapper wrapper.__name__ = func.__name__ return wrapper def find_first_remote_branch(remotes, branch_name): """Find the remote branch matching the name of the given branch or raise InvalidGitRepositoryError""" for remote in remotes: try: return remote.refs[branch_name] except IndexError: continue # END exception handling #END for remote raise InvalidGitRepositoryError("Didn't find remote branch %r in any of the given remotes", branch_name) #} END utilities #{ Classes class SubmoduleConfigParser(GitConfigParser): """ Catches calls to _write, and updates the .gitmodules blob in the index with the new data, if we have written into a stream. Otherwise it will add the local file to the index to make it correspond with the working tree. Additionally, the cache must be cleared Please note that no mutating method will work in bare mode """ def __init__(self, *args, **kwargs): self._smref = None self._index = None self._auto_write = True super(SubmoduleConfigParser, self).__init__(*args, **kwargs) #{ Interface def set_submodule(self, submodule): """Set this instance's submodule. It must be called before the first write operation begins""" self._smref = weakref.ref(submodule) def flush_to_index(self): """Flush changes in our configuration file to the index""" assert self._smref is not None # should always have a file here assert not isinstance(self._file_or_files, StringIO) sm = self._smref() if sm is not None: index = self._index if index is None: index = sm.repo.index # END handle index index.add([sm.k_modules_file], write=self._auto_write) sm._clear_cache() # END handle weakref #} END interface #{ Overridden Methods def write(self): rval = super(SubmoduleConfigParser, self).write() self.flush_to_index() return rval # END overridden methods #} END classes GitPython-0.3.2.RC1/git/objects/__init__.py0000644000175100017510000000114311573623553017431 0ustar byronbyron""" Import all submodules main classes into the package space """ import inspect from base import * # Fix import dependency - add IndexObject to the util module, so that it can be # imported by the submodule.base import submodule.util submodule.util.IndexObject = IndexObject submodule.util.Object = Object from submodule.base import * from submodule.root import * # must come after submodule was made available from tag import * from blob import * from commit import * from tree import * __all__ = [ name for name, obj in locals().items() if not (name.startswith('_') or inspect.ismodule(obj)) ]GitPython-0.3.2.RC1/git/objects/util.py0000644000175100017510000002325111575507662016657 0ustar byronbyron# util.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php """Module for general utility functions""" from git.util import ( IterableList, Actor ) import re from collections import deque as Deque from string import digits import time import os __all__ = ('get_object_type_by_name', 'parse_date', 'parse_actor_and_date', 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz', 'verify_utctz', 'Actor') #{ Functions def mode_str_to_int(modestr): """ :param modestr: string like 755 or 644 or 100644 - only the last 6 chars will be used :return: String identifying a mode compatible to the mode methods ids of the stat module regarding the rwx permissions for user, group and other, special flags and file system flags, i.e. whether it is a symlink for example.""" mode = 0 for iteration, char in enumerate(reversed(modestr[-6:])): mode += int(char) << iteration*3 # END for each char return mode def get_object_type_by_name(object_type_name): """ :return: type suitable to handle the given object type name. Use the type to create new instances. :param object_type_name: Member of TYPES :raise ValueError: In case object_type_name is unknown""" if object_type_name == "commit": import commit return commit.Commit elif object_type_name == "tag": import tag return tag.TagObject elif object_type_name == "blob": import blob return blob.Blob elif object_type_name == "tree": import tree return tree.Tree else: raise ValueError("Cannot handle unknown object type: %s" % object_type_name) def utctz_to_altz(utctz): """we convert utctz to the timezone in seconds, it is the format time.altzone returns. Git stores it as UTC timezone which has the opposite sign as well, which explains the -1 * ( that was made explicit here ) :param utctz: git utc timezone string, i.e. +0200""" return -1 * int(float(utctz)/100*3600) def altz_to_utctz_str(altz): """As above, but inverses the operation, returning a string that can be used in commit objects""" utci = -1 * int((altz / 3600)*100) utcs = str(abs(utci)) utcs = "0"*(4-len(utcs)) + utcs prefix = (utci < 0 and '-') or '+' return prefix + utcs def verify_utctz(offset): """:raise ValueError: if offset is incorrect :return: offset""" fmt_exc = ValueError("Invalid timezone offset format: %s" % offset) if len(offset) != 5: raise fmt_exc if offset[0] not in "+-": raise fmt_exc if offset[1] not in digits or \ offset[2] not in digits or \ offset[3] not in digits or \ offset[4] not in digits: raise fmt_exc # END for each char return offset def parse_date(string_date): """ Parse the given date as one of the following * Git internal format: timestamp offset * RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200. * ISO 8601 2005-04-07T22:13:13 The T can be a space as well :return: Tuple(int(timestamp), int(offset)), both in seconds since epoch :raise ValueError: If the format could not be understood :note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY""" # git time try: if string_date.count(' ') == 1 and string_date.rfind(':') == -1: timestamp, offset = string_date.split() timestamp = int(timestamp) return timestamp, utctz_to_altz(verify_utctz(offset)) else: offset = "+0000" # local time by default if string_date[-5] in '-+': offset = verify_utctz(string_date[-5:]) string_date = string_date[:-6] # skip space as well # END split timezone info # now figure out the date and time portion - split time date_formats = list() splitter = -1 if ',' in string_date: date_formats.append("%a, %d %b %Y") splitter = string_date.rfind(' ') else: # iso plus additional date_formats.append("%Y-%m-%d") date_formats.append("%Y.%m.%d") date_formats.append("%m/%d/%Y") date_formats.append("%d.%m.%Y") splitter = string_date.rfind('T') if splitter == -1: splitter = string_date.rfind(' ') # END handle 'T' and ' ' # END handle rfc or iso assert splitter > -1 # split date and time time_part = string_date[splitter+1:] # skip space date_part = string_date[:splitter] # parse time tstruct = time.strptime(time_part, "%H:%M:%S") for fmt in date_formats: try: dtstruct = time.strptime(date_part, fmt) fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday, tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec, dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst)) return int(time.mktime(fstruct)), utctz_to_altz(offset) except ValueError: continue # END exception handling # END for each fmt # still here ? fail raise ValueError("no format matched") # END handle format except Exception: raise ValueError("Unsupported date format: %s" % string_date) # END handle exceptions # precompiled regex _re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$') def parse_actor_and_date(line): """Parse out the actor (author or committer) info from a line like:: author Tom Preston-Werner 1191999972 -0700 :return: [Actor, int_seconds_since_epoch, int_timezone_offset]""" m = _re_actor_epoch.search(line) actor, epoch, offset = m.groups() return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset)) #} END functions #{ Classes class ProcessStreamAdapter(object): """Class wireing all calls to the contained Process instance. Use this type to hide the underlying process to provide access only to a specified stream. The process is usually wrapped into an AutoInterrupt class to kill it if the instance goes out of scope.""" __slots__ = ("_proc", "_stream") def __init__(self, process, stream_name): self._proc = process self._stream = getattr(process, stream_name) def __getattr__(self, attr): return getattr(self._stream, attr) class Traversable(object): """Simple interface to perforam depth-first or breadth-first traversals into one direction. Subclasses only need to implement one function. Instances of the Subclass must be hashable""" __slots__ = tuple() @classmethod def _get_intermediate_items(cls, item): """ Returns: List of items connected to the given item. Must be implemented in subclass """ raise NotImplementedError("To be implemented in subclass") def list_traverse(self, *args, **kwargs): """ :return: IterableList with the results of the traversal as produced by traverse()""" out = IterableList(self._id_attribute_) out.extend(self.traverse(*args, **kwargs)) return out def traverse( self, predicate = lambda i,d: True, prune = lambda i,d: False, depth = -1, branch_first=True, visit_once = True, ignore_self=1, as_edge = False ): """:return: iterator yieling of items found when traversing self :param predicate: f(i,d) returns False if item i at depth d should not be included in the result :param prune: f(i,d) return True if the search should stop at item i at depth d. Item i will not be returned. :param depth: define at which level the iteration should not go deeper if -1, there is no limit if 0, you would effectively only get self, the root of the iteration i.e. if 1, you would only get the first level of predessessors/successors :param branch_first: if True, items will be returned branch first, otherwise depth first :param visit_once: if True, items will only be returned once, although they might be encountered several times. Loops are prevented that way. :param ignore_self: if True, self will be ignored and automatically pruned from the result. Otherwise it will be the first item to be returned. If as_edge is True, the source of the first edge is None :param as_edge: if True, return a pair of items, first being the source, second the destinatination, i.e. tuple(src, dest) with the edge spanning from source to destination""" visited = set() stack = Deque() stack.append( ( 0 ,self, None ) ) # self is always depth level 0 def addToStack( stack, item, branch_first, depth ): lst = self._get_intermediate_items( item ) if not lst: return if branch_first: stack.extendleft( ( depth , i, item ) for i in lst ) else: reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) ) stack.extend( reviter ) # END addToStack local method while stack: d, item, src = stack.pop() # depth of item, item, item_source if visit_once and item in visited: continue if visit_once: visited.add(item) rval = ( as_edge and (src, item) ) or item if prune( rval, d ): continue skipStartItem = ignore_self and ( item is self ) if not skipStartItem and predicate( rval, d ): yield rval # only continue to next level if this is appropriate ! nd = d + 1 if depth > -1 and nd > depth: continue addToStack( stack, item, branch_first, nd ) # END for each item on work stack class Serializable(object): """Defines methods to serialize and deserialize objects from and into a data stream""" __slots__ = tuple() def _serialize(self, stream): """Serialize the data of this object into the given data stream :note: a serialized object would ``_deserialize`` into the same objet :param stream: a file-like object :return: self""" raise NotImplementedError("To be implemented in subclass") def _deserialize(self, stream): """Deserialize all information regarding this object from the stream :param stream: a file-like object :return: self""" raise NotImplementedError("To be implemented in subclass") GitPython-0.3.2.RC1/git/objects/tag.py0000644000175100017510000000521011573623573016446 0ustar byronbyron# objects.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php """ Module containing all object based types. """ import base from gitdb.util import hex_to_bin from util import ( get_object_type_by_name, parse_actor_and_date ) __all__ = ("TagObject", ) class TagObject(base.Object): """Non-Lightweight tag carrying additional information about an object we are pointing to.""" type = "tag" __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" ) def __init__(self, repo, binsha, object=None, tag=None, tagger=None, tagged_date=None, tagger_tz_offset=None, message=None): """Initialize a tag object with additional data :param repo: repository this object is located in :param binsha: 20 byte SHA1 :param object: Object instance of object we are pointing to :param tag: name of this tag :param tagger: Actor identifying the tagger :param tagged_date: int_seconds_since_epoch is the DateTime of the tag creation - use time.gmtime to convert it into a different format :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the authored_date is in, in a format similar to time.altzone""" super(TagObject, self).__init__(repo, binsha ) if object is not None: self.object = object if tag is not None: self.tag = tag if tagger is not None: self.tagger = tagger if tagged_date is not None: self.tagged_date = tagged_date if tagger_tz_offset is not None: self.tagger_tz_offset = tagger_tz_offset if message is not None: self.message = message def _set_cache_(self, attr): """Cache all our attributes at once""" if attr in TagObject.__slots__: ostream = self.repo.odb.stream(self.binsha) lines = ostream.read().splitlines() obj, hexsha = lines[0].split(" ") # object type_token, type_name = lines[1].split(" ") # type self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha)) self.tag = lines[2][4:] # tag tagger_info = lines[3]# tagger self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info) # line 4 empty - it could mark the beginning of the next header # in case there really is no message, it would not exist. Otherwise # a newline separates header from message if len(lines) > 5: self.message = "\n".join(lines[5:]) else: self.message = '' # END check our attributes else: super(TagObject, self)._set_cache_(attr) GitPython-0.3.2.RC1/git/objects/tree.py0000644000175100017510000002061511573623573016640 0ustar byronbyron# tree.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import util from base import IndexObject from git.util import join_path from blob import Blob from submodule.base import Submodule import git.diff as diff from fun import ( tree_entries_from_data, tree_to_stream ) from gitdb.util import ( to_bin_sha, ) __all__ = ("TreeModifier", "Tree") class TreeModifier(object): """A utility class providing methods to alter the underlying cache in a list-like fashion. Once all adjustments are complete, the _cache, which really is a refernce to the cache of a tree, will be sorted. Assuring it will be in a serializable state""" __slots__ = '_cache' def __init__(self, cache): self._cache = cache def _index_by_name(self, name): """:return: index of an item with name, or -1 if not found""" for i, t in enumerate(self._cache): if t[2] == name: return i # END found item # END for each item in cache return -1 #{ Interface def set_done(self): """Call this method once you are done modifying the tree information. It may be called several times, but be aware that each call will cause a sort operation :return self:""" self._cache.sort(key=lambda t: t[2]) # sort by name return self #} END interface #{ Mutators def add(self, sha, mode, name, force=False): """Add the given item to the tree. If an item with the given name already exists, nothing will be done, but a ValueError will be raised if the sha and mode of the existing item do not match the one you add, unless force is True :param sha: The 20 or 40 byte sha of the item to add :param mode: int representing the stat compatible mode of the item :param force: If True, an item with your name and information will overwrite any existing item with the same name, no matter which information it has :return: self""" if '/' in name: raise ValueError("Name must not contain '/' characters") if (mode >> 12) not in Tree._map_id_to_type: raise ValueError("Invalid object type according to mode %o" % mode) sha = to_bin_sha(sha) index = self._index_by_name(name) item = (sha, mode, name) if index == -1: self._cache.append(item) else: if force: self._cache[index] = item else: ex_item = self._cache[index] if ex_item[0] != sha or ex_item[1] != mode: raise ValueError("Item %r existed with different properties" % name) # END handle mismatch # END handle force # END handle name exists return self def add_unchecked(self, binsha, mode, name): """Add the given item to the tree, its correctness is assumed, which puts the caller into responsibility to assure the input is correct. For more information on the parameters, see ``add`` :param binsha: 20 byte binary sha""" self._cache.append((binsha, mode, name)) def __delitem__(self, name): """Deletes an item with the given name if it exists""" index = self._index_by_name(name) if index > -1: del(self._cache[index]) #} END mutators class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable): """Tree objects represent an ordered list of Blobs and other Trees. ``Tree as a list``:: Access a specific blob using the tree['filename'] notation. You may as well access by index blob = tree[0] """ type = "tree" __slots__ = "_cache" # actual integer ids for comparison commit_id = 016 # equals stat.S_IFDIR | stat.S_IFLNK - a directory link blob_id = 010 symlink_id = 012 tree_id = 004 _map_id_to_type = { commit_id : Submodule, blob_id : Blob, symlink_id : Blob # tree id added once Tree is defined } def __init__(self, repo, binsha, mode=tree_id<<12, path=None): super(Tree, self).__init__(repo, binsha, mode, path) @classmethod def _get_intermediate_items(cls, index_object): if index_object.type == "tree": return tuple(index_object._iter_convert_to_object(index_object._cache)) return tuple() def _set_cache_(self, attr): if attr == "_cache": # Set the data when we need it ostream = self.repo.odb.stream(self.binsha) self._cache = tree_entries_from_data(ostream.read()) else: super(Tree, self)._set_cache_(attr) # END handle attribute def _iter_convert_to_object(self, iterable): """Iterable yields tuples of (binsha, mode, name), which will be converted to the respective object representation""" for binsha, mode, name in iterable: path = join_path(self.path, name) try: yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path) except KeyError: raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) # END for each item def __div__(self, file): """Find the named object in this tree's contents :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule`` :raise KeyError: if given file or tree does not exist in tree""" msg = "Blob or Tree named %r not found" if '/' in file: tree = self item = self tokens = file.split('/') for i,token in enumerate(tokens): item = tree[token] if item.type == 'tree': tree = item else: # safety assertion - blobs are at the end of the path if i != len(tokens)-1: raise KeyError(msg % file) return item # END handle item type # END for each token of split path if item == self: raise KeyError(msg % file) return item else: for info in self._cache: if info[2] == file: # [2] == name return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) # END for each obj raise KeyError( msg % file ) # END handle long paths @property def trees(self): """:return: list(Tree, ...) list of trees directly below this tree""" return [ i for i in self if i.type == "tree" ] @property def blobs(self): """:return: list(Blob, ...) list of blobs directly below this tree""" return [ i for i in self if i.type == "blob" ] @property def cache(self): """ :return: An object allowing to modify the internal cache. This can be used to change the tree's contents. When done, make sure you call ``set_done`` on the tree modifier, or serialization behaviour will be incorrect. See the ``TreeModifier`` for more information on how to alter the cache""" return TreeModifier(self._cache) def traverse( self, predicate = lambda i,d: True, prune = lambda i,d: False, depth = -1, branch_first=True, visit_once = False, ignore_self=1 ): """For documentation, see util.Traversable.traverse Trees are set to visit_once = False to gain more performance in the traversal""" return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) # List protocol def __getslice__(self, i, j): return list(self._iter_convert_to_object(self._cache[i:j])) def __iter__(self): return self._iter_convert_to_object(self._cache) def __len__(self): return len(self._cache) def __getitem__(self, item): if isinstance(item, int): info = self._cache[item] return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) if isinstance(item, basestring): # compatability return self.__div__(item) # END index is basestring raise TypeError( "Invalid index type: %r" % item ) def __contains__(self, item): if isinstance(item, IndexObject): for info in self._cache: if item.binsha == info[0]: return True # END compare sha # END for each entry # END handle item is index object # compatability # treat item as repo-relative path path = self.path for info in self._cache: if item == join_path(path, info[2]): return True # END for each item return False def __reversed__(self): return reversed(self._iter_convert_to_object(self._cache)) def _serialize(self, stream): """Serialize this tree into the stream. Please note that we will assume our tree data to be in a sorted state. If this is not the case, serialization will not generate a correct tree representation as these are assumed to be sorted by algorithms""" tree_to_stream(self._cache, stream.write) return self def _deserialize(self, stream): self._cache = tree_entries_from_data(stream.read()) return self # END tree # finalize map definition Tree._map_id_to_type[Tree.tree_id] = Tree GitPython-0.3.2.RC1/git/repo/0000755000175100017510000000000011605055665014634 5ustar byronbyronGitPython-0.3.2.RC1/git/repo/base.py0000644000175100017510000006016711604665352016131 0ustar byronbyron# repo.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php from git.exc import InvalidGitRepositoryError, NoSuchPathError from git.cmd import Git from git.util import Actor from git.refs import * from git.index import IndexFile from git.objects import * from git.config import GitConfigParser from git.remote import ( Remote, digest_process_messages, finalize_process, add_progress ) from git.db import ( GitCmdObjectDB, GitDB ) from gitdb.util import ( join, isfile, hex_to_bin ) from fun import ( rev_parse, is_git_dir, touch ) import os import sys import re DefaultDBType = GitDB if sys.version_info[1] < 5: # python 2.4 compatiblity DefaultDBType = GitCmdObjectDB # END handle python 2.4 __all__ = ('Repo', ) class Repo(object): """Represents a git repository and allows you to query references, gather commit information, generate diffs, create and clone repositories query the log. The following attributes are worth using: 'working_dir' is the working directory of the git command, wich is the working tree directory if available or the .git directory in case of bare repositories 'working_tree_dir' is the working tree directory, but will raise AssertionError if we are a bare repository. 'git_dir' is the .git repository directoy, which is always set.""" DAEMON_EXPORT_FILE = 'git-daemon-export-ok' __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git", "odb" ) # precompiled regex re_whitespace = re.compile(r'\s+') re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') re_author_committer_start = re.compile(r'^(author|committer)') re_tab_full_line = re.compile(r'^\t(.*)$') # invariants # represents the configuration level of a configuration file config_level = ("system", "global", "repository") def __init__(self, path=None, odbt = DefaultDBType): """Create a new Repo instance :param path: is the path to either the root git directory or the bare git repo:: repo = Repo("/Users/mtrier/Development/git-python") repo = Repo("/Users/mtrier/Development/git-python.git") repo = Repo("~/Development/git-python.git") repo = Repo("$REPOSITORIES/Development/git-python.git") :param odbt: Object DataBase type - a type which is constructed by providing the directory containing the database objects, i.e. .git/objects. It will be used to access all object data :raise InvalidGitRepositoryError: :raise NoSuchPathError: :return: git.Repo """ epath = os.path.abspath(os.path.expandvars(os.path.expanduser(path or os.getcwd()))) if not os.path.exists(epath): raise NoSuchPathError(epath) self.working_dir = None self._working_tree_dir = None self.git_dir = None curpath = epath # walk up the path to find the .git dir while curpath: if is_git_dir(curpath): self.git_dir = curpath self._working_tree_dir = os.path.dirname(curpath) break gitpath = join(curpath, '.git') if is_git_dir(gitpath): self.git_dir = gitpath self._working_tree_dir = curpath break curpath, dummy = os.path.split(curpath) if not dummy: break # END while curpath if self.git_dir is None: raise InvalidGitRepositoryError(epath) self._bare = False try: self._bare = self.config_reader("repository").getboolean('core','bare') except Exception: # lets not assume the option exists, although it should pass # adjust the wd in case we are actually bare - we didn't know that # in the first place if self._bare: self._working_tree_dir = None # END working dir handling self.working_dir = self._working_tree_dir or self.git_dir self.git = Git(self.working_dir) # special handling, in special times args = [join(self.git_dir, 'objects')] if issubclass(odbt, GitCmdObjectDB): args.append(self.git) self.odb = odbt(*args) def __eq__(self, rhs): if isinstance(rhs, Repo): return self.git_dir == rhs.git_dir return False def __ne__(self, rhs): return not self.__eq__(rhs) def __hash__(self): return hash(self.git_dir) def __repr__(self): return "%s(%r)" % (type(self).__name__, self.git_dir) # Description property def _get_description(self): filename = join(self.git_dir, 'description') return file(filename).read().rstrip() def _set_description(self, descr): filename = join(self.git_dir, 'description') file(filename, 'w').write(descr+'\n') description = property(_get_description, _set_description, doc="the project's description") del _get_description del _set_description @property def working_tree_dir(self): """:return: The working tree directory of our git repository :raise AssertionError: If we are a bare repository""" if self._working_tree_dir is None: raise AssertionError( "Repository at %r is bare and does not have a working tree directory" % self.git_dir ) return self._working_tree_dir @property def bare(self): """:return: True if the repository is bare""" return self._bare @property def heads(self): """A list of ``Head`` objects representing the branch heads in this repo :return: ``git.IterableList(Head, ...)``""" return Head.list_items(self) @property def references(self): """A list of Reference objects representing tags, heads and remote references. :return: IterableList(Reference, ...)""" return Reference.list_items(self) # alias for references refs = references # alias for heads branches = heads @property def index(self): """:return: IndexFile representing this repository's index.""" return IndexFile(self) @property def head(self): """:return: HEAD Object pointing to the current head reference""" return HEAD(self,'HEAD') @property def remotes(self): """A list of Remote objects allowing to access and manipulate remotes :return: ``git.IterableList(Remote, ...)``""" return Remote.list_items(self) def remote(self, name='origin'): """:return: Remote with the specified name :raise ValueError: if no remote with such a name exists""" return Remote(self, name) #{ Submodules @property def submodules(self): """ :return: git.IterableList(Submodule, ...) of direct submodules available from the current head""" return Submodule.list_items(self) def submodule(self, name): """ :return: Submodule with the given name :raise ValueError: If no such submodule exists""" try: return self.submodules[name] except IndexError: raise ValueError("Didn't find submodule named %r" % name) # END exception handling def create_submodule(self, *args, **kwargs): """Create a new submodule :note: See the documentation of Submodule.add for a description of the applicable parameters :return: created submodules""" return Submodule.add(self, *args, **kwargs) def iter_submodules(self, *args, **kwargs): """An iterator yielding Submodule instances, see Traversable interface for a description of args and kwargs :return: Iterator""" return RootModule(self).traverse(*args, **kwargs) def submodule_update(self, *args, **kwargs): """Update the submodules, keeping the repository consistent as it will take the previous state into consideration. For more information, please see the documentation of RootModule.update""" return RootModule(self).update(*args, **kwargs) #}END submodules @property def tags(self): """A list of ``Tag`` objects that are available in this repo :return: ``git.IterableList(TagReference, ...)`` """ return TagReference.list_items(self) def tag(self,path): """:return: TagReference Object, reference pointing to a Commit or Tag :param path: path to the tag reference, i.e. 0.1.5 or tags/0.1.5 """ return TagReference(self, path) def create_head(self, path, commit='HEAD', force=False, logmsg=None ): """Create a new head within the repository. For more documentation, please see the Head.create method. :return: newly created Head Reference""" return Head.create(self, path, commit, force, logmsg) def delete_head(self, *heads, **kwargs): """Delete the given heads :param kwargs: Additional keyword arguments to be passed to git-branch""" return Head.delete(self, *heads, **kwargs) def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs): """Create a new tag reference. For more documentation, please see the TagReference.create method. :return: TagReference object """ return TagReference.create(self, path, ref, message, force, **kwargs) def delete_tag(self, *tags): """Delete the given tag references""" return TagReference.delete(self, *tags) def create_remote(self, name, url, **kwargs): """Create a new remote. For more information, please see the documentation of the Remote.create methods :return: Remote reference""" return Remote.create(self, name, url, **kwargs) def delete_remote(self, remote): """Delete the given remote.""" return Remote.remove(self, remote) def _get_config_path(self, config_level ): # we do not support an absolute path of the gitconfig on windows , # use the global config instead if sys.platform == "win32" and config_level == "system": config_level = "global" if config_level == "system": return "/etc/gitconfig" elif config_level == "global": return os.path.normpath(os.path.expanduser("~/.gitconfig")) elif config_level == "repository": return join(self.git_dir, "config") raise ValueError( "Invalid configuration level: %r" % config_level ) def config_reader(self, config_level=None): """ :return: GitConfigParser allowing to read the full git configuration, but not to write it The configuration will include values from the system, user and repository configuration files. :param config_level: For possible values, see config_writer method If None, all applicable levels will be used. Specify a level in case you know which exact file you whish to read to prevent reading multiple files for instance :note: On windows, system configuration cannot currently be read as the path is unknown, instead the global path will be used.""" files = None if config_level is None: files = [ self._get_config_path(f) for f in self.config_level ] else: files = [ self._get_config_path(config_level) ] return GitConfigParser(files, read_only=True) def config_writer(self, config_level="repository"): """ :return: GitConfigParser allowing to write values of the specified configuration file level. Config writers should be retrieved, used to change the configuration ,and written right away as they will lock the configuration file in question and prevent other's to write it. :param config_level: One of the following values system = sytem wide configuration file global = user level configuration file repository = configuration file for this repostory only""" return GitConfigParser(self._get_config_path(config_level), read_only = False) def commit(self, rev=None): """The Commit object for the specified revision :param rev: revision specifier, see git-rev-parse for viable options. :return: ``git.Commit``""" if rev is None: return self.head.commit else: return self.rev_parse(str(rev)+"^0") def iter_trees(self, *args, **kwargs): """:return: Iterator yielding Tree objects :note: Takes all arguments known to iter_commits method""" return ( c.tree for c in self.iter_commits(*args, **kwargs) ) def tree(self, rev=None): """The Tree object for the given treeish revision Examples:: repo.tree(repo.heads[0]) :param rev: is a revision pointing to a Treeish ( being a commit or tree ) :return: ``git.Tree`` :note: If you need a non-root level tree, find it by iterating the root tree. Otherwise it cannot know about its path relative to the repository root and subsequent operations might have unexpected results.""" if rev is None: return self.head.commit.tree else: return self.rev_parse(str(rev)+"^{tree}") def iter_commits(self, rev=None, paths='', **kwargs): """A list of Commit objects representing the history of a given ref/commit :parm rev: revision specifier, see git-rev-parse for viable options. If None, the active branch will be used. :parm paths: is an optional path or a list of paths to limit the returned commits to Commits that do not contain that path or the paths will not be returned. :parm kwargs: Arguments to be passed to git-rev-list - common ones are max_count and skip :note: to receive only commits between two named revisions, use the "revA..revB" revision specifier :return ``git.Commit[]``""" if rev is None: rev = self.head.commit return Commit.iter_items(self, rev, paths, **kwargs) def _get_daemon_export(self): filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) return os.path.exists(filename) def _set_daemon_export(self, value): filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) fileexists = os.path.exists(filename) if value and not fileexists: touch(filename) elif not value and fileexists: os.unlink(filename) daemon_export = property(_get_daemon_export, _set_daemon_export, doc="If True, git-daemon may export this repository") del _get_daemon_export del _set_daemon_export def _get_alternates(self): """The list of alternates for this repo from which objects can be retrieved :return: list of strings being pathnames of alternates""" alternates_path = join(self.git_dir, 'objects', 'info', 'alternates') if os.path.exists(alternates_path): try: f = open(alternates_path) alts = f.read() finally: f.close() return alts.strip().splitlines() else: return list() def _set_alternates(self, alts): """Sets the alternates :parm alts: is the array of string paths representing the alternates at which git should look for objects, i.e. /home/user/repo/.git/objects :raise NoSuchPathError: :note: The method does not check for the existance of the paths in alts as the caller is responsible.""" alternates_path = join(self.git_dir, 'objects', 'info', 'alternates') if not alts: if isfile(alternates_path): os.remove(alternates_path) else: try: f = open(alternates_path, 'w') f.write("\n".join(alts)) finally: f.close() # END file handling # END alts handling alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") def is_dirty(self, index=True, working_tree=True, untracked_files=False): """ :return: ``True``, the repository is considered dirty. By default it will react like a git-status without untracked files, hence it is dirty if the index or the working copy have changes.""" if self._bare: # Bare repositories with no associated working directory are # always consired to be clean. return False # start from the one which is fastest to evaluate default_args = ('--abbrev=40', '--full-index', '--raw') if index: # diff index against HEAD if isfile(self.index.path) and self.head.is_valid() and \ len(self.git.diff('HEAD', '--cached', *default_args)): return True # END index handling if working_tree: # diff index against working tree if len(self.git.diff(*default_args)): return True # END working tree handling if untracked_files: if len(self.untracked_files): return True # END untracked files return False @property def untracked_files(self): """ :return: list(str,...) Files currently untracked as they have not been staged yet. Paths are relative to the current working directory of the git command. :note: ignored files will not appear here, i.e. files mentioned in .gitignore""" # make sure we get all files, no only untracked directores proc = self.git.status(untracked_files=True, as_process=True) stream = iter(proc.stdout) untracked_files = list() for line in stream: if not line.startswith("# Untracked files:"): continue # skip two lines stream.next() stream.next() for untracked_info in stream: if not untracked_info.startswith("#\t"): break untracked_files.append(untracked_info.replace("#\t", "").rstrip()) # END for each utracked info line # END for each line return untracked_files @property def active_branch(self): """The name of the currently active branch. :return: Head to the active branch""" return self.head.reference def blame(self, rev, file): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. :return: list: [git.Commit, list: []] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" data = self.git.blame(rev, '--', file, p=True) commits = dict() blames = list() info = None for line in data.splitlines(False): parts = self.re_whitespace.split(line, 1) firstpart = parts[0] if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit( self, hex_to_bin(sha), author=Actor._from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation m = self.re_tab_full_line.search(line) text, = m.groups() blames[-1][0] = c blames[-1][1].append( text ) info = None # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames @classmethod def init(cls, path=None, mkdir=True, **kwargs): """Initialize a git repository at the given path if specified :param path: is the full path to the repo (traditionally ends with /.git) or None in which case the repository will be created in the current working directory :parm mkdir: if specified will create the repository directory if it doesn't already exists. Creates the directory with a mode=0755. Only effective if a path is explicitly given :parm kwargs: keyword arguments serving as additional options to the git-init command :return: ``git.Repo`` (the newly created repo)""" if mkdir and path and not os.path.exists(path): os.makedirs(path, 0755) # git command automatically chdir into the directory git = Git(path) output = git.init(**kwargs) return Repo(path) @classmethod def _clone(cls, git, url, path, odb_default_type, progress, **kwargs): # special handling for windows for path at which the clone should be # created. # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence # we at least give a proper error instead of letting git fail prev_cwd = None prev_path = None odbt = kwargs.pop('odbt', odb_default_type) if os.name == 'nt': if '~' in path: raise OSError("Git cannot handle the ~ character in path %r correctly" % path) # on windows, git will think paths like c: are relative and prepend the # current working dir ( before it fails ). We temporarily adjust the working # dir to make this actually work match = re.match("(\w:[/\\\])(.*)", path) if match: prev_cwd = os.getcwd() prev_path = path drive, rest_of_path = match.groups() os.chdir(drive) path = rest_of_path kwargs['with_keep_cwd'] = True # END cwd preparation # END windows handling try: proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, git, progress)) if progress: digest_process_messages(proc.stderr, progress) #END handle progress finalize_process(proc) finally: if prev_cwd is not None: os.chdir(prev_cwd) path = prev_path # END reset previous working dir # END bad windows handling # our git command could have a different working dir than our actual # environment, hence we prepend its working dir if required if not os.path.isabs(path) and git.working_dir: path = join(git._working_dir, path) # adjust remotes - there may be operating systems which use backslashes, # These might be given as initial paths, but when handling the config file # that contains the remote from which we were clones, git stops liking it # as it will escape the backslashes. Hence we undo the escaping just to be # sure repo = cls(os.path.abspath(path), odbt = odbt) if repo.remotes: repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/")) # END handle remote repo return repo def clone(self, path, progress=None, **kwargs): """Create a clone from this repository. :param path: is the full path of the new repo (traditionally ends with ./.git). :param progress: See 'git.remote.Remote.push'. :param kwargs: odbt = ObjectDatabase Type, allowing to determine the object database implementation used by the returned Repo instance All remaining keyword arguments are given to the git-clone command :return: ``git.Repo`` (the newly cloned repo)""" return self._clone(self.git, self.git_dir, path, type(self.odb), progress, **kwargs) @classmethod def clone_from(cls, url, to_path, progress=None, **kwargs): """Create a clone from the given URL :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS :param to_path: Path to which the repository should be cloned to :param progress: See 'git.remote.Remote.push'. :param kwargs: see the ``clone`` method :return: Repo instance pointing to the cloned directory""" return cls._clone(Git(os.getcwd()), url, to_path, GitCmdObjectDB, progress, **kwargs) def archive(self, ostream, treeish=None, prefix=None, **kwargs): """Archive the tree at the given revision. :parm ostream: file compatible stream object to which the archive will be written :parm treeish: is the treeish name/id, defaults to active branch :parm prefix: is the optional prefix to prepend to each filename in the archive :parm kwargs: Additional arguments passed to git-archive NOTE: Use the 'format' argument to define the kind of format. Use specialized ostreams to write any format supported by python :raise GitCommandError: in case something went wrong :return: self""" if treeish is None: treeish = self.head.commit if prefix and 'prefix' not in kwargs: kwargs['prefix'] = prefix kwargs['output_stream'] = ostream self.git.archive(treeish, **kwargs) return self rev_parse = rev_parse def __repr__(self): return '' % self.git_dir GitPython-0.3.2.RC1/git/repo/fun.py0000644000175100017510000001704111575507662016006 0ustar byronbyron"""Package with general repository related functions""" import os from gitdb.exc import BadObject from git.refs import SymbolicReference from git.objects import Object from gitdb.util import ( join, isdir, isfile, hex_to_bin, bin_to_hex ) from string import digits __all__ = ('rev_parse', 'is_git_dir', 'touch') def touch(filename): fp = open(filename, "a") fp.close() def is_git_dir(d): """ This is taken from the git setup.c:is_git_directory function.""" if isdir(d) and \ isdir(join(d, 'objects')) and \ isdir(join(d, 'refs')): headref = join(d, 'HEAD') return isfile(headref) or \ (os.path.islink(headref) and os.readlink(headref).startswith('refs')) return False def short_to_long(odb, hexsha): """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha or None if no candidate could be found. :param hexsha: hexsha with less than 40 byte""" try: return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha)) except BadObject: return None # END exception handling def name_to_object(repo, name, return_ref=False): """ :return: object specified by the given name, hexshas ( short and long ) as well as references are supported :param return_ref: if name specifies a reference, we will return the reference instead of the object. Otherwise it will raise BadObject """ hexsha = None # is it a hexsha ? Try the most common ones, which is 7 to 40 if repo.re_hexsha_shortened.match(name): if len(name) != 40: # find long sha for short sha hexsha = short_to_long(repo.odb, name) else: hexsha = name # END handle short shas #END find sha if it matches # if we couldn't find an object for what seemed to be a short hexsha # try to find it as reference anyway, it could be named 'aaa' for instance if hexsha is None: for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'): try: hexsha = SymbolicReference.dereference_recursive(repo, base % name) if return_ref: return SymbolicReference(repo, base % name) #END handle symbolic ref break except ValueError: pass # END for each base # END handle hexsha # didn't find any ref, this is an error if return_ref: raise BadObject("Couldn't find reference named %r" % name) #END handle return ref # tried everything ? fail if hexsha is None: raise BadObject(name) # END assert hexsha was found return Object.new_from_sha(repo, hex_to_bin(hexsha)) def deref_tag(tag): """Recursively dereerence a tag and return the resulting object""" while True: try: tag = tag.object except AttributeError: break # END dereference tag return tag def to_commit(obj): """Convert the given object to a commit if possible and return it""" if obj.type == 'tag': obj = deref_tag(obj) if obj.type != "commit": raise ValueError("Cannot convert object %r to type commit" % obj) # END verify type return obj def rev_parse(repo, rev): """ :return: Object at the given revision, either Commit, Tag, Tree or Blob :param rev: git-rev-parse compatible revision specification, please see http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html for details :note: Currently there is no access to the rev-log, rev-specs may only contain topological tokens such ~ and ^. :raise BadObject: if the given revision could not be found :raise ValueError: If rev couldn't be parsed :raise IndexError: If invalid reflog index is specified""" # colon search mode ? if rev.startswith(':/'): # colon search mode raise NotImplementedError("commit by message search ( regex )") # END handle search obj = None ref = None output_type = "commit" start = 0 parsed_to = 0 lr = len(rev) while start < lr: if rev[start] not in "^~:@": start += 1 continue # END handle start token = rev[start] if obj is None: # token is a rev name if start == 0: ref = repo.head.ref else: if token == '@': ref = name_to_object(repo, rev[:start], return_ref=True) else: obj = name_to_object(repo, rev[:start]) #END handle token #END handle refname if ref is not None: obj = ref.commit #END handle ref # END initialize obj on first token start += 1 # try to parse {type} if start < lr and rev[start] == '{': end = rev.find('}', start) if end == -1: raise ValueError("Missing closing brace to define type in %s" % rev) output_type = rev[start+1:end] # exclude brace # handle type if output_type == 'commit': pass # default elif output_type == 'tree': try: obj = to_commit(obj).tree except (AttributeError, ValueError): pass # error raised later # END exception handling elif output_type in ('', 'blob'): if obj.type == 'tag': obj = deref_tag(obj) else: # cannot do anything for non-tags pass # END handle tag elif token == '@': # try single int assert ref is not None, "Requre Reference to access reflog" revlog_index = None try: # transform reversed index into the format of our revlog revlog_index = -(int(output_type)+1) except ValueError: # TODO: Try to parse the other date options, using parse_date # maybe raise NotImplementedError("Support for additional @{...} modes not implemented") #END handle revlog index try: entry = ref.log_entry(revlog_index) except IndexError: raise IndexError("Invalid revlog index: %i" % revlog_index) #END handle index out of bound obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha)) # make it pass the following checks output_type = None else: raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) # END handle output type # empty output types don't require any specific type, its just about dereferencing tags if output_type and obj.type != output_type: raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type)) # END verify ouput type start = end+1 # skip brace parsed_to = start continue # END parse type # try to parse a number num = 0 if token != ":": found_digit = False while start < lr: if rev[start] in digits: num = num * 10 + int(rev[start]) start += 1 found_digit = True else: break # END handle number # END number parse loop # no explicit number given, 1 is the default # It could be 0 though if not found_digit: num = 1 # END set default num # END number parsing only if non-blob mode parsed_to = start # handle hiererarchy walk try: if token == "~": obj = to_commit(obj) for item in xrange(num): obj = obj.parents[0] # END for each history item to walk elif token == "^": obj = to_commit(obj) # must be n'th parent if num: obj = obj.parents[num-1] elif token == ":": if obj.type != "tree": obj = obj.tree # END get tree type obj = obj[rev[start:]] parsed_to = lr else: raise ValueError("Invalid token: %r" % token) # END end handle tag except (IndexError, AttributeError): raise BadObject("Invalid Revision in %s" % rev) # END exception handling # END parse loop # still no obj ? Its probably a simple name if obj is None: obj = name_to_object(repo, rev) parsed_to = lr # END handle simple name if obj is None: raise ValueError("Revision specifier could not be parsed: %s" % rev) if parsed_to != lr: raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) return obj GitPython-0.3.2.RC1/git/repo/__init__.py0000644000175100017510000000006511573623573016751 0ustar byronbyron"""Initialize the Repo package""" from base import *GitPython-0.3.2.RC1/git/odict.py0000644000175100017510000013272011573623573015353 0ustar byronbyron# odict.py # An Ordered Dictionary object # Copyright (C) 2005 Nicola Larosa, Michael Foord # E-mail: nico AT tekNico DOT net, fuzzyman AT voidspace DOT org DOT uk # This software is licensed under the terms of the BSD license. # http://www.voidspace.org.uk/python/license.shtml # Basically you're free to copy, modify, distribute and relicense it, # So long as you keep a copy of the license with it. # Documentation at http://www.voidspace.org.uk/python/odict.html # For information about bugfixes, updates and support, please join the # Pythonutils mailing list: # http://groups.google.com/group/pythonutils/ # Comments, suggestions and bug reports welcome. """A dict that keeps keys in insertion order""" from __future__ import generators __author__ = ('Nicola Larosa ,' 'Michael Foord ') __docformat__ = "restructuredtext en" __revision__ = '$Id: odict.py 129 2005-09-12 18:15:28Z teknico $' __version__ = '0.2.2' __all__ = ['OrderedDict', 'SequenceOrderedDict'] import sys INTP_VER = sys.version_info[:2] if INTP_VER < (2, 2): raise RuntimeError("Python v.2.2 or later required") import types, warnings class OrderedDict(dict): """ A class of dictionary that keeps the insertion order of keys. All appropriate methods return keys, items, or values in an ordered way. All normal dictionary methods are available. Update and comparison is restricted to other OrderedDict objects. Various sequence methods are available, including the ability to explicitly mutate the key ordering. __contains__ tests: >>> d = OrderedDict(((1, 3),)) >>> 1 in d 1 >>> 4 in d 0 __getitem__ tests: >>> OrderedDict(((1, 3), (3, 2), (2, 1)))[2] 1 >>> OrderedDict(((1, 3), (3, 2), (2, 1)))[4] Traceback (most recent call last): KeyError: 4 __len__ tests: >>> len(OrderedDict()) 0 >>> len(OrderedDict(((1, 3), (3, 2), (2, 1)))) 3 get tests: >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.get(1) 3 >>> d.get(4) is None 1 >>> d.get(4, 5) 5 >>> d OrderedDict([(1, 3), (3, 2), (2, 1)]) has_key tests: >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.has_key(1) 1 >>> d.has_key(4) 0 """ def __init__(self, init_val=(), strict=False): """ Create a new ordered dictionary. Cannot init from a normal dict, nor from kwargs, since items order is undefined in those cases. If the ``strict`` keyword argument is ``True`` (``False`` is the default) then when doing slice assignment - the ``OrderedDict`` you are assigning from *must not* contain any keys in the remaining dict. >>> OrderedDict() OrderedDict([]) >>> OrderedDict({1: 1}) Traceback (most recent call last): TypeError: undefined order, cannot get items from dict >>> OrderedDict({1: 1}.items()) OrderedDict([(1, 1)]) >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d OrderedDict([(1, 3), (3, 2), (2, 1)]) >>> OrderedDict(d) OrderedDict([(1, 3), (3, 2), (2, 1)]) """ self.strict = strict dict.__init__(self) if isinstance(init_val, OrderedDict): self._sequence = init_val.keys() dict.update(self, init_val) elif isinstance(init_val, dict): # we lose compatibility with other ordered dict types this way raise TypeError('undefined order, cannot get items from dict') else: self._sequence = [] self.update(init_val) ### Special methods ### def __delitem__(self, key): """ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> del d[3] >>> d OrderedDict([(1, 3), (2, 1)]) >>> del d[3] Traceback (most recent call last): KeyError: 3 >>> d[3] = 2 >>> d OrderedDict([(1, 3), (2, 1), (3, 2)]) >>> del d[0:1] >>> d OrderedDict([(2, 1), (3, 2)]) """ if isinstance(key, types.SliceType): # FIXME: efficiency? keys = self._sequence[key] for entry in keys: dict.__delitem__(self, entry) del self._sequence[key] else: # do the dict.__delitem__ *first* as it raises # the more appropriate error dict.__delitem__(self, key) self._sequence.remove(key) def __eq__(self, other): """ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d == OrderedDict(d) True >>> d == OrderedDict(((1, 3), (2, 1), (3, 2))) False >>> d == OrderedDict(((1, 0), (3, 2), (2, 1))) False >>> d == OrderedDict(((0, 3), (3, 2), (2, 1))) False >>> d == dict(d) False >>> d == False False """ if isinstance(other, OrderedDict): # FIXME: efficiency? # Generate both item lists for each compare return (self.items() == other.items()) else: return False def __lt__(self, other): """ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> c = OrderedDict(((0, 3), (3, 2), (2, 1))) >>> c < d True >>> d < c False >>> d < dict(c) Traceback (most recent call last): TypeError: Can only compare with other OrderedDicts """ if not isinstance(other, OrderedDict): raise TypeError('Can only compare with other OrderedDicts') # FIXME: efficiency? # Generate both item lists for each compare return (self.items() < other.items()) def __le__(self, other): """ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> c = OrderedDict(((0, 3), (3, 2), (2, 1))) >>> e = OrderedDict(d) >>> c <= d True >>> d <= c False >>> d <= dict(c) Traceback (most recent call last): TypeError: Can only compare with other OrderedDicts >>> d <= e True """ if not isinstance(other, OrderedDict): raise TypeError('Can only compare with other OrderedDicts') # FIXME: efficiency? # Generate both item lists for each compare return (self.items() <= other.items()) def __ne__(self, other): """ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d != OrderedDict(d) False >>> d != OrderedDict(((1, 3), (2, 1), (3, 2))) True >>> d != OrderedDict(((1, 0), (3, 2), (2, 1))) True >>> d == OrderedDict(((0, 3), (3, 2), (2, 1))) False >>> d != dict(d) True >>> d != False True """ if isinstance(other, OrderedDict): # FIXME: efficiency? # Generate both item lists for each compare return not (self.items() == other.items()) else: return True def __gt__(self, other): """ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> c = OrderedDict(((0, 3), (3, 2), (2, 1))) >>> d > c True >>> c > d False >>> d > dict(c) Traceback (most recent call last): TypeError: Can only compare with other OrderedDicts """ if not isinstance(other, OrderedDict): raise TypeError('Can only compare with other OrderedDicts') # FIXME: efficiency? # Generate both item lists for each compare return (self.items() > other.items()) def __ge__(self, other): """ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> c = OrderedDict(((0, 3), (3, 2), (2, 1))) >>> e = OrderedDict(d) >>> c >= d False >>> d >= c True >>> d >= dict(c) Traceback (most recent call last): TypeError: Can only compare with other OrderedDicts >>> e >= d True """ if not isinstance(other, OrderedDict): raise TypeError('Can only compare with other OrderedDicts') # FIXME: efficiency? # Generate both item lists for each compare return (self.items() >= other.items()) def __repr__(self): """ Used for __repr__ and __str__ >>> r1 = repr(OrderedDict((('a', 'b'), ('c', 'd'), ('e', 'f')))) >>> r1 "OrderedDict([('a', 'b'), ('c', 'd'), ('e', 'f')])" >>> r2 = repr(OrderedDict((('a', 'b'), ('e', 'f'), ('c', 'd')))) >>> r2 "OrderedDict([('a', 'b'), ('e', 'f'), ('c', 'd')])" >>> r1 == str(OrderedDict((('a', 'b'), ('c', 'd'), ('e', 'f')))) True >>> r2 == str(OrderedDict((('a', 'b'), ('e', 'f'), ('c', 'd')))) True """ return '%s([%s])' % (self.__class__.__name__, ', '.join( ['(%r, %r)' % (key, self[key]) for key in self._sequence])) def __setitem__(self, key, val): """ Allows slice assignment, so long as the slice is an OrderedDict >>> d = OrderedDict() >>> d['a'] = 'b' >>> d['b'] = 'a' >>> d[3] = 12 >>> d OrderedDict([('a', 'b'), ('b', 'a'), (3, 12)]) >>> d[:] = OrderedDict(((1, 2), (2, 3), (3, 4))) >>> d OrderedDict([(1, 2), (2, 3), (3, 4)]) >>> d[::2] = OrderedDict(((7, 8), (9, 10))) >>> d OrderedDict([(7, 8), (2, 3), (9, 10)]) >>> d = OrderedDict(((0, 1), (1, 2), (2, 3), (3, 4))) >>> d[1:3] = OrderedDict(((1, 2), (5, 6), (7, 8))) >>> d OrderedDict([(0, 1), (1, 2), (5, 6), (7, 8), (3, 4)]) >>> d = OrderedDict(((0, 1), (1, 2), (2, 3), (3, 4)), strict=True) >>> d[1:3] = OrderedDict(((1, 2), (5, 6), (7, 8))) >>> d OrderedDict([(0, 1), (1, 2), (5, 6), (7, 8), (3, 4)]) >>> a = OrderedDict(((0, 1), (1, 2), (2, 3)), strict=True) >>> a[3] = 4 >>> a OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> a[::1] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> a OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> a[:2] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)]) Traceback (most recent call last): ValueError: slice assignment must be from unique keys >>> a = OrderedDict(((0, 1), (1, 2), (2, 3))) >>> a[3] = 4 >>> a OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> a[::1] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> a OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> a[:2] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> a OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> a[::-1] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> a OrderedDict([(3, 4), (2, 3), (1, 2), (0, 1)]) >>> d = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> d[:1] = 3 Traceback (most recent call last): TypeError: slice assignment requires an OrderedDict >>> d = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> d[:1] = OrderedDict([(9, 8)]) >>> d OrderedDict([(9, 8), (1, 2), (2, 3), (3, 4)]) """ if isinstance(key, types.SliceType): if not isinstance(val, OrderedDict): # FIXME: allow a list of tuples? raise TypeError('slice assignment requires an OrderedDict') keys = self._sequence[key] # NOTE: Could use ``range(*key.indices(len(self._sequence)))`` indexes = range(len(self._sequence))[key] if key.step is None: # NOTE: new slice may not be the same size as the one being # overwritten ! # NOTE: What is the algorithm for an impossible slice? # e.g. d[5:3] pos = key.start or 0 del self[key] newkeys = val.keys() for k in newkeys: if k in self: if self.strict: raise ValueError('slice assignment must be from ' 'unique keys') else: # NOTE: This removes duplicate keys *first* # so start position might have changed? del self[k] self._sequence = (self._sequence[:pos] + newkeys + self._sequence[pos:]) dict.update(self, val) else: # extended slice - length of new slice must be the same # as the one being replaced if len(keys) != len(val): raise ValueError('attempt to assign sequence of size %s ' 'to extended slice of size %s' % (len(val), len(keys))) # FIXME: efficiency? del self[key] item_list = zip(indexes, val.items()) # smallest indexes first - higher indexes not guaranteed to # exist item_list.sort() for pos, (newkey, newval) in item_list: if self.strict and newkey in self: raise ValueError('slice assignment must be from unique' ' keys') self.insert(pos, newkey, newval) else: if key not in self: self._sequence.append(key) dict.__setitem__(self, key, val) def __getitem__(self, key): """ Allows slicing. Returns an OrderedDict if you slice. >>> b = OrderedDict([(7, 0), (6, 1), (5, 2), (4, 3), (3, 4), (2, 5), (1, 6)]) >>> b[::-1] OrderedDict([(1, 6), (2, 5), (3, 4), (4, 3), (5, 2), (6, 1), (7, 0)]) >>> b[2:5] OrderedDict([(5, 2), (4, 3), (3, 4)]) >>> type(b[2:4]) """ if isinstance(key, types.SliceType): # FIXME: does this raise the error we want? keys = self._sequence[key] # FIXME: efficiency? return OrderedDict([(entry, self[entry]) for entry in keys]) else: return dict.__getitem__(self, key) __str__ = __repr__ def __setattr__(self, name, value): """ Implemented so that accesses to ``sequence`` raise a warning and are diverted to the new ``setkeys`` method. """ if name == 'sequence': warnings.warn('Use of the sequence attribute is deprecated.' ' Use the keys method instead.', DeprecationWarning) # NOTE: doesn't return anything self.setkeys(value) else: # FIXME: do we want to allow arbitrary setting of attributes? # Or do we want to manage it? object.__setattr__(self, name, value) def __getattr__(self, name): """ Implemented so that access to ``sequence`` raises a warning. >>> d = OrderedDict() >>> d.sequence [] """ if name == 'sequence': warnings.warn('Use of the sequence attribute is deprecated.' ' Use the keys method instead.', DeprecationWarning) # NOTE: Still (currently) returns a direct reference. Need to # because code that uses sequence will expect to be able to # mutate it in place. return self._sequence else: # raise the appropriate error raise AttributeError("OrderedDict has no '%s' attribute" % name) def __deepcopy__(self, memo): """ To allow deepcopy to work with OrderedDict. >>> from copy import deepcopy >>> a = OrderedDict([(1, 1), (2, 2), (3, 3)]) >>> a['test'] = {} >>> b = deepcopy(a) >>> b == a True >>> b is a False >>> a['test'] is b['test'] False """ from copy import deepcopy return self.__class__(deepcopy(self.items(), memo), self.strict) ### Read-only methods ### def copy(self): """ >>> OrderedDict(((1, 3), (3, 2), (2, 1))).copy() OrderedDict([(1, 3), (3, 2), (2, 1)]) """ return OrderedDict(self) def items(self): """ ``items`` returns a list of tuples representing all the ``(key, value)`` pairs in the dictionary. >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.items() [(1, 3), (3, 2), (2, 1)] >>> d.clear() >>> d.items() [] """ return zip(self._sequence, self.values()) def keys(self): """ Return a list of keys in the ``OrderedDict``. >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.keys() [1, 3, 2] """ return self._sequence[:] def values(self, values=None): """ Return a list of all the values in the OrderedDict. Optionally you can pass in a list of values, which will replace the current list. The value list must be the same len as the OrderedDict. >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.values() [3, 2, 1] """ return [self[key] for key in self._sequence] def iteritems(self): """ >>> ii = OrderedDict(((1, 3), (3, 2), (2, 1))).iteritems() >>> ii.next() (1, 3) >>> ii.next() (3, 2) >>> ii.next() (2, 1) >>> ii.next() Traceback (most recent call last): StopIteration """ def make_iter(self=self): keys = self.iterkeys() while True: key = keys.next() yield (key, self[key]) return make_iter() def iterkeys(self): """ >>> ii = OrderedDict(((1, 3), (3, 2), (2, 1))).iterkeys() >>> ii.next() 1 >>> ii.next() 3 >>> ii.next() 2 >>> ii.next() Traceback (most recent call last): StopIteration """ return iter(self._sequence) __iter__ = iterkeys def itervalues(self): """ >>> iv = OrderedDict(((1, 3), (3, 2), (2, 1))).itervalues() >>> iv.next() 3 >>> iv.next() 2 >>> iv.next() 1 >>> iv.next() Traceback (most recent call last): StopIteration """ def make_iter(self=self): keys = self.iterkeys() while True: yield self[keys.next()] return make_iter() ### Read-write methods ### def clear(self): """ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.clear() >>> d OrderedDict([]) """ dict.clear(self) self._sequence = [] def pop(self, key, *args): """ No dict.pop in Python 2.2, gotta reimplement it >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.pop(3) 2 >>> d OrderedDict([(1, 3), (2, 1)]) >>> d.pop(4) Traceback (most recent call last): KeyError: 4 >>> d.pop(4, 0) 0 >>> d.pop(4, 0, 1) Traceback (most recent call last): TypeError: pop expected at most 2 arguments, got 3 """ if len(args) > 1: raise TypeError, ('pop expected at most 2 arguments, got %s' % (len(args) + 1)) if key in self: val = self[key] del self[key] else: try: val = args[0] except IndexError: raise KeyError(key) return val def popitem(self, i=-1): """ Delete and return an item specified by index, not a random one as in dict. The index is -1 by default (the last item). >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.popitem() (2, 1) >>> d OrderedDict([(1, 3), (3, 2)]) >>> d.popitem(0) (1, 3) >>> OrderedDict().popitem() Traceback (most recent call last): KeyError: 'popitem(): dictionary is empty' >>> d.popitem(2) Traceback (most recent call last): IndexError: popitem(): index 2 not valid """ if not self._sequence: raise KeyError('popitem(): dictionary is empty') try: key = self._sequence[i] except IndexError: raise IndexError('popitem(): index %s not valid' % i) return (key, self.pop(key)) def setdefault(self, key, defval = None): """ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.setdefault(1) 3 >>> d.setdefault(4) is None True >>> d OrderedDict([(1, 3), (3, 2), (2, 1), (4, None)]) >>> d.setdefault(5, 0) 0 >>> d OrderedDict([(1, 3), (3, 2), (2, 1), (4, None), (5, 0)]) """ if key in self: return self[key] else: self[key] = defval return defval def update(self, from_od): """ Update from another OrderedDict or sequence of (key, value) pairs >>> d = OrderedDict(((1, 0), (0, 1))) >>> d.update(OrderedDict(((1, 3), (3, 2), (2, 1)))) >>> d OrderedDict([(1, 3), (0, 1), (3, 2), (2, 1)]) >>> d.update({4: 4}) Traceback (most recent call last): TypeError: undefined order, cannot get items from dict >>> d.update((4, 4)) Traceback (most recent call last): TypeError: cannot convert dictionary update sequence element "4" to a 2-item sequence """ if isinstance(from_od, OrderedDict): for key, val in from_od.items(): self[key] = val elif isinstance(from_od, dict): # we lose compatibility with other ordered dict types this way raise TypeError('undefined order, cannot get items from dict') else: # FIXME: efficiency? # sequence of 2-item sequences, or error for item in from_od: try: key, val = item except TypeError: raise TypeError('cannot convert dictionary update' ' sequence element "%s" to a 2-item sequence' % item) self[key] = val def rename(self, old_key, new_key): """ Rename the key for a given value, without modifying sequence order. For the case where new_key already exists this raise an exception, since if new_key exists, it is ambiguous as to what happens to the associated values, and the position of new_key in the sequence. >>> od = OrderedDict() >>> od['a'] = 1 >>> od['b'] = 2 >>> od.items() [('a', 1), ('b', 2)] >>> od.rename('b', 'c') >>> od.items() [('a', 1), ('c', 2)] >>> od.rename('c', 'a') Traceback (most recent call last): ValueError: New key already exists: 'a' >>> od.rename('d', 'b') Traceback (most recent call last): KeyError: 'd' """ if new_key == old_key: # no-op return if new_key in self: raise ValueError("New key already exists: %r" % new_key) # rename sequence entry value = self[old_key] old_idx = self._sequence.index(old_key) self._sequence[old_idx] = new_key # rename internal dict entry dict.__delitem__(self, old_key) dict.__setitem__(self, new_key, value) def setitems(self, items): """ This method allows you to set the items in the dict. It takes a list of tuples - of the same sort returned by the ``items`` method. >>> d = OrderedDict() >>> d.setitems(((3, 1), (2, 3), (1, 2))) >>> d OrderedDict([(3, 1), (2, 3), (1, 2)]) """ self.clear() # FIXME: this allows you to pass in an OrderedDict as well :-) self.update(items) def setkeys(self, keys): """ ``setkeys`` all ows you to pass in a new list of keys which will replace the current set. This must contain the same set of keys, but need not be in the same order. If you pass in new keys that don't match, a ``KeyError`` will be raised. >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.keys() [1, 3, 2] >>> d.setkeys((1, 2, 3)) >>> d OrderedDict([(1, 3), (2, 1), (3, 2)]) >>> d.setkeys(['a', 'b', 'c']) Traceback (most recent call last): KeyError: 'Keylist is not the same as current keylist.' """ # FIXME: Efficiency? (use set for Python 2.4 :-) # NOTE: list(keys) rather than keys[:] because keys[:] returns # a tuple, if keys is a tuple. kcopy = list(keys) kcopy.sort() self._sequence.sort() if kcopy != self._sequence: raise KeyError('Keylist is not the same as current keylist.') # NOTE: This makes the _sequence attribute a new object, instead # of changing it in place. # FIXME: efficiency? self._sequence = list(keys) def setvalues(self, values): """ You can pass in a list of values, which will replace the current list. The value list must be the same len as the OrderedDict. (Or a ``ValueError`` is raised.) >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.setvalues((1, 2, 3)) >>> d OrderedDict([(1, 1), (3, 2), (2, 3)]) >>> d.setvalues([6]) Traceback (most recent call last): ValueError: Value list is not the same length as the OrderedDict. """ if len(values) != len(self): # FIXME: correct error to raise? raise ValueError('Value list is not the same length as the ' 'OrderedDict.') self.update(zip(self, values)) ### Sequence Methods ### def index(self, key): """ Return the position of the specified key in the OrderedDict. >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.index(3) 1 >>> d.index(4) Traceback (most recent call last): ValueError: list.index(x): x not in list """ return self._sequence.index(key) def insert(self, index, key, value): """ Takes ``index``, ``key``, and ``value`` as arguments. Sets ``key`` to ``value``, so that ``key`` is at position ``index`` in the OrderedDict. >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.insert(0, 4, 0) >>> d OrderedDict([(4, 0), (1, 3), (3, 2), (2, 1)]) >>> d.insert(0, 2, 1) >>> d OrderedDict([(2, 1), (4, 0), (1, 3), (3, 2)]) >>> d.insert(8, 8, 1) >>> d OrderedDict([(2, 1), (4, 0), (1, 3), (3, 2), (8, 1)]) """ if key in self: # FIXME: efficiency? del self[key] self._sequence.insert(index, key) dict.__setitem__(self, key, value) def reverse(self): """ Reverse the order of the OrderedDict. >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) >>> d.reverse() >>> d OrderedDict([(2, 1), (3, 2), (1, 3)]) """ self._sequence.reverse() def sort(self, *args, **kwargs): """ Sort the key order in the OrderedDict. This method takes the same arguments as the ``list.sort`` method on your version of Python. >>> d = OrderedDict(((4, 1), (2, 2), (3, 3), (1, 4))) >>> d.sort() >>> d OrderedDict([(1, 4), (2, 2), (3, 3), (4, 1)]) """ self._sequence.sort(*args, **kwargs) class Keys(object): # FIXME: should this object be a subclass of list? """ Custom object for accessing the keys of an OrderedDict. Can be called like the normal ``OrderedDict.keys`` method, but also supports indexing and sequence methods. """ def __init__(self, main): self._main = main def __call__(self): """Pretend to be the keys method.""" return self._main._keys() def __getitem__(self, index): """Fetch the key at position i.""" # NOTE: this automatically supports slicing :-) return self._main._sequence[index] def __setitem__(self, index, name): """ You cannot assign to keys, but you can do slice assignment to re-order them. You can only do slice assignment if the new set of keys is a reordering of the original set. """ if isinstance(index, types.SliceType): # FIXME: efficiency? # check length is the same indexes = range(len(self._main._sequence))[index] if len(indexes) != len(name): raise ValueError('attempt to assign sequence of size %s ' 'to slice of size %s' % (len(name), len(indexes))) # check they are the same keys # FIXME: Use set old_keys = self._main._sequence[index] new_keys = list(name) old_keys.sort() new_keys.sort() if old_keys != new_keys: raise KeyError('Keylist is not the same as current keylist.') orig_vals = [self._main[k] for k in name] del self._main[index] vals = zip(indexes, name, orig_vals) vals.sort() for i, k, v in vals: if self._main.strict and k in self._main: raise ValueError('slice assignment must be from ' 'unique keys') self._main.insert(i, k, v) else: raise ValueError('Cannot assign to keys') ### following methods pinched from UserList and adapted ### def __repr__(self): return repr(self._main._sequence) # FIXME: do we need to check if we are comparing with another ``Keys`` # object? (like the __cast method of UserList) def __lt__(self, other): return self._main._sequence < other def __le__(self, other): return self._main._sequence <= other def __eq__(self, other): return self._main._sequence == other def __ne__(self, other): return self._main._sequence != other def __gt__(self, other): return self._main._sequence > other def __ge__(self, other): return self._main._sequence >= other # FIXME: do we need __cmp__ as well as rich comparisons? def __cmp__(self, other): return cmp(self._main._sequence, other) def __contains__(self, item): return item in self._main._sequence def __len__(self): return len(self._main._sequence) def __iter__(self): return self._main.iterkeys() def count(self, item): return self._main._sequence.count(item) def index(self, item, *args): return self._main._sequence.index(item, *args) def reverse(self): self._main._sequence.reverse() def sort(self, *args, **kwds): self._main._sequence.sort(*args, **kwds) def __mul__(self, n): return self._main._sequence*n __rmul__ = __mul__ def __add__(self, other): return self._main._sequence + other def __radd__(self, other): return other + self._main._sequence ## following methods not implemented for keys ## def __delitem__(self, i): raise TypeError('Can\'t delete items from keys') def __iadd__(self, other): raise TypeError('Can\'t add in place to keys') def __imul__(self, n): raise TypeError('Can\'t multiply keys in place') def append(self, item): raise TypeError('Can\'t append items to keys') def insert(self, i, item): raise TypeError('Can\'t insert items into keys') def pop(self, i=-1): raise TypeError('Can\'t pop items from keys') def remove(self, item): raise TypeError('Can\'t remove items from keys') def extend(self, other): raise TypeError('Can\'t extend keys') class Items(object): """ Custom object for accessing the items of an OrderedDict. Can be called like the normal ``OrderedDict.items`` method, but also supports indexing and sequence methods. """ def __init__(self, main): self._main = main def __call__(self): """Pretend to be the items method.""" return self._main._items() def __getitem__(self, index): """Fetch the item at position i.""" if isinstance(index, types.SliceType): # fetching a slice returns an OrderedDict return self._main[index].items() key = self._main._sequence[index] return (key, self._main[key]) def __setitem__(self, index, item): """Set item at position i to item.""" if isinstance(index, types.SliceType): # NOTE: item must be an iterable (list of tuples) self._main[index] = OrderedDict(item) else: # FIXME: Does this raise a sensible error? orig = self._main.keys[index] key, value = item if self._main.strict and key in self and (key != orig): raise ValueError('slice assignment must be from ' 'unique keys') # delete the current one del self._main[self._main._sequence[index]] self._main.insert(index, key, value) def __delitem__(self, i): """Delete the item at position i.""" key = self._main._sequence[i] if isinstance(i, types.SliceType): for k in key: # FIXME: efficiency? del self._main[k] else: del self._main[key] ### following methods pinched from UserList and adapted ### def __repr__(self): return repr(self._main.items()) # FIXME: do we need to check if we are comparing with another ``Items`` # object? (like the __cast method of UserList) def __lt__(self, other): return self._main.items() < other def __le__(self, other): return self._main.items() <= other def __eq__(self, other): return self._main.items() == other def __ne__(self, other): return self._main.items() != other def __gt__(self, other): return self._main.items() > other def __ge__(self, other): return self._main.items() >= other def __cmp__(self, other): return cmp(self._main.items(), other) def __contains__(self, item): return item in self._main.items() def __len__(self): return len(self._main._sequence) # easier :-) def __iter__(self): return self._main.iteritems() def count(self, item): return self._main.items().count(item) def index(self, item, *args): return self._main.items().index(item, *args) def reverse(self): self._main.reverse() def sort(self, *args, **kwds): self._main.sort(*args, **kwds) def __mul__(self, n): return self._main.items()*n __rmul__ = __mul__ def __add__(self, other): return self._main.items() + other def __radd__(self, other): return other + self._main.items() def append(self, item): """Add an item to the end.""" # FIXME: this is only append if the key isn't already present key, value = item self._main[key] = value def insert(self, i, item): key, value = item self._main.insert(i, key, value) def pop(self, i=-1): key = self._main._sequence[i] return (key, self._main.pop(key)) def remove(self, item): key, value = item try: assert value == self._main[key] except (KeyError, AssertionError): raise ValueError('ValueError: list.remove(x): x not in list') else: del self._main[key] def extend(self, other): # FIXME: is only a true extend if none of the keys already present for item in other: key, value = item self._main[key] = value def __iadd__(self, other): self.extend(other) ## following methods not implemented for items ## def __imul__(self, n): raise TypeError('Can\'t multiply items in place') class Values(object): """ Custom object for accessing the values of an OrderedDict. Can be called like the normal ``OrderedDict.values`` method, but also supports indexing and sequence methods. """ def __init__(self, main): self._main = main def __call__(self): """Pretend to be the values method.""" return self._main._values() def __getitem__(self, index): """Fetch the value at position i.""" if isinstance(index, types.SliceType): return [self._main[key] for key in self._main._sequence[index]] else: return self._main[self._main._sequence[index]] def __setitem__(self, index, value): """ Set the value at position i to value. You can only do slice assignment to values if you supply a sequence of equal length to the slice you are replacing. """ if isinstance(index, types.SliceType): keys = self._main._sequence[index] if len(keys) != len(value): raise ValueError('attempt to assign sequence of size %s ' 'to slice of size %s' % (len(name), len(keys))) # FIXME: efficiency? Would be better to calculate the indexes # directly from the slice object # NOTE: the new keys can collide with existing keys (or even # contain duplicates) - these will overwrite for key, val in zip(keys, value): self._main[key] = val else: self._main[self._main._sequence[index]] = value ### following methods pinched from UserList and adapted ### def __repr__(self): return repr(self._main.values()) # FIXME: do we need to check if we are comparing with another ``Values`` # object? (like the __cast method of UserList) def __lt__(self, other): return self._main.values() < other def __le__(self, other): return self._main.values() <= other def __eq__(self, other): return self._main.values() == other def __ne__(self, other): return self._main.values() != other def __gt__(self, other): return self._main.values() > other def __ge__(self, other): return self._main.values() >= other def __cmp__(self, other): return cmp(self._main.values(), other) def __contains__(self, item): return item in self._main.values() def __len__(self): return len(self._main._sequence) # easier :-) def __iter__(self): return self._main.itervalues() def count(self, item): return self._main.values().count(item) def index(self, item, *args): return self._main.values().index(item, *args) def reverse(self): """Reverse the values""" vals = self._main.values() vals.reverse() # FIXME: efficiency self[:] = vals def sort(self, *args, **kwds): """Sort the values.""" vals = self._main.values() vals.sort(*args, **kwds) self[:] = vals def __mul__(self, n): return self._main.values()*n __rmul__ = __mul__ def __add__(self, other): return self._main.values() + other def __radd__(self, other): return other + self._main.values() ## following methods not implemented for values ## def __delitem__(self, i): raise TypeError('Can\'t delete items from values') def __iadd__(self, other): raise TypeError('Can\'t add in place to values') def __imul__(self, n): raise TypeError('Can\'t multiply values in place') def append(self, item): raise TypeError('Can\'t append items to values') def insert(self, i, item): raise TypeError('Can\'t insert items into values') def pop(self, i=-1): raise TypeError('Can\'t pop items from values') def remove(self, item): raise TypeError('Can\'t remove items from values') def extend(self, other): raise TypeError('Can\'t extend values') class SequenceOrderedDict(OrderedDict): """ Experimental version of OrderedDict that has a custom object for ``keys``, ``values``, and ``items``. These are callable sequence objects that work as methods, or can be manipulated directly as sequences. Test for ``keys``, ``items`` and ``values``. >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4))) >>> d SequenceOrderedDict([(1, 2), (2, 3), (3, 4)]) >>> d.keys [1, 2, 3] >>> d.keys() [1, 2, 3] >>> d.setkeys((3, 2, 1)) >>> d SequenceOrderedDict([(3, 4), (2, 3), (1, 2)]) >>> d.setkeys((1, 2, 3)) >>> d.keys[0] 1 >>> d.keys[:] [1, 2, 3] >>> d.keys[-1] 3 >>> d.keys[-2] 2 >>> d.keys[0:2] = [2, 1] >>> d SequenceOrderedDict([(2, 3), (1, 2), (3, 4)]) >>> d.keys.reverse() >>> d.keys [3, 1, 2] >>> d.keys = [1, 2, 3] >>> d SequenceOrderedDict([(1, 2), (2, 3), (3, 4)]) >>> d.keys = [3, 1, 2] >>> d SequenceOrderedDict([(3, 4), (1, 2), (2, 3)]) >>> a = SequenceOrderedDict() >>> b = SequenceOrderedDict() >>> a.keys == b.keys 1 >>> a['a'] = 3 >>> a.keys == b.keys 0 >>> b['a'] = 3 >>> a.keys == b.keys 1 >>> b['b'] = 3 >>> a.keys == b.keys 0 >>> a.keys > b.keys 0 >>> a.keys < b.keys 1 >>> 'a' in a.keys 1 >>> len(b.keys) 2 >>> 'c' in d.keys 0 >>> 1 in d.keys 1 >>> [v for v in d.keys] [3, 1, 2] >>> d.keys.sort() >>> d.keys [1, 2, 3] >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4)), strict=True) >>> d.keys[::-1] = [1, 2, 3] >>> d SequenceOrderedDict([(3, 4), (2, 3), (1, 2)]) >>> d.keys[:2] [3, 2] >>> d.keys[:2] = [1, 3] Traceback (most recent call last): KeyError: 'Keylist is not the same as current keylist.' >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4))) >>> d SequenceOrderedDict([(1, 2), (2, 3), (3, 4)]) >>> d.values [2, 3, 4] >>> d.values() [2, 3, 4] >>> d.setvalues((4, 3, 2)) >>> d SequenceOrderedDict([(1, 4), (2, 3), (3, 2)]) >>> d.values[::-1] [2, 3, 4] >>> d.values[0] 4 >>> d.values[-2] 3 >>> del d.values[0] Traceback (most recent call last): TypeError: Can't delete items from values >>> d.values[::2] = [2, 4] >>> d SequenceOrderedDict([(1, 2), (2, 3), (3, 4)]) >>> 7 in d.values 0 >>> len(d.values) 3 >>> [val for val in d.values] [2, 3, 4] >>> d.values[-1] = 2 >>> d.values.count(2) 2 >>> d.values.index(2) 0 >>> d.values[-1] = 7 >>> d.values [2, 3, 7] >>> d.values.reverse() >>> d.values [7, 3, 2] >>> d.values.sort() >>> d.values [2, 3, 7] >>> d.values.append('anything') Traceback (most recent call last): TypeError: Can't append items to values >>> d.values = (1, 2, 3) >>> d SequenceOrderedDict([(1, 1), (2, 2), (3, 3)]) >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4))) >>> d SequenceOrderedDict([(1, 2), (2, 3), (3, 4)]) >>> d.items() [(1, 2), (2, 3), (3, 4)] >>> d.setitems([(3, 4), (2 ,3), (1, 2)]) >>> d SequenceOrderedDict([(3, 4), (2, 3), (1, 2)]) >>> d.items[0] (3, 4) >>> d.items[:-1] [(3, 4), (2, 3)] >>> d.items[1] = (6, 3) >>> d.items [(3, 4), (6, 3), (1, 2)] >>> d.items[1:2] = [(9, 9)] >>> d SequenceOrderedDict([(3, 4), (9, 9), (1, 2)]) >>> del d.items[1:2] >>> d SequenceOrderedDict([(3, 4), (1, 2)]) >>> (3, 4) in d.items 1 >>> (4, 3) in d.items 0 >>> len(d.items) 2 >>> [v for v in d.items] [(3, 4), (1, 2)] >>> d.items.count((3, 4)) 1 >>> d.items.index((1, 2)) 1 >>> d.items.index((2, 1)) Traceback (most recent call last): ValueError: list.index(x): x not in list >>> d.items.reverse() >>> d.items [(1, 2), (3, 4)] >>> d.items.reverse() >>> d.items.sort() >>> d.items [(1, 2), (3, 4)] >>> d.items.append((5, 6)) >>> d.items [(1, 2), (3, 4), (5, 6)] >>> d.items.insert(0, (0, 0)) >>> d.items [(0, 0), (1, 2), (3, 4), (5, 6)] >>> d.items.insert(-1, (7, 8)) >>> d.items [(0, 0), (1, 2), (3, 4), (7, 8), (5, 6)] >>> d.items.pop() (5, 6) >>> d.items [(0, 0), (1, 2), (3, 4), (7, 8)] >>> d.items.remove((1, 2)) >>> d.items [(0, 0), (3, 4), (7, 8)] >>> d.items.extend([(1, 2), (5, 6)]) >>> d.items [(0, 0), (3, 4), (7, 8), (1, 2), (5, 6)] """ def __init__(self, init_val=(), strict=True): OrderedDict.__init__(self, init_val, strict=strict) self._keys = self.keys self._values = self.values self._items = self.items self.keys = Keys(self) self.values = Values(self) self.items = Items(self) self._att_dict = { 'keys': self.setkeys, 'items': self.setitems, 'values': self.setvalues, } def __setattr__(self, name, value): """Protect keys, items, and values.""" if not '_att_dict' in self.__dict__: object.__setattr__(self, name, value) else: try: fun = self._att_dict[name] except KeyError: OrderedDict.__setattr__(self, name, value) else: fun(value) if __name__ == '__main__': if INTP_VER < (2, 3): raise RuntimeError("Tests require Python v.2.3 or later") # turn off warnings for tests warnings.filterwarnings('ignore') # run the code tests in doctest format import doctest m = sys.modules.get('__main__') globs = m.__dict__.copy() globs.update({ 'INTP_VER': INTP_VER, }) doctest.testmod(m, globs=globs) GitPython-0.3.2.RC1/git/config.py0000644000175100017510000003220711575507662015517 0ustar byronbyron# config.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php """Module containing module parser implementation able to properly read and write configuration files""" import re import os import ConfigParser as cp import inspect import cStringIO from git.odict import OrderedDict from git.util import LockFile __all__ = ('GitConfigParser', 'SectionConstraint') class MetaParserBuilder(type): """Utlity class wrapping base-class methods into decorators that assure read-only properties""" def __new__(metacls, name, bases, clsdict): """ Equip all base-class methods with a needs_values decorator, and all non-const methods with a set_dirty_and_flush_changes decorator in addition to that.""" kmm = '_mutating_methods_' if kmm in clsdict: mutating_methods = clsdict[kmm] for base in bases: methods = ( t for t in inspect.getmembers(base, inspect.ismethod) if not t[0].startswith("_") ) for name, method in methods: if name in clsdict: continue method_with_values = needs_values(method) if name in mutating_methods: method_with_values = set_dirty_and_flush_changes(method_with_values) # END mutating methods handling clsdict[name] = method_with_values # END for each name/method pair # END for each base # END if mutating methods configuration is set new_type = super(MetaParserBuilder, metacls).__new__(metacls, name, bases, clsdict) return new_type def needs_values(func): """Returns method assuring we read values (on demand) before we try to access them""" def assure_data_present(self, *args, **kwargs): self.read() return func(self, *args, **kwargs) # END wrapper method assure_data_present.__name__ = func.__name__ return assure_data_present def set_dirty_and_flush_changes(non_const_func): """Return method that checks whether given non constant function may be called. If so, the instance will be set dirty. Additionally, we flush the changes right to disk""" def flush_changes(self, *args, **kwargs): rval = non_const_func(self, *args, **kwargs) self.write() return rval # END wrapper method flush_changes.__name__ = non_const_func.__name__ return flush_changes class SectionConstraint(object): """Constrains a ConfigParser to only option commands which are constrained to always use the section we have been initialized with. It supports all ConfigParser methods that operate on an option""" __slots__ = ("_config", "_section_name") _valid_attrs_ = ("get_value", "set_value", "get", "set", "getint", "getfloat", "getboolean", "has_option", "remove_section", "remove_option", "options") def __init__(self, config, section): self._config = config self._section_name = section def __getattr__(self, attr): if attr in self._valid_attrs_: return lambda *args, **kwargs: self._call_config(attr, *args, **kwargs) return super(SectionConstraint,self).__getattribute__(attr) def _call_config(self, method, *args, **kwargs): """Call the configuration at the given method which must take a section name as first argument""" return getattr(self._config, method)(self._section_name, *args, **kwargs) @property def config(self): """return: Configparser instance we constrain""" return self._config class GitConfigParser(cp.RawConfigParser, object): """Implements specifics required to read git style configuration files. This variation behaves much like the git.config command such that the configuration will be read on demand based on the filepath given during initialization. The changes will automatically be written once the instance goes out of scope, but can be triggered manually as well. The configuration file will be locked if you intend to change values preventing other instances to write concurrently. :note: The config is case-sensitive even when queried, hence section and option names must match perfectly.""" __metaclass__ = MetaParserBuilder #{ Configuration # The lock type determines the type of lock to use in new configuration readers. # They must be compatible to the LockFile interface. # A suitable alternative would be the BlockingLockFile t_lock = LockFile re_comment = re.compile('^\s*[#;]') #} END configuration OPTCRE = re.compile( r'\s*(?P