From 00ce31ad308ff4c7ef874d2fa64374f47980c85c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 15 Nov 2010 16:53:12 +0100 Subject: [PATCH] Objects: Constructor now manually checks and sets the input arguments to the local cache - previously a procedural approach was used, which was less code, but slower too. Especially in case of CommitObjects unrolling the loop manually makes a difference. Submodule: Implemented query methods and did a bit of testing. More is to come, but the test works for now. As special addition, the submodule implementation uses the section name as submodule ID even though it seems to be just the path. This allows to make renames easier --- lib/git/objects/base.py | 16 +-- lib/git/objects/commit.py | 25 +++- lib/git/objects/submodule.py | 222 +++++++++++++++++++++++++++++------ lib/git/objects/tag.py | 13 +- test/git/test_submodule.py | 73 +++++++++++- 5 files changed, 297 insertions(+), 52 deletions(-) diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 41862ac25..82c2589c9 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -62,17 +62,6 @@ def new_from_sha(cls, repo, sha1): inst.size = oinfo.size return inst - def _set_self_from_args_(self, args_dict): - """Initialize attributes on self from the given dict that was retrieved - from locals() in the calling method. - - Will only set an attribute on self if the corresponding value in args_dict - is not None""" - for attr, val in args_dict.items(): - if attr != "self" and val is not None: - setattr( self, attr, val ) - # END set all non-None attributes - def _set_cache_(self, attr): """Retrieve object information""" if attr == "size": @@ -140,7 +129,10 @@ def __init__(self, repo, binsha, mode=None, path=None): Path may not be set of the index object has been created directly as it cannot be retrieved without knowing the parent tree.""" super(IndexObject, self).__init__(repo, binsha) - self._set_self_from_args_(locals()) + if mode is not None: + self.mode = mode + if path is not None: + self.path = path def __hash__(self): """:return: diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 58c82da26..ae22fb767 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -108,7 +108,26 @@ def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, aut super(Commit,self).__init__(repo, binsha) if tree is not None: assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) - self._set_self_from_args_(locals()) + if tree is not None: + self.tree = tree + if author is not None: + self.author = author + if authored_date is not None: + self.authored_date = authored_date + if author_tz_offset is not None: + self.author_tz_offset = author_tz_offset + if committer is not None: + self.committer = committer + if committed_date is not None: + self.committed_date = committed_date + if committer_tz_offset is not None: + self.committer_tz_offset = committer_tz_offset + if message is not None: + self.message = message + if parents is not None: + self.parents = parents + if encoding is not None: + self.encoding = encoding @classmethod def _get_intermediate_items(cls, commit): @@ -434,7 +453,7 @@ def _deserialize(self, stream): try: self.author.name = self.author.name.decode(self.encoding) except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode author name: %s" % self.author.name + print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) # END handle author's encoding # a stream from our data simply gives us the plain message @@ -443,7 +462,7 @@ def _deserialize(self, stream): try: self.message = self.message.decode(self.encoding) except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode message: %s" % self.message + print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) # END exception handling return self diff --git a/lib/git/objects/submodule.py b/lib/git/objects/submodule.py index b9bcfc079..1aa0cfb53 100644 --- a/lib/git/objects/submodule.py +++ b/lib/git/objects/submodule.py @@ -1,11 +1,29 @@ import base -from cStringIO import StringIO -from git.config import GitConfigParser +from StringIO import StringIO # need a dict to set bloody .name field +from git.util import Iterable +from git.config import GitConfigParser, SectionConstraint from git.util import join_path_native from git.exc import InvalidGitRepositoryError, NoSuchPathError +import os + __all__ = ("Submodule", ) +#{ Utilities + +def sm_section(path): + """:return: section title used in .gitmodules configuration file""" + return 'submodule "%s"' % path + +def sm_name(section): + """:return: name of the submodule as parsed from the section name""" + section = section.strip() + return section[11:-1] +#} END utilities + + +#{ Classes + class SubmoduleConfigParser(GitConfigParser): """Catches calls to _write, and updates the .gitmodules blob in the index with the new data, if we have written into a stream. Otherwise it will @@ -13,7 +31,7 @@ class SubmoduleConfigParser(GitConfigParser): _mutating_methods_ = tuple() -class Submodule(base.IndexObject): +class Submodule(base.IndexObject, Iterable): """Implements access to a git submodule. They are special in that their sha represents a commit in the submodule's repository which is to be checked out at the path of this instance. @@ -22,12 +40,32 @@ class Submodule(base.IndexObject): All methods work in bare and non-bare repositories.""" - kModulesFile = '.gitmodules' + _id_attribute_ = "path" + k_modules_file = '.gitmodules' + k_ref_option = 'ref' + k_ref_default = 'master' # this is a bogus type for base class compatability type = 'submodule' - __slots__ = ('_parent_commit', '_url', '_ref') + __slots__ = ('_parent_commit', '_url', '_ref', '_name') + + def __init__(self, repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, ref=None): + """Initialize this instance with its attributes. We only document the ones + that differ from ``IndexObject`` + :param binsha: binary sha referring to a commit in the remote repository, see url parameter + :param parent_commit: see set_parent_commit() + :param url: The url to the remote repository which is the submodule + :param ref: Reference to checkout when cloning the remote repository""" + super(Submodule, self).__init__(repo, binsha, mode, path) + if parent_commit is not None: + self._parent_commit = parent_commit + if url is not None: + self._url = url + if ref is not None: + self._ref = ref + if name is not None: + self._name = name def _set_cache_(self, attr): if attr == 'size': @@ -38,35 +76,63 @@ def _set_cache_(self, attr): elif attr in ('path', '_url', '_ref'): reader = self.config_reader() # default submodule values - self._path = reader.get_value('path') + self.path = reader.get_value('path') self._url = reader.get_value('url') # git-python extension values - optional - self._ref = reader.get_value('ref', 'master') + self._ref = reader.get_value(self.k_ref_option, self.k_ref_default) + elif attr == '_name': + raise AttributeError("Cannot retrieve the name of a submodule if it was not set initially") else: super(Submodule, self)._set_cache_(attr) # END handle attribute name - - def _sio_modules(self): - """:return: Configuration file as StringIO - we only access it through the respective blob's data""" - sio = StringIO(self._parent_commit.tree[self.kModulesFile].datastream.read()) - sio.name = self.kModulesFile - return sio - def _config_parser(self, read_only): - """:return: Config Parser constrained to our submodule in read or write mode""" - parent_matches_head = self.repo.head.commit == self._parent_commit - if not self.repo.bare and parent_matches_head: - fp_module = self.kModulesFile + def __eq__(self, other): + """Compare with another submodule""" + return self.path == other.path and self.url == other.url and super(Submodule, self).__eq__(other) + + def __ne__(self, other): + """Compare with another submodule for inequality""" + return not (self == other) + + @classmethod + def _config_parser(cls, repo, parent_commit, read_only): + """:return: Config Parser constrained to our submodule in read or write mode + :raise IOError: If the .gitmodules file cannot be found, either locally or in the repository + at the given parent commit. Otherwise the exception would be delayed until the first + access of the config parser""" + parent_matches_head = repo.head.commit == parent_commit + if not repo.bare and parent_matches_head: + fp_module = cls.k_modules_file + fp_module_path = os.path.join(repo.working_tree_dir, fp_module) + if not os.path.isfile(fp_module_path): + raise IOError("%s file was not accessible" % fp_module_path) + # END handle existance else: - fp_module = self._sio_modules() + try: + fp_module = cls._sio_modules(parent_commit) + except KeyError: + raise IOError("Could not find %s file in the tree of parent commit %s" % (cls.k_modules_file, parent_commit)) + # END handle exceptions # END handle non-bare working tree if not read_only and not parent_matches_head: raise ValueError("Cannot write blobs of 'historical' submodule configurations") # END handle writes of historical submodules - parser = GitConfigParser(fp_module, read_only = read_only) - return SectionConstraint(parser, 'submodule "%s"' % self.path) + return GitConfigParser(fp_module, read_only = read_only) + + + @classmethod + def _sio_modules(cls, parent_commit): + """:return: Configuration file as StringIO - we only access it through the respective blob's data""" + sio = StringIO(parent_commit.tree[cls.k_modules_file].data_stream.read()) + sio.name = cls.k_modules_file + return sio + + def _config_parser_constrained(self, read_only): + """:return: Config Parser constrained to our submodule in read or write mode""" + parser = self._config_parser(self.repo, self._parent_commit, read_only) + return SectionConstraint(parser, sm_section(self.name)) #{ Edit Interface @@ -81,29 +147,52 @@ def add(cls, repo, path, url, skip_init=False): :param skip_init: if True, the new repository will not be cloned to its location. :return: The newly created submodule instance""" - def set_parent_commit(self, commit): + def set_parent_commit(self, commit, check=True): """Set this instance to use the given commit whose tree is supposed to contain the .gitmodules blob. :param commit: Commit'ish reference pointing at the root_tree - :raise ValueError: if the commit's tree didn't contain the .gitmodules blob.""" + :param check: if True, relatively expensive checks will be performed to verify + validity of the submodule. + :raise ValueError: if the commit's tree didn't contain the .gitmodules blob. + :raise ValueError: if the parent commit didn't store this submodule under the + current path""" pcommit = self.repo.commit(commit) - if self.kModulesFile not in pcommit.tree: - raise ValueError("Tree of commit %s did not contain the %s file" % (commit, self.kModulesFile)) + pctree = pcommit.tree + if self.k_modules_file not in pctree: + raise ValueError("Tree of commit %s did not contain the %s file" % (commit, self.k_modules_file)) # END handle exceptions + + prev_pc = self._parent_commit self._parent_commit = pcommit + if check: + parser = self._config_parser(self.repo, self._parent_commit, read_only=True) + if not parser.has_section(sm_section(self.name)): + self._parent_commit = prev_pc + raise ValueError("Submodule at path %r did not exist in parent commit %s" % (self.path, commit)) + # END handle submodule did not exist + # END handle checking mode + + # update our sha, it could have changed + self.binsha = pctree[self.path].binsha + # clear the possibly changed values for name in ('path', '_ref', '_url'): try: delattr(self, name) except AttributeError: pass + # END try attr deletion # END for each name to delete def config_writer(self): """:return: a config writer instance allowing you to read and write the data - belonging to this submodule into the .gitmodules file.""" - return self._config_parser(read_only=False) + belonging to this submodule into the .gitmodules file. + + :raise ValueError: if trying to get a writer on a parent_commit which does not + match the current head commit + :raise IOError: If the .gitmodules file/blob could not be read""" + return self._config_parser_constrained(read_only=False) #} END edit interface @@ -111,37 +200,104 @@ def config_writer(self): def module(self): """:return: Repo instance initialized from the repository at our submodule path - :raise InvalidGitRepositoryError: if a repository was not available""" + :raise InvalidGitRepositoryError: if a repository was not available. This could + also mean that it was not yet initialized""" + # late import to workaround circular dependencies + from git.repo import Repo + if self.repo.bare: raise InvalidGitRepositoryError("Cannot retrieve module repository in bare parent repositories") # END handle bare mode repo_path = join_path_native(self.repo.working_tree_dir, self.path) try: - return Repo(repo_path) + repo = Repo(repo_path) + if repo != self.repo: + return repo + # END handle repo uninitialized except (InvalidGitRepositoryError, NoSuchPathError): raise InvalidGitRepositoryError("No valid repository at %s" % self.path) + else: + raise InvalidGitRepositoryError("Repository at %r was not yet checked out" % repo_path) # END handle exceptions - + + @property def ref(self): """:return: The reference's name that we are to checkout""" return self._ref - + + @property def url(self): """:return: The url to the repository which our module-repository refers to""" return self._url + @property def parent_commit(self): """:return: Commit instance with the tree containing the .gitmodules file :note: will always point to the current head's commit if it was not set explicitly""" return self._parent_commit + + @property + def name(self): + """:return: The name of this submodule. It is used to identify it within the + .gitmodules file. + :note: by default, the name is the path at which to find the submodule, but + in git-python it should be a unique identifier similar to the identifiers + used for remotes, which allows to change the path of the submodule + easily + """ + return self._name def config_reader(self): """:return: ConfigReader instance which allows you to qurey the configuration values of this submodule, as provided by the .gitmodules file :note: The config reader will actually read the data directly from the repository and thus does not need nor care about your working tree. - :note: Should be cached by the caller and only kept as long as needed""" - return self._config_parser.read_only(read_only=True) + :note: Should be cached by the caller and only kept as long as needed + :raise IOError: If the .gitmodules file/blob could not be read""" + return self._config_parser_constrained(read_only=True) #} END query interface + + #{ Iterable Interface + + @classmethod + def iter_items(cls, repo, parent_commit='HEAD'): + """:return: iterator yielding Submodule instances available in the given repository""" + pc = repo.commit(parent_commit) # parent commit instance + try: + parser = cls._config_parser(repo, pc, read_only=True) + except IOError: + raise StopIteration + # END handle empty iterator + + rt = pc.tree # root tree + + for sms in parser.sections(): + n = sm_name(sms) + p = parser.get_value(sms, 'path') + u = parser.get_value(sms, 'url') + r = cls.k_ref_default + if parser.has_option(sms, cls.k_ref_option): + r = parser.get_value(sms, cls.k_ref_option) + # END handle optional information + + # get the binsha + try: + sm = rt[p] + except KeyError: + raise InvalidGitRepositoryError("Gitmodule path %r did not exist in revision of parent commit %s" % (p, parent_commit)) + # END handle critical error + + # fill in remaining info - saves time as it doesn't have to be parsed again + sm._name = n + sm._parent_commit = pc + sm._ref = r + sm._url = u + + yield sm + # END for each section + + #} END iterable interface + +#} END classes diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index ea480fc23..c7d02abe7 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -33,7 +33,18 @@ def __init__(self, repo, binsha, object=None, tag=None, :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the authored_date is in, in a format similar to time.altzone""" super(TagObject, self).__init__(repo, binsha ) - self._set_self_from_args_(locals()) + if object is not None: + self.object = object + if tag is not None: + self.tag = tag + if tagger is not None: + self.tagger = tagger + if tagged_date is not None: + self.tagged_date = tagged_date + if tagger_tz_offset is not None: + self.tagger_tz_offset = tagger_tz_offset + if message is not None: + self.message = message def _set_cache_(self, attr): """Cache all our attributes at once""" diff --git a/test/git/test_submodule.py b/test/git/test_submodule.py index 7c8dffcbe..f2bc43b5d 100644 --- a/test/git/test_submodule.py +++ b/test/git/test_submodule.py @@ -2,14 +2,81 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php from test.testlib import * -from git import * +from git.exc import * +from git.objects.submodule import * class TestSubmodule(TestBase): - kCOTag = '0.1.6' + k_subm_changed = "394ed7006ee5dc8bddfd132b64001d5dfc0ffdd3" + k_no_subm_tag = "0.1.6" + def _do_base_tests(self, rwrepo): """Perform all tests in the given repository, it may be bare or nonbare""" + # manual instantiation + smm = Submodule(rwrepo, "\0"*20) + # name needs to be set in advance + self.failUnlessRaises(AttributeError, getattr, smm, 'name') + + # iterate - 1 submodule + sms = Submodule.list_items(rwrepo) + assert len(sms) == 1 + sm = sms[0] + + # at a different time, there is None + assert len(Submodule.list_items(rwrepo, self.k_no_subm_tag)) == 0 + + assert sm.path == 'lib/git/ext/gitdb' + assert sm.url == 'git://gitorious.org/git-python/gitdb.git' + assert sm.ref == 'master' # its unset in this case + assert sm.parent_commit == rwrepo.head.commit + + # some commits earlier we still have a submodule, but its at a different commit + smold = Submodule.iter_items(rwrepo, self.k_subm_changed).next() + assert smold.binsha != sm.binsha + assert smold != sm + + # force it to reread its information + del(smold._url) + smold.url == sm.url + + # test config_reader/writer methods + sm.config_reader() + sm.config_writer() + smold.config_reader() + # cannot get a writer on historical submodules + self.failUnlessRaises(ValueError, smold.config_writer) + + + # make the old into a new + prev_parent_commit = smold.parent_commit + smold.set_parent_commit('HEAD') + assert smold.parent_commit != prev_parent_commit + assert smold.binsha == sm.binsha + smold.set_parent_commit(prev_parent_commit) + assert smold.binsha != sm.binsha + + # raises if the sm didn't exist in new parent - it keeps its + # parent_commit unchanged + self.failUnlessRaises(ValueError, smold.set_parent_commit, self.k_no_subm_tag) + + # TEST TODO: if a path in the gitmodules file, but not in the index, it raises + + # module retrieval is not always possible + if rwrepo.bare: + self.failUnlessRaises(InvalidGitRepositoryError, sm.module) + else: + # its not checked out in our case + self.failUnlessRaises(InvalidGitRepositoryError, sm.module) + + # lets do it - its a recursive one too + + # delete the whole directory and re-initialize + # END handle bare mode + + + # Error if there is no submodule file here + self.failUnlessRaises(IOError, Submodule._config_parser, rwrepo, rwrepo.commit(self.k_no_subm_tag), True) # uncached path/url - retrieves information from .gitmodules file @@ -33,7 +100,7 @@ def _do_base_tests(self, rwrepo): # Writing of historical submodule configurations must not work - @with_rw_repo(kCOTag) + @with_rw_repo('HEAD') def test_base_rw(self, rwrepo): self._do_base_tests(rwrepo)