|
- # repo.py
- # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
- #
- # This module is part of GitPython and is released under
- # the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
- from builtins import str
- from collections import namedtuple
- import logging
- import os
- import re
- import warnings
-
- from git.cmd import (
- Git,
- handle_process_output
- )
- from git.compat import (
- text_type,
- defenc,
- PY3,
- safe_decode,
- range,
- is_win,
- )
- from git.config import GitConfigParser
- from git.db import GitCmdObjectDB
- from git.exc import InvalidGitRepositoryError, NoSuchPathError, GitCommandError
- from git.index import IndexFile
- from git.objects import Submodule, RootModule, Commit
- from git.refs import HEAD, Head, Reference, TagReference
- from git.remote import Remote, add_progress, to_progress_instance
- from git.util import Actor, finalize_process, decygpath, hex_to_bin, expand_path
- import os.path as osp
-
- from .fun import rev_parse, is_git_dir, find_submodule_git_dir, touch, find_worktree_git_dir
- import gc
- import gitdb
-
- try:
- import pathlib
- except ImportError:
- pathlib = None
-
-
- log = logging.getLogger(__name__)
-
- BlameEntry = namedtuple('BlameEntry', ['commit', 'linenos', 'orig_path', 'orig_linenos'])
-
-
- __all__ = ('Repo',)
-
-
- class Repo(object):
- """Represents a git repository and allows you to query references,
- gather commit information, generate diffs, create and clone repositories query
- the log.
-
- The following attributes are worth using:
-
- 'working_dir' is the working directory of the git command, which is the working tree
- directory if available or the .git directory in case of bare repositories
-
- 'working_tree_dir' is the working tree directory, but will raise AssertionError
- if we are a bare repository.
-
- 'git_dir' is the .git repository directory, which is always set."""
- DAEMON_EXPORT_FILE = 'git-daemon-export-ok'
-
- git = None # Must exist, or __del__ will fail in case we raise on `__init__()`
- working_dir = None
- _working_tree_dir = None
- git_dir = None
- _common_dir = None
-
- # precompiled regex
- re_whitespace = re.compile(r'\s+')
- re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
- re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$')
- re_author_committer_start = re.compile(r'^(author|committer)')
- re_tab_full_line = re.compile(r'^\t(.*)$')
-
- # invariants
- # represents the configuration level of a configuration file
- config_level = ("system", "user", "global", "repository")
-
- # Subclass configuration
- # Subclasses may easily bring in their own custom types by placing a constructor or type here
- GitCommandWrapperType = Git
-
- def __init__(self, path=None, odbt=GitCmdObjectDB, search_parent_directories=False, expand_vars=True):
- """Create a new Repo instance
-
- :param path:
- the path to either the root git directory or the bare git repo::
-
- repo = Repo("/Users/mtrier/Development/git-python")
- repo = Repo("/Users/mtrier/Development/git-python.git")
- repo = Repo("~/Development/git-python.git")
- repo = Repo("$REPOSITORIES/Development/git-python.git")
- repo = Repo("C:\\Users\\mtrier\\Development\\git-python\\.git")
-
- - In *Cygwin*, path may be a `'cygdrive/...'` prefixed path.
- - If it evaluates to false, :envvar:`GIT_DIR` is used, and if this also evals to false,
- the current-directory is used.
- :param odbt:
- Object DataBase type - a type which is constructed by providing
- the directory containing the database objects, i.e. .git/objects. It will
- be used to access all object data
- :param search_parent_directories:
- if True, all parent directories will be searched for a valid repo as well.
-
- Please note that this was the default behaviour in older versions of GitPython,
- which is considered a bug though.
- :raise InvalidGitRepositoryError:
- :raise NoSuchPathError:
- :return: git.Repo """
-
- epath = path or os.getenv('GIT_DIR')
- if not epath:
- epath = os.getcwd()
- if Git.is_cygwin():
- epath = decygpath(epath)
-
- epath = epath or path or os.getcwd()
- if not isinstance(epath, str):
- epath = str(epath)
- if expand_vars and ("%" in epath or "$" in epath):
- warnings.warn("The use of environment variables in paths is deprecated" +
- "\nfor security reasons and may be removed in the future!!")
- epath = expand_path(epath, expand_vars)
- if not os.path.exists(epath):
- raise NoSuchPathError(epath)
-
- ## Walk up the path to find the `.git` dir.
- #
- curpath = epath
- while curpath:
- # ABOUT osp.NORMPATH
- # It's important to normalize the paths, as submodules will otherwise initialize their
- # repo instances with paths that depend on path-portions that will not exist after being
- # removed. It's just cleaner.
- if is_git_dir(curpath):
- self.git_dir = curpath
- # from man git-config : core.worktree
- # Set the path to the root of the working tree. If GIT_COMMON_DIR environment
- # variable is set, core.worktree is ignored and not used for determining the
- # root of working tree. This can be overridden by the GIT_WORK_TREE environment
- # variable. The value can be an absolute path or relative to the path to the .git
- # directory, which is either specified by GIT_DIR, or automatically discovered.
- # If GIT_DIR is specified but none of GIT_WORK_TREE and core.worktree is specified,
- # the current working directory is regarded as the top level of your working tree.
- self._working_tree_dir = os.path.dirname(self.git_dir)
- if os.environ.get('GIT_COMMON_DIR') is None:
- gitconf = self.config_reader("repository")
- if gitconf.has_option('core', 'worktree'):
- self._working_tree_dir = gitconf.get('core', 'worktree')
- if 'GIT_WORK_TREE' in os.environ:
- self._working_tree_dir = os.getenv('GIT_WORK_TREE')
- break
-
- dotgit = osp.join(curpath, '.git')
- sm_gitpath = find_submodule_git_dir(dotgit)
- if sm_gitpath is not None:
- self.git_dir = osp.normpath(sm_gitpath)
-
- sm_gitpath = find_submodule_git_dir(dotgit)
- if sm_gitpath is None:
- sm_gitpath = find_worktree_git_dir(dotgit)
-
- if sm_gitpath is not None:
- self.git_dir = expand_path(sm_gitpath, expand_vars)
- self._working_tree_dir = curpath
- break
-
- if not search_parent_directories:
- break
- curpath, tail = osp.split(curpath)
- if not tail:
- break
- # END while curpath
-
- if self.git_dir is None:
- raise InvalidGitRepositoryError(epath)
-
- self._bare = False
- try:
- self._bare = self.config_reader("repository").getboolean('core', 'bare')
- except Exception:
- # lets not assume the option exists, although it should
- pass
-
- try:
- common_dir = open(osp.join(self.git_dir, 'commondir'), 'rt').readlines()[0].strip()
- self._common_dir = osp.join(self.git_dir, common_dir)
- except (OSError, IOError):
- self._common_dir = None
-
- # adjust the wd in case we are actually bare - we didn't know that
- # in the first place
- if self._bare:
- self._working_tree_dir = None
- # END working dir handling
-
- self.working_dir = self._working_tree_dir or self.common_dir
- self.git = self.GitCommandWrapperType(self.working_dir)
-
- # special handling, in special times
- args = [osp.join(self.common_dir, 'objects')]
- if issubclass(odbt, GitCmdObjectDB):
- args.append(self.git)
- self.odb = odbt(*args)
-
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_value, traceback):
- self.close()
-
- def __del__(self):
- try:
- self.close()
- except Exception:
- pass
-
- def close(self):
- if self.git:
- self.git.clear_cache()
- # Tempfiles objects on Windows are holding references to
- # open files until they are collected by the garbage
- # collector, thus preventing deletion.
- # TODO: Find these references and ensure they are closed
- # and deleted synchronously rather than forcing a gc
- # collection.
- if is_win:
- gc.collect()
- gitdb.util.mman.collect()
- if is_win:
- gc.collect()
-
- def __eq__(self, rhs):
- if isinstance(rhs, Repo):
- return self.git_dir == rhs.git_dir
- return False
-
- def __ne__(self, rhs):
- return not self.__eq__(rhs)
-
- def __hash__(self):
- return hash(self.git_dir)
-
- # Description property
- def _get_description(self):
- filename = osp.join(self.git_dir, 'description')
- with open(filename, 'rb') as fp:
- return fp.read().rstrip().decode(defenc)
-
- def _set_description(self, descr):
- filename = osp.join(self.git_dir, 'description')
- with open(filename, 'wb') as fp:
- fp.write((descr + '\n').encode(defenc))
-
- description = property(_get_description, _set_description,
- doc="the project's description")
- del _get_description
- del _set_description
-
- @property
- def working_tree_dir(self):
- """:return: The working tree directory of our git repository. If this is a bare repository, None is returned.
- """
- return self._working_tree_dir
-
- @property
- def common_dir(self):
- """:return: The git dir that holds everything except possibly HEAD,
- FETCH_HEAD, ORIG_HEAD, COMMIT_EDITMSG, index, and logs/ .
- """
- return self._common_dir or self.git_dir
-
- @property
- def bare(self):
- """:return: True if the repository is bare"""
- return self._bare
-
- @property
- def heads(self):
- """A list of ``Head`` objects representing the branch heads in
- this repo
-
- :return: ``git.IterableList(Head, ...)``"""
- return Head.list_items(self)
-
- @property
- def references(self):
- """A list of Reference objects representing tags, heads and remote references.
-
- :return: IterableList(Reference, ...)"""
- return Reference.list_items(self)
-
- # alias for references
- refs = references
-
- # alias for heads
- branches = heads
-
- @property
- def index(self):
- """:return: IndexFile representing this repository's index.
- :note: This property can be expensive, as the returned ``IndexFile`` will be
- reinitialized. It's recommended to re-use the object."""
- return IndexFile(self)
-
- @property
- def head(self):
- """:return: HEAD Object pointing to the current head reference"""
- return HEAD(self, 'HEAD')
-
- @property
- def remotes(self):
- """A list of Remote objects allowing to access and manipulate remotes
- :return: ``git.IterableList(Remote, ...)``"""
- return Remote.list_items(self)
-
- def remote(self, name='origin'):
- """:return: Remote with the specified name
- :raise ValueError: if no remote with such a name exists"""
- r = Remote(self, name)
- if not r.exists():
- raise ValueError("Remote named '%s' didn't exist" % name)
- return r
-
- #{ Submodules
-
- @property
- def submodules(self):
- """
- :return: git.IterableList(Submodule, ...) of direct submodules
- available from the current head"""
- return Submodule.list_items(self)
-
- def submodule(self, name):
- """ :return: Submodule with the given name
- :raise ValueError: If no such submodule exists"""
- try:
- return self.submodules[name]
- except IndexError:
- raise ValueError("Didn't find submodule named %r" % name)
- # END exception handling
-
- def create_submodule(self, *args, **kwargs):
- """Create a new submodule
-
- :note: See the documentation of Submodule.add for a description of the
- applicable parameters
- :return: created submodules"""
- return Submodule.add(self, *args, **kwargs)
-
- def iter_submodules(self, *args, **kwargs):
- """An iterator yielding Submodule instances, see Traversable interface
- for a description of args and kwargs
- :return: Iterator"""
- return RootModule(self).traverse(*args, **kwargs)
-
- def submodule_update(self, *args, **kwargs):
- """Update the submodules, keeping the repository consistent as it will
- take the previous state into consideration. For more information, please
- see the documentation of RootModule.update"""
- return RootModule(self).update(*args, **kwargs)
-
- #}END submodules
-
- @property
- def tags(self):
- """A list of ``Tag`` objects that are available in this repo
- :return: ``git.IterableList(TagReference, ...)`` """
- return TagReference.list_items(self)
-
- def tag(self, path):
- """:return: TagReference Object, reference pointing to a Commit or Tag
- :param path: path to the tag reference, i.e. 0.1.5 or tags/0.1.5 """
- return TagReference(self, path)
-
- def create_head(self, path, commit='HEAD', force=False, logmsg=None):
- """Create a new head within the repository.
- For more documentation, please see the Head.create method.
-
- :return: newly created Head Reference"""
- return Head.create(self, path, commit, force, logmsg)
-
- def delete_head(self, *heads, **kwargs):
- """Delete the given heads
-
- :param kwargs: Additional keyword arguments to be passed to git-branch"""
- return Head.delete(self, *heads, **kwargs)
-
- def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs):
- """Create a new tag reference.
- For more documentation, please see the TagReference.create method.
-
- :return: TagReference object """
- return TagReference.create(self, path, ref, message, force, **kwargs)
-
- def delete_tag(self, *tags):
- """Delete the given tag references"""
- return TagReference.delete(self, *tags)
-
- def create_remote(self, name, url, **kwargs):
- """Create a new remote.
-
- For more information, please see the documentation of the Remote.create
- methods
-
- :return: Remote reference"""
- return Remote.create(self, name, url, **kwargs)
-
- def delete_remote(self, remote):
- """Delete the given remote."""
- return Remote.remove(self, remote)
-
- def _get_config_path(self, config_level):
- # we do not support an absolute path of the gitconfig on windows ,
- # use the global config instead
- if is_win and config_level == "system":
- config_level = "global"
-
- if config_level == "system":
- return "/etc/gitconfig"
- elif config_level == "user":
- config_home = os.environ.get("XDG_CONFIG_HOME") or osp.join(os.environ.get("HOME", '~'), ".config")
- return osp.normpath(osp.expanduser(osp.join(config_home, "git", "config")))
- elif config_level == "global":
- return osp.normpath(osp.expanduser("~/.gitconfig"))
- elif config_level == "repository":
- return osp.normpath(osp.join(self._common_dir or self.git_dir, "config"))
-
- raise ValueError("Invalid configuration level: %r" % config_level)
-
- def config_reader(self, config_level=None):
- """
- :return:
- GitConfigParser allowing to read the full git configuration, but not to write it
-
- The configuration will include values from the system, user and repository
- configuration files.
-
- :param config_level:
- For possible values, see config_writer method
- If None, all applicable levels will be used. Specify a level in case
- you know which file you wish to read to prevent reading multiple files.
- :note: On windows, system configuration cannot currently be read as the path is
- unknown, instead the global path will be used."""
- files = None
- if config_level is None:
- files = [self._get_config_path(f) for f in self.config_level]
- else:
- files = [self._get_config_path(config_level)]
- return GitConfigParser(files, read_only=True)
-
- def config_writer(self, config_level="repository"):
- """
- :return:
- GitConfigParser allowing to write values of the specified configuration file level.
- Config writers should be retrieved, used to change the configuration, and written
- right away as they will lock the configuration file in question and prevent other's
- to write it.
-
- :param config_level:
- One of the following values
- system = system wide configuration file
- global = user level configuration file
- repository = configuration file for this repostory only"""
- return GitConfigParser(self._get_config_path(config_level), read_only=False)
-
- def commit(self, rev=None):
- """The Commit object for the specified revision
- :param rev: revision specifier, see git-rev-parse for viable options.
- :return: ``git.Commit``"""
- if rev is None:
- return self.head.commit
- else:
- return self.rev_parse(text_type(rev) + "^0")
-
- def iter_trees(self, *args, **kwargs):
- """:return: Iterator yielding Tree objects
- :note: Takes all arguments known to iter_commits method"""
- return (c.tree for c in self.iter_commits(*args, **kwargs))
-
- def tree(self, rev=None):
- """The Tree object for the given treeish revision
- Examples::
-
- repo.tree(repo.heads[0])
-
- :param rev: is a revision pointing to a Treeish ( being a commit or tree )
- :return: ``git.Tree``
-
- :note:
- If you need a non-root level tree, find it by iterating the root tree. Otherwise
- it cannot know about its path relative to the repository root and subsequent
- operations might have unexpected results."""
- if rev is None:
- return self.head.commit.tree
- else:
- return self.rev_parse(text_type(rev) + "^{tree}")
-
- def iter_commits(self, rev=None, paths='', **kwargs):
- """A list of Commit objects representing the history of a given ref/commit
-
- :param rev:
- revision specifier, see git-rev-parse for viable options.
- If None, the active branch will be used.
-
- :param paths:
- is an optional path or a list of paths to limit the returned commits to
- Commits that do not contain that path or the paths will not be returned.
-
- :param kwargs:
- Arguments to be passed to git-rev-list - common ones are
- max_count and skip
-
- :note: to receive only commits between two named revisions, use the
- "revA...revB" revision specifier
-
- :return: ``git.Commit[]``"""
- if rev is None:
- rev = self.head.commit
-
- return Commit.iter_items(self, rev, paths, **kwargs)
-
- def merge_base(self, *rev, **kwargs):
- """Find the closest common ancestor for the given revision (e.g. Commits, Tags, References, etc)
-
- :param rev: At least two revs to find the common ancestor for.
- :param kwargs: Additional arguments to be passed to the repo.git.merge_base() command which does all the work.
- :return: A list of Commit objects. If --all was not specified as kwarg, the list will have at max one Commit,
- or is empty if no common merge base exists.
- :raises ValueError: If not at least two revs are provided
- """
- if len(rev) < 2:
- raise ValueError("Please specify at least two revs, got only %i" % len(rev))
- # end handle input
-
- res = []
- try:
- lines = self.git.merge_base(*rev, **kwargs).splitlines()
- except GitCommandError as err:
- if err.status == 128:
- raise
- # end handle invalid rev
- # Status code 1 is returned if there is no merge-base
- # (see https://github.com/git/git/blob/master/builtin/merge-base.c#L16)
- return res
- # end exception handling
-
- for line in lines:
- res.append(self.commit(line))
- # end for each merge-base
-
- return res
-
- def is_ancestor(self, ancestor_rev, rev):
- """Check if a commit is an ancestor of another
-
- :param ancestor_rev: Rev which should be an ancestor
- :param rev: Rev to test against ancestor_rev
- :return: ``True``, ancestor_rev is an ancestor to rev.
- """
- try:
- self.git.merge_base(ancestor_rev, rev, is_ancestor=True)
- except GitCommandError as err:
- if err.status == 1:
- return False
- raise
- return True
-
- def _get_daemon_export(self):
- filename = osp.join(self.git_dir, self.DAEMON_EXPORT_FILE)
- return osp.exists(filename)
-
- def _set_daemon_export(self, value):
- filename = osp.join(self.git_dir, self.DAEMON_EXPORT_FILE)
- fileexists = osp.exists(filename)
- if value and not fileexists:
- touch(filename)
- elif not value and fileexists:
- os.unlink(filename)
-
- daemon_export = property(_get_daemon_export, _set_daemon_export,
- doc="If True, git-daemon may export this repository")
- del _get_daemon_export
- del _set_daemon_export
-
- def _get_alternates(self):
- """The list of alternates for this repo from which objects can be retrieved
-
- :return: list of strings being pathnames of alternates"""
- alternates_path = osp.join(self.git_dir, 'objects', 'info', 'alternates')
-
- if osp.exists(alternates_path):
- with open(alternates_path, 'rb') as f:
- alts = f.read().decode(defenc)
- return alts.strip().splitlines()
- else:
- return []
-
- def _set_alternates(self, alts):
- """Sets the alternates
-
- :param alts:
- is the array of string paths representing the alternates at which
- git should look for objects, i.e. /home/user/repo/.git/objects
-
- :raise NoSuchPathError:
- :note:
- The method does not check for the existence of the paths in alts
- as the caller is responsible."""
- alternates_path = osp.join(self.common_dir, 'objects', 'info', 'alternates')
- if not alts:
- if osp.isfile(alternates_path):
- os.remove(alternates_path)
- else:
- with open(alternates_path, 'wb') as f:
- f.write("\n".join(alts).encode(defenc))
-
- alternates = property(_get_alternates, _set_alternates,
- doc="Retrieve a list of alternates paths or set a list paths to be used as alternates")
-
- def is_dirty(self, index=True, working_tree=True, untracked_files=False,
- submodules=True, path=None):
- """
- :return:
- ``True``, the repository is considered dirty. By default it will react
- like a git-status without untracked files, hence it is dirty if the
- index or the working copy have changes."""
- if self._bare:
- # Bare repositories with no associated working directory are
- # always consired to be clean.
- return False
-
- # start from the one which is fastest to evaluate
- default_args = ['--abbrev=40', '--full-index', '--raw']
- if not submodules:
- default_args.append('--ignore-submodules')
- if path:
- default_args.append(path)
- if index:
- # diff index against HEAD
- if osp.isfile(self.index.path) and \
- len(self.git.diff('--cached', *default_args)):
- return True
- # END index handling
- if working_tree:
- # diff index against working tree
- if len(self.git.diff(*default_args)):
- return True
- # END working tree handling
- if untracked_files:
- if len(self._get_untracked_files(path, ignore_submodules=not submodules)):
- return True
- # END untracked files
- return False
-
- @property
- def untracked_files(self):
- """
- :return:
- list(str,...)
-
- Files currently untracked as they have not been staged yet. Paths
- are relative to the current working directory of the git command.
-
- :note:
- ignored files will not appear here, i.e. files mentioned in .gitignore
- :note:
- This property is expensive, as no cache is involved. To process the result, please
- consider caching it yourself."""
- return self._get_untracked_files()
-
- def _get_untracked_files(self, *args, **kwargs):
- # make sure we get all files, not only untracked directories
- proc = self.git.status(*args,
- porcelain=True,
- untracked_files=True,
- as_process=True,
- **kwargs)
- # Untracked files preffix in porcelain mode
- prefix = "?? "
- untracked_files = []
- for line in proc.stdout:
- line = line.decode(defenc)
- if not line.startswith(prefix):
- continue
- filename = line[len(prefix):].rstrip('\n')
- # Special characters are escaped
- if filename[0] == filename[-1] == '"':
- filename = filename[1:-1]
- if PY3:
- # WHATEVER ... it's a mess, but works for me
- filename = filename.encode('ascii').decode('unicode_escape').encode('latin1').decode(defenc)
- else:
- filename = filename.decode('string_escape').decode(defenc)
- untracked_files.append(filename)
- finalize_process(proc)
- return untracked_files
-
- @property
- def active_branch(self):
- """The name of the currently active branch.
-
- :return: Head to the active branch"""
- return self.head.reference
-
- def blame_incremental(self, rev, file, **kwargs):
- """Iterator for blame information for the given file at the given revision.
-
- Unlike .blame(), this does not return the actual file's contents, only
- a stream of BlameEntry tuples.
-
- :param rev: revision specifier, see git-rev-parse for viable options.
- :return: lazy iterator of BlameEntry tuples, where the commit
- indicates the commit to blame for the line, and range
- indicates a span of line numbers in the resulting file.
-
- If you combine all line number ranges outputted by this command, you
- should get a continuous range spanning all line numbers in the file.
- """
- data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs)
- commits = {}
-
- stream = (line for line in data.split(b'\n') if line)
- while True:
- try:
- line = next(stream) # when exhausted, causes a StopIteration, terminating this function
- except StopIteration:
- return
- hexsha, orig_lineno, lineno, num_lines = line.split()
- lineno = int(lineno)
- num_lines = int(num_lines)
- orig_lineno = int(orig_lineno)
- if hexsha not in commits:
- # Now read the next few lines and build up a dict of properties
- # for this commit
- props = {}
- while True:
- try:
- line = next(stream)
- except StopIteration:
- return
- if line == b'boundary':
- # "boundary" indicates a root commit and occurs
- # instead of the "previous" tag
- continue
-
- tag, value = line.split(b' ', 1)
- props[tag] = value
- if tag == b'filename':
- # "filename" formally terminates the entry for --incremental
- orig_filename = value
- break
-
- c = Commit(self, hex_to_bin(hexsha),
- author=Actor(safe_decode(props[b'author']),
- safe_decode(props[b'author-mail'].lstrip(b'<').rstrip(b'>'))),
- authored_date=int(props[b'author-time']),
- committer=Actor(safe_decode(props[b'committer']),
- safe_decode(props[b'committer-mail'].lstrip(b'<').rstrip(b'>'))),
- committed_date=int(props[b'committer-time']))
- commits[hexsha] = c
- else:
- # Discard all lines until we find "filename" which is
- # guaranteed to be the last line
- while True:
- try:
- line = next(stream) # will fail if we reach the EOF unexpectedly
- except StopIteration:
- return
- tag, value = line.split(b' ', 1)
- if tag == b'filename':
- orig_filename = value
- break
-
- yield BlameEntry(commits[hexsha],
- range(lineno, lineno + num_lines),
- safe_decode(orig_filename),
- range(orig_lineno, orig_lineno + num_lines))
-
- def blame(self, rev, file, incremental=False, **kwargs):
- """The blame information for the given file at the given revision.
-
- :param rev: revision specifier, see git-rev-parse for viable options.
- :return:
- list: [git.Commit, list: [<line>]]
- A list of tuples associating a Commit object with a list of lines that
- changed within the given commit. The Commit objects will be given in order
- of appearance."""
- if incremental:
- return self.blame_incremental(rev, file, **kwargs)
-
- data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False, **kwargs)
- commits = {}
- blames = []
- info = None
-
- keepends = True
- for line in data.splitlines(keepends):
- try:
- line = line.rstrip().decode(defenc)
- except UnicodeDecodeError:
- firstpart = ''
- is_binary = True
- else:
- # As we don't have an idea when the binary data ends, as it could contain multiple newlines
- # in the process. So we rely on being able to decode to tell us what is is.
- # This can absolutely fail even on text files, but even if it does, we should be fine treating it
- # as binary instead
- parts = self.re_whitespace.split(line, 1)
- firstpart = parts[0]
- is_binary = False
- # end handle decode of line
-
- if self.re_hexsha_only.search(firstpart):
- # handles
- # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start
- # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - indicates
- # another line of blame with the same data
- digits = parts[-1].split(" ")
- if len(digits) == 3:
- info = {'id': firstpart}
- blames.append([None, []])
- elif info['id'] != firstpart:
- info = {'id': firstpart}
- blames.append([commits.get(firstpart), []])
- # END blame data initialization
- else:
- m = self.re_author_committer_start.search(firstpart)
- if m:
- # handles:
- # author Tom Preston-Werner
- # author-mail <tom@mojombo.com>
- # author-time 1192271832
- # author-tz -0700
- # committer Tom Preston-Werner
- # committer-mail <tom@mojombo.com>
- # committer-time 1192271832
- # committer-tz -0700 - IGNORED BY US
- role = m.group(0)
- if firstpart.endswith('-mail'):
- info["%s_email" % role] = parts[-1]
- elif firstpart.endswith('-time'):
- info["%s_date" % role] = int(parts[-1])
- elif role == firstpart:
- info[role] = parts[-1]
- # END distinguish mail,time,name
- else:
- # handle
- # filename lib/grit.rb
- # summary add Blob
- # <and rest>
- if firstpart.startswith('filename'):
- info['filename'] = parts[-1]
- elif firstpart.startswith('summary'):
- info['summary'] = parts[-1]
- elif firstpart == '':
- if info:
- sha = info['id']
- c = commits.get(sha)
- if c is None:
- c = Commit(self, hex_to_bin(sha),
- author=Actor._from_string(info['author'] + ' ' + info['author_email']),
- authored_date=info['author_date'],
- committer=Actor._from_string(
- info['committer'] + ' ' + info['committer_email']),
- committed_date=info['committer_date'])
- commits[sha] = c
- # END if commit objects needs initial creation
- if not is_binary:
- if line and line[0] == '\t':
- line = line[1:]
- else:
- # NOTE: We are actually parsing lines out of binary data, which can lead to the
- # binary being split up along the newline separator. We will append this to the blame
- # we are currently looking at, even though it should be concatenated with the last line
- # we have seen.
- pass
- # end handle line contents
- blames[-1][0] = c
- blames[-1][1].append(line)
- info = {'id': sha}
- # END if we collected commit info
- # END distinguish filename,summary,rest
- # END distinguish author|committer vs filename,summary,rest
- # END distinguish hexsha vs other information
- return blames
-
- @classmethod
- def init(cls, path=None, mkdir=True, odbt=GitCmdObjectDB, expand_vars=True, **kwargs):
- """Initialize a git repository at the given path if specified
-
- :param path:
- is the full path to the repo (traditionally ends with /<name>.git)
- or None in which case the repository will be created in the current
- working directory
-
- :param mkdir:
- if specified will create the repository directory if it doesn't
- already exists. Creates the directory with a mode=0755.
- Only effective if a path is explicitly given
-
- :param odbt:
- Object DataBase type - a type which is constructed by providing
- the directory containing the database objects, i.e. .git/objects.
- It will be used to access all object data
-
- :param expand_vars:
- if specified, environment variables will not be escaped. This
- can lead to information disclosure, allowing attackers to
- access the contents of environment variables
-
- :param kwargs:
- keyword arguments serving as additional options to the git-init command
-
- :return: ``git.Repo`` (the newly created repo)"""
- if path:
- path = expand_path(path, expand_vars)
- if mkdir and path and not osp.exists(path):
- os.makedirs(path, 0o755)
-
- # git command automatically chdir into the directory
- git = Git(path)
- git.init(**kwargs)
- return cls(path, odbt=odbt)
-
- @classmethod
- def _clone(cls, git, url, path, odb_default_type, progress, multi_options=None, **kwargs):
- if progress is not None:
- progress = to_progress_instance(progress)
-
- odbt = kwargs.pop('odbt', odb_default_type)
-
- # when pathlib.Path or other classbased path is passed
- if not isinstance(path, str):
- path = str(path)
-
- ## A bug win cygwin's Git, when `--bare` or `--separate-git-dir`
- # it prepends the cwd or(?) the `url` into the `path, so::
- # git clone --bare /cygwin/d/foo.git C:\\Work
- # becomes::
- # git clone --bare /cygwin/d/foo.git /cygwin/d/C:\\Work
- #
- clone_path = (Git.polish_url(path)
- if Git.is_cygwin() and 'bare' in kwargs
- else path)
- sep_dir = kwargs.get('separate_git_dir')
- if sep_dir:
- kwargs['separate_git_dir'] = Git.polish_url(sep_dir)
- multi = None
- if multi_options:
- multi = ' '.join(multi_options).split(' ')
- proc = git.clone(multi, Git.polish_url(url), clone_path, with_extended_output=True, as_process=True,
- v=True, universal_newlines=True, **add_progress(kwargs, git, progress))
- if progress:
- handle_process_output(proc, None, progress.new_message_handler(), finalize_process, decode_streams=False)
- else:
- (stdout, stderr) = proc.communicate()
- log.debug("Cmd(%s)'s unused stdout: %s", getattr(proc, 'args', ''), stdout)
- finalize_process(proc, stderr=stderr)
-
- # our git command could have a different working dir than our actual
- # environment, hence we prepend its working dir if required
- if not osp.isabs(path) and git.working_dir:
- path = osp.join(git._working_dir, path)
-
- repo = cls(path, odbt=odbt)
-
- # retain env values that were passed to _clone()
- repo.git.update_environment(**git.environment())
-
- # adjust remotes - there may be operating systems which use backslashes,
- # These might be given as initial paths, but when handling the config file
- # that contains the remote from which we were clones, git stops liking it
- # as it will escape the backslashes. Hence we undo the escaping just to be
- # sure
- if repo.remotes:
- with repo.remotes[0].config_writer as writer:
- writer.set_value('url', Git.polish_url(repo.remotes[0].url))
- # END handle remote repo
- return repo
-
- def clone(self, path, progress=None, multi_options=None, **kwargs):
- """Create a clone from this repository.
-
- :param path: is the full path of the new repo (traditionally ends with ./<name>.git).
- :param progress: See 'git.remote.Remote.push'.
- :param multi_options: A list of Clone options that can be provided multiple times. One
- option per list item which is passed exactly as specified to clone.
- For example ['--config core.filemode=false', '--config core.ignorecase',
- '--recurse-submodule=repo1_path', '--recurse-submodule=repo2_path']
- :param kwargs:
- * odbt = ObjectDatabase Type, allowing to determine the object database
- implementation used by the returned Repo instance
- * All remaining keyword arguments are given to the git-clone command
-
- :return: ``git.Repo`` (the newly cloned repo)"""
- return self._clone(self.git, self.common_dir, path, type(self.odb), progress, multi_options, **kwargs)
-
- @classmethod
- def clone_from(cls, url, to_path, progress=None, env=None, multi_options=None, **kwargs):
- """Create a clone from the given URL
-
- :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS
- :param to_path: Path to which the repository should be cloned to
- :param progress: See 'git.remote.Remote.push'.
- :param env: Optional dictionary containing the desired environment variables.
- :param multi_options: See ``clone`` method
- :param kwargs: see the ``clone`` method
- :return: Repo instance pointing to the cloned directory"""
- git = Git(os.getcwd())
- if env is not None:
- git.update_environment(**env)
- return cls._clone(git, url, to_path, GitCmdObjectDB, progress, multi_options, **kwargs)
-
- def archive(self, ostream, treeish=None, prefix=None, **kwargs):
- """Archive the tree at the given revision.
-
- :param ostream: file compatible stream object to which the archive will be written as bytes
- :param treeish: is the treeish name/id, defaults to active branch
- :param prefix: is the optional prefix to prepend to each filename in the archive
- :param kwargs: Additional arguments passed to git-archive
-
- * Use the 'format' argument to define the kind of format. Use
- specialized ostreams to write any format supported by python.
- * You may specify the special **path** keyword, which may either be a repository-relative
- path to a directory or file to place into the archive, or a list or tuple of multiple paths.
-
- :raise GitCommandError: in case something went wrong
- :return: self"""
- if treeish is None:
- treeish = self.head.commit
- if prefix and 'prefix' not in kwargs:
- kwargs['prefix'] = prefix
- kwargs['output_stream'] = ostream
- path = kwargs.pop('path', [])
- if not isinstance(path, (tuple, list)):
- path = [path]
- # end assure paths is list
-
- self.git.archive(treeish, *path, **kwargs)
- return self
-
- def has_separate_working_tree(self):
- """
- :return: True if our git_dir is not at the root of our working_tree_dir, but a .git file with a
- platform agnositic symbolic link. Our git_dir will be wherever the .git file points to
- :note: bare repositories will always return False here
- """
- if self.bare:
- return False
- return osp.isfile(osp.join(self.working_tree_dir, '.git'))
-
- rev_parse = rev_parse
-
- def __repr__(self):
- return '<git.Repo "%s">' % self.git_dir
-
- def currently_rebasing_on(self):
- """
- :return: The commit which is currently being replayed while rebasing.
-
- None if we are not currently rebasing.
- """
- rebase_head_file = osp.join(self.git_dir, "REBASE_HEAD")
- if not osp.isfile(rebase_head_file):
- return None
- return self.commit(open(rebase_head_file, "rt").readline().strip())
|