You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

534 lines
20KB

  1. # commit.py
  2. # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
  3. #
  4. # This module is part of GitPython and is released under
  5. # the BSD License: http://www.opensource.org/licenses/bsd-license.php
  6. from gitdb import IStream
  7. from git.util import (
  8. hex_to_bin,
  9. Actor,
  10. Iterable,
  11. Stats,
  12. finalize_process
  13. )
  14. from git.diff import Diffable
  15. from .tree import Tree
  16. from . import base
  17. from .util import (
  18. Traversable,
  19. Serializable,
  20. parse_date,
  21. altz_to_utctz_str,
  22. parse_actor_and_date,
  23. from_timestamp,
  24. )
  25. from git.compat import text_type
  26. from time import (
  27. time,
  28. daylight,
  29. altzone,
  30. timezone,
  31. localtime
  32. )
  33. import os
  34. from io import BytesIO
  35. import logging
  36. log = logging.getLogger('git.objects.commit')
  37. log.addHandler(logging.NullHandler())
  38. __all__ = ('Commit', )
  39. class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
  40. """Wraps a git Commit object.
  41. This class will act lazily on some of its attributes and will query the
  42. value on demand only if it involves calling the git binary."""
  43. # ENVIRONMENT VARIABLES
  44. # read when creating new commits
  45. env_author_date = "GIT_AUTHOR_DATE"
  46. env_committer_date = "GIT_COMMITTER_DATE"
  47. # CONFIGURATION KEYS
  48. conf_encoding = 'i18n.commitencoding'
  49. # INVARIANTS
  50. default_encoding = "UTF-8"
  51. # object configuration
  52. type = "commit"
  53. __slots__ = ("tree",
  54. "author", "authored_date", "author_tz_offset",
  55. "committer", "committed_date", "committer_tz_offset",
  56. "message", "parents", "encoding", "gpgsig")
  57. _id_attribute_ = "hexsha"
  58. def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
  59. committer=None, committed_date=None, committer_tz_offset=None,
  60. message=None, parents=None, encoding=None, gpgsig=None):
  61. """Instantiate a new Commit. All keyword arguments taking None as default will
  62. be implicitly set on first query.
  63. :param binsha: 20 byte sha1
  64. :param parents: tuple( Commit, ... )
  65. is a tuple of commit ids or actual Commits
  66. :param tree: Tree
  67. Tree object
  68. :param author: Actor
  69. is the author Actor object
  70. :param authored_date: int_seconds_since_epoch
  71. is the authored DateTime - use time.gmtime() to convert it into a
  72. different format
  73. :param author_tz_offset: int_seconds_west_of_utc
  74. is the timezone that the authored_date is in
  75. :param committer: Actor
  76. is the committer string
  77. :param committed_date: int_seconds_since_epoch
  78. is the committed DateTime - use time.gmtime() to convert it into a
  79. different format
  80. :param committer_tz_offset: int_seconds_west_of_utc
  81. is the timezone that the committed_date is in
  82. :param message: string
  83. is the commit message
  84. :param encoding: string
  85. encoding of the message, defaults to UTF-8
  86. :param parents:
  87. List or tuple of Commit objects which are our parent(s) in the commit
  88. dependency graph
  89. :return: git.Commit
  90. :note:
  91. Timezone information is in the same format and in the same sign
  92. as what time.altzone returns. The sign is inverted compared to git's
  93. UTC timezone."""
  94. super(Commit, self).__init__(repo, binsha)
  95. if tree is not None:
  96. assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
  97. if tree is not None:
  98. self.tree = tree
  99. if author is not None:
  100. self.author = author
  101. if authored_date is not None:
  102. self.authored_date = authored_date
  103. if author_tz_offset is not None:
  104. self.author_tz_offset = author_tz_offset
  105. if committer is not None:
  106. self.committer = committer
  107. if committed_date is not None:
  108. self.committed_date = committed_date
  109. if committer_tz_offset is not None:
  110. self.committer_tz_offset = committer_tz_offset
  111. if message is not None:
  112. self.message = message
  113. if parents is not None:
  114. self.parents = parents
  115. if encoding is not None:
  116. self.encoding = encoding
  117. if gpgsig is not None:
  118. self.gpgsig = gpgsig
  119. @classmethod
  120. def _get_intermediate_items(cls, commit):
  121. return commit.parents
  122. def _set_cache_(self, attr):
  123. if attr in Commit.__slots__:
  124. # read the data in a chunk, its faster - then provide a file wrapper
  125. binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) # @UnusedVariable
  126. self._deserialize(BytesIO(stream.read()))
  127. else:
  128. super(Commit, self)._set_cache_(attr)
  129. # END handle attrs
  130. @property
  131. def authored_datetime(self):
  132. return from_timestamp(self.authored_date, self.author_tz_offset)
  133. @property
  134. def committed_datetime(self):
  135. return from_timestamp(self.committed_date, self.committer_tz_offset)
  136. @property
  137. def summary(self):
  138. """:return: First line of the commit message"""
  139. return self.message.split('\n', 1)[0]
  140. def count(self, paths='', **kwargs):
  141. """Count the number of commits reachable from this commit
  142. :param paths:
  143. is an optional path or a list of paths restricting the return value
  144. to commits actually containing the paths
  145. :param kwargs:
  146. Additional options to be passed to git-rev-list. They must not alter
  147. the output style of the command, or parsing will yield incorrect results
  148. :return: int defining the number of reachable commits"""
  149. # yes, it makes a difference whether empty paths are given or not in our case
  150. # as the empty paths version will ignore merge commits for some reason.
  151. if paths:
  152. return len(self.repo.git.rev_list(self.hexsha, '--', paths, **kwargs).splitlines())
  153. else:
  154. return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines())
  155. @property
  156. def name_rev(self):
  157. """
  158. :return:
  159. String describing the commits hex sha based on the closest Reference.
  160. Mostly useful for UI purposes"""
  161. return self.repo.git.name_rev(self)
  162. @classmethod
  163. def iter_items(cls, repo, rev, paths='', **kwargs):
  164. """Find all commits matching the given criteria.
  165. :param repo: is the Repo
  166. :param rev: revision specifier, see git-rev-parse for viable options
  167. :param paths:
  168. is an optional path or list of paths, if set only Commits that include the path
  169. or paths will be considered
  170. :param kwargs:
  171. optional keyword arguments to git rev-list where
  172. ``max_count`` is the maximum number of commits to fetch
  173. ``skip`` is the number of commits to skip
  174. ``since`` all commits since i.e. '1970-01-01'
  175. :return: iterator yielding Commit items"""
  176. if 'pretty' in kwargs:
  177. raise ValueError("--pretty cannot be used as parsing expects single sha's only")
  178. # END handle pretty
  179. # use -- in any case, to prevent possibility of ambiguous arguments
  180. # see https://github.com/gitpython-developers/GitPython/issues/264
  181. args = ['--']
  182. if paths:
  183. args.extend((paths, ))
  184. # END if paths
  185. proc = repo.git.rev_list(rev, args, as_process=True, **kwargs)
  186. return cls._iter_from_process_or_stream(repo, proc)
  187. def iter_parents(self, paths='', **kwargs):
  188. """Iterate _all_ parents of this commit.
  189. :param paths:
  190. Optional path or list of paths limiting the Commits to those that
  191. contain at least one of the paths
  192. :param kwargs: All arguments allowed by git-rev-list
  193. :return: Iterator yielding Commit objects which are parents of self """
  194. # skip ourselves
  195. skip = kwargs.get("skip", 1)
  196. if skip == 0: # skip ourselves
  197. skip = 1
  198. kwargs['skip'] = skip
  199. return self.iter_items(self.repo, self, paths, **kwargs)
  200. @property
  201. def stats(self):
  202. """Create a git stat from changes between this commit and its first parent
  203. or from all changes done if this is the very first commit.
  204. :return: git.Stats"""
  205. if not self.parents:
  206. text = self.repo.git.diff_tree(self.hexsha, '--', numstat=True, root=True)
  207. text2 = ""
  208. for line in text.splitlines()[1:]:
  209. (insertions, deletions, filename) = line.split("\t")
  210. text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
  211. text = text2
  212. else:
  213. text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True)
  214. return Stats._list_from_string(self.repo, text)
  215. @classmethod
  216. def _iter_from_process_or_stream(cls, repo, proc_or_stream):
  217. """Parse out commit information into a list of Commit objects
  218. We expect one-line per commit, and parse the actual commit information directly
  219. from our lighting fast object database
  220. :param proc: git-rev-list process instance - one sha per line
  221. :return: iterator returning Commit objects"""
  222. stream = proc_or_stream
  223. if not hasattr(stream, 'readline'):
  224. stream = proc_or_stream.stdout
  225. readline = stream.readline
  226. while True:
  227. line = readline()
  228. if not line:
  229. break
  230. hexsha = line.strip()
  231. if len(hexsha) > 40:
  232. # split additional information, as returned by bisect for instance
  233. hexsha, _ = line.split(None, 1)
  234. # END handle extra info
  235. assert len(hexsha) == 40, "Invalid line: %s" % hexsha
  236. yield Commit(repo, hex_to_bin(hexsha))
  237. # END for each line in stream
  238. # TODO: Review this - it seems process handling got a bit out of control
  239. # due to many developers trying to fix the open file handles issue
  240. if hasattr(proc_or_stream, 'wait'):
  241. finalize_process(proc_or_stream)
  242. @classmethod
  243. def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False, author=None, committer=None,
  244. author_date=None, commit_date=None):
  245. """Commit the given tree, creating a commit object.
  246. :param repo: Repo object the commit should be part of
  247. :param tree: Tree object or hex or bin sha
  248. the tree of the new commit
  249. :param message: Commit message. It may be an empty string if no message is provided.
  250. It will be converted to a string in any case.
  251. :param parent_commits:
  252. Optional Commit objects to use as parents for the new commit.
  253. If empty list, the commit will have no parents at all and become
  254. a root commit.
  255. If None , the current head commit will be the parent of the
  256. new commit object
  257. :param head:
  258. If True, the HEAD will be advanced to the new commit automatically.
  259. Else the HEAD will remain pointing on the previous commit. This could
  260. lead to undesired results when diffing files.
  261. :param author: The name of the author, optional. If unset, the repository
  262. configuration is used to obtain this value.
  263. :param committer: The name of the committer, optional. If unset, the
  264. repository configuration is used to obtain this value.
  265. :param author_date: The timestamp for the author field
  266. :param commit_date: The timestamp for the committer field
  267. :return: Commit object representing the new commit
  268. :note:
  269. Additional information about the committer and Author are taken from the
  270. environment or from the git configuration, see git-commit-tree for
  271. more information"""
  272. if parent_commits is None:
  273. try:
  274. parent_commits = [repo.head.commit]
  275. except ValueError:
  276. # empty repositories have no head commit
  277. parent_commits = []
  278. # END handle parent commits
  279. else:
  280. for p in parent_commits:
  281. if not isinstance(p, cls):
  282. raise ValueError("Parent commit '%r' must be of type %s" % (p, cls))
  283. # end check parent commit types
  284. # END if parent commits are unset
  285. # retrieve all additional information, create a commit object, and
  286. # serialize it
  287. # Generally:
  288. # * Environment variables override configuration values
  289. # * Sensible defaults are set according to the git documentation
  290. # COMMITER AND AUTHOR INFO
  291. cr = repo.config_reader()
  292. env = os.environ
  293. committer = committer or Actor.committer(cr)
  294. author = author or Actor.author(cr)
  295. # PARSE THE DATES
  296. unix_time = int(time())
  297. is_dst = daylight and localtime().tm_isdst > 0
  298. offset = altzone if is_dst else timezone
  299. author_date_str = env.get(cls.env_author_date, '')
  300. if author_date:
  301. author_time, author_offset = parse_date(author_date)
  302. elif author_date_str:
  303. author_time, author_offset = parse_date(author_date_str)
  304. else:
  305. author_time, author_offset = unix_time, offset
  306. # END set author time
  307. committer_date_str = env.get(cls.env_committer_date, '')
  308. if commit_date:
  309. committer_time, committer_offset = parse_date(commit_date)
  310. elif committer_date_str:
  311. committer_time, committer_offset = parse_date(committer_date_str)
  312. else:
  313. committer_time, committer_offset = unix_time, offset
  314. # END set committer time
  315. # assume utf8 encoding
  316. enc_section, enc_option = cls.conf_encoding.split('.')
  317. conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
  318. # if the tree is no object, make sure we create one - otherwise
  319. # the created commit object is invalid
  320. if isinstance(tree, str):
  321. tree = repo.tree(tree)
  322. # END tree conversion
  323. # CREATE NEW COMMIT
  324. new_commit = cls(repo, cls.NULL_BIN_SHA, tree,
  325. author, author_time, author_offset,
  326. committer, committer_time, committer_offset,
  327. message, parent_commits, conf_encoding)
  328. stream = BytesIO()
  329. new_commit._serialize(stream)
  330. streamlen = stream.tell()
  331. stream.seek(0)
  332. istream = repo.odb.store(IStream(cls.type, streamlen, stream))
  333. new_commit.binsha = istream.binsha
  334. if head:
  335. # need late import here, importing git at the very beginning throws
  336. # as well ...
  337. import git.refs
  338. try:
  339. repo.head.set_commit(new_commit, logmsg=message)
  340. except ValueError:
  341. # head is not yet set to the ref our HEAD points to
  342. # Happens on first commit
  343. master = git.refs.Head.create(repo, repo.head.ref, new_commit, logmsg="commit (initial): %s" % message)
  344. repo.head.set_reference(master, logmsg='commit: Switching to %s' % master)
  345. # END handle empty repositories
  346. # END advance head handling
  347. return new_commit
  348. #{ Serializable Implementation
  349. def _serialize(self, stream):
  350. write = stream.write
  351. write(("tree %s\n" % self.tree).encode('ascii'))
  352. for p in self.parents:
  353. write(("parent %s\n" % p).encode('ascii'))
  354. a = self.author
  355. aname = a.name
  356. c = self.committer
  357. fmt = "%s %s <%s> %s %s\n"
  358. write((fmt % ("author", aname, a.email,
  359. self.authored_date,
  360. altz_to_utctz_str(self.author_tz_offset))).encode(self.encoding))
  361. # encode committer
  362. aname = c.name
  363. write((fmt % ("committer", aname, c.email,
  364. self.committed_date,
  365. altz_to_utctz_str(self.committer_tz_offset))).encode(self.encoding))
  366. if self.encoding != self.default_encoding:
  367. write(("encoding %s\n" % self.encoding).encode('ascii'))
  368. try:
  369. if self.__getattribute__('gpgsig') is not None:
  370. write(b"gpgsig")
  371. for sigline in self.gpgsig.rstrip("\n").split("\n"):
  372. write((" " + sigline + "\n").encode('ascii'))
  373. except AttributeError:
  374. pass
  375. write(b"\n")
  376. # write plain bytes, be sure its encoded according to our encoding
  377. if isinstance(self.message, text_type):
  378. write(self.message.encode(self.encoding))
  379. else:
  380. write(self.message)
  381. # END handle encoding
  382. return self
  383. def _deserialize(self, stream):
  384. """:param from_rev_list: if true, the stream format is coming from the rev-list command
  385. Otherwise it is assumed to be a plain data stream from our object"""
  386. readline = stream.readline
  387. self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id << 12, '')
  388. self.parents = []
  389. next_line = None
  390. while True:
  391. parent_line = readline()
  392. if not parent_line.startswith(b'parent'):
  393. next_line = parent_line
  394. break
  395. # END abort reading parents
  396. self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1].decode('ascii'))))
  397. # END for each parent line
  398. self.parents = tuple(self.parents)
  399. # we don't know actual author encoding before we have parsed it, so keep the lines around
  400. author_line = next_line
  401. committer_line = readline()
  402. # we might run into one or more mergetag blocks, skip those for now
  403. next_line = readline()
  404. while next_line.startswith(b'mergetag '):
  405. next_line = readline()
  406. while next_line.startswith(b' '):
  407. next_line = readline()
  408. # end skip mergetags
  409. # now we can have the encoding line, or an empty line followed by the optional
  410. # message.
  411. self.encoding = self.default_encoding
  412. self.gpgsig = None
  413. # read headers
  414. enc = next_line
  415. buf = enc.strip()
  416. while buf:
  417. if buf[0:10] == b"encoding ":
  418. self.encoding = buf[buf.find(' ') + 1:].decode('ascii')
  419. elif buf[0:7] == b"gpgsig ":
  420. sig = buf[buf.find(b' ') + 1:] + b"\n"
  421. is_next_header = False
  422. while True:
  423. sigbuf = readline()
  424. if not sigbuf:
  425. break
  426. if sigbuf[0:1] != b" ":
  427. buf = sigbuf.strip()
  428. is_next_header = True
  429. break
  430. sig += sigbuf[1:]
  431. # end read all signature
  432. self.gpgsig = sig.rstrip(b"\n").decode('ascii')
  433. if is_next_header:
  434. continue
  435. buf = readline().strip()
  436. # decode the authors name
  437. try:
  438. self.author, self.authored_date, self.author_tz_offset = \
  439. parse_actor_and_date(author_line.decode(self.encoding, 'replace'))
  440. except UnicodeDecodeError:
  441. log.error("Failed to decode author line '%s' using encoding %s", author_line, self.encoding,
  442. exc_info=True)
  443. try:
  444. self.committer, self.committed_date, self.committer_tz_offset = \
  445. parse_actor_and_date(committer_line.decode(self.encoding, 'replace'))
  446. except UnicodeDecodeError:
  447. log.error("Failed to decode committer line '%s' using encoding %s", committer_line, self.encoding,
  448. exc_info=True)
  449. # END handle author's encoding
  450. # a stream from our data simply gives us the plain message
  451. # The end of our message stream is marked with a newline that we strip
  452. self.message = stream.read()
  453. try:
  454. self.message = self.message.decode(self.encoding, 'replace')
  455. except UnicodeDecodeError:
  456. log.error("Failed to decode message '%s' using encoding %s", self.message, self.encoding, exc_info=True)
  457. # END exception handling
  458. return self
  459. #} END serializable implementation