You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

364 lines
12KB

  1. # util.py
  2. # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
  3. #
  4. # This module is part of GitPython and is released under
  5. # the BSD License: http://www.opensource.org/licenses/bsd-license.php
  6. """Module for general utility functions"""
  7. from git.util import (
  8. IterableList,
  9. Actor
  10. )
  11. import re
  12. from collections import deque as Deque
  13. from string import digits
  14. import time
  15. import calendar
  16. from datetime import datetime, timedelta, tzinfo
  17. __all__ = ('get_object_type_by_name', 'parse_date', 'parse_actor_and_date',
  18. 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz',
  19. 'verify_utctz', 'Actor', 'tzoffset', 'utc')
  20. ZERO = timedelta(0)
  21. #{ Functions
  22. def mode_str_to_int(modestr):
  23. """
  24. :param modestr: string like 755 or 644 or 100644 - only the last 6 chars will be used
  25. :return:
  26. String identifying a mode compatible to the mode methods ids of the
  27. stat module regarding the rwx permissions for user, group and other,
  28. special flags and file system flags, i.e. whether it is a symlink
  29. for example."""
  30. mode = 0
  31. for iteration, char in enumerate(reversed(modestr[-6:])):
  32. mode += int(char) << iteration * 3
  33. # END for each char
  34. return mode
  35. def get_object_type_by_name(object_type_name):
  36. """
  37. :return: type suitable to handle the given object type name.
  38. Use the type to create new instances.
  39. :param object_type_name: Member of TYPES
  40. :raise ValueError: In case object_type_name is unknown"""
  41. if object_type_name == b"commit":
  42. from . import commit
  43. return commit.Commit
  44. elif object_type_name == b"tag":
  45. from . import tag
  46. return tag.TagObject
  47. elif object_type_name == b"blob":
  48. from . import blob
  49. return blob.Blob
  50. elif object_type_name == b"tree":
  51. from . import tree
  52. return tree.Tree
  53. else:
  54. raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
  55. def utctz_to_altz(utctz):
  56. """we convert utctz to the timezone in seconds, it is the format time.altzone
  57. returns. Git stores it as UTC timezone which has the opposite sign as well,
  58. which explains the -1 * ( that was made explicit here )
  59. :param utctz: git utc timezone string, i.e. +0200"""
  60. return -1 * int(float(utctz) / 100 * 3600)
  61. def altz_to_utctz_str(altz):
  62. """As above, but inverses the operation, returning a string that can be used
  63. in commit objects"""
  64. utci = -1 * int((float(altz) / 3600) * 100)
  65. utcs = str(abs(utci))
  66. utcs = "0" * (4 - len(utcs)) + utcs
  67. prefix = (utci < 0 and '-') or '+'
  68. return prefix + utcs
  69. def verify_utctz(offset):
  70. """:raise ValueError: if offset is incorrect
  71. :return: offset"""
  72. fmt_exc = ValueError("Invalid timezone offset format: %s" % offset)
  73. if len(offset) != 5:
  74. raise fmt_exc
  75. if offset[0] not in "+-":
  76. raise fmt_exc
  77. if offset[1] not in digits or\
  78. offset[2] not in digits or\
  79. offset[3] not in digits or\
  80. offset[4] not in digits:
  81. raise fmt_exc
  82. # END for each char
  83. return offset
  84. class tzoffset(tzinfo):
  85. def __init__(self, secs_west_of_utc, name=None):
  86. self._offset = timedelta(seconds=-secs_west_of_utc)
  87. self._name = name or 'fixed'
  88. def utcoffset(self, dt):
  89. return self._offset
  90. def tzname(self, dt):
  91. return self._name
  92. def dst(self, dt):
  93. return ZERO
  94. utc = tzoffset(0, 'UTC')
  95. def from_timestamp(timestamp, tz_offset):
  96. """Converts a timestamp + tz_offset into an aware datetime instance."""
  97. utc_dt = datetime.fromtimestamp(timestamp, utc)
  98. try:
  99. local_dt = utc_dt.astimezone(tzoffset(tz_offset))
  100. return local_dt
  101. except ValueError:
  102. return utc_dt
  103. def parse_date(string_date):
  104. """
  105. Parse the given date as one of the following
  106. * Git internal format: timestamp offset
  107. * RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200.
  108. * ISO 8601 2005-04-07T22:13:13
  109. The T can be a space as well
  110. :return: Tuple(int(timestamp_UTC), int(offset)), both in seconds since epoch
  111. :raise ValueError: If the format could not be understood
  112. :note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY.
  113. """
  114. # git time
  115. try:
  116. if string_date.count(' ') == 1 and string_date.rfind(':') == -1:
  117. timestamp, offset = string_date.split()
  118. timestamp = int(timestamp)
  119. return timestamp, utctz_to_altz(verify_utctz(offset))
  120. else:
  121. offset = "+0000" # local time by default
  122. if string_date[-5] in '-+':
  123. offset = verify_utctz(string_date[-5:])
  124. string_date = string_date[:-6] # skip space as well
  125. # END split timezone info
  126. offset = utctz_to_altz(offset)
  127. # now figure out the date and time portion - split time
  128. date_formats = []
  129. splitter = -1
  130. if ',' in string_date:
  131. date_formats.append("%a, %d %b %Y")
  132. splitter = string_date.rfind(' ')
  133. else:
  134. # iso plus additional
  135. date_formats.append("%Y-%m-%d")
  136. date_formats.append("%Y.%m.%d")
  137. date_formats.append("%m/%d/%Y")
  138. date_formats.append("%d.%m.%Y")
  139. splitter = string_date.rfind('T')
  140. if splitter == -1:
  141. splitter = string_date.rfind(' ')
  142. # END handle 'T' and ' '
  143. # END handle rfc or iso
  144. assert splitter > -1
  145. # split date and time
  146. time_part = string_date[splitter + 1:] # skip space
  147. date_part = string_date[:splitter]
  148. # parse time
  149. tstruct = time.strptime(time_part, "%H:%M:%S")
  150. for fmt in date_formats:
  151. try:
  152. dtstruct = time.strptime(date_part, fmt)
  153. utctime = calendar.timegm((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday,
  154. tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec,
  155. dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst))
  156. return int(utctime), offset
  157. except ValueError:
  158. continue
  159. # END exception handling
  160. # END for each fmt
  161. # still here ? fail
  162. raise ValueError("no format matched")
  163. # END handle format
  164. except Exception:
  165. raise ValueError("Unsupported date format: %s" % string_date)
  166. # END handle exceptions
  167. # precompiled regex
  168. _re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$')
  169. _re_only_actor = re.compile(r'^.+? (.*)$')
  170. def parse_actor_and_date(line):
  171. """Parse out the actor (author or committer) info from a line like::
  172. author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
  173. :return: [Actor, int_seconds_since_epoch, int_timezone_offset]"""
  174. actor, epoch, offset = '', 0, 0
  175. m = _re_actor_epoch.search(line)
  176. if m:
  177. actor, epoch, offset = m.groups()
  178. else:
  179. m = _re_only_actor.search(line)
  180. actor = m.group(1) if m else line or ''
  181. return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
  182. #} END functions
  183. #{ Classes
  184. class ProcessStreamAdapter(object):
  185. """Class wireing all calls to the contained Process instance.
  186. Use this type to hide the underlying process to provide access only to a specified
  187. stream. The process is usually wrapped into an AutoInterrupt class to kill
  188. it if the instance goes out of scope."""
  189. __slots__ = ("_proc", "_stream")
  190. def __init__(self, process, stream_name):
  191. self._proc = process
  192. self._stream = getattr(process, stream_name)
  193. def __getattr__(self, attr):
  194. return getattr(self._stream, attr)
  195. class Traversable(object):
  196. """Simple interface to perform depth-first or breadth-first traversals
  197. into one direction.
  198. Subclasses only need to implement one function.
  199. Instances of the Subclass must be hashable"""
  200. __slots__ = ()
  201. @classmethod
  202. def _get_intermediate_items(cls, item):
  203. """
  204. Returns:
  205. List of items connected to the given item.
  206. Must be implemented in subclass
  207. """
  208. raise NotImplementedError("To be implemented in subclass")
  209. def list_traverse(self, *args, **kwargs):
  210. """
  211. :return: IterableList with the results of the traversal as produced by
  212. traverse()"""
  213. out = IterableList(self._id_attribute_)
  214. out.extend(self.traverse(*args, **kwargs))
  215. return out
  216. def traverse(self, predicate=lambda i, d: True,
  217. prune=lambda i, d: False, depth=-1, branch_first=True,
  218. visit_once=True, ignore_self=1, as_edge=False):
  219. """:return: iterator yielding of items found when traversing self
  220. :param predicate: f(i,d) returns False if item i at depth d should not be included in the result
  221. :param prune:
  222. f(i,d) return True if the search should stop at item i at depth d.
  223. Item i will not be returned.
  224. :param depth:
  225. define at which level the iteration should not go deeper
  226. if -1, there is no limit
  227. if 0, you would effectively only get self, the root of the iteration
  228. i.e. if 1, you would only get the first level of predecessors/successors
  229. :param branch_first:
  230. if True, items will be returned branch first, otherwise depth first
  231. :param visit_once:
  232. if True, items will only be returned once, although they might be encountered
  233. several times. Loops are prevented that way.
  234. :param ignore_self:
  235. if True, self will be ignored and automatically pruned from
  236. the result. Otherwise it will be the first item to be returned.
  237. If as_edge is True, the source of the first edge is None
  238. :param as_edge:
  239. if True, return a pair of items, first being the source, second the
  240. destination, i.e. tuple(src, dest) with the edge spanning from
  241. source to destination"""
  242. visited = set()
  243. stack = Deque()
  244. stack.append((0, self, None)) # self is always depth level 0
  245. def addToStack(stack, item, branch_first, depth):
  246. lst = self._get_intermediate_items(item)
  247. if not lst:
  248. return
  249. if branch_first:
  250. stack.extendleft((depth, i, item) for i in lst)
  251. else:
  252. reviter = ((depth, lst[i], item) for i in range(len(lst) - 1, -1, -1))
  253. stack.extend(reviter)
  254. # END addToStack local method
  255. while stack:
  256. d, item, src = stack.pop() # depth of item, item, item_source
  257. if visit_once and item in visited:
  258. continue
  259. if visit_once:
  260. visited.add(item)
  261. rval = (as_edge and (src, item)) or item
  262. if prune(rval, d):
  263. continue
  264. skipStartItem = ignore_self and (item is self)
  265. if not skipStartItem and predicate(rval, d):
  266. yield rval
  267. # only continue to next level if this is appropriate !
  268. nd = d + 1
  269. if depth > -1 and nd > depth:
  270. continue
  271. addToStack(stack, item, branch_first, nd)
  272. # END for each item on work stack
  273. class Serializable(object):
  274. """Defines methods to serialize and deserialize objects from and into a data stream"""
  275. __slots__ = ()
  276. def _serialize(self, stream):
  277. """Serialize the data of this object into the given data stream
  278. :note: a serialized object would ``_deserialize`` into the same object
  279. :param stream: a file-like object
  280. :return: self"""
  281. raise NotImplementedError("To be implemented in subclass")
  282. def _deserialize(self, stream):
  283. """Deserialize all information regarding this object from the stream
  284. :param stream: a file-like object
  285. :return: self"""
  286. raise NotImplementedError("To be implemented in subclass")