You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

208 lines
7.2KB

  1. """Module with functions which are supposed to be as fast as possible"""
  2. from stat import S_ISDIR
  3. from git.compat import (
  4. byte_ord,
  5. safe_decode,
  6. defenc,
  7. xrange,
  8. text_type,
  9. bchr
  10. )
  11. __all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive',
  12. 'traverse_tree_recursive')
  13. def tree_to_stream(entries, write):
  14. """Write the give list of entries into a stream using its write method
  15. :param entries: **sorted** list of tuples with (binsha, mode, name)
  16. :param write: write method which takes a data string"""
  17. ord_zero = ord('0')
  18. bit_mask = 7 # 3 bits set
  19. for binsha, mode, name in entries:
  20. mode_str = b''
  21. for i in xrange(6):
  22. mode_str = bchr(((mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str
  23. # END for each 8 octal value
  24. # git slices away the first octal if its zero
  25. if byte_ord(mode_str[0]) == ord_zero:
  26. mode_str = mode_str[1:]
  27. # END save a byte
  28. # here it comes: if the name is actually unicode, the replacement below
  29. # will not work as the binsha is not part of the ascii unicode encoding -
  30. # hence we must convert to an utf8 string for it to work properly.
  31. # According to my tests, this is exactly what git does, that is it just
  32. # takes the input literally, which appears to be utf8 on linux.
  33. if isinstance(name, text_type):
  34. name = name.encode(defenc)
  35. write(b''.join((mode_str, b' ', name, b'\0', binsha)))
  36. # END for each item
  37. def tree_entries_from_data(data):
  38. """Reads the binary representation of a tree and returns tuples of Tree items
  39. :param data: data block with tree data (as bytes)
  40. :return: list(tuple(binsha, mode, tree_relative_path), ...)"""
  41. ord_zero = ord('0')
  42. space_ord = ord(' ')
  43. len_data = len(data)
  44. i = 0
  45. out = []
  46. while i < len_data:
  47. mode = 0
  48. # read mode
  49. # Some git versions truncate the leading 0, some don't
  50. # The type will be extracted from the mode later
  51. while byte_ord(data[i]) != space_ord:
  52. # move existing mode integer up one level being 3 bits
  53. # and add the actual ordinal value of the character
  54. mode = (mode << 3) + (byte_ord(data[i]) - ord_zero)
  55. i += 1
  56. # END while reading mode
  57. # byte is space now, skip it
  58. i += 1
  59. # parse name, it is NULL separated
  60. ns = i
  61. while byte_ord(data[i]) != 0:
  62. i += 1
  63. # END while not reached NULL
  64. # default encoding for strings in git is utf8
  65. # Only use the respective unicode object if the byte stream was encoded
  66. name = data[ns:i]
  67. name = safe_decode(name)
  68. # byte is NULL, get next 20
  69. i += 1
  70. sha = data[i:i + 20]
  71. i = i + 20
  72. out.append((sha, mode, name))
  73. # END for each byte in data stream
  74. return out
  75. def _find_by_name(tree_data, name, is_dir, start_at):
  76. """return data entry matching the given name and tree mode
  77. or None.
  78. Before the item is returned, the respective data item is set
  79. None in the tree_data list to mark it done"""
  80. try:
  81. item = tree_data[start_at]
  82. if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
  83. tree_data[start_at] = None
  84. return item
  85. except IndexError:
  86. pass
  87. # END exception handling
  88. for index, item in enumerate(tree_data):
  89. if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
  90. tree_data[index] = None
  91. return item
  92. # END if item matches
  93. # END for each item
  94. return None
  95. def _to_full_path(item, path_prefix):
  96. """Rebuild entry with given path prefix"""
  97. if not item:
  98. return item
  99. return (item[0], item[1], path_prefix + item[2])
  100. def traverse_trees_recursive(odb, tree_shas, path_prefix):
  101. """
  102. :return: list with entries according to the given binary tree-shas.
  103. The result is encoded in a list
  104. of n tuple|None per blob/commit, (n == len(tree_shas)), where
  105. * [0] == 20 byte sha
  106. * [1] == mode as int
  107. * [2] == path relative to working tree root
  108. The entry tuple is None if the respective blob/commit did not
  109. exist in the given tree.
  110. :param tree_shas: iterable of shas pointing to trees. All trees must
  111. be on the same level. A tree-sha may be None in which case None
  112. :param path_prefix: a prefix to be added to the returned paths on this level,
  113. set it '' for the first iteration
  114. :note: The ordering of the returned items will be partially lost"""
  115. trees_data = []
  116. nt = len(tree_shas)
  117. for tree_sha in tree_shas:
  118. if tree_sha is None:
  119. data = []
  120. else:
  121. data = tree_entries_from_data(odb.stream(tree_sha).read())
  122. # END handle muted trees
  123. trees_data.append(data)
  124. # END for each sha to get data for
  125. out = []
  126. out_append = out.append
  127. # find all matching entries and recursively process them together if the match
  128. # is a tree. If the match is a non-tree item, put it into the result.
  129. # Processed items will be set None
  130. for ti, tree_data in enumerate(trees_data):
  131. for ii, item in enumerate(tree_data):
  132. if not item:
  133. continue
  134. # END skip already done items
  135. entries = [None for _ in range(nt)]
  136. entries[ti] = item
  137. sha, mode, name = item # its faster to unpack @UnusedVariable
  138. is_dir = S_ISDIR(mode) # type mode bits
  139. # find this item in all other tree data items
  140. # wrap around, but stop one before our current index, hence
  141. # ti+nt, not ti+1+nt
  142. for tio in range(ti + 1, ti + nt):
  143. tio = tio % nt
  144. entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii)
  145. # END for each other item data
  146. # if we are a directory, enter recursion
  147. if is_dir:
  148. out.extend(traverse_trees_recursive(
  149. odb, [((ei and ei[0]) or None) for ei in entries], path_prefix + name + '/'))
  150. else:
  151. out_append(tuple(_to_full_path(e, path_prefix) for e in entries))
  152. # END handle recursion
  153. # finally mark it done
  154. tree_data[ii] = None
  155. # END for each item
  156. # we are done with one tree, set all its data empty
  157. del(tree_data[:])
  158. # END for each tree_data chunk
  159. return out
  160. def traverse_tree_recursive(odb, tree_sha, path_prefix):
  161. """
  162. :return: list of entries of the tree pointed to by the binary tree_sha. An entry
  163. has the following format:
  164. * [0] 20 byte sha
  165. * [1] mode as int
  166. * [2] path relative to the repository
  167. :param path_prefix: prefix to prepend to the front of all returned paths"""
  168. entries = []
  169. data = tree_entries_from_data(odb.stream(tree_sha).read())
  170. # unpacking/packing is faster than accessing individual items
  171. for sha, mode, name in data:
  172. if S_ISDIR(mode):
  173. entries.extend(traverse_tree_recursive(odb, sha, path_prefix + name + '/'))
  174. else:
  175. entries.append((sha, mode, path_prefix + name))
  176. # END for each item
  177. return entries