您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

208 行
7.2KB

  1. # Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
  2. #
  3. # This module is part of GitDB and is released under
  4. # the New BSD License: http://www.opensource.org/licenses/bsd-license.php
  5. """Module containing a database to deal with packs"""
  6. from gitdb.db.base import (
  7. FileDBBase,
  8. ObjectDBR,
  9. CachingDB
  10. )
  11. from gitdb.util import LazyMixin
  12. from gitdb.exc import (
  13. BadObject,
  14. UnsupportedOperation,
  15. AmbiguousObjectName
  16. )
  17. from gitdb.pack import PackEntity
  18. from gitdb.utils.compat import xrange
  19. from functools import reduce
  20. import os
  21. import glob
  22. __all__ = ('PackedDB', )
  23. #{ Utilities
  24. class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
  25. """A database operating on a set of object packs"""
  26. # sort the priority list every N queries
  27. # Higher values are better, performance tests don't show this has
  28. # any effect, but it should have one
  29. _sort_interval = 500
  30. def __init__(self, root_path):
  31. super(PackedDB, self).__init__(root_path)
  32. # list of lists with three items:
  33. # * hits - number of times the pack was hit with a request
  34. # * entity - Pack entity instance
  35. # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
  36. # self._entities = list() # lazy loaded list
  37. self._hit_count = 0 # amount of hits
  38. self._st_mtime = 0 # last modification data of our root path
  39. def _set_cache_(self, attr):
  40. if attr == '_entities':
  41. self._entities = list()
  42. self.update_cache(force=True)
  43. # END handle entities initialization
  44. def _sort_entities(self):
  45. self._entities.sort(key=lambda l: l[0], reverse=True)
  46. def _pack_info(self, sha):
  47. """:return: tuple(entity, index) for an item at the given sha
  48. :param sha: 20 or 40 byte sha
  49. :raise BadObject:
  50. **Note:** This method is not thread-safe, but may be hit in multi-threaded
  51. operation. The worst thing that can happen though is a counter that
  52. was not incremented, or the list being in wrong order. So we safe
  53. the time for locking here, lets see how that goes"""
  54. # presort ?
  55. if self._hit_count % self._sort_interval == 0:
  56. self._sort_entities()
  57. # END update sorting
  58. for item in self._entities:
  59. index = item[2](sha)
  60. if index is not None:
  61. item[0] += 1 # one hit for you
  62. self._hit_count += 1 # general hit count
  63. return (item[1], index)
  64. # END index found in pack
  65. # END for each item
  66. # no hit, see whether we have to update packs
  67. # NOTE: considering packs don't change very often, we safe this call
  68. # and leave it to the super-caller to trigger that
  69. raise BadObject(sha)
  70. #{ Object DB Read
  71. def has_object(self, sha):
  72. try:
  73. self._pack_info(sha)
  74. return True
  75. except BadObject:
  76. return False
  77. # END exception handling
  78. def info(self, sha):
  79. entity, index = self._pack_info(sha)
  80. return entity.info_at_index(index)
  81. def stream(self, sha):
  82. entity, index = self._pack_info(sha)
  83. return entity.stream_at_index(index)
  84. def sha_iter(self):
  85. for entity in self.entities():
  86. index = entity.index()
  87. sha_by_index = index.sha
  88. for index in xrange(index.size()):
  89. yield sha_by_index(index)
  90. # END for each index
  91. # END for each entity
  92. def size(self):
  93. sizes = [item[1].index().size() for item in self._entities]
  94. return reduce(lambda x, y: x + y, sizes, 0)
  95. #} END object db read
  96. #{ object db write
  97. def store(self, istream):
  98. """Storing individual objects is not feasible as a pack is designed to
  99. hold multiple objects. Writing or rewriting packs for single objects is
  100. inefficient"""
  101. raise UnsupportedOperation()
  102. #} END object db write
  103. #{ Interface
  104. def update_cache(self, force=False):
  105. """
  106. Update our cache with the acutally existing packs on disk. Add new ones,
  107. and remove deleted ones. We keep the unchanged ones
  108. :param force: If True, the cache will be updated even though the directory
  109. does not appear to have changed according to its modification timestamp.
  110. :return: True if the packs have been updated so there is new information,
  111. False if there was no change to the pack database"""
  112. stat = os.stat(self.root_path())
  113. if not force and stat.st_mtime <= self._st_mtime:
  114. return False
  115. # END abort early on no change
  116. self._st_mtime = stat.st_mtime
  117. # packs are supposed to be prefixed with pack- by git-convention
  118. # get all pack files, figure out what changed
  119. pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
  120. our_pack_files = {item[1].pack().path() for item in self._entities}
  121. # new packs
  122. for pack_file in (pack_files - our_pack_files):
  123. # init the hit-counter/priority with the size, a good measure for hit-
  124. # probability. Its implemented so that only 12 bytes will be read
  125. entity = PackEntity(pack_file)
  126. self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
  127. # END for each new packfile
  128. # removed packs
  129. for pack_file in (our_pack_files - pack_files):
  130. del_index = -1
  131. for i, item in enumerate(self._entities):
  132. if item[1].pack().path() == pack_file:
  133. del_index = i
  134. break
  135. # END found index
  136. # END for each entity
  137. assert del_index != -1
  138. del(self._entities[del_index])
  139. # END for each removed pack
  140. # reinitialize prioritiess
  141. self._sort_entities()
  142. return True
  143. def entities(self):
  144. """:return: list of pack entities operated upon by this database"""
  145. return [item[1] for item in self._entities]
  146. def partial_to_complete_sha(self, partial_binsha, canonical_length):
  147. """:return: 20 byte sha as inferred by the given partial binary sha
  148. :param partial_binsha: binary sha with less than 20 bytes
  149. :param canonical_length: length of the corresponding canonical representation.
  150. It is required as binary sha's cannot display whether the original hex sha
  151. had an odd or even number of characters
  152. :raise AmbiguousObjectName:
  153. :raise BadObject: """
  154. candidate = None
  155. for item in self._entities:
  156. item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
  157. if item_index is not None:
  158. sha = item[1].index().sha(item_index)
  159. if candidate and candidate != sha:
  160. raise AmbiguousObjectName(partial_binsha)
  161. candidate = sha
  162. # END handle full sha could be found
  163. # END for each entity
  164. if candidate:
  165. return candidate
  166. # still not found ?
  167. raise BadObject(partial_binsha)
  168. #} END interface