You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

263 lines
8.1KB

  1. # Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
  2. #
  3. # This module is part of GitDB and is released under
  4. # the New BSD License: http://www.opensource.org/licenses/bsd-license.php
  5. from gitdb.db.base import (
  6. FileDBBase,
  7. ObjectDBR,
  8. ObjectDBW
  9. )
  10. from gitdb.exc import (
  11. BadObject,
  12. AmbiguousObjectName
  13. )
  14. from gitdb.stream import (
  15. DecompressMemMapReader,
  16. FDCompressedSha1Writer,
  17. FDStream,
  18. Sha1Writer
  19. )
  20. from gitdb.base import (
  21. OStream,
  22. OInfo
  23. )
  24. from gitdb.util import (
  25. file_contents_ro_filepath,
  26. ENOENT,
  27. hex_to_bin,
  28. bin_to_hex,
  29. exists,
  30. chmod,
  31. isdir,
  32. isfile,
  33. remove,
  34. mkdir,
  35. rename,
  36. dirname,
  37. basename,
  38. join
  39. )
  40. from gitdb.fun import (
  41. chunk_size,
  42. loose_object_header_info,
  43. write_object,
  44. stream_copy
  45. )
  46. from gitdb.utils.compat import MAXSIZE
  47. from gitdb.utils.encoding import force_bytes
  48. import tempfile
  49. import os
  50. __all__ = ('LooseObjectDB', )
  51. class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
  52. """A database which operates on loose object files"""
  53. # CONFIGURATION
  54. # chunks in which data will be copied between streams
  55. stream_chunk_size = chunk_size
  56. # On windows we need to keep it writable, otherwise it cannot be removed
  57. # either
  58. new_objects_mode = int("444", 8)
  59. if os.name == 'nt':
  60. new_objects_mode = int("644", 8)
  61. def __init__(self, root_path):
  62. super(LooseObjectDB, self).__init__(root_path)
  63. self._hexsha_to_file = dict()
  64. # Additional Flags - might be set to 0 after the first failure
  65. # Depending on the root, this might work for some mounts, for others not, which
  66. # is why it is per instance
  67. self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
  68. #{ Interface
  69. def object_path(self, hexsha):
  70. """
  71. :return: path at which the object with the given hexsha would be stored,
  72. relative to the database root"""
  73. return join(hexsha[:2], hexsha[2:])
  74. def readable_db_object_path(self, hexsha):
  75. """
  76. :return: readable object path to the object identified by hexsha
  77. :raise BadObject: If the object file does not exist"""
  78. try:
  79. return self._hexsha_to_file[hexsha]
  80. except KeyError:
  81. pass
  82. # END ignore cache misses
  83. # try filesystem
  84. path = self.db_path(self.object_path(hexsha))
  85. if exists(path):
  86. self._hexsha_to_file[hexsha] = path
  87. return path
  88. # END handle cache
  89. raise BadObject(hexsha)
  90. def partial_to_complete_sha_hex(self, partial_hexsha):
  91. """:return: 20 byte binary sha1 string which matches the given name uniquely
  92. :param name: hexadecimal partial name (bytes or ascii string)
  93. :raise AmbiguousObjectName:
  94. :raise BadObject: """
  95. candidate = None
  96. for binsha in self.sha_iter():
  97. if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)):
  98. # it can't ever find the same object twice
  99. if candidate is not None:
  100. raise AmbiguousObjectName(partial_hexsha)
  101. candidate = binsha
  102. # END for each object
  103. if candidate is None:
  104. raise BadObject(partial_hexsha)
  105. return candidate
  106. #} END interface
  107. def _map_loose_object(self, sha):
  108. """
  109. :return: memory map of that file to allow random read access
  110. :raise BadObject: if object could not be located"""
  111. db_path = self.db_path(self.object_path(bin_to_hex(sha)))
  112. try:
  113. return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
  114. except OSError as e:
  115. if e.errno != ENOENT:
  116. # try again without noatime
  117. try:
  118. return file_contents_ro_filepath(db_path)
  119. except OSError:
  120. raise BadObject(sha)
  121. # didn't work because of our flag, don't try it again
  122. self._fd_open_flags = 0
  123. else:
  124. raise BadObject(sha)
  125. # END handle error
  126. # END exception handling
  127. def set_ostream(self, stream):
  128. """:raise TypeError: if the stream does not support the Sha1Writer interface"""
  129. if stream is not None and not isinstance(stream, Sha1Writer):
  130. raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
  131. return super(LooseObjectDB, self).set_ostream(stream)
  132. def info(self, sha):
  133. m = self._map_loose_object(sha)
  134. try:
  135. typ, size = loose_object_header_info(m)
  136. return OInfo(sha, typ, size)
  137. finally:
  138. if hasattr(m, 'close'):
  139. m.close()
  140. # END assure release of system resources
  141. def stream(self, sha):
  142. m = self._map_loose_object(sha)
  143. type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True)
  144. return OStream(sha, type, size, stream)
  145. def has_object(self, sha):
  146. try:
  147. self.readable_db_object_path(bin_to_hex(sha))
  148. return True
  149. except BadObject:
  150. return False
  151. # END check existence
  152. def store(self, istream):
  153. """note: The sha we produce will be hex by nature"""
  154. tmp_path = None
  155. writer = self.ostream()
  156. if writer is None:
  157. # open a tmp file to write the data to
  158. fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
  159. if istream.binsha is None:
  160. writer = FDCompressedSha1Writer(fd)
  161. else:
  162. writer = FDStream(fd)
  163. # END handle direct stream copies
  164. # END handle custom writer
  165. try:
  166. try:
  167. if istream.binsha is not None:
  168. # copy as much as possible, the actual uncompressed item size might
  169. # be smaller than the compressed version
  170. stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size)
  171. else:
  172. # write object with header, we have to make a new one
  173. write_object(istream.type, istream.size, istream.read, writer.write,
  174. chunk_size=self.stream_chunk_size)
  175. # END handle direct stream copies
  176. finally:
  177. if tmp_path:
  178. writer.close()
  179. # END assure target stream is closed
  180. except:
  181. if tmp_path:
  182. os.remove(tmp_path)
  183. raise
  184. # END assure tmpfile removal on error
  185. hexsha = None
  186. if istream.binsha:
  187. hexsha = istream.hexsha
  188. else:
  189. hexsha = writer.sha(as_hex=True)
  190. # END handle sha
  191. if tmp_path:
  192. obj_path = self.db_path(self.object_path(hexsha))
  193. obj_dir = dirname(obj_path)
  194. if not isdir(obj_dir):
  195. mkdir(obj_dir)
  196. # END handle destination directory
  197. # rename onto existing doesn't work on windows
  198. if os.name == 'nt':
  199. if isfile(obj_path):
  200. remove(tmp_path)
  201. else:
  202. rename(tmp_path, obj_path)
  203. # end rename only if needed
  204. else:
  205. rename(tmp_path, obj_path)
  206. # END handle win32
  207. # make sure its readable for all ! It started out as rw-- tmp file
  208. # but needs to be rwrr
  209. chmod(obj_path, self.new_objects_mode)
  210. # END handle dry_run
  211. istream.binsha = hex_to_bin(hexsha)
  212. return istream
  213. def sha_iter(self):
  214. # find all files which look like an object, extract sha from there
  215. for root, dirs, files in os.walk(self.root_path()):
  216. root_base = basename(root)
  217. if len(root_base) != 2:
  218. continue
  219. for f in files:
  220. if len(f) != 38:
  221. continue
  222. yield hex_to_bin(root_base + f)
  223. # END for each file
  224. # END for each walk iteration
  225. def size(self):
  226. return len(tuple(self.sha_iter()))