|
- # Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
- #
- # This module is part of GitDB and is released under
- # the New BSD License: http://www.opensource.org/licenses/bsd-license.php
- from gitdb.db.base import (
- FileDBBase,
- ObjectDBR,
- ObjectDBW
- )
-
-
- from gitdb.exc import (
- BadObject,
- AmbiguousObjectName
- )
-
- from gitdb.stream import (
- DecompressMemMapReader,
- FDCompressedSha1Writer,
- FDStream,
- Sha1Writer
- )
-
- from gitdb.base import (
- OStream,
- OInfo
- )
-
- from gitdb.util import (
- file_contents_ro_filepath,
- ENOENT,
- hex_to_bin,
- bin_to_hex,
- exists,
- chmod,
- isdir,
- isfile,
- remove,
- mkdir,
- rename,
- dirname,
- basename,
- join
- )
-
- from gitdb.fun import (
- chunk_size,
- loose_object_header_info,
- write_object,
- stream_copy
- )
-
- from gitdb.utils.compat import MAXSIZE
- from gitdb.utils.encoding import force_bytes
-
- import tempfile
- import os
-
-
- __all__ = ('LooseObjectDB', )
-
-
- class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
-
- """A database which operates on loose object files"""
-
- # CONFIGURATION
- # chunks in which data will be copied between streams
- stream_chunk_size = chunk_size
-
- # On windows we need to keep it writable, otherwise it cannot be removed
- # either
- new_objects_mode = int("444", 8)
- if os.name == 'nt':
- new_objects_mode = int("644", 8)
-
- def __init__(self, root_path):
- super(LooseObjectDB, self).__init__(root_path)
- self._hexsha_to_file = dict()
- # Additional Flags - might be set to 0 after the first failure
- # Depending on the root, this might work for some mounts, for others not, which
- # is why it is per instance
- self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
-
- #{ Interface
- def object_path(self, hexsha):
- """
- :return: path at which the object with the given hexsha would be stored,
- relative to the database root"""
- return join(hexsha[:2], hexsha[2:])
-
- def readable_db_object_path(self, hexsha):
- """
- :return: readable object path to the object identified by hexsha
- :raise BadObject: If the object file does not exist"""
- try:
- return self._hexsha_to_file[hexsha]
- except KeyError:
- pass
- # END ignore cache misses
-
- # try filesystem
- path = self.db_path(self.object_path(hexsha))
- if exists(path):
- self._hexsha_to_file[hexsha] = path
- return path
- # END handle cache
- raise BadObject(hexsha)
-
- def partial_to_complete_sha_hex(self, partial_hexsha):
- """:return: 20 byte binary sha1 string which matches the given name uniquely
- :param name: hexadecimal partial name (bytes or ascii string)
- :raise AmbiguousObjectName:
- :raise BadObject: """
- candidate = None
- for binsha in self.sha_iter():
- if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)):
- # it can't ever find the same object twice
- if candidate is not None:
- raise AmbiguousObjectName(partial_hexsha)
- candidate = binsha
- # END for each object
- if candidate is None:
- raise BadObject(partial_hexsha)
- return candidate
-
- #} END interface
-
- def _map_loose_object(self, sha):
- """
- :return: memory map of that file to allow random read access
- :raise BadObject: if object could not be located"""
- db_path = self.db_path(self.object_path(bin_to_hex(sha)))
- try:
- return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
- except OSError as e:
- if e.errno != ENOENT:
- # try again without noatime
- try:
- return file_contents_ro_filepath(db_path)
- except OSError:
- raise BadObject(sha)
- # didn't work because of our flag, don't try it again
- self._fd_open_flags = 0
- else:
- raise BadObject(sha)
- # END handle error
- # END exception handling
-
- def set_ostream(self, stream):
- """:raise TypeError: if the stream does not support the Sha1Writer interface"""
- if stream is not None and not isinstance(stream, Sha1Writer):
- raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
- return super(LooseObjectDB, self).set_ostream(stream)
-
- def info(self, sha):
- m = self._map_loose_object(sha)
- try:
- typ, size = loose_object_header_info(m)
- return OInfo(sha, typ, size)
- finally:
- if hasattr(m, 'close'):
- m.close()
- # END assure release of system resources
-
- def stream(self, sha):
- m = self._map_loose_object(sha)
- type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True)
- return OStream(sha, type, size, stream)
-
- def has_object(self, sha):
- try:
- self.readable_db_object_path(bin_to_hex(sha))
- return True
- except BadObject:
- return False
- # END check existence
-
- def store(self, istream):
- """note: The sha we produce will be hex by nature"""
- tmp_path = None
- writer = self.ostream()
- if writer is None:
- # open a tmp file to write the data to
- fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
-
- if istream.binsha is None:
- writer = FDCompressedSha1Writer(fd)
- else:
- writer = FDStream(fd)
- # END handle direct stream copies
- # END handle custom writer
-
- try:
- try:
- if istream.binsha is not None:
- # copy as much as possible, the actual uncompressed item size might
- # be smaller than the compressed version
- stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size)
- else:
- # write object with header, we have to make a new one
- write_object(istream.type, istream.size, istream.read, writer.write,
- chunk_size=self.stream_chunk_size)
- # END handle direct stream copies
- finally:
- if tmp_path:
- writer.close()
- # END assure target stream is closed
- except:
- if tmp_path:
- os.remove(tmp_path)
- raise
- # END assure tmpfile removal on error
-
- hexsha = None
- if istream.binsha:
- hexsha = istream.hexsha
- else:
- hexsha = writer.sha(as_hex=True)
- # END handle sha
-
- if tmp_path:
- obj_path = self.db_path(self.object_path(hexsha))
- obj_dir = dirname(obj_path)
- if not isdir(obj_dir):
- mkdir(obj_dir)
- # END handle destination directory
- # rename onto existing doesn't work on windows
- if os.name == 'nt':
- if isfile(obj_path):
- remove(tmp_path)
- else:
- rename(tmp_path, obj_path)
- # end rename only if needed
- else:
- rename(tmp_path, obj_path)
- # END handle win32
-
- # make sure its readable for all ! It started out as rw-- tmp file
- # but needs to be rwrr
- chmod(obj_path, self.new_objects_mode)
- # END handle dry_run
-
- istream.binsha = hex_to_bin(hexsha)
- return istream
-
- def sha_iter(self):
- # find all files which look like an object, extract sha from there
- for root, dirs, files in os.walk(self.root_path()):
- root_base = basename(root)
- if len(root_base) != 2:
- continue
-
- for f in files:
- if len(f) != 38:
- continue
- yield hex_to_bin(root_base + f)
- # END for each file
- # END for each walk iteration
-
- def size(self):
- return len(tuple(self.sha_iter()))
|