|
- # Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
- #
- # This module is part of GitDB and is released under
- # the New BSD License: http://www.opensource.org/licenses/bsd-license.php
- import binascii
- import os
- import mmap
- import sys
- import time
- import errno
-
- from io import BytesIO
-
- from smmap import (
- StaticWindowMapManager,
- SlidingWindowMapManager,
- SlidingWindowMapBuffer
- )
-
- # initialize our global memory manager instance
- # Use it to free cached (and unused) resources.
- mman = SlidingWindowMapManager()
- # END handle mman
-
- import hashlib
-
- try:
- from struct import unpack_from
- except ImportError:
- from struct import unpack, calcsize
- __calcsize_cache = dict()
-
- def unpack_from(fmt, data, offset=0):
- try:
- size = __calcsize_cache[fmt]
- except KeyError:
- size = calcsize(fmt)
- __calcsize_cache[fmt] = size
- # END exception handling
- return unpack(fmt, data[offset: offset + size])
- # END own unpack_from implementation
-
-
- #{ Aliases
-
- hex_to_bin = binascii.a2b_hex
- bin_to_hex = binascii.b2a_hex
-
- # errors
- ENOENT = errno.ENOENT
-
- # os shortcuts
- exists = os.path.exists
- mkdir = os.mkdir
- chmod = os.chmod
- isdir = os.path.isdir
- isfile = os.path.isfile
- rename = os.rename
- dirname = os.path.dirname
- basename = os.path.basename
- join = os.path.join
- read = os.read
- write = os.write
- close = os.close
- fsync = os.fsync
-
-
- def _retry(func, *args, **kwargs):
- # Wrapper around functions, that are problematic on "Windows". Sometimes
- # the OS or someone else has still a handle to the file
- if sys.platform == "win32":
- for _ in range(10):
- try:
- return func(*args, **kwargs)
- except Exception:
- time.sleep(0.1)
- return func(*args, **kwargs)
- else:
- return func(*args, **kwargs)
-
-
- def remove(*args, **kwargs):
- return _retry(os.remove, *args, **kwargs)
-
-
- # Backwards compatibility imports
- from gitdb.const import (
- NULL_BIN_SHA,
- NULL_HEX_SHA
- )
-
- #} END Aliases
-
- #{ compatibility stuff ...
-
-
- class _RandomAccessBytesIO(object):
-
- """Wrapper to provide required functionality in case memory maps cannot or may
- not be used. This is only really required in python 2.4"""
- __slots__ = '_sio'
-
- def __init__(self, buf=''):
- self._sio = BytesIO(buf)
-
- def __getattr__(self, attr):
- return getattr(self._sio, attr)
-
- def __len__(self):
- return len(self.getvalue())
-
- def __getitem__(self, i):
- return self.getvalue()[i]
-
- def __getslice__(self, start, end):
- return self.getvalue()[start:end]
-
-
- def byte_ord(b):
- """
- Return the integer representation of the byte string. This supports Python
- 3 byte arrays as well as standard strings.
- """
- try:
- return ord(b)
- except TypeError:
- return b
-
- #} END compatibility stuff ...
-
- #{ Routines
-
-
- def make_sha(source=''.encode("ascii")):
- """A python2.4 workaround for the sha/hashlib module fiasco
-
- **Note** From the dulwich project """
- try:
- return hashlib.sha1(source)
- except NameError:
- import sha
- sha1 = sha.sha(source)
- return sha1
-
-
- def allocate_memory(size):
- """:return: a file-protocol accessible memory block of the given size"""
- if size == 0:
- return _RandomAccessBytesIO(b'')
- # END handle empty chunks gracefully
-
- try:
- return mmap.mmap(-1, size) # read-write by default
- except EnvironmentError:
- # setup real memory instead
- # this of course may fail if the amount of memory is not available in
- # one chunk - would only be the case in python 2.4, being more likely on
- # 32 bit systems.
- return _RandomAccessBytesIO(b"\0" * size)
- # END handle memory allocation
-
-
- def file_contents_ro(fd, stream=False, allow_mmap=True):
- """:return: read-only contents of the file represented by the file descriptor fd
-
- :param fd: file descriptor opened for reading
- :param stream: if False, random access is provided, otherwise the stream interface
- is provided.
- :param allow_mmap: if True, its allowed to map the contents into memory, which
- allows large files to be handled and accessed efficiently. The file-descriptor
- will change its position if this is False"""
- try:
- if allow_mmap:
- # supports stream and random access
- try:
- return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
- except EnvironmentError:
- # python 2.4 issue, 0 wants to be the actual size
- return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ)
- # END handle python 2.4
- except OSError:
- pass
- # END exception handling
-
- # read manully
- contents = os.read(fd, os.fstat(fd).st_size)
- if stream:
- return _RandomAccessBytesIO(contents)
- return contents
-
-
- def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0):
- """Get the file contents at filepath as fast as possible
-
- :return: random access compatible memory of the given filepath
- :param stream: see ``file_contents_ro``
- :param allow_mmap: see ``file_contents_ro``
- :param flags: additional flags to pass to os.open
- :raise OSError: If the file could not be opened
-
- **Note** for now we don't try to use O_NOATIME directly as the right value needs to be
- shared per database in fact. It only makes a real difference for loose object
- databases anyway, and they use it with the help of the ``flags`` parameter"""
- fd = os.open(filepath, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags)
- try:
- return file_contents_ro(fd, stream, allow_mmap)
- finally:
- close(fd)
- # END assure file is closed
-
-
- def sliding_ro_buffer(filepath, flags=0):
- """
- :return: a buffer compatible object which uses our mapped memory manager internally
- ready to read the whole given filepath"""
- return SlidingWindowMapBuffer(mman.make_cursor(filepath), flags=flags)
-
-
- def to_hex_sha(sha):
- """:return: hexified version of sha"""
- if len(sha) == 40:
- return sha
- return bin_to_hex(sha)
-
-
- def to_bin_sha(sha):
- if len(sha) == 20:
- return sha
- return hex_to_bin(sha)
-
-
- #} END routines
-
-
- #{ Utilities
-
- class LazyMixin(object):
-
- """
- Base class providing an interface to lazily retrieve attribute values upon
- first access. If slots are used, memory will only be reserved once the attribute
- is actually accessed and retrieved the first time. All future accesses will
- return the cached value as stored in the Instance's dict or slot.
- """
-
- __slots__ = tuple()
-
- def __getattr__(self, attr):
- """
- Whenever an attribute is requested that we do not know, we allow it
- to be created and set. Next time the same attribute is reqeusted, it is simply
- returned from our dict/slots. """
- self._set_cache_(attr)
- # will raise in case the cache was not created
- return object.__getattribute__(self, attr)
-
- def _set_cache_(self, attr):
- """
- This method should be overridden in the derived class.
- It should check whether the attribute named by attr can be created
- and cached. Do nothing if you do not know the attribute or call your subclass
-
- The derived class may create as many additional attributes as it deems
- necessary in case a git command returns more information than represented
- in the single attribute."""
- pass
-
-
- class LockedFD(object):
-
- """
- This class facilitates a safe read and write operation to a file on disk.
- If we write to 'file', we obtain a lock file at 'file.lock' and write to
- that instead. If we succeed, the lock file will be renamed to overwrite
- the original file.
-
- When reading, we obtain a lock file, but to prevent other writers from
- succeeding while we are reading the file.
-
- This type handles error correctly in that it will assure a consistent state
- on destruction.
-
- **note** with this setup, parallel reading is not possible"""
- __slots__ = ("_filepath", '_fd', '_write')
-
- def __init__(self, filepath):
- """Initialize an instance with the givne filepath"""
- self._filepath = filepath
- self._fd = None
- self._write = None # if True, we write a file
-
- def __del__(self):
- # will do nothing if the file descriptor is already closed
- if self._fd is not None:
- self.rollback()
-
- def _lockfilepath(self):
- return "%s.lock" % self._filepath
-
- def open(self, write=False, stream=False):
- """
- Open the file descriptor for reading or writing, both in binary mode.
-
- :param write: if True, the file descriptor will be opened for writing. Other
- wise it will be opened read-only.
- :param stream: if True, the file descriptor will be wrapped into a simple stream
- object which supports only reading or writing
- :return: fd to read from or write to. It is still maintained by this instance
- and must not be closed directly
- :raise IOError: if the lock could not be retrieved
- :raise OSError: If the actual file could not be opened for reading
-
- **note** must only be called once"""
- if self._write is not None:
- raise AssertionError("Called %s multiple times" % self.open)
-
- self._write = write
-
- # try to open the lock file
- binary = getattr(os, 'O_BINARY', 0)
- lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary
- try:
- fd = os.open(self._lockfilepath(), lockmode, int("600", 8))
- if not write:
- os.close(fd)
- else:
- self._fd = fd
- # END handle file descriptor
- except OSError:
- raise IOError("Lock at %r could not be obtained" % self._lockfilepath())
- # END handle lock retrieval
-
- # open actual file if required
- if self._fd is None:
- # we could specify exlusive here, as we obtained the lock anyway
- try:
- self._fd = os.open(self._filepath, os.O_RDONLY | binary)
- except:
- # assure we release our lockfile
- remove(self._lockfilepath())
- raise
- # END handle lockfile
- # END open descriptor for reading
-
- if stream:
- # need delayed import
- from gitdb.stream import FDStream
- return FDStream(self._fd)
- else:
- return self._fd
- # END handle stream
-
- def commit(self):
- """When done writing, call this function to commit your changes into the
- actual file.
- The file descriptor will be closed, and the lockfile handled.
-
- **Note** can be called multiple times"""
- self._end_writing(successful=True)
-
- def rollback(self):
- """Abort your operation without any changes. The file descriptor will be
- closed, and the lock released.
-
- **Note** can be called multiple times"""
- self._end_writing(successful=False)
-
- def _end_writing(self, successful=True):
- """Handle the lock according to the write mode """
- if self._write is None:
- raise AssertionError("Cannot end operation if it wasn't started yet")
-
- if self._fd is None:
- return
-
- os.close(self._fd)
- self._fd = None
-
- lockfile = self._lockfilepath()
- if self._write and successful:
- # on windows, rename does not silently overwrite the existing one
- if sys.platform == "win32":
- if isfile(self._filepath):
- remove(self._filepath)
- # END remove if exists
- # END win32 special handling
- os.rename(lockfile, self._filepath)
-
- # assure others can at least read the file - the tmpfile left it at rw--
- # We may also write that file, on windows that boils down to a remove-
- # protection as well
- chmod(self._filepath, int("644", 8))
- else:
- # just delete the file so far, we failed
- remove(lockfile)
- # END successful handling
-
- #} END utilities
|