You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

152 lines
5.7KB

  1. """Module with a simple buffer implementation using the memory manager"""
  2. import sys
  3. __all__ = ["SlidingWindowMapBuffer"]
  4. import sys
  5. try:
  6. bytes
  7. except NameError:
  8. bytes = str
  9. class SlidingWindowMapBuffer(object):
  10. """A buffer like object which allows direct byte-wise object and slicing into
  11. memory of a mapped file. The mapping is controlled by the provided cursor.
  12. The buffer is relative, that is if you map an offset, index 0 will map to the
  13. first byte at the offset you used during initialization or begin_access
  14. **Note:** Although this type effectively hides the fact that there are mapped windows
  15. underneath, it can unfortunately not be used in any non-pure python method which
  16. needs a buffer or string"""
  17. __slots__ = (
  18. '_c', # our cursor
  19. '_size', # our supposed size
  20. )
  21. def __init__(self, cursor=None, offset=0, size=sys.maxsize, flags=0):
  22. """Initalize the instance to operate on the given cursor.
  23. :param cursor: if not None, the associated cursor to the file you want to access
  24. If None, you have call begin_access before using the buffer and provide a cursor
  25. :param offset: absolute offset in bytes
  26. :param size: the total size of the mapping. Defaults to the maximum possible size
  27. From that point on, the __len__ of the buffer will be the given size or the file size.
  28. If the size is larger than the mappable area, you can only access the actually available
  29. area, although the length of the buffer is reported to be your given size.
  30. Hence it is in your own interest to provide a proper size !
  31. :param flags: Additional flags to be passed to os.open
  32. :raise ValueError: if the buffer could not achieve a valid state"""
  33. self._c = cursor
  34. if cursor and not self.begin_access(cursor, offset, size, flags):
  35. raise ValueError("Failed to allocate the buffer - probably the given offset is out of bounds")
  36. # END handle offset
  37. def __del__(self):
  38. self.end_access()
  39. def __enter__(self):
  40. return self
  41. def __exit__(self, exc_type, exc_value, traceback):
  42. self.end_access()
  43. def __len__(self):
  44. return self._size
  45. def __getitem__(self, i):
  46. if isinstance(i, slice):
  47. return self.__getslice__(i.start or 0, i.stop or self._size)
  48. c = self._c
  49. assert c.is_valid()
  50. if i < 0:
  51. i = self._size + i
  52. if not c.includes_ofs(i):
  53. c.use_region(i, 1)
  54. # END handle region usage
  55. return c.buffer()[i - c.ofs_begin()]
  56. def __getslice__(self, i, j):
  57. c = self._c
  58. # fast path, slice fully included - safes a concatenate operation and
  59. # should be the default
  60. assert c.is_valid()
  61. if i < 0:
  62. i = self._size + i
  63. if j == sys.maxsize:
  64. j = self._size
  65. if j < 0:
  66. j = self._size + j
  67. if (c.ofs_begin() <= i) and (j < c.ofs_end()):
  68. b = c.ofs_begin()
  69. return c.buffer()[i - b:j - b]
  70. else:
  71. l = j - i # total length
  72. ofs = i
  73. # It's fastest to keep tokens and join later, especially in py3, which was 7 times slower
  74. # in the previous iteration of this code
  75. pyvers = sys.version_info[:2]
  76. md = list()
  77. while l:
  78. c.use_region(ofs, l)
  79. assert c.is_valid()
  80. d = c.buffer()[:l]
  81. ofs += len(d)
  82. l -= len(d)
  83. # Make sure we don't keep references, as c.use_region() might attempt to free resources, but
  84. # can't unless we use pure bytes
  85. if hasattr(d, 'tobytes'):
  86. d = d.tobytes()
  87. md.append(d)
  88. # END while there are bytes to read
  89. return bytes().join(md)
  90. # END fast or slow path
  91. #{ Interface
  92. def begin_access(self, cursor=None, offset=0, size=sys.maxsize, flags=0):
  93. """Call this before the first use of this instance. The method was already
  94. called by the constructor in case sufficient information was provided.
  95. For more information no the parameters, see the __init__ method
  96. :param path: if cursor is None the existing one will be used.
  97. :return: True if the buffer can be used"""
  98. if cursor:
  99. self._c = cursor
  100. # END update our cursor
  101. # reuse existing cursors if possible
  102. if self._c is not None and self._c.is_associated():
  103. res = self._c.use_region(offset, size, flags).is_valid()
  104. if res:
  105. # if given size is too large or default, we computer a proper size
  106. # If its smaller, we assume the combination between offset and size
  107. # as chosen by the user is correct and use it !
  108. # If not, the user is in trouble.
  109. if size > self._c.file_size():
  110. size = self._c.file_size() - offset
  111. # END handle size
  112. self._size = size
  113. # END set size
  114. return res
  115. # END use our cursor
  116. return False
  117. def end_access(self):
  118. """Call this method once you are done using the instance. It is automatically
  119. called on destruction, and should be called just in time to allow system
  120. resources to be freed.
  121. Once you called end_access, you must call begin access before reusing this instance!"""
  122. self._size = 0
  123. if self._c is not None:
  124. self._c.unuse_region()
  125. # END unuse region
  126. def cursor(self):
  127. """:return: the currently set cursor which provides access to the data"""
  128. return self._c
  129. #}END interface