|
- """Module containing a memory memory manager which provides a sliding window on a number of memory mapped files"""
- from .util import (
- MapWindow,
- MapRegion,
- MapRegionList,
- is_64_bit,
- string_types,
- buffer,
- )
-
- import sys
- from functools import reduce
-
- __all__ = ["StaticWindowMapManager", "SlidingWindowMapManager", "WindowCursor"]
- #{ Utilities
-
- #}END utilities
-
-
- class WindowCursor(object):
-
- """
- Pointer into the mapped region of the memory manager, keeping the map
- alive until it is destroyed and no other client uses it.
-
- Cursors should not be created manually, but are instead returned by the SlidingWindowMapManager
-
- **Note:**: The current implementation is suited for static and sliding window managers, but it also means
- that it must be suited for the somewhat quite different sliding manager. It could be improved, but
- I see no real need to do so."""
- __slots__ = (
- '_manager', # the manger keeping all file regions
- '_rlist', # a regions list with regions for our file
- '_region', # our current class:`MapRegion` or None
- '_ofs', # relative offset from the actually mapped area to our start area
- '_size' # maximum size we should provide
- )
-
- def __init__(self, manager=None, regions=None):
- self._manager = manager
- self._rlist = regions
- self._region = None
- self._ofs = 0
- self._size = 0
-
- def __del__(self):
- self._destroy()
-
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_value, traceback):
- self._destroy()
-
- def _destroy(self):
- """Destruction code to decrement counters"""
- self.unuse_region()
-
- if self._rlist is not None:
- # Actual client count, which doesn't include the reference kept by the manager, nor ours
- # as we are about to be deleted
- try:
- if len(self._rlist) == 0:
- # Free all resources associated with the mapped file
- self._manager._fdict.pop(self._rlist.path_or_fd())
- # END remove regions list from manager
- except (TypeError, KeyError):
- # sometimes, during shutdown, getrefcount is None. Its possible
- # to re-import it, however, its probably better to just ignore
- # this python problem (for now).
- # The next step is to get rid of the error prone getrefcount alltogether.
- pass
- # END exception handling
- # END handle regions
-
- def _copy_from(self, rhs):
- """Copy all data from rhs into this instance, handles usage count"""
- self._manager = rhs._manager
- self._rlist = type(rhs._rlist)(rhs._rlist)
- self._region = rhs._region
- self._ofs = rhs._ofs
- self._size = rhs._size
-
- for region in self._rlist:
- region.increment_client_count()
-
- if self._region is not None:
- self._region.increment_client_count()
- # END handle regions
-
- def __copy__(self):
- """copy module interface"""
- cpy = type(self)()
- cpy._copy_from(self)
- return cpy
-
- #{ Interface
- def assign(self, rhs):
- """Assign rhs to this instance. This is required in order to get a real copy.
- Alternativly, you can copy an existing instance using the copy module"""
- self._destroy()
- self._copy_from(rhs)
-
- def use_region(self, offset=0, size=0, flags=0):
- """Assure we point to a window which allows access to the given offset into the file
-
- :param offset: absolute offset in bytes into the file
- :param size: amount of bytes to map. If 0, all available bytes will be mapped
- :param flags: additional flags to be given to os.open in case a file handle is initially opened
- for mapping. Has no effect if a region can actually be reused.
- :return: this instance - it should be queried for whether it points to a valid memory region.
- This is not the case if the mapping failed because we reached the end of the file
-
- **Note:**: The size actually mapped may be smaller than the given size. If that is the case,
- either the file has reached its end, or the map was created between two existing regions"""
- need_region = True
- man = self._manager
- fsize = self._rlist.file_size()
- size = min(size or fsize, man.window_size() or fsize) # clamp size to window size
-
- if self._region is not None:
- if self._region.includes_ofs(offset):
- need_region = False
- else:
- self.unuse_region()
- # END handle existing region
- # END check existing region
-
- # offset too large ?
- if offset >= fsize:
- return self
- # END handle offset
-
- if need_region:
- self._region = man._obtain_region(self._rlist, offset, size, flags, False)
- self._region.increment_client_count()
- # END need region handling
-
- self._ofs = offset - self._region._b
- self._size = min(size, self._region.ofs_end() - offset)
-
- return self
-
- def unuse_region(self):
- """Unuse the current region. Does nothing if we have no current region
-
- **Note:** the cursor unuses the region automatically upon destruction. It is recommended
- to un-use the region once you are done reading from it in persistent cursors as it
- helps to free up resource more quickly"""
- if self._region is not None:
- self._region.increment_client_count(-1)
- self._region = None
- # note: should reset ofs and size, but we spare that for performance. Its not
- # allowed to query information if we are not valid !
-
- def buffer(self):
- """Return a buffer object which allows access to our memory region from our offset
- to the window size. Please note that it might be smaller than you requested when calling use_region()
-
- **Note:** You can only obtain a buffer if this instance is_valid() !
-
- **Note:** buffers should not be cached passed the duration of your access as it will
- prevent resources from being freed even though they might not be accounted for anymore !"""
- return buffer(self._region.buffer(), self._ofs, self._size)
-
- def map(self):
- """
- :return: the underlying raw memory map. Please not that the offset and size is likely to be different
- to what you set as offset and size. Use it only if you are sure about the region it maps, which is the whole
- file in case of StaticWindowMapManager"""
- return self._region.map()
-
- def is_valid(self):
- """:return: True if we have a valid and usable region"""
- return self._region is not None
-
- def is_associated(self):
- """:return: True if we are associated with a specific file already"""
- return self._rlist is not None
-
- def ofs_begin(self):
- """:return: offset to the first byte pointed to by our cursor
-
- **Note:** only if is_valid() is True"""
- return self._region._b + self._ofs
-
- def ofs_end(self):
- """:return: offset to one past the last available byte"""
- # unroll method calls for performance !
- return self._region._b + self._ofs + self._size
-
- def size(self):
- """:return: amount of bytes we point to"""
- return self._size
-
- def region(self):
- """:return: our mapped region, or None if nothing is mapped yet
- :raise AssertionError: if we have no current region. This is only useful for debugging"""
- return self._region
-
- def includes_ofs(self, ofs):
- """:return: True if the given absolute offset is contained in the cursors
- current region
-
- **Note:** cursor must be valid for this to work"""
- # unroll methods
- return (self._region._b + self._ofs) <= ofs < (self._region._b + self._ofs + self._size)
-
- def file_size(self):
- """:return: size of the underlying file"""
- return self._rlist.file_size()
-
- def path_or_fd(self):
- """:return: path or file descriptor of the underlying mapped file"""
- return self._rlist.path_or_fd()
-
- def path(self):
- """:return: path of the underlying mapped file
- :raise ValueError: if attached path is not a path"""
- if isinstance(self._rlist.path_or_fd(), int):
- raise ValueError("Path queried although mapping was applied to a file descriptor")
- # END handle type
- return self._rlist.path_or_fd()
-
- def fd(self):
- """:return: file descriptor used to create the underlying mapping.
-
- **Note:** it is not required to be valid anymore
- :raise ValueError: if the mapping was not created by a file descriptor"""
- if isinstance(self._rlist.path_or_fd(), string_types()):
- raise ValueError("File descriptor queried although mapping was generated from path")
- # END handle type
- return self._rlist.path_or_fd()
-
- #} END interface
-
-
- class StaticWindowMapManager(object):
-
- """Provides a manager which will produce single size cursors that are allowed
- to always map the whole file.
-
- Clients must be written to specifically know that they are accessing their data
- through a StaticWindowMapManager, as they otherwise have to deal with their window size.
-
- These clients would have to use a SlidingWindowMapBuffer to hide this fact.
-
- This type will always use a maximum window size, and optimize certain methods to
- accommodate this fact"""
-
- __slots__ = [
- '_fdict', # mapping of path -> StorageHelper (of some kind
- '_window_size', # maximum size of a window
- '_max_memory_size', # maximum amount of memory we may allocate
- '_max_handle_count', # maximum amount of handles to keep open
- '_memory_size', # currently allocated memory size
- '_handle_count', # amount of currently allocated file handles
- ]
-
- #{ Configuration
- MapRegionListCls = MapRegionList
- MapWindowCls = MapWindow
- MapRegionCls = MapRegion
- WindowCursorCls = WindowCursor
- #} END configuration
-
- _MB_in_bytes = 1024 * 1024
-
- def __init__(self, window_size=0, max_memory_size=0, max_open_handles=sys.maxsize):
- """initialize the manager with the given parameters.
- :param window_size: if -1, a default window size will be chosen depending on
- the operating system's architecture. It will internally be quantified to a multiple of the page size
- If 0, the window may have any size, which basically results in mapping the whole file at one
- :param max_memory_size: maximum amount of memory we may map at once before releasing mapped regions.
- If 0, a viable default will be set depending on the system's architecture.
- It is a soft limit that is tried to be kept, but nothing bad happens if we have to over-allocate
- :param max_open_handles: if not maxint, limit the amount of open file handles to the given number.
- Otherwise the amount is only limited by the system itself. If a system or soft limit is hit,
- the manager will free as many handles as possible"""
- self._fdict = dict()
- self._window_size = window_size
- self._max_memory_size = max_memory_size
- self._max_handle_count = max_open_handles
- self._memory_size = 0
- self._handle_count = 0
-
- if window_size < 0:
- coeff = 64
- if is_64_bit():
- coeff = 1024
- # END handle arch
- self._window_size = coeff * self._MB_in_bytes
- # END handle max window size
-
- if max_memory_size == 0:
- coeff = 1024
- if is_64_bit():
- coeff = 8192
- # END handle arch
- self._max_memory_size = coeff * self._MB_in_bytes
- # END handle max memory size
-
- #{ Internal Methods
-
- def _collect_lru_region(self, size):
- """Unmap the region which was least-recently used and has no client
- :param size: size of the region we want to map next (assuming its not already mapped partially or full
- if 0, we try to free any available region
- :return: Amount of freed regions
-
- .. Note::
- We don't raise exceptions anymore, in order to keep the system working, allowing temporary overallocation.
- If the system runs out of memory, it will tell.
-
- .. TODO::
- implement a case where all unusued regions are discarded efficiently.
- Currently its only brute force
- """
- num_found = 0
- while (size == 0) or (self._memory_size + size > self._max_memory_size):
- lru_region = None
- lru_list = None
- for regions in self._fdict.values():
- for region in regions:
- # check client count - if it's 1, it's just us
- if (region.client_count() == 1 and
- (lru_region is None or region._uc < lru_region._uc)):
- lru_region = region
- lru_list = regions
- # END update lru_region
- # END for each region
- # END for each regions list
-
- if lru_region is None:
- break
- # END handle region not found
-
- num_found += 1
- del(lru_list[lru_list.index(lru_region)])
- lru_region.increment_client_count(-1)
- self._memory_size -= lru_region.size()
- self._handle_count -= 1
- # END while there is more memory to free
- return num_found
-
- def _obtain_region(self, a, offset, size, flags, is_recursive):
- """Utilty to create a new region - for more information on the parameters,
- see MapCursor.use_region.
- :param a: A regions (a)rray
- :return: The newly created region"""
- if self._memory_size + size > self._max_memory_size:
- self._collect_lru_region(size)
- # END handle collection
-
- r = None
- if a:
- assert len(a) == 1
- r = a[0]
- else:
- try:
- r = self.MapRegionCls(a.path_or_fd(), 0, sys.maxsize, flags)
- except Exception:
- # apparently we are out of system resources or hit a limit
- # As many more operations are likely to fail in that condition (
- # like reading a file from disk, etc) we free up as much as possible
- # As this invalidates our insert position, we have to recurse here
- if is_recursive:
- # we already tried this, and still have no success in obtaining
- # a mapping. This is an exception, so we propagate it
- raise
- # END handle existing recursion
- self._collect_lru_region(0)
- return self._obtain_region(a, offset, size, flags, True)
- # END handle exceptions
-
- self._handle_count += 1
- self._memory_size += r.size()
- a.append(r)
- # END handle array
-
- assert r.includes_ofs(offset)
- return r
-
- #}END internal methods
-
- #{ Interface
- def make_cursor(self, path_or_fd):
- """
- :return: a cursor pointing to the given path or file descriptor.
- It can be used to map new regions of the file into memory
-
- **Note:** if a file descriptor is given, it is assumed to be open and valid,
- but may be closed afterwards. To refer to the same file, you may reuse
- your existing file descriptor, but keep in mind that new windows can only
- be mapped as long as it stays valid. This is why the using actual file paths
- are preferred unless you plan to keep the file descriptor open.
-
- **Note:** file descriptors are problematic as they are not necessarily unique, as two
- different files opened and closed in succession might have the same file descriptor id.
-
- **Note:** Using file descriptors directly is faster once new windows are mapped as it
- prevents the file to be opened again just for the purpose of mapping it."""
- regions = self._fdict.get(path_or_fd)
- if regions is None:
- regions = self.MapRegionListCls(path_or_fd)
- self._fdict[path_or_fd] = regions
- # END obtain region for path
- return self.WindowCursorCls(self, regions)
-
- def collect(self):
- """Collect all available free-to-collect mapped regions
- :return: Amount of freed handles"""
- return self._collect_lru_region(0)
-
- def num_file_handles(self):
- """:return: amount of file handles in use. Each mapped region uses one file handle"""
- return self._handle_count
-
- def num_open_files(self):
- """Amount of opened files in the system"""
- return reduce(lambda x, y: x + y, (1 for rlist in self._fdict.values() if len(rlist) > 0), 0)
-
- def window_size(self):
- """:return: size of each window when allocating new regions"""
- return self._window_size
-
- def mapped_memory_size(self):
- """:return: amount of bytes currently mapped in total"""
- return self._memory_size
-
- def max_file_handles(self):
- """:return: maximium amount of handles we may have opened"""
- return self._max_handle_count
-
- def max_mapped_memory_size(self):
- """:return: maximum amount of memory we may allocate"""
- return self._max_memory_size
-
- #} END interface
-
- #{ Special Purpose Interface
-
- def force_map_handle_removal_win(self, base_path):
- """ONLY AVAILABLE ON WINDOWS
- On windows removing files is not allowed if anybody still has it opened.
- If this process is ourselves, and if the whole process uses this memory
- manager (as far as the parent framework is concerned) we can enforce
- closing all memory maps whose path matches the given base path to
- allow the respective operation after all.
- The respective system must NOT access the closed memory regions anymore !
- This really may only be used if you know that the items which keep
- the cursors alive will not be using it anymore. They need to be recreated !
- :return: Amount of closed handles
-
- **Note:** does nothing on non-windows platforms"""
- if sys.platform != 'win32':
- return
- # END early bailout
-
- num_closed = 0
- for path, rlist in self._fdict.items():
- if path.startswith(base_path):
- for region in rlist:
- region.release()
- num_closed += 1
- # END path matches
- # END for each path
- return num_closed
- #} END special purpose interface
-
-
- class SlidingWindowMapManager(StaticWindowMapManager):
-
- """Maintains a list of ranges of mapped memory regions in one or more files and allows to easily
- obtain additional regions assuring there is no overlap.
- Once a certain memory limit is reached globally, or if there cannot be more open file handles
- which result from each mmap call, the least recently used, and currently unused mapped regions
- are unloaded automatically.
-
- **Note:** currently not thread-safe !
-
- **Note:** in the current implementation, we will automatically unload windows if we either cannot
- create more memory maps (as the open file handles limit is hit) or if we have allocated more than
- a safe amount of memory already, which would possibly cause memory allocations to fail as our address
- space is full."""
-
- __slots__ = tuple()
-
- def __init__(self, window_size=-1, max_memory_size=0, max_open_handles=sys.maxsize):
- """Adjusts the default window size to -1"""
- super(SlidingWindowMapManager, self).__init__(window_size, max_memory_size, max_open_handles)
-
- def _obtain_region(self, a, offset, size, flags, is_recursive):
- # bisect to find an existing region. The c++ implementation cannot
- # do that as it uses a linked list for regions.
- r = None
- lo = 0
- hi = len(a)
- while lo < hi:
- mid = (lo + hi) // 2
- ofs = a[mid]._b
- if ofs <= offset:
- if a[mid].includes_ofs(offset):
- r = a[mid]
- break
- # END have region
- lo = mid + 1
- else:
- hi = mid
- # END handle position
- # END while bisecting
-
- if r is None:
- window_size = self._window_size
- left = self.MapWindowCls(0, 0)
- mid = self.MapWindowCls(offset, size)
- right = self.MapWindowCls(a.file_size(), 0)
-
- # we want to honor the max memory size, and assure we have anough
- # memory available
- # Save calls !
- if self._memory_size + window_size > self._max_memory_size:
- self._collect_lru_region(window_size)
- # END handle collection
-
- # we assume the list remains sorted by offset
- insert_pos = 0
- len_regions = len(a)
- if len_regions == 1:
- if a[0]._b <= offset:
- insert_pos = 1
- # END maintain sort
- else:
- # find insert position
- insert_pos = len_regions
- for i, region in enumerate(a):
- if region._b > offset:
- insert_pos = i
- break
- # END if insert position is correct
- # END for each region
- # END obtain insert pos
-
- # adjust the actual offset and size values to create the largest
- # possible mapping
- if insert_pos == 0:
- if len_regions:
- right = self.MapWindowCls.from_region(a[insert_pos])
- # END adjust right side
- else:
- if insert_pos != len_regions:
- right = self.MapWindowCls.from_region(a[insert_pos])
- # END adjust right window
- left = self.MapWindowCls.from_region(a[insert_pos - 1])
- # END adjust surrounding windows
-
- mid.extend_left_to(left, window_size)
- mid.extend_right_to(right, window_size)
- mid.align()
-
- # it can happen that we align beyond the end of the file
- if mid.ofs_end() > right.ofs:
- mid.size = right.ofs - mid.ofs
- # END readjust size
-
- # insert new region at the right offset to keep the order
- try:
- if self._handle_count >= self._max_handle_count:
- raise Exception
- # END assert own imposed max file handles
- r = self.MapRegionCls(a.path_or_fd(), mid.ofs, mid.size, flags)
- except Exception:
- # apparently we are out of system resources or hit a limit
- # As many more operations are likely to fail in that condition (
- # like reading a file from disk, etc) we free up as much as possible
- # As this invalidates our insert position, we have to recurse here
- if is_recursive:
- # we already tried this, and still have no success in obtaining
- # a mapping. This is an exception, so we propagate it
- raise
- # END handle existing recursion
- self._collect_lru_region(0)
- return self._obtain_region(a, offset, size, flags, True)
- # END handle exceptions
-
- self._handle_count += 1
- self._memory_size += r.size()
- a.insert(insert_pos, r)
- # END create new region
- return r
|