選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

591 行
24KB

  1. """Module containing a memory memory manager which provides a sliding window on a number of memory mapped files"""
  2. from .util import (
  3. MapWindow,
  4. MapRegion,
  5. MapRegionList,
  6. is_64_bit,
  7. string_types,
  8. buffer,
  9. )
  10. import sys
  11. from functools import reduce
  12. __all__ = ["StaticWindowMapManager", "SlidingWindowMapManager", "WindowCursor"]
  13. #{ Utilities
  14. #}END utilities
  15. class WindowCursor(object):
  16. """
  17. Pointer into the mapped region of the memory manager, keeping the map
  18. alive until it is destroyed and no other client uses it.
  19. Cursors should not be created manually, but are instead returned by the SlidingWindowMapManager
  20. **Note:**: The current implementation is suited for static and sliding window managers, but it also means
  21. that it must be suited for the somewhat quite different sliding manager. It could be improved, but
  22. I see no real need to do so."""
  23. __slots__ = (
  24. '_manager', # the manger keeping all file regions
  25. '_rlist', # a regions list with regions for our file
  26. '_region', # our current class:`MapRegion` or None
  27. '_ofs', # relative offset from the actually mapped area to our start area
  28. '_size' # maximum size we should provide
  29. )
  30. def __init__(self, manager=None, regions=None):
  31. self._manager = manager
  32. self._rlist = regions
  33. self._region = None
  34. self._ofs = 0
  35. self._size = 0
  36. def __del__(self):
  37. self._destroy()
  38. def __enter__(self):
  39. return self
  40. def __exit__(self, exc_type, exc_value, traceback):
  41. self._destroy()
  42. def _destroy(self):
  43. """Destruction code to decrement counters"""
  44. self.unuse_region()
  45. if self._rlist is not None:
  46. # Actual client count, which doesn't include the reference kept by the manager, nor ours
  47. # as we are about to be deleted
  48. try:
  49. if len(self._rlist) == 0:
  50. # Free all resources associated with the mapped file
  51. self._manager._fdict.pop(self._rlist.path_or_fd())
  52. # END remove regions list from manager
  53. except (TypeError, KeyError):
  54. # sometimes, during shutdown, getrefcount is None. Its possible
  55. # to re-import it, however, its probably better to just ignore
  56. # this python problem (for now).
  57. # The next step is to get rid of the error prone getrefcount alltogether.
  58. pass
  59. # END exception handling
  60. # END handle regions
  61. def _copy_from(self, rhs):
  62. """Copy all data from rhs into this instance, handles usage count"""
  63. self._manager = rhs._manager
  64. self._rlist = type(rhs._rlist)(rhs._rlist)
  65. self._region = rhs._region
  66. self._ofs = rhs._ofs
  67. self._size = rhs._size
  68. for region in self._rlist:
  69. region.increment_client_count()
  70. if self._region is not None:
  71. self._region.increment_client_count()
  72. # END handle regions
  73. def __copy__(self):
  74. """copy module interface"""
  75. cpy = type(self)()
  76. cpy._copy_from(self)
  77. return cpy
  78. #{ Interface
  79. def assign(self, rhs):
  80. """Assign rhs to this instance. This is required in order to get a real copy.
  81. Alternativly, you can copy an existing instance using the copy module"""
  82. self._destroy()
  83. self._copy_from(rhs)
  84. def use_region(self, offset=0, size=0, flags=0):
  85. """Assure we point to a window which allows access to the given offset into the file
  86. :param offset: absolute offset in bytes into the file
  87. :param size: amount of bytes to map. If 0, all available bytes will be mapped
  88. :param flags: additional flags to be given to os.open in case a file handle is initially opened
  89. for mapping. Has no effect if a region can actually be reused.
  90. :return: this instance - it should be queried for whether it points to a valid memory region.
  91. This is not the case if the mapping failed because we reached the end of the file
  92. **Note:**: The size actually mapped may be smaller than the given size. If that is the case,
  93. either the file has reached its end, or the map was created between two existing regions"""
  94. need_region = True
  95. man = self._manager
  96. fsize = self._rlist.file_size()
  97. size = min(size or fsize, man.window_size() or fsize) # clamp size to window size
  98. if self._region is not None:
  99. if self._region.includes_ofs(offset):
  100. need_region = False
  101. else:
  102. self.unuse_region()
  103. # END handle existing region
  104. # END check existing region
  105. # offset too large ?
  106. if offset >= fsize:
  107. return self
  108. # END handle offset
  109. if need_region:
  110. self._region = man._obtain_region(self._rlist, offset, size, flags, False)
  111. self._region.increment_client_count()
  112. # END need region handling
  113. self._ofs = offset - self._region._b
  114. self._size = min(size, self._region.ofs_end() - offset)
  115. return self
  116. def unuse_region(self):
  117. """Unuse the current region. Does nothing if we have no current region
  118. **Note:** the cursor unuses the region automatically upon destruction. It is recommended
  119. to un-use the region once you are done reading from it in persistent cursors as it
  120. helps to free up resource more quickly"""
  121. if self._region is not None:
  122. self._region.increment_client_count(-1)
  123. self._region = None
  124. # note: should reset ofs and size, but we spare that for performance. Its not
  125. # allowed to query information if we are not valid !
  126. def buffer(self):
  127. """Return a buffer object which allows access to our memory region from our offset
  128. to the window size. Please note that it might be smaller than you requested when calling use_region()
  129. **Note:** You can only obtain a buffer if this instance is_valid() !
  130. **Note:** buffers should not be cached passed the duration of your access as it will
  131. prevent resources from being freed even though they might not be accounted for anymore !"""
  132. return buffer(self._region.buffer(), self._ofs, self._size)
  133. def map(self):
  134. """
  135. :return: the underlying raw memory map. Please not that the offset and size is likely to be different
  136. to what you set as offset and size. Use it only if you are sure about the region it maps, which is the whole
  137. file in case of StaticWindowMapManager"""
  138. return self._region.map()
  139. def is_valid(self):
  140. """:return: True if we have a valid and usable region"""
  141. return self._region is not None
  142. def is_associated(self):
  143. """:return: True if we are associated with a specific file already"""
  144. return self._rlist is not None
  145. def ofs_begin(self):
  146. """:return: offset to the first byte pointed to by our cursor
  147. **Note:** only if is_valid() is True"""
  148. return self._region._b + self._ofs
  149. def ofs_end(self):
  150. """:return: offset to one past the last available byte"""
  151. # unroll method calls for performance !
  152. return self._region._b + self._ofs + self._size
  153. def size(self):
  154. """:return: amount of bytes we point to"""
  155. return self._size
  156. def region(self):
  157. """:return: our mapped region, or None if nothing is mapped yet
  158. :raise AssertionError: if we have no current region. This is only useful for debugging"""
  159. return self._region
  160. def includes_ofs(self, ofs):
  161. """:return: True if the given absolute offset is contained in the cursors
  162. current region
  163. **Note:** cursor must be valid for this to work"""
  164. # unroll methods
  165. return (self._region._b + self._ofs) <= ofs < (self._region._b + self._ofs + self._size)
  166. def file_size(self):
  167. """:return: size of the underlying file"""
  168. return self._rlist.file_size()
  169. def path_or_fd(self):
  170. """:return: path or file descriptor of the underlying mapped file"""
  171. return self._rlist.path_or_fd()
  172. def path(self):
  173. """:return: path of the underlying mapped file
  174. :raise ValueError: if attached path is not a path"""
  175. if isinstance(self._rlist.path_or_fd(), int):
  176. raise ValueError("Path queried although mapping was applied to a file descriptor")
  177. # END handle type
  178. return self._rlist.path_or_fd()
  179. def fd(self):
  180. """:return: file descriptor used to create the underlying mapping.
  181. **Note:** it is not required to be valid anymore
  182. :raise ValueError: if the mapping was not created by a file descriptor"""
  183. if isinstance(self._rlist.path_or_fd(), string_types()):
  184. raise ValueError("File descriptor queried although mapping was generated from path")
  185. # END handle type
  186. return self._rlist.path_or_fd()
  187. #} END interface
  188. class StaticWindowMapManager(object):
  189. """Provides a manager which will produce single size cursors that are allowed
  190. to always map the whole file.
  191. Clients must be written to specifically know that they are accessing their data
  192. through a StaticWindowMapManager, as they otherwise have to deal with their window size.
  193. These clients would have to use a SlidingWindowMapBuffer to hide this fact.
  194. This type will always use a maximum window size, and optimize certain methods to
  195. accommodate this fact"""
  196. __slots__ = [
  197. '_fdict', # mapping of path -> StorageHelper (of some kind
  198. '_window_size', # maximum size of a window
  199. '_max_memory_size', # maximum amount of memory we may allocate
  200. '_max_handle_count', # maximum amount of handles to keep open
  201. '_memory_size', # currently allocated memory size
  202. '_handle_count', # amount of currently allocated file handles
  203. ]
  204. #{ Configuration
  205. MapRegionListCls = MapRegionList
  206. MapWindowCls = MapWindow
  207. MapRegionCls = MapRegion
  208. WindowCursorCls = WindowCursor
  209. #} END configuration
  210. _MB_in_bytes = 1024 * 1024
  211. def __init__(self, window_size=0, max_memory_size=0, max_open_handles=sys.maxsize):
  212. """initialize the manager with the given parameters.
  213. :param window_size: if -1, a default window size will be chosen depending on
  214. the operating system's architecture. It will internally be quantified to a multiple of the page size
  215. If 0, the window may have any size, which basically results in mapping the whole file at one
  216. :param max_memory_size: maximum amount of memory we may map at once before releasing mapped regions.
  217. If 0, a viable default will be set depending on the system's architecture.
  218. It is a soft limit that is tried to be kept, but nothing bad happens if we have to over-allocate
  219. :param max_open_handles: if not maxint, limit the amount of open file handles to the given number.
  220. Otherwise the amount is only limited by the system itself. If a system or soft limit is hit,
  221. the manager will free as many handles as possible"""
  222. self._fdict = dict()
  223. self._window_size = window_size
  224. self._max_memory_size = max_memory_size
  225. self._max_handle_count = max_open_handles
  226. self._memory_size = 0
  227. self._handle_count = 0
  228. if window_size < 0:
  229. coeff = 64
  230. if is_64_bit():
  231. coeff = 1024
  232. # END handle arch
  233. self._window_size = coeff * self._MB_in_bytes
  234. # END handle max window size
  235. if max_memory_size == 0:
  236. coeff = 1024
  237. if is_64_bit():
  238. coeff = 8192
  239. # END handle arch
  240. self._max_memory_size = coeff * self._MB_in_bytes
  241. # END handle max memory size
  242. #{ Internal Methods
  243. def _collect_lru_region(self, size):
  244. """Unmap the region which was least-recently used and has no client
  245. :param size: size of the region we want to map next (assuming its not already mapped partially or full
  246. if 0, we try to free any available region
  247. :return: Amount of freed regions
  248. .. Note::
  249. We don't raise exceptions anymore, in order to keep the system working, allowing temporary overallocation.
  250. If the system runs out of memory, it will tell.
  251. .. TODO::
  252. implement a case where all unusued regions are discarded efficiently.
  253. Currently its only brute force
  254. """
  255. num_found = 0
  256. while (size == 0) or (self._memory_size + size > self._max_memory_size):
  257. lru_region = None
  258. lru_list = None
  259. for regions in self._fdict.values():
  260. for region in regions:
  261. # check client count - if it's 1, it's just us
  262. if (region.client_count() == 1 and
  263. (lru_region is None or region._uc < lru_region._uc)):
  264. lru_region = region
  265. lru_list = regions
  266. # END update lru_region
  267. # END for each region
  268. # END for each regions list
  269. if lru_region is None:
  270. break
  271. # END handle region not found
  272. num_found += 1
  273. del(lru_list[lru_list.index(lru_region)])
  274. lru_region.increment_client_count(-1)
  275. self._memory_size -= lru_region.size()
  276. self._handle_count -= 1
  277. # END while there is more memory to free
  278. return num_found
  279. def _obtain_region(self, a, offset, size, flags, is_recursive):
  280. """Utilty to create a new region - for more information on the parameters,
  281. see MapCursor.use_region.
  282. :param a: A regions (a)rray
  283. :return: The newly created region"""
  284. if self._memory_size + size > self._max_memory_size:
  285. self._collect_lru_region(size)
  286. # END handle collection
  287. r = None
  288. if a:
  289. assert len(a) == 1
  290. r = a[0]
  291. else:
  292. try:
  293. r = self.MapRegionCls(a.path_or_fd(), 0, sys.maxsize, flags)
  294. except Exception:
  295. # apparently we are out of system resources or hit a limit
  296. # As many more operations are likely to fail in that condition (
  297. # like reading a file from disk, etc) we free up as much as possible
  298. # As this invalidates our insert position, we have to recurse here
  299. if is_recursive:
  300. # we already tried this, and still have no success in obtaining
  301. # a mapping. This is an exception, so we propagate it
  302. raise
  303. # END handle existing recursion
  304. self._collect_lru_region(0)
  305. return self._obtain_region(a, offset, size, flags, True)
  306. # END handle exceptions
  307. self._handle_count += 1
  308. self._memory_size += r.size()
  309. a.append(r)
  310. # END handle array
  311. assert r.includes_ofs(offset)
  312. return r
  313. #}END internal methods
  314. #{ Interface
  315. def make_cursor(self, path_or_fd):
  316. """
  317. :return: a cursor pointing to the given path or file descriptor.
  318. It can be used to map new regions of the file into memory
  319. **Note:** if a file descriptor is given, it is assumed to be open and valid,
  320. but may be closed afterwards. To refer to the same file, you may reuse
  321. your existing file descriptor, but keep in mind that new windows can only
  322. be mapped as long as it stays valid. This is why the using actual file paths
  323. are preferred unless you plan to keep the file descriptor open.
  324. **Note:** file descriptors are problematic as they are not necessarily unique, as two
  325. different files opened and closed in succession might have the same file descriptor id.
  326. **Note:** Using file descriptors directly is faster once new windows are mapped as it
  327. prevents the file to be opened again just for the purpose of mapping it."""
  328. regions = self._fdict.get(path_or_fd)
  329. if regions is None:
  330. regions = self.MapRegionListCls(path_or_fd)
  331. self._fdict[path_or_fd] = regions
  332. # END obtain region for path
  333. return self.WindowCursorCls(self, regions)
  334. def collect(self):
  335. """Collect all available free-to-collect mapped regions
  336. :return: Amount of freed handles"""
  337. return self._collect_lru_region(0)
  338. def num_file_handles(self):
  339. """:return: amount of file handles in use. Each mapped region uses one file handle"""
  340. return self._handle_count
  341. def num_open_files(self):
  342. """Amount of opened files in the system"""
  343. return reduce(lambda x, y: x + y, (1 for rlist in self._fdict.values() if len(rlist) > 0), 0)
  344. def window_size(self):
  345. """:return: size of each window when allocating new regions"""
  346. return self._window_size
  347. def mapped_memory_size(self):
  348. """:return: amount of bytes currently mapped in total"""
  349. return self._memory_size
  350. def max_file_handles(self):
  351. """:return: maximium amount of handles we may have opened"""
  352. return self._max_handle_count
  353. def max_mapped_memory_size(self):
  354. """:return: maximum amount of memory we may allocate"""
  355. return self._max_memory_size
  356. #} END interface
  357. #{ Special Purpose Interface
  358. def force_map_handle_removal_win(self, base_path):
  359. """ONLY AVAILABLE ON WINDOWS
  360. On windows removing files is not allowed if anybody still has it opened.
  361. If this process is ourselves, and if the whole process uses this memory
  362. manager (as far as the parent framework is concerned) we can enforce
  363. closing all memory maps whose path matches the given base path to
  364. allow the respective operation after all.
  365. The respective system must NOT access the closed memory regions anymore !
  366. This really may only be used if you know that the items which keep
  367. the cursors alive will not be using it anymore. They need to be recreated !
  368. :return: Amount of closed handles
  369. **Note:** does nothing on non-windows platforms"""
  370. if sys.platform != 'win32':
  371. return
  372. # END early bailout
  373. num_closed = 0
  374. for path, rlist in self._fdict.items():
  375. if path.startswith(base_path):
  376. for region in rlist:
  377. region.release()
  378. num_closed += 1
  379. # END path matches
  380. # END for each path
  381. return num_closed
  382. #} END special purpose interface
  383. class SlidingWindowMapManager(StaticWindowMapManager):
  384. """Maintains a list of ranges of mapped memory regions in one or more files and allows to easily
  385. obtain additional regions assuring there is no overlap.
  386. Once a certain memory limit is reached globally, or if there cannot be more open file handles
  387. which result from each mmap call, the least recently used, and currently unused mapped regions
  388. are unloaded automatically.
  389. **Note:** currently not thread-safe !
  390. **Note:** in the current implementation, we will automatically unload windows if we either cannot
  391. create more memory maps (as the open file handles limit is hit) or if we have allocated more than
  392. a safe amount of memory already, which would possibly cause memory allocations to fail as our address
  393. space is full."""
  394. __slots__ = tuple()
  395. def __init__(self, window_size=-1, max_memory_size=0, max_open_handles=sys.maxsize):
  396. """Adjusts the default window size to -1"""
  397. super(SlidingWindowMapManager, self).__init__(window_size, max_memory_size, max_open_handles)
  398. def _obtain_region(self, a, offset, size, flags, is_recursive):
  399. # bisect to find an existing region. The c++ implementation cannot
  400. # do that as it uses a linked list for regions.
  401. r = None
  402. lo = 0
  403. hi = len(a)
  404. while lo < hi:
  405. mid = (lo + hi) // 2
  406. ofs = a[mid]._b
  407. if ofs <= offset:
  408. if a[mid].includes_ofs(offset):
  409. r = a[mid]
  410. break
  411. # END have region
  412. lo = mid + 1
  413. else:
  414. hi = mid
  415. # END handle position
  416. # END while bisecting
  417. if r is None:
  418. window_size = self._window_size
  419. left = self.MapWindowCls(0, 0)
  420. mid = self.MapWindowCls(offset, size)
  421. right = self.MapWindowCls(a.file_size(), 0)
  422. # we want to honor the max memory size, and assure we have anough
  423. # memory available
  424. # Save calls !
  425. if self._memory_size + window_size > self._max_memory_size:
  426. self._collect_lru_region(window_size)
  427. # END handle collection
  428. # we assume the list remains sorted by offset
  429. insert_pos = 0
  430. len_regions = len(a)
  431. if len_regions == 1:
  432. if a[0]._b <= offset:
  433. insert_pos = 1
  434. # END maintain sort
  435. else:
  436. # find insert position
  437. insert_pos = len_regions
  438. for i, region in enumerate(a):
  439. if region._b > offset:
  440. insert_pos = i
  441. break
  442. # END if insert position is correct
  443. # END for each region
  444. # END obtain insert pos
  445. # adjust the actual offset and size values to create the largest
  446. # possible mapping
  447. if insert_pos == 0:
  448. if len_regions:
  449. right = self.MapWindowCls.from_region(a[insert_pos])
  450. # END adjust right side
  451. else:
  452. if insert_pos != len_regions:
  453. right = self.MapWindowCls.from_region(a[insert_pos])
  454. # END adjust right window
  455. left = self.MapWindowCls.from_region(a[insert_pos - 1])
  456. # END adjust surrounding windows
  457. mid.extend_left_to(left, window_size)
  458. mid.extend_right_to(right, window_size)
  459. mid.align()
  460. # it can happen that we align beyond the end of the file
  461. if mid.ofs_end() > right.ofs:
  462. mid.size = right.ofs - mid.ofs
  463. # END readjust size
  464. # insert new region at the right offset to keep the order
  465. try:
  466. if self._handle_count >= self._max_handle_count:
  467. raise Exception
  468. # END assert own imposed max file handles
  469. r = self.MapRegionCls(a.path_or_fd(), mid.ofs, mid.size, flags)
  470. except Exception:
  471. # apparently we are out of system resources or hit a limit
  472. # As many more operations are likely to fail in that condition (
  473. # like reading a file from disk, etc) we free up as much as possible
  474. # As this invalidates our insert position, we have to recurse here
  475. if is_recursive:
  476. # we already tried this, and still have no success in obtaining
  477. # a mapping. This is an exception, so we propagate it
  478. raise
  479. # END handle existing recursion
  480. self._collect_lru_region(0)
  481. return self._obtain_region(a, offset, size, flags, True)
  482. # END handle exceptions
  483. self._handle_count += 1
  484. self._memory_size += r.size()
  485. a.insert(insert_pos, r)
  486. # END create new region
  487. return r