您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

317 行
9.3KB

  1. # -*- coding: utf-8 -*-
  2. # config.py
  3. # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
  4. #
  5. # This module is part of GitPython and is released under
  6. # the BSD License: http://www.opensource.org/licenses/bsd-license.php
  7. """utilities to help provide compatibility with python 3"""
  8. # flake8: noqa
  9. import locale
  10. import os
  11. import sys
  12. import codecs
  13. from gitdb.utils.compat import (
  14. xrange,
  15. MAXSIZE, # @UnusedImport
  16. izip, # @UnusedImport
  17. )
  18. from gitdb.utils.encoding import (
  19. string_types, # @UnusedImport
  20. text_type, # @UnusedImport
  21. force_bytes, # @UnusedImport
  22. force_text # @UnusedImport
  23. )
  24. PY3 = sys.version_info[0] >= 3
  25. is_win = (os.name == 'nt')
  26. is_posix = (os.name == 'posix')
  27. is_darwin = (os.name == 'darwin')
  28. if hasattr(sys, 'getfilesystemencoding'):
  29. defenc = sys.getfilesystemencoding()
  30. if defenc is None:
  31. defenc = sys.getdefaultencoding()
  32. if PY3:
  33. import io
  34. FileType = io.IOBase
  35. def byte_ord(b):
  36. return b
  37. def bchr(n):
  38. return bytes([n])
  39. def mviter(d):
  40. return d.values()
  41. range = xrange # @ReservedAssignment
  42. unicode = str
  43. binary_type = bytes
  44. else:
  45. FileType = file # @UndefinedVariable on PY3
  46. # usually, this is just ascii, which might not enough for our encoding needs
  47. # Unless it's set specifically, we override it to be utf-8
  48. if defenc == 'ascii':
  49. defenc = 'utf-8'
  50. byte_ord = ord
  51. bchr = chr
  52. unicode = unicode
  53. binary_type = str
  54. range = xrange # @ReservedAssignment
  55. def mviter(d):
  56. return d.itervalues()
  57. def safe_decode(s):
  58. """Safely decodes a binary string to unicode"""
  59. if isinstance(s, unicode):
  60. return s
  61. elif isinstance(s, bytes):
  62. return s.decode(defenc, 'surrogateescape')
  63. elif s is not None:
  64. raise TypeError('Expected bytes or text, but got %r' % (s,))
  65. def safe_encode(s):
  66. """Safely decodes a binary string to unicode"""
  67. if isinstance(s, unicode):
  68. return s.encode(defenc)
  69. elif isinstance(s, bytes):
  70. return s
  71. elif s is not None:
  72. raise TypeError('Expected bytes or text, but got %r' % (s,))
  73. def win_encode(s):
  74. """Encode unicodes for process arguments on Windows."""
  75. if isinstance(s, unicode):
  76. return s.encode(locale.getpreferredencoding(False))
  77. elif isinstance(s, bytes):
  78. return s
  79. elif s is not None:
  80. raise TypeError('Expected bytes or text, but got %r' % (s,))
  81. def with_metaclass(meta, *bases):
  82. """copied from https://github.com/Byron/bcore/blob/master/src/python/butility/future.py#L15"""
  83. class metaclass(meta):
  84. __call__ = type.__call__
  85. __init__ = type.__init__
  86. def __new__(cls, name, nbases, d):
  87. if nbases is None:
  88. return type.__new__(cls, name, (), d)
  89. # There may be clients who rely on this attribute to be set to a reasonable value, which is why
  90. # we set the __metaclass__ attribute explicitly
  91. if not PY3 and '___metaclass__' not in d:
  92. d['__metaclass__'] = meta
  93. return meta(name, bases, d)
  94. return metaclass(meta.__name__ + 'Helper', None, {})
  95. ## From https://docs.python.org/3.3/howto/pyporting.html
  96. class UnicodeMixin(object):
  97. """Mixin class to handle defining the proper __str__/__unicode__
  98. methods in Python 2 or 3."""
  99. if PY3:
  100. def __str__(self):
  101. return self.__unicode__()
  102. else: # Python 2
  103. def __str__(self):
  104. return self.__unicode__().encode(defenc)
  105. """
  106. This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
  107. handler of Python 3.
  108. Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
  109. """
  110. # This code is released under the Python license and the BSD 2-clause license
  111. FS_ERRORS = 'surrogateescape'
  112. # # -- Python 2/3 compatibility -------------------------------------
  113. # FS_ERRORS = 'my_surrogateescape'
  114. def u(text):
  115. if PY3:
  116. return text
  117. else:
  118. return text.decode('unicode_escape')
  119. def b(data):
  120. if PY3:
  121. return data.encode('latin1')
  122. else:
  123. return data
  124. if PY3:
  125. _unichr = chr
  126. bytes_chr = lambda code: bytes((code,))
  127. else:
  128. _unichr = unichr
  129. bytes_chr = chr
  130. def surrogateescape_handler(exc):
  131. """
  132. Pure Python implementation of the PEP 383: the "surrogateescape" error
  133. handler of Python 3. Undecodable bytes will be replaced by a Unicode
  134. character U+DCxx on decoding, and these are translated into the
  135. original bytes on encoding.
  136. """
  137. mystring = exc.object[exc.start:exc.end]
  138. try:
  139. if isinstance(exc, UnicodeDecodeError):
  140. # mystring is a byte-string in this case
  141. decoded = replace_surrogate_decode(mystring)
  142. elif isinstance(exc, UnicodeEncodeError):
  143. # In the case of u'\udcc3'.encode('ascii',
  144. # 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an
  145. # exception anyway after this function is called, even though I think
  146. # it's doing what it should. It seems that the strict encoder is called
  147. # to encode the unicode string that this function returns ...
  148. decoded = replace_surrogate_encode(mystring, exc)
  149. else:
  150. raise exc
  151. except NotASurrogateError:
  152. raise exc
  153. return (decoded, exc.end)
  154. class NotASurrogateError(Exception):
  155. pass
  156. def replace_surrogate_encode(mystring, exc):
  157. """
  158. Returns a (unicode) string, not the more logical bytes, because the codecs
  159. register_error functionality expects this.
  160. """
  161. decoded = []
  162. for ch in mystring:
  163. # if PY3:
  164. # code = ch
  165. # else:
  166. code = ord(ch)
  167. # The following magic comes from Py3.3's Python/codecs.c file:
  168. if not 0xD800 <= code <= 0xDCFF:
  169. # Not a surrogate. Fail with the original exception.
  170. raise exc
  171. # mybytes = [0xe0 | (code >> 12),
  172. # 0x80 | ((code >> 6) & 0x3f),
  173. # 0x80 | (code & 0x3f)]
  174. # Is this a good idea?
  175. if 0xDC00 <= code <= 0xDC7F:
  176. decoded.append(_unichr(code - 0xDC00))
  177. elif code <= 0xDCFF:
  178. decoded.append(_unichr(code - 0xDC00))
  179. else:
  180. raise NotASurrogateError
  181. return str().join(decoded)
  182. def replace_surrogate_decode(mybytes):
  183. """
  184. Returns a (unicode) string
  185. """
  186. decoded = []
  187. for ch in mybytes:
  188. # We may be parsing newbytes (in which case ch is an int) or a native
  189. # str on Py2
  190. if isinstance(ch, int):
  191. code = ch
  192. else:
  193. code = ord(ch)
  194. if 0x80 <= code <= 0xFF:
  195. decoded.append(_unichr(0xDC00 + code))
  196. elif code <= 0x7F:
  197. decoded.append(_unichr(code))
  198. else:
  199. # # It may be a bad byte
  200. # # Try swallowing it.
  201. # continue
  202. # print("RAISE!")
  203. raise NotASurrogateError
  204. return str().join(decoded)
  205. def encodefilename(fn):
  206. if FS_ENCODING == 'ascii':
  207. # ASCII encoder of Python 2 expects that the error handler returns a
  208. # Unicode string encodable to ASCII, whereas our surrogateescape error
  209. # handler has to return bytes in 0x80-0xFF range.
  210. encoded = []
  211. for index, ch in enumerate(fn):
  212. code = ord(ch)
  213. if code < 128:
  214. ch = bytes_chr(code)
  215. elif 0xDC80 <= code <= 0xDCFF:
  216. ch = bytes_chr(code - 0xDC00)
  217. else:
  218. raise UnicodeEncodeError(FS_ENCODING,
  219. fn, index, index+1,
  220. 'ordinal not in range(128)')
  221. encoded.append(ch)
  222. return bytes().join(encoded)
  223. elif FS_ENCODING == 'utf-8':
  224. # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF
  225. # doesn't go through our error handler
  226. encoded = []
  227. for index, ch in enumerate(fn):
  228. code = ord(ch)
  229. if 0xD800 <= code <= 0xDFFF:
  230. if 0xDC80 <= code <= 0xDCFF:
  231. ch = bytes_chr(code - 0xDC00)
  232. encoded.append(ch)
  233. else:
  234. raise UnicodeEncodeError(
  235. FS_ENCODING,
  236. fn, index, index+1, 'surrogates not allowed')
  237. else:
  238. ch_utf8 = ch.encode('utf-8')
  239. encoded.append(ch_utf8)
  240. return bytes().join(encoded)
  241. else:
  242. return fn.encode(FS_ENCODING, FS_ERRORS)
  243. def decodefilename(fn):
  244. return fn.decode(FS_ENCODING, FS_ERRORS)
  245. FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
  246. # FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
  247. # FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
  248. # normalize the filesystem encoding name.
  249. # For example, we expect "utf-8", not "UTF8".
  250. FS_ENCODING = codecs.lookup(FS_ENCODING).name
  251. def register_surrogateescape():
  252. """
  253. Registers the surrogateescape error handler on Python 2 (only)
  254. """
  255. if PY3:
  256. return
  257. try:
  258. codecs.lookup_error(FS_ERRORS)
  259. except LookupError:
  260. codecs.register_error(FS_ERRORS, surrogateescape_handler)
  261. try:
  262. b"100644 \x9f\0aaa".decode(defenc, "surrogateescape")
  263. except Exception:
  264. register_surrogateescape()