|
- # -*- coding: utf-8 -*-
- # config.py
- # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
- #
- # This module is part of GitPython and is released under
- # the BSD License: http://www.opensource.org/licenses/bsd-license.php
- """utilities to help provide compatibility with python 3"""
- # flake8: noqa
-
- import locale
- import os
- import sys
- import codecs
-
-
- from gitdb.utils.compat import (
- xrange,
- MAXSIZE, # @UnusedImport
- izip, # @UnusedImport
- )
- from gitdb.utils.encoding import (
- string_types, # @UnusedImport
- text_type, # @UnusedImport
- force_bytes, # @UnusedImport
- force_text # @UnusedImport
- )
-
-
- PY3 = sys.version_info[0] >= 3
- is_win = (os.name == 'nt')
- is_posix = (os.name == 'posix')
- is_darwin = (os.name == 'darwin')
- if hasattr(sys, 'getfilesystemencoding'):
- defenc = sys.getfilesystemencoding()
- if defenc is None:
- defenc = sys.getdefaultencoding()
-
- if PY3:
- import io
- FileType = io.IOBase
-
- def byte_ord(b):
- return b
-
- def bchr(n):
- return bytes([n])
-
- def mviter(d):
- return d.values()
-
- range = xrange # @ReservedAssignment
- unicode = str
- binary_type = bytes
- else:
- FileType = file # @UndefinedVariable on PY3
- # usually, this is just ascii, which might not enough for our encoding needs
- # Unless it's set specifically, we override it to be utf-8
- if defenc == 'ascii':
- defenc = 'utf-8'
- byte_ord = ord
- bchr = chr
- unicode = unicode
- binary_type = str
- range = xrange # @ReservedAssignment
-
- def mviter(d):
- return d.itervalues()
-
-
- def safe_decode(s):
- """Safely decodes a binary string to unicode"""
- if isinstance(s, unicode):
- return s
- elif isinstance(s, bytes):
- return s.decode(defenc, 'surrogateescape')
- elif s is not None:
- raise TypeError('Expected bytes or text, but got %r' % (s,))
-
-
- def safe_encode(s):
- """Safely decodes a binary string to unicode"""
- if isinstance(s, unicode):
- return s.encode(defenc)
- elif isinstance(s, bytes):
- return s
- elif s is not None:
- raise TypeError('Expected bytes or text, but got %r' % (s,))
-
-
- def win_encode(s):
- """Encode unicodes for process arguments on Windows."""
- if isinstance(s, unicode):
- return s.encode(locale.getpreferredencoding(False))
- elif isinstance(s, bytes):
- return s
- elif s is not None:
- raise TypeError('Expected bytes or text, but got %r' % (s,))
-
-
- def with_metaclass(meta, *bases):
- """copied from https://github.com/Byron/bcore/blob/master/src/python/butility/future.py#L15"""
- class metaclass(meta):
- __call__ = type.__call__
- __init__ = type.__init__
-
- def __new__(cls, name, nbases, d):
- if nbases is None:
- return type.__new__(cls, name, (), d)
- # There may be clients who rely on this attribute to be set to a reasonable value, which is why
- # we set the __metaclass__ attribute explicitly
- if not PY3 and '___metaclass__' not in d:
- d['__metaclass__'] = meta
- return meta(name, bases, d)
- return metaclass(meta.__name__ + 'Helper', None, {})
-
-
- ## From https://docs.python.org/3.3/howto/pyporting.html
- class UnicodeMixin(object):
-
- """Mixin class to handle defining the proper __str__/__unicode__
- methods in Python 2 or 3."""
-
- if PY3:
- def __str__(self):
- return self.__unicode__()
- else: # Python 2
- def __str__(self):
- return self.__unicode__().encode(defenc)
-
-
- """
- This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
- handler of Python 3.
- Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
- """
-
- # This code is released under the Python license and the BSD 2-clause license
-
-
- FS_ERRORS = 'surrogateescape'
-
- # # -- Python 2/3 compatibility -------------------------------------
- # FS_ERRORS = 'my_surrogateescape'
-
- def u(text):
- if PY3:
- return text
- else:
- return text.decode('unicode_escape')
-
- def b(data):
- if PY3:
- return data.encode('latin1')
- else:
- return data
-
- if PY3:
- _unichr = chr
- bytes_chr = lambda code: bytes((code,))
- else:
- _unichr = unichr
- bytes_chr = chr
-
- def surrogateescape_handler(exc):
- """
- Pure Python implementation of the PEP 383: the "surrogateescape" error
- handler of Python 3. Undecodable bytes will be replaced by a Unicode
- character U+DCxx on decoding, and these are translated into the
- original bytes on encoding.
- """
- mystring = exc.object[exc.start:exc.end]
-
- try:
- if isinstance(exc, UnicodeDecodeError):
- # mystring is a byte-string in this case
- decoded = replace_surrogate_decode(mystring)
- elif isinstance(exc, UnicodeEncodeError):
- # In the case of u'\udcc3'.encode('ascii',
- # 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an
- # exception anyway after this function is called, even though I think
- # it's doing what it should. It seems that the strict encoder is called
- # to encode the unicode string that this function returns ...
- decoded = replace_surrogate_encode(mystring, exc)
- else:
- raise exc
- except NotASurrogateError:
- raise exc
- return (decoded, exc.end)
-
-
- class NotASurrogateError(Exception):
- pass
-
-
- def replace_surrogate_encode(mystring, exc):
- """
- Returns a (unicode) string, not the more logical bytes, because the codecs
- register_error functionality expects this.
- """
- decoded = []
- for ch in mystring:
- # if PY3:
- # code = ch
- # else:
- code = ord(ch)
-
- # The following magic comes from Py3.3's Python/codecs.c file:
- if not 0xD800 <= code <= 0xDCFF:
- # Not a surrogate. Fail with the original exception.
- raise exc
- # mybytes = [0xe0 | (code >> 12),
- # 0x80 | ((code >> 6) & 0x3f),
- # 0x80 | (code & 0x3f)]
- # Is this a good idea?
- if 0xDC00 <= code <= 0xDC7F:
- decoded.append(_unichr(code - 0xDC00))
- elif code <= 0xDCFF:
- decoded.append(_unichr(code - 0xDC00))
- else:
- raise NotASurrogateError
- return str().join(decoded)
-
-
- def replace_surrogate_decode(mybytes):
- """
- Returns a (unicode) string
- """
- decoded = []
- for ch in mybytes:
- # We may be parsing newbytes (in which case ch is an int) or a native
- # str on Py2
- if isinstance(ch, int):
- code = ch
- else:
- code = ord(ch)
- if 0x80 <= code <= 0xFF:
- decoded.append(_unichr(0xDC00 + code))
- elif code <= 0x7F:
- decoded.append(_unichr(code))
- else:
- # # It may be a bad byte
- # # Try swallowing it.
- # continue
- # print("RAISE!")
- raise NotASurrogateError
- return str().join(decoded)
-
-
- def encodefilename(fn):
- if FS_ENCODING == 'ascii':
- # ASCII encoder of Python 2 expects that the error handler returns a
- # Unicode string encodable to ASCII, whereas our surrogateescape error
- # handler has to return bytes in 0x80-0xFF range.
- encoded = []
- for index, ch in enumerate(fn):
- code = ord(ch)
- if code < 128:
- ch = bytes_chr(code)
- elif 0xDC80 <= code <= 0xDCFF:
- ch = bytes_chr(code - 0xDC00)
- else:
- raise UnicodeEncodeError(FS_ENCODING,
- fn, index, index+1,
- 'ordinal not in range(128)')
- encoded.append(ch)
- return bytes().join(encoded)
- elif FS_ENCODING == 'utf-8':
- # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF
- # doesn't go through our error handler
- encoded = []
- for index, ch in enumerate(fn):
- code = ord(ch)
- if 0xD800 <= code <= 0xDFFF:
- if 0xDC80 <= code <= 0xDCFF:
- ch = bytes_chr(code - 0xDC00)
- encoded.append(ch)
- else:
- raise UnicodeEncodeError(
- FS_ENCODING,
- fn, index, index+1, 'surrogates not allowed')
- else:
- ch_utf8 = ch.encode('utf-8')
- encoded.append(ch_utf8)
- return bytes().join(encoded)
- else:
- return fn.encode(FS_ENCODING, FS_ERRORS)
-
- def decodefilename(fn):
- return fn.decode(FS_ENCODING, FS_ERRORS)
-
- FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
- # FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
- # FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
-
-
- # normalize the filesystem encoding name.
- # For example, we expect "utf-8", not "UTF8".
- FS_ENCODING = codecs.lookup(FS_ENCODING).name
-
-
- def register_surrogateescape():
- """
- Registers the surrogateescape error handler on Python 2 (only)
- """
- if PY3:
- return
- try:
- codecs.lookup_error(FS_ERRORS)
- except LookupError:
- codecs.register_error(FS_ERRORS, surrogateescape_handler)
-
-
- try:
- b"100644 \x9f\0aaa".decode(defenc, "surrogateescape")
- except Exception:
- register_surrogateescape()
|