Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

386 строки
14KB

  1. # -*- coding: utf-8 -*-
  2. # Copyright (c) 2015 Ian Stapleton Cordasco
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  12. # implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. """Module containing the urlparse compatibility logic."""
  16. from collections import namedtuple
  17. from . import compat
  18. from . import exceptions
  19. from . import misc
  20. from . import normalizers
  21. from . import uri
  22. __all__ = ('ParseResult', 'ParseResultBytes')
  23. PARSED_COMPONENTS = ('scheme', 'userinfo', 'host', 'port', 'path', 'query',
  24. 'fragment')
  25. class ParseResultMixin(object):
  26. def _generate_authority(self, attributes):
  27. # I swear I did not align the comparisons below. That's just how they
  28. # happened to align based on pep8 and attribute lengths.
  29. userinfo, host, port = (attributes[p]
  30. for p in ('userinfo', 'host', 'port'))
  31. if (self.userinfo != userinfo or
  32. self.host != host or
  33. self.port != port):
  34. if port:
  35. port = '{0}'.format(port)
  36. return normalizers.normalize_authority(
  37. (compat.to_str(userinfo, self.encoding),
  38. compat.to_str(host, self.encoding),
  39. port)
  40. )
  41. return self.authority
  42. def geturl(self):
  43. """Shim to match the standard library method."""
  44. return self.unsplit()
  45. @property
  46. def hostname(self):
  47. """Shim to match the standard library."""
  48. return self.host
  49. @property
  50. def netloc(self):
  51. """Shim to match the standard library."""
  52. return self.authority
  53. @property
  54. def params(self):
  55. """Shim to match the standard library."""
  56. return self.query
  57. class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS),
  58. ParseResultMixin):
  59. """Implementation of urlparse compatibility class.
  60. This uses the URIReference logic to handle compatibility with the
  61. urlparse.ParseResult class.
  62. """
  63. slots = ()
  64. def __new__(cls, scheme, userinfo, host, port, path, query, fragment,
  65. uri_ref, encoding='utf-8'):
  66. """Create a new ParseResult."""
  67. parse_result = super(ParseResult, cls).__new__(
  68. cls,
  69. scheme or None,
  70. userinfo or None,
  71. host,
  72. port or None,
  73. path or None,
  74. query,
  75. fragment)
  76. parse_result.encoding = encoding
  77. parse_result.reference = uri_ref
  78. return parse_result
  79. @classmethod
  80. def from_parts(cls, scheme=None, userinfo=None, host=None, port=None,
  81. path=None, query=None, fragment=None, encoding='utf-8'):
  82. """Create a ParseResult instance from its parts."""
  83. authority = ''
  84. if userinfo is not None:
  85. authority += userinfo + '@'
  86. if host is not None:
  87. authority += host
  88. if port is not None:
  89. authority += ':{0}'.format(port)
  90. uri_ref = uri.URIReference(scheme=scheme,
  91. authority=authority,
  92. path=path,
  93. query=query,
  94. fragment=fragment,
  95. encoding=encoding).normalize()
  96. userinfo, host, port = authority_from(uri_ref, strict=True)
  97. return cls(scheme=uri_ref.scheme,
  98. userinfo=userinfo,
  99. host=host,
  100. port=port,
  101. path=uri_ref.path,
  102. query=uri_ref.query,
  103. fragment=uri_ref.fragment,
  104. uri_ref=uri_ref,
  105. encoding=encoding)
  106. @classmethod
  107. def from_string(cls, uri_string, encoding='utf-8', strict=True,
  108. lazy_normalize=True):
  109. """Parse a URI from the given unicode URI string.
  110. :param str uri_string: Unicode URI to be parsed into a reference.
  111. :param str encoding: The encoding of the string provided
  112. :param bool strict: Parse strictly according to :rfc:`3986` if True.
  113. If False, parse similarly to the standard library's urlparse
  114. function.
  115. :returns: :class:`ParseResult` or subclass thereof
  116. """
  117. reference = uri.URIReference.from_string(uri_string, encoding)
  118. if not lazy_normalize:
  119. reference = reference.normalize()
  120. userinfo, host, port = authority_from(reference, strict)
  121. return cls(scheme=reference.scheme,
  122. userinfo=userinfo,
  123. host=host,
  124. port=port,
  125. path=reference.path,
  126. query=reference.query,
  127. fragment=reference.fragment,
  128. uri_ref=reference,
  129. encoding=encoding)
  130. @property
  131. def authority(self):
  132. """Return the normalized authority."""
  133. return self.reference.authority
  134. def copy_with(self, scheme=misc.UseExisting, userinfo=misc.UseExisting,
  135. host=misc.UseExisting, port=misc.UseExisting,
  136. path=misc.UseExisting, query=misc.UseExisting,
  137. fragment=misc.UseExisting):
  138. """Create a copy of this instance replacing with specified parts."""
  139. attributes = zip(PARSED_COMPONENTS,
  140. (scheme, userinfo, host, port, path, query, fragment))
  141. attrs_dict = {}
  142. for name, value in attributes:
  143. if value is misc.UseExisting:
  144. value = getattr(self, name)
  145. attrs_dict[name] = value
  146. authority = self._generate_authority(attrs_dict)
  147. ref = self.reference.copy_with(scheme=attrs_dict['scheme'],
  148. authority=authority,
  149. path=attrs_dict['path'],
  150. query=attrs_dict['query'],
  151. fragment=attrs_dict['fragment'])
  152. return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict)
  153. def encode(self, encoding=None):
  154. """Convert to an instance of ParseResultBytes."""
  155. encoding = encoding or self.encoding
  156. attrs = dict(
  157. zip(PARSED_COMPONENTS,
  158. (attr.encode(encoding) if hasattr(attr, 'encode') else attr
  159. for attr in self)))
  160. return ParseResultBytes(
  161. uri_ref=self.reference,
  162. encoding=encoding,
  163. **attrs
  164. )
  165. def unsplit(self, use_idna=False):
  166. """Create a URI string from the components.
  167. :returns: The parsed URI reconstituted as a string.
  168. :rtype: str
  169. """
  170. parse_result = self
  171. if use_idna and self.host:
  172. hostbytes = self.host.encode('idna')
  173. host = hostbytes.decode(self.encoding)
  174. parse_result = self.copy_with(host=host)
  175. return parse_result.reference.unsplit()
  176. class ParseResultBytes(namedtuple('ParseResultBytes', PARSED_COMPONENTS),
  177. ParseResultMixin):
  178. """Compatibility shim for the urlparse.ParseResultBytes object."""
  179. def __new__(cls, scheme, userinfo, host, port, path, query, fragment,
  180. uri_ref, encoding='utf-8', lazy_normalize=True):
  181. """Create a new ParseResultBytes instance."""
  182. parse_result = super(ParseResultBytes, cls).__new__(
  183. cls,
  184. scheme or None,
  185. userinfo or None,
  186. host,
  187. port or None,
  188. path or None,
  189. query or None,
  190. fragment or None)
  191. parse_result.encoding = encoding
  192. parse_result.reference = uri_ref
  193. parse_result.lazy_normalize = lazy_normalize
  194. return parse_result
  195. @classmethod
  196. def from_parts(cls, scheme=None, userinfo=None, host=None, port=None,
  197. path=None, query=None, fragment=None, encoding='utf-8',
  198. lazy_normalize=True):
  199. """Create a ParseResult instance from its parts."""
  200. authority = ''
  201. if userinfo is not None:
  202. authority += userinfo + '@'
  203. if host is not None:
  204. authority += host
  205. if port is not None:
  206. authority += ':{0}'.format(int(port))
  207. uri_ref = uri.URIReference(scheme=scheme,
  208. authority=authority,
  209. path=path,
  210. query=query,
  211. fragment=fragment,
  212. encoding=encoding)
  213. if not lazy_normalize:
  214. uri_ref = uri_ref.normalize()
  215. to_bytes = compat.to_bytes
  216. userinfo, host, port = authority_from(uri_ref, strict=True)
  217. return cls(scheme=to_bytes(scheme, encoding),
  218. userinfo=to_bytes(userinfo, encoding),
  219. host=to_bytes(host, encoding),
  220. port=port,
  221. path=to_bytes(path, encoding),
  222. query=to_bytes(query, encoding),
  223. fragment=to_bytes(fragment, encoding),
  224. uri_ref=uri_ref,
  225. encoding=encoding,
  226. lazy_normalize=lazy_normalize)
  227. @classmethod
  228. def from_string(cls, uri_string, encoding='utf-8', strict=True,
  229. lazy_normalize=True):
  230. """Parse a URI from the given unicode URI string.
  231. :param str uri_string: Unicode URI to be parsed into a reference.
  232. :param str encoding: The encoding of the string provided
  233. :param bool strict: Parse strictly according to :rfc:`3986` if True.
  234. If False, parse similarly to the standard library's urlparse
  235. function.
  236. :returns: :class:`ParseResultBytes` or subclass thereof
  237. """
  238. reference = uri.URIReference.from_string(uri_string, encoding)
  239. if not lazy_normalize:
  240. reference = reference.normalize()
  241. userinfo, host, port = authority_from(reference, strict)
  242. to_bytes = compat.to_bytes
  243. return cls(scheme=to_bytes(reference.scheme, encoding),
  244. userinfo=to_bytes(userinfo, encoding),
  245. host=to_bytes(host, encoding),
  246. port=port,
  247. path=to_bytes(reference.path, encoding),
  248. query=to_bytes(reference.query, encoding),
  249. fragment=to_bytes(reference.fragment, encoding),
  250. uri_ref=reference,
  251. encoding=encoding,
  252. lazy_normalize=lazy_normalize)
  253. @property
  254. def authority(self):
  255. """Return the normalized authority."""
  256. return self.reference.authority.encode(self.encoding)
  257. def copy_with(self, scheme=misc.UseExisting, userinfo=misc.UseExisting,
  258. host=misc.UseExisting, port=misc.UseExisting,
  259. path=misc.UseExisting, query=misc.UseExisting,
  260. fragment=misc.UseExisting, lazy_normalize=True):
  261. """Create a copy of this instance replacing with specified parts."""
  262. attributes = zip(PARSED_COMPONENTS,
  263. (scheme, userinfo, host, port, path, query, fragment))
  264. attrs_dict = {}
  265. for name, value in attributes:
  266. if value is misc.UseExisting:
  267. value = getattr(self, name)
  268. if not isinstance(value, bytes) and hasattr(value, 'encode'):
  269. value = value.encode(self.encoding)
  270. attrs_dict[name] = value
  271. authority = self._generate_authority(attrs_dict)
  272. to_str = compat.to_str
  273. ref = self.reference.copy_with(
  274. scheme=to_str(attrs_dict['scheme'], self.encoding),
  275. authority=to_str(authority, self.encoding),
  276. path=to_str(attrs_dict['path'], self.encoding),
  277. query=to_str(attrs_dict['query'], self.encoding),
  278. fragment=to_str(attrs_dict['fragment'], self.encoding)
  279. )
  280. if not lazy_normalize:
  281. ref = ref.normalize()
  282. return ParseResultBytes(
  283. uri_ref=ref,
  284. encoding=self.encoding,
  285. lazy_normalize=lazy_normalize,
  286. **attrs_dict
  287. )
  288. def unsplit(self, use_idna=False):
  289. """Create a URI bytes object from the components.
  290. :returns: The parsed URI reconstituted as a string.
  291. :rtype: bytes
  292. """
  293. parse_result = self
  294. if use_idna and self.host:
  295. # self.host is bytes, to encode to idna, we need to decode it
  296. # first
  297. host = self.host.decode(self.encoding)
  298. hostbytes = host.encode('idna')
  299. parse_result = self.copy_with(host=hostbytes)
  300. if self.lazy_normalize:
  301. parse_result = parse_result.copy_with(lazy_normalize=False)
  302. uri = parse_result.reference.unsplit()
  303. return uri.encode(self.encoding)
  304. def split_authority(authority):
  305. # Initialize our expected return values
  306. userinfo = host = port = None
  307. # Initialize an extra var we may need to use
  308. extra_host = None
  309. # Set-up rest in case there is no userinfo portion
  310. rest = authority
  311. if '@' in authority:
  312. userinfo, rest = authority.rsplit('@', 1)
  313. # Handle IPv6 host addresses
  314. if rest.startswith('['):
  315. host, rest = rest.split(']', 1)
  316. host += ']'
  317. if ':' in rest:
  318. extra_host, port = rest.split(':', 1)
  319. elif not host and rest:
  320. host = rest
  321. if extra_host and not host:
  322. host = extra_host
  323. return userinfo, host, port
  324. def authority_from(reference, strict):
  325. try:
  326. subauthority = reference.authority_info()
  327. except exceptions.InvalidAuthority:
  328. if strict:
  329. raise
  330. userinfo, host, port = split_authority(reference.authority)
  331. else:
  332. # Thanks to Richard Barrell for this idea:
  333. # https://twitter.com/0x2ba22e11/status/617338811975139328
  334. userinfo, host, port = (subauthority.get(p)
  335. for p in ('userinfo', 'host', 'port'))
  336. if port:
  337. try:
  338. port = int(port)
  339. except ValueError:
  340. raise exceptions.InvalidPort(port)
  341. return userinfo, host, port