You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

148 lines
5.3KB

  1. """Module containing the implementation of the IRIReference class."""
  2. # -*- coding: utf-8 -*-
  3. # Copyright (c) 2014 Rackspace
  4. # Copyright (c) 2015 Ian Stapleton Cordasco
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  14. # implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. from collections import namedtuple
  18. from . import compat
  19. from . import exceptions
  20. from . import misc
  21. from . import normalizers
  22. from . import uri
  23. try:
  24. import idna
  25. except ImportError: # pragma: no cover
  26. idna = None
  27. class IRIReference(namedtuple('IRIReference', misc.URI_COMPONENTS),
  28. uri.URIMixin):
  29. """Immutable object representing a parsed IRI Reference.
  30. Can be encoded into an URIReference object via the procedure
  31. specified in RFC 3987 Section 3.1
  32. .. note::
  33. The IRI submodule is a new interface and may possibly change in
  34. the future. Check for changes to the interface when upgrading.
  35. """
  36. slots = ()
  37. def __new__(cls, scheme, authority, path, query, fragment,
  38. encoding='utf-8'):
  39. """Create a new IRIReference."""
  40. ref = super(IRIReference, cls).__new__(
  41. cls,
  42. scheme or None,
  43. authority or None,
  44. path or None,
  45. query,
  46. fragment)
  47. ref.encoding = encoding
  48. return ref
  49. def __eq__(self, other):
  50. """Compare this reference to another."""
  51. other_ref = other
  52. if isinstance(other, tuple):
  53. other_ref = self.__class__(*other)
  54. elif not isinstance(other, IRIReference):
  55. try:
  56. other_ref = self.__class__.from_string(other)
  57. except TypeError:
  58. raise TypeError(
  59. 'Unable to compare {0}() to {1}()'.format(
  60. type(self).__name__, type(other).__name__))
  61. # See http://tools.ietf.org/html/rfc3986#section-6.2
  62. return tuple(self) == tuple(other_ref)
  63. def _match_subauthority(self):
  64. return misc.ISUBAUTHORITY_MATCHER.match(self.authority)
  65. @classmethod
  66. def from_string(cls, iri_string, encoding='utf-8'):
  67. """Parse a IRI reference from the given unicode IRI string.
  68. :param str iri_string: Unicode IRI to be parsed into a reference.
  69. :param str encoding: The encoding of the string provided
  70. :returns: :class:`IRIReference` or subclass thereof
  71. """
  72. iri_string = compat.to_str(iri_string, encoding)
  73. split_iri = misc.IRI_MATCHER.match(iri_string).groupdict()
  74. return cls(
  75. split_iri['scheme'], split_iri['authority'],
  76. normalizers.encode_component(split_iri['path'], encoding),
  77. normalizers.encode_component(split_iri['query'], encoding),
  78. normalizers.encode_component(split_iri['fragment'], encoding),
  79. encoding,
  80. )
  81. def encode(self, idna_encoder=None): # noqa: C901
  82. """Encode an IRIReference into a URIReference instance.
  83. If the ``idna`` module is installed or the ``rfc3986[idna]``
  84. extra is used then unicode characters in the IRI host
  85. component will be encoded with IDNA2008.
  86. :param idna_encoder:
  87. Function that encodes each part of the host component
  88. If not given will raise an exception if the IRI
  89. contains a host component.
  90. :rtype: uri.URIReference
  91. :returns: A URI reference
  92. """
  93. authority = self.authority
  94. if authority:
  95. if idna_encoder is None:
  96. if idna is None: # pragma: no cover
  97. raise exceptions.MissingDependencyError(
  98. "Could not import the 'idna' module "
  99. "and the IRI hostname requires encoding"
  100. )
  101. def idna_encoder(name):
  102. if any(ord(c) > 128 for c in name):
  103. try:
  104. return idna.encode(name.lower(),
  105. strict=True,
  106. std3_rules=True)
  107. except idna.IDNAError:
  108. raise exceptions.InvalidAuthority(self.authority)
  109. return name
  110. authority = ""
  111. if self.host:
  112. authority = ".".join([compat.to_str(idna_encoder(part))
  113. for part in self.host.split(".")])
  114. if self.userinfo is not None:
  115. authority = (normalizers.encode_component(
  116. self.userinfo, self.encoding) + '@' + authority)
  117. if self.port is not None:
  118. authority += ":" + str(self.port)
  119. return uri.URIReference(self.scheme,
  120. authority,
  121. path=self.path,
  122. query=self.query,
  123. fragment=self.fragment,
  124. encoding=self.encoding)