You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

125 line
4.0KB

  1. # -*- coding: utf-8 -*-
  2. # Copyright (c) 2014 Rackspace
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  12. # implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. """
  16. Module containing compiled regular expressions and constants.
  17. This module contains important constants, patterns, and compiled regular
  18. expressions for parsing and validating URIs and their components.
  19. """
  20. import re
  21. from . import abnf_regexp
  22. # These are enumerated for the named tuple used as a superclass of
  23. # URIReference
  24. URI_COMPONENTS = ['scheme', 'authority', 'path', 'query', 'fragment']
  25. important_characters = {
  26. 'generic_delimiters': abnf_regexp.GENERIC_DELIMITERS,
  27. 'sub_delimiters': abnf_regexp.SUB_DELIMITERS,
  28. # We need to escape the '*' in this case
  29. 're_sub_delimiters': abnf_regexp.SUB_DELIMITERS_RE,
  30. 'unreserved_chars': abnf_regexp.UNRESERVED_CHARS,
  31. # We need to escape the '-' in this case:
  32. 're_unreserved': abnf_regexp.UNRESERVED_RE,
  33. }
  34. # For details about delimiters and reserved characters, see:
  35. # http://tools.ietf.org/html/rfc3986#section-2.2
  36. GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET
  37. SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET
  38. RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET
  39. # For details about unreserved characters, see:
  40. # http://tools.ietf.org/html/rfc3986#section-2.3
  41. UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET
  42. NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET
  43. URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE)
  44. SUBAUTHORITY_MATCHER = re.compile((
  45. '^(?:(?P<userinfo>{0})@)?' # userinfo
  46. '(?P<host>{1})' # host
  47. ':?(?P<port>{2})?$' # port
  48. ).format(abnf_regexp.USERINFO_RE,
  49. abnf_regexp.HOST_PATTERN,
  50. abnf_regexp.PORT_RE))
  51. HOST_MATCHER = re.compile('^' + abnf_regexp.HOST_RE + '$')
  52. IPv4_MATCHER = re.compile('^' + abnf_regexp.IPv4_RE + '$')
  53. IPv6_MATCHER = re.compile(r'^\[' + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r'\]$')
  54. # Used by host validator
  55. IPv6_NO_RFC4007_MATCHER = re.compile(r'^\[%s\]$' % (
  56. abnf_regexp.IPv6_ADDRZ_RE
  57. ))
  58. # Matcher used to validate path components
  59. PATH_MATCHER = re.compile(abnf_regexp.PATH_RE)
  60. # ##################################
  61. # Query and Fragment Matcher Section
  62. # ##################################
  63. QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE)
  64. FRAGMENT_MATCHER = QUERY_MATCHER
  65. # Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1
  66. SCHEME_MATCHER = re.compile('^{0}$'.format(abnf_regexp.SCHEME_RE))
  67. RELATIVE_REF_MATCHER = re.compile(r'^%s(\?%s)?(#%s)?$' % (
  68. abnf_regexp.RELATIVE_PART_RE,
  69. abnf_regexp.QUERY_RE,
  70. abnf_regexp.FRAGMENT_RE,
  71. ))
  72. # See http://tools.ietf.org/html/rfc3986#section-4.3
  73. ABSOLUTE_URI_MATCHER = re.compile(r'^%s:%s(\?%s)?$' % (
  74. abnf_regexp.COMPONENT_PATTERN_DICT['scheme'],
  75. abnf_regexp.HIER_PART_RE,
  76. abnf_regexp.QUERY_RE[1:-1],
  77. ))
  78. # ###############
  79. # IRIs / RFC 3987
  80. # ###############
  81. IRI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE, re.UNICODE)
  82. ISUBAUTHORITY_MATCHER = re.compile((
  83. u'^(?:(?P<userinfo>{0})@)?' # iuserinfo
  84. u'(?P<host>{1})' # ihost
  85. u':?(?P<port>{2})?$' # port
  86. ).format(abnf_regexp.IUSERINFO_RE,
  87. abnf_regexp.IHOST_RE,
  88. abnf_regexp.PORT_RE), re.UNICODE)
  89. # Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3
  90. def merge_paths(base_uri, relative_path):
  91. """Merge a base URI's path with a relative URI's path."""
  92. if base_uri.path is None and base_uri.authority is not None:
  93. return '/' + relative_path
  94. else:
  95. path = base_uri.path or ''
  96. index = path.rfind('/')
  97. return path[:index] + '/' + relative_path
  98. UseExisting = object()