accept.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. from __future__ import annotations
  2. import codecs
  3. import collections.abc as cabc
  4. import re
  5. import typing as t
  6. from .structures import ImmutableList
  7. class Accept(ImmutableList[tuple[str, float]]):
  8. """An :class:`Accept` object is just a list subclass for lists of
  9. ``(value, quality)`` tuples. It is automatically sorted by specificity
  10. and quality.
  11. All :class:`Accept` objects work similar to a list but provide extra
  12. functionality for working with the data. Containment checks are
  13. normalized to the rules of that header:
  14. >>> a = CharsetAccept([('ISO-8859-1', 1), ('utf-8', 0.7)])
  15. >>> a.best
  16. 'ISO-8859-1'
  17. >>> 'iso-8859-1' in a
  18. True
  19. >>> 'UTF8' in a
  20. True
  21. >>> 'utf7' in a
  22. False
  23. To get the quality for an item you can use normal item lookup:
  24. >>> print a['utf-8']
  25. 0.7
  26. >>> a['utf7']
  27. 0
  28. .. versionchanged:: 0.5
  29. :class:`Accept` objects are forced immutable now.
  30. .. versionchanged:: 1.0.0
  31. :class:`Accept` internal values are no longer ordered
  32. alphabetically for equal quality tags. Instead the initial
  33. order is preserved.
  34. """
  35. def __init__(
  36. self, values: Accept | cabc.Iterable[tuple[str, float]] | None = ()
  37. ) -> None:
  38. if values is None:
  39. super().__init__()
  40. self.provided = False
  41. elif isinstance(values, Accept):
  42. self.provided = values.provided
  43. super().__init__(values)
  44. else:
  45. self.provided = True
  46. values = sorted(
  47. values, key=lambda x: (self._specificity(x[0]), x[1]), reverse=True
  48. )
  49. super().__init__(values)
  50. def _specificity(self, value: str) -> tuple[bool, ...]:
  51. """Returns a tuple describing the value's specificity."""
  52. return (value != "*",)
  53. def _value_matches(self, value: str, item: str) -> bool:
  54. """Check if a value matches a given accept item."""
  55. return item == "*" or item.lower() == value.lower()
  56. @t.overload
  57. def __getitem__(self, key: str) -> float: ...
  58. @t.overload
  59. def __getitem__(self, key: t.SupportsIndex) -> tuple[str, float]: ...
  60. @t.overload
  61. def __getitem__(self, key: slice) -> list[tuple[str, float]]: ...
  62. def __getitem__(
  63. self, key: str | t.SupportsIndex | slice
  64. ) -> float | tuple[str, float] | list[tuple[str, float]]:
  65. """Besides index lookup (getting item n) you can also pass it a string
  66. to get the quality for the item. If the item is not in the list, the
  67. returned quality is ``0``.
  68. """
  69. if isinstance(key, str):
  70. return self.quality(key)
  71. return list.__getitem__(self, key)
  72. def quality(self, key: str) -> float:
  73. """Returns the quality of the key.
  74. .. versionadded:: 0.6
  75. In previous versions you had to use the item-lookup syntax
  76. (eg: ``obj[key]`` instead of ``obj.quality(key)``)
  77. """
  78. for item, quality in self:
  79. if self._value_matches(key, item):
  80. return quality
  81. return 0
  82. def __contains__(self, value: str) -> bool: # type: ignore[override]
  83. for item, _quality in self:
  84. if self._value_matches(value, item):
  85. return True
  86. return False
  87. def __repr__(self) -> str:
  88. pairs_str = ", ".join(f"({x!r}, {y})" for x, y in self)
  89. return f"{type(self).__name__}([{pairs_str}])"
  90. def index(self, key: str | tuple[str, float]) -> int: # type: ignore[override]
  91. """Get the position of an entry or raise :exc:`ValueError`.
  92. :param key: The key to be looked up.
  93. .. versionchanged:: 0.5
  94. This used to raise :exc:`IndexError`, which was inconsistent
  95. with the list API.
  96. """
  97. if isinstance(key, str):
  98. for idx, (item, _quality) in enumerate(self):
  99. if self._value_matches(key, item):
  100. return idx
  101. raise ValueError(key)
  102. return list.index(self, key)
  103. def find(self, key: str | tuple[str, float]) -> int:
  104. """Get the position of an entry or return -1.
  105. :param key: The key to be looked up.
  106. """
  107. try:
  108. return self.index(key)
  109. except ValueError:
  110. return -1
  111. def values(self) -> cabc.Iterator[str]:
  112. """Iterate over all values."""
  113. for item in self:
  114. yield item[0]
  115. def to_header(self) -> str:
  116. """Convert the header set into an HTTP header string."""
  117. result = []
  118. for value, quality in self:
  119. if quality != 1:
  120. value = f"{value};q={quality}"
  121. result.append(value)
  122. return ",".join(result)
  123. def __str__(self) -> str:
  124. return self.to_header()
  125. def _best_single_match(self, match: str) -> tuple[str, float] | None:
  126. for client_item, quality in self:
  127. if self._value_matches(match, client_item):
  128. # self is sorted by specificity descending, we can exit
  129. return client_item, quality
  130. return None
  131. @t.overload
  132. def best_match(self, matches: cabc.Iterable[str]) -> str | None: ...
  133. @t.overload
  134. def best_match(self, matches: cabc.Iterable[str], default: str = ...) -> str: ...
  135. def best_match(
  136. self, matches: cabc.Iterable[str], default: str | None = None
  137. ) -> str | None:
  138. """Returns the best match from a list of possible matches based
  139. on the specificity and quality of the client. If two items have the
  140. same quality and specificity, the one is returned that comes first.
  141. :param matches: a list of matches to check for
  142. :param default: the value that is returned if none match
  143. """
  144. result = default
  145. best_quality: float = -1
  146. best_specificity: tuple[float, ...] = (-1,)
  147. for server_item in matches:
  148. match = self._best_single_match(server_item)
  149. if not match:
  150. continue
  151. client_item, quality = match
  152. specificity = self._specificity(client_item)
  153. if quality <= 0 or quality < best_quality:
  154. continue
  155. # better quality or same quality but more specific => better match
  156. if quality > best_quality or specificity > best_specificity:
  157. result = server_item
  158. best_quality = quality
  159. best_specificity = specificity
  160. return result
  161. @property
  162. def best(self) -> str | None:
  163. """The best match as value."""
  164. if self:
  165. return self[0][0]
  166. return None
  167. _mime_split_re = re.compile(r"/|(?:\s*;\s*)")
  168. def _normalize_mime(value: str) -> list[str]:
  169. return _mime_split_re.split(value.lower())
  170. class MIMEAccept(Accept):
  171. """Like :class:`Accept` but with special methods and behavior for
  172. mimetypes.
  173. """
  174. def _specificity(self, value: str) -> tuple[bool, ...]:
  175. return tuple(x != "*" for x in _mime_split_re.split(value))
  176. def _value_matches(self, value: str, item: str) -> bool:
  177. # item comes from the client, can't match if it's invalid.
  178. if "/" not in item:
  179. return False
  180. # value comes from the application, tell the developer when it
  181. # doesn't look valid.
  182. if "/" not in value:
  183. raise ValueError(f"invalid mimetype {value!r}")
  184. # Split the match value into type, subtype, and a sorted list of parameters.
  185. normalized_value = _normalize_mime(value)
  186. value_type, value_subtype = normalized_value[:2]
  187. value_params = sorted(normalized_value[2:])
  188. # "*/*" is the only valid value that can start with "*".
  189. if value_type == "*" and value_subtype != "*":
  190. raise ValueError(f"invalid mimetype {value!r}")
  191. # Split the accept item into type, subtype, and parameters.
  192. normalized_item = _normalize_mime(item)
  193. item_type, item_subtype = normalized_item[:2]
  194. item_params = sorted(normalized_item[2:])
  195. # "*/not-*" from the client is invalid, can't match.
  196. if item_type == "*" and item_subtype != "*":
  197. return False
  198. return (
  199. (item_type == "*" and item_subtype == "*")
  200. or (value_type == "*" and value_subtype == "*")
  201. ) or (
  202. item_type == value_type
  203. and (
  204. item_subtype == "*"
  205. or value_subtype == "*"
  206. or (item_subtype == value_subtype and item_params == value_params)
  207. )
  208. )
  209. @property
  210. def accept_html(self) -> bool:
  211. """True if this object accepts HTML."""
  212. return "text/html" in self or self.accept_xhtml # type: ignore[comparison-overlap]
  213. @property
  214. def accept_xhtml(self) -> bool:
  215. """True if this object accepts XHTML."""
  216. return "application/xhtml+xml" in self or "application/xml" in self # type: ignore[comparison-overlap]
  217. @property
  218. def accept_json(self) -> bool:
  219. """True if this object accepts JSON."""
  220. return "application/json" in self # type: ignore[comparison-overlap]
  221. _locale_delim_re = re.compile(r"[_-]")
  222. def _normalize_lang(value: str) -> list[str]:
  223. """Process a language tag for matching."""
  224. return _locale_delim_re.split(value.lower())
  225. class LanguageAccept(Accept):
  226. """Like :class:`Accept` but with normalization for language tags."""
  227. def _value_matches(self, value: str, item: str) -> bool:
  228. return item == "*" or _normalize_lang(value) == _normalize_lang(item)
  229. @t.overload
  230. def best_match(self, matches: cabc.Iterable[str]) -> str | None: ...
  231. @t.overload
  232. def best_match(self, matches: cabc.Iterable[str], default: str = ...) -> str: ...
  233. def best_match(
  234. self, matches: cabc.Iterable[str], default: str | None = None
  235. ) -> str | None:
  236. """Given a list of supported values, finds the best match from
  237. the list of accepted values.
  238. Language tags are normalized for the purpose of matching, but
  239. are returned unchanged.
  240. If no exact match is found, this will fall back to matching
  241. the first subtag (primary language only), first with the
  242. accepted values then with the match values. This partial is not
  243. applied to any other language subtags.
  244. The default is returned if no exact or fallback match is found.
  245. :param matches: A list of supported languages to find a match.
  246. :param default: The value that is returned if none match.
  247. """
  248. # Look for an exact match first. If a client accepts "en-US",
  249. # "en-US" is a valid match at this point.
  250. result = super().best_match(matches)
  251. if result is not None:
  252. return result
  253. # Fall back to accepting primary tags. If a client accepts
  254. # "en-US", "en" is a valid match at this point. Need to use
  255. # re.split to account for 2 or 3 letter codes.
  256. fallback = Accept(
  257. [(_locale_delim_re.split(item[0], 1)[0], item[1]) for item in self]
  258. )
  259. result = fallback.best_match(matches)
  260. if result is not None:
  261. return result
  262. # Fall back to matching primary tags. If the client accepts
  263. # "en", "en-US" is a valid match at this point.
  264. fallback_matches = [_locale_delim_re.split(item, 1)[0] for item in matches]
  265. result = super().best_match(fallback_matches)
  266. # Return a value from the original match list. Find the first
  267. # original value that starts with the matched primary tag.
  268. if result is not None:
  269. return next(item for item in matches if item.startswith(result))
  270. return default
  271. class CharsetAccept(Accept):
  272. """Like :class:`Accept` but with normalization for charsets."""
  273. def _value_matches(self, value: str, item: str) -> bool:
  274. def _normalize(name: str) -> str:
  275. try:
  276. return codecs.lookup(name).name
  277. except LookupError:
  278. return name.lower()
  279. return item == "*" or _normalize(value) == _normalize(item)