diff --git a/.gitignore b/.gitignore index d5d763e18..cad90aeff 100644 --- a/.gitignore +++ b/.gitignore @@ -104,3 +104,4 @@ yarl/_quoting.html .install-cython .install-deps .pytest_cache +pip-wheel-metadata \ No newline at end of file diff --git a/CHANGES/476.feature b/CHANGES/476.feature new file mode 100644 index 000000000..2f822dbec --- /dev/null +++ b/CHANGES/476.feature @@ -0,0 +1 @@ +Cache slow IDNA encode/decode calls. \ No newline at end of file diff --git a/docs/api.rst b/docs/api.rst index 313e045a9..fa65dcced 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -793,6 +793,39 @@ Default port substitution False +Cache control +------------- + +IDNA conversion used for host encoding is quite expensive operation, that's why the +``yarl`` library caches IDNA encoding/decoding calls by storing last ``256`` encodes +and last ``256`` decodes in the global LRU cache. + +.. function:: cache_clear() + + Clear IDNA caches. + + +.. function:: cache_info() + + Return a dictionary with ``"idna_encode"`` and ``"idna_decode"`` keys, each value + points to corresponding ``CacheInfo`` structure (see :func:`functools.lru_cache` for + details): + + .. doctest:: + :options: +SKIP + + >>> yarl.cache_info() + {'idna_encode': CacheInfo(hits=5, misses=5, maxsize=256, currsize=5), + 'idna_decode': CacheInfo(hits=24, misses=15, maxsize=256, currsize=15)} + + +.. function:: cache_configure(*, idna_encode_size=256, idna_decode_size=256) + + Set IDNA encode and decode cache sizes (``256`` for each by default). + + Pass ``None`` to make the corresponding cache unbounded (may speed up the IDNA + encoding/decoding operation a little but the memory footprint can be very high, + please use with caution). References ---------- diff --git a/requirements/test.txt b/requirements/test.txt index 0a2a3e520..9b941803c 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -2,4 +2,5 @@ pytest-cov>=2.3.1 pytest==5.4.3 multidict==4.7.6 idna==2.10 +typing_extensions==3.7.4.2 -e . diff --git a/setup.py b/setup.py index 8b8700c9d..e56e3f293 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ except IndexError: raise RuntimeError("Unable to determine version.") -install_requires = ["multidict>=4.0", "idna>=2.0"] +install_requires = ["multidict>=4.0", "idna>=2.0", "typing_extensions>=3.7.4"] def read(name): diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 000000000..22141dd08 --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,28 @@ +import yarl + +# Don't check the actual behavior but make sure that calls are allowed + + +def teardown_module(): + yarl.cache_configure() + + +def test_cache_clear() -> None: + yarl.cache_clear() + + +def test_cache_info() -> None: + info = yarl.cache_info() + assert info.keys() == {"idna_encode", "idna_decode"} + + +def test_cache_configure_default() -> None: + yarl.cache_configure() + + +def test_cache_configure_None() -> None: + yarl.cache_configure(idna_encode_size=None, idna_decode_size=None) + + +def test_cache_configure_explicit() -> None: + yarl.cache_configure(idna_encode_size=128, idna_decode_size=128) diff --git a/yarl/__init__.py b/yarl/__init__.py index 0b358ccc7..72e32b1cd 100644 --- a/yarl/__init__.py +++ b/yarl/__init__.py @@ -1,3 +1,4 @@ +import functools import sys import warnings from collections.abc import Mapping, Sequence @@ -453,10 +454,7 @@ def host(self): # fe80::2%Проверка # presence of '%' sign means only IPv6 address, so idna is useless. return raw - try: - return idna.decode(raw.encode("ascii")) - except UnicodeError: # e.g. '::1' - return raw.encode("ascii").decode("idna") + return _idna_decode(raw) @property def port(self): @@ -671,12 +669,11 @@ def _encode_host(cls, host): except ValueError: # IDNA encoding is slow, # skip it for ASCII-only strings + # Don't move the check into _idna_encode() helper + # to reduce the cache size if host.isascii(): return host - try: - host = idna.encode(host, uts46=True).decode("ascii") - except UnicodeError: - host = host.encode("idna").decode("ascii") + return _idna_encode(host) else: host = ip.compressed if sep: @@ -1029,3 +1026,41 @@ def human_repr(self): self.fragment, ) ) + + +_MAXCACHE = 256 + + +@functools.lru_cache(_MAXCACHE) +def _idna_decode(raw): + try: + return idna.decode(raw.encode("ascii")) + except UnicodeError: # e.g. '::1' + return raw.encode("ascii").decode("idna") + + +@functools.lru_cache(_MAXCACHE) +def _idna_encode(host): + try: + return idna.encode(host, uts46=True).decode("ascii") + except UnicodeError: + return host.encode("idna").decode("ascii") + + +def cache_clear(): + _idna_decode.cache_clear() + _idna_encode.cache_clear() + + +def cache_info(): + return { + "idna_encode": _idna_encode.cache_info(), + "idna_decode": _idna_decode.cache_info(), + } + + +def cache_configure(*, idna_encode_size=_MAXCACHE, idna_decode_size=_MAXCACHE): + global _idna_decode, _idna_encode + + _idna_encode = functools.lru_cache(idna_encode_size)(_idna_encode.__wrapped__) + _idna_decode = functools.lru_cache(idna_decode_size)(_idna_decode.__wrapped__) diff --git a/yarl/__init__.pyi b/yarl/__init__.pyi index d04f38bd2..5ca62eb91 100644 --- a/yarl/__init__.pyi +++ b/yarl/__init__.pyi @@ -1,5 +1,7 @@ from typing import overload, Any, Tuple, Optional, Mapping, Union, Sequence, Type +from typing_extensions import TypedDict import multidict +from functools import _CacheInfo _QueryVariable = Union[str, int] _Query = Union[ @@ -87,3 +89,13 @@ class cached_property: def __init__(self, wrapped: Any) -> None: ... def __get__(self, inst: URL, owner: Type[URL]) -> Any: ... def __set__(self, inst: URL, value: Any) -> None: ... + +class CacheInfo(TypedDict): + idna_encode: _CacheInfo + idna_decode: _CacheInfo + +def cache_clear() -> None: ... +def cache_info() -> CacheInfo: ... +def cache_configure( + *, idna_encode_size: Optional[int] = ..., idna_decode_size: Optional[int] = ... +) -> None: ...