From bcf754c5b910ac76200dc8d9b656d49ae5fbf1bd Mon Sep 17 00:00:00 2001 From: Andrew Svetlov <andrew.svetlov@gmail.com> Date: Sun, 19 Jul 2020 14:05:45 +0300 Subject: [PATCH 1/3] Implement IDNA encode/decode caching --- docs/api.rst | 33 ++++++++++++++++++++++++++++ requirements/test.txt | 1 + setup.py | 2 +- yarl/__init__.py | 51 ++++++++++++++++++++++++++++++++++++------- yarl/__init__.pyi | 12 ++++++++++ 5 files changed, 90 insertions(+), 9 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 313e045a9..201e7d7c4 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -793,6 +793,39 @@ Default port substitution False +Cache control +------------- + +IDNA conversion used for host encoding is quite expensive operation, that's why the +``yarl`` library caches IDNA encoding/decoding calls by storing last ``256`` encodes +and last ``256`` decodes in the global LRU cache. + +.. function:: clear_cache() + + Clear IDNA caches. + + +.. function:: cache_info() + + Return a dictionary with ``"idna_encode"`` and ``"idna_decode"`` keys, each value + points to corresponding ``CacheInfo`` structure (see :func:`functools.lru_cache` for + details): + + .. doctest:: + :options: +SKIP + + >>> yarl.cache_info() + {'idna_encode': CacheInfo(hits=5, misses=5, maxsize=256, currsize=5), + 'idna_decode': CacheInfo(hits=24, misses=15, maxsize=256, currsize=15)} + + +.. function:: set_cache_sizes(*, idna_encode_size=256, idna_decode_size=256) + + Set IDNA encode and decode cache sizes (``256`` for each by default). + + Pass ``None`` to make the corresponding cache unbounded (may speed up the IDNA + encoding/decoding operation a little but the memory footprint can be very high, + please use with caution). References ---------- diff --git a/requirements/test.txt b/requirements/test.txt index 0a2a3e520..9b941803c 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -2,4 +2,5 @@ pytest-cov>=2.3.1 pytest==5.4.3 multidict==4.7.6 idna==2.10 +typing_extensions==3.7.4.2 -e . diff --git a/setup.py b/setup.py index 8b8700c9d..e56e3f293 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ except IndexError: raise RuntimeError("Unable to determine version.") -install_requires = ["multidict>=4.0", "idna>=2.0"] +install_requires = ["multidict>=4.0", "idna>=2.0", "typing_extensions>=3.7.4"] def read(name): diff --git a/yarl/__init__.py b/yarl/__init__.py index 0b358ccc7..619327436 100644 --- a/yarl/__init__.py +++ b/yarl/__init__.py @@ -1,3 +1,4 @@ +import functools import sys import warnings from collections.abc import Mapping, Sequence @@ -453,10 +454,7 @@ def host(self): # fe80::2%Проверка # presence of '%' sign means only IPv6 address, so idna is useless. return raw - try: - return idna.decode(raw.encode("ascii")) - except UnicodeError: # e.g. '::1' - return raw.encode("ascii").decode("idna") + return _idna_decode(raw) @property def port(self): @@ -671,12 +669,11 @@ def _encode_host(cls, host): except ValueError: # IDNA encoding is slow, # skip it for ASCII-only strings + # Don't move the check into _idna_encode() helper + # to reduce the cache size if host.isascii(): return host - try: - host = idna.encode(host, uts46=True).decode("ascii") - except UnicodeError: - host = host.encode("idna").decode("ascii") + return _idna_encode(host) else: host = ip.compressed if sep: @@ -1029,3 +1026,41 @@ def human_repr(self): self.fragment, ) ) + + +_MAXCACHE = 256 + + +@functools.lru_cache(_MAXCACHE) +def _idna_decode(raw): + try: + return idna.decode(raw.encode("ascii")) + except UnicodeError: # e.g. '::1' + return raw.encode("ascii").decode("idna") + + +@functools.lru_cache(_MAXCACHE) +def _idna_encode(host): + try: + return idna.encode(host, uts46=True).decode("ascii") + except UnicodeError: + return host.encode("idna").decode("ascii") + + +def clear_cache(): + _idna_decode.clear_cache() + _idna_encode.clear_cache() + + +def cache_info(): + return { + "idna_encode": _idna_encode.cache_info(), + "idna_decode": _idna_decode.cache_info(), + } + + +def set_cache_sizes(*, idna_encode_size=_MAXCACHE, idna_decode_size=_MAXCACHE): + global _idna_decode, _idna_encode + + _idna_encode = functools.lru_cache(idna_encode_size)(_idna_encode.__wrapped__) + _idna_decode = functools.lru_cache(idna_decode_size)(_idna_decode.__wrapped__) diff --git a/yarl/__init__.pyi b/yarl/__init__.pyi index d04f38bd2..10491ee52 100644 --- a/yarl/__init__.pyi +++ b/yarl/__init__.pyi @@ -1,5 +1,7 @@ from typing import overload, Any, Tuple, Optional, Mapping, Union, Sequence, Type +from typing_extensions import TypedDict import multidict +from functools import _CacheInfo _QueryVariable = Union[str, int] _Query = Union[ @@ -87,3 +89,13 @@ class cached_property: def __init__(self, wrapped: Any) -> None: ... def __get__(self, inst: URL, owner: Type[URL]) -> Any: ... def __set__(self, inst: URL, value: Any) -> None: ... + +class CacheInfo(TypedDict): + idna_encode: _CacheInfo + idna_decode: _CacheInfo + +def clear_cache() -> None: ... +def cache_info() -> CacheInfo: ... +def set_cache_sizes( + *, idna_encode_size: Optional[int] = ..., idna_decode_size: Optional[int] = ... +) -> None: ... From b14f26a4c6a49240d1ffe8df71602446e2b4f3f0 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov <andrew.svetlov@gmail.com> Date: Sun, 19 Jul 2020 14:20:49 +0300 Subject: [PATCH 2/3] Add tests, rename API --- .gitignore | 1 + docs/api.rst | 4 ++-- tests/test_cache.py | 28 ++++++++++++++++++++++++++++ yarl/__init__.py | 8 ++++---- yarl/__init__.pyi | 4 ++-- 5 files changed, 37 insertions(+), 8 deletions(-) create mode 100644 tests/test_cache.py diff --git a/.gitignore b/.gitignore index d5d763e18..cad90aeff 100644 --- a/.gitignore +++ b/.gitignore @@ -104,3 +104,4 @@ yarl/_quoting.html .install-cython .install-deps .pytest_cache +pip-wheel-metadata \ No newline at end of file diff --git a/docs/api.rst b/docs/api.rst index 201e7d7c4..fa65dcced 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -800,7 +800,7 @@ IDNA conversion used for host encoding is quite expensive operation, that's why ``yarl`` library caches IDNA encoding/decoding calls by storing last ``256`` encodes and last ``256`` decodes in the global LRU cache. -.. function:: clear_cache() +.. function:: cache_clear() Clear IDNA caches. @@ -819,7 +819,7 @@ and last ``256`` decodes in the global LRU cache. 'idna_decode': CacheInfo(hits=24, misses=15, maxsize=256, currsize=15)} -.. function:: set_cache_sizes(*, idna_encode_size=256, idna_decode_size=256) +.. function:: cache_configure(*, idna_encode_size=256, idna_decode_size=256) Set IDNA encode and decode cache sizes (``256`` for each by default). diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 000000000..22141dd08 --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,28 @@ +import yarl + +# Don't check the actual behavior but make sure that calls are allowed + + +def teardown_module(): + yarl.cache_configure() + + +def test_cache_clear() -> None: + yarl.cache_clear() + + +def test_cache_info() -> None: + info = yarl.cache_info() + assert info.keys() == {"idna_encode", "idna_decode"} + + +def test_cache_configure_default() -> None: + yarl.cache_configure() + + +def test_cache_configure_None() -> None: + yarl.cache_configure(idna_encode_size=None, idna_decode_size=None) + + +def test_cache_configure_explicit() -> None: + yarl.cache_configure(idna_encode_size=128, idna_decode_size=128) diff --git a/yarl/__init__.py b/yarl/__init__.py index 619327436..72e32b1cd 100644 --- a/yarl/__init__.py +++ b/yarl/__init__.py @@ -1047,9 +1047,9 @@ def _idna_encode(host): return host.encode("idna").decode("ascii") -def clear_cache(): - _idna_decode.clear_cache() - _idna_encode.clear_cache() +def cache_clear(): + _idna_decode.cache_clear() + _idna_encode.cache_clear() def cache_info(): @@ -1059,7 +1059,7 @@ def cache_info(): } -def set_cache_sizes(*, idna_encode_size=_MAXCACHE, idna_decode_size=_MAXCACHE): +def cache_configure(*, idna_encode_size=_MAXCACHE, idna_decode_size=_MAXCACHE): global _idna_decode, _idna_encode _idna_encode = functools.lru_cache(idna_encode_size)(_idna_encode.__wrapped__) diff --git a/yarl/__init__.pyi b/yarl/__init__.pyi index 10491ee52..5ca62eb91 100644 --- a/yarl/__init__.pyi +++ b/yarl/__init__.pyi @@ -94,8 +94,8 @@ class CacheInfo(TypedDict): idna_encode: _CacheInfo idna_decode: _CacheInfo -def clear_cache() -> None: ... +def cache_clear() -> None: ... def cache_info() -> CacheInfo: ... -def set_cache_sizes( +def cache_configure( *, idna_encode_size: Optional[int] = ..., idna_decode_size: Optional[int] = ... ) -> None: ... From 63bf8024b18f1d6d5412bb2de0abbcd90b1138a9 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov <andrew.svetlov@gmail.com> Date: Sun, 19 Jul 2020 14:21:56 +0300 Subject: [PATCH 3/3] Add CHANGES --- CHANGES/476.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 CHANGES/476.feature diff --git a/CHANGES/476.feature b/CHANGES/476.feature new file mode 100644 index 000000000..2f822dbec --- /dev/null +++ b/CHANGES/476.feature @@ -0,0 +1 @@ +Cache slow IDNA encode/decode calls. \ No newline at end of file