Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement IDNA encode/decode caching #476

Merged
merged 3 commits into from
Jul 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,4 @@ yarl/_quoting.html
.install-cython
.install-deps
.pytest_cache
pip-wheel-metadata
1 change: 1 addition & 0 deletions CHANGES/476.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Cache slow IDNA encode/decode calls.
33 changes: 33 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,39 @@ Default port substitution
False


Cache control
-------------

IDNA conversion used for host encoding is quite expensive operation, that's why the
``yarl`` library caches IDNA encoding/decoding calls by storing last ``256`` encodes
and last ``256`` decodes in the global LRU cache.

.. function:: cache_clear()

Clear IDNA caches.


.. function:: cache_info()

Return a dictionary with ``"idna_encode"`` and ``"idna_decode"`` keys, each value
points to corresponding ``CacheInfo`` structure (see :func:`functools.lru_cache` for
details):

.. doctest::
:options: +SKIP

>>> yarl.cache_info()
{'idna_encode': CacheInfo(hits=5, misses=5, maxsize=256, currsize=5),
'idna_decode': CacheInfo(hits=24, misses=15, maxsize=256, currsize=15)}


.. function:: cache_configure(*, idna_encode_size=256, idna_decode_size=256)

Set IDNA encode and decode cache sizes (``256`` for each by default).

Pass ``None`` to make the corresponding cache unbounded (may speed up the IDNA
encoding/decoding operation a little but the memory footprint can be very high,
please use with caution).

References
----------
Expand Down
1 change: 1 addition & 0 deletions requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ pytest-cov>=2.3.1
pytest==5.4.3
multidict==4.7.6
idna==2.10
typing_extensions==3.7.4.2
-e .
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
except IndexError:
raise RuntimeError("Unable to determine version.")

install_requires = ["multidict>=4.0", "idna>=2.0"]
install_requires = ["multidict>=4.0", "idna>=2.0", "typing_extensions>=3.7.4"]


def read(name):
Expand Down
28 changes: 28 additions & 0 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import yarl

# Don't check the actual behavior but make sure that calls are allowed


def teardown_module():
yarl.cache_configure()


def test_cache_clear() -> None:
yarl.cache_clear()


def test_cache_info() -> None:
info = yarl.cache_info()
assert info.keys() == {"idna_encode", "idna_decode"}


def test_cache_configure_default() -> None:
yarl.cache_configure()


def test_cache_configure_None() -> None:
yarl.cache_configure(idna_encode_size=None, idna_decode_size=None)


def test_cache_configure_explicit() -> None:
yarl.cache_configure(idna_encode_size=128, idna_decode_size=128)
51 changes: 43 additions & 8 deletions yarl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import functools
import sys
import warnings
from collections.abc import Mapping, Sequence
Expand Down Expand Up @@ -453,10 +454,7 @@ def host(self):
# fe80::2%Проверка
# presence of '%' sign means only IPv6 address, so idna is useless.
return raw
try:
return idna.decode(raw.encode("ascii"))
except UnicodeError: # e.g. '::1'
return raw.encode("ascii").decode("idna")
return _idna_decode(raw)

@property
def port(self):
Expand Down Expand Up @@ -671,12 +669,11 @@ def _encode_host(cls, host):
except ValueError:
# IDNA encoding is slow,
# skip it for ASCII-only strings
# Don't move the check into _idna_encode() helper
# to reduce the cache size
if host.isascii():
return host
try:
host = idna.encode(host, uts46=True).decode("ascii")
except UnicodeError:
host = host.encode("idna").decode("ascii")
return _idna_encode(host)
else:
host = ip.compressed
if sep:
Expand Down Expand Up @@ -1029,3 +1026,41 @@ def human_repr(self):
self.fragment,
)
)


_MAXCACHE = 256


@functools.lru_cache(_MAXCACHE)
def _idna_decode(raw):
try:
return idna.decode(raw.encode("ascii"))
except UnicodeError: # e.g. '::1'
return raw.encode("ascii").decode("idna")


@functools.lru_cache(_MAXCACHE)
def _idna_encode(host):
try:
return idna.encode(host, uts46=True).decode("ascii")
except UnicodeError:
return host.encode("idna").decode("ascii")


def cache_clear():
_idna_decode.cache_clear()
_idna_encode.cache_clear()


def cache_info():
return {
"idna_encode": _idna_encode.cache_info(),
"idna_decode": _idna_decode.cache_info(),
}


def cache_configure(*, idna_encode_size=_MAXCACHE, idna_decode_size=_MAXCACHE):
global _idna_decode, _idna_encode

_idna_encode = functools.lru_cache(idna_encode_size)(_idna_encode.__wrapped__)
_idna_decode = functools.lru_cache(idna_decode_size)(_idna_decode.__wrapped__)
12 changes: 12 additions & 0 deletions yarl/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import overload, Any, Tuple, Optional, Mapping, Union, Sequence, Type
from typing_extensions import TypedDict
import multidict
from functools import _CacheInfo

_QueryVariable = Union[str, int]
_Query = Union[
Expand Down Expand Up @@ -87,3 +89,13 @@ class cached_property:
def __init__(self, wrapped: Any) -> None: ...
def __get__(self, inst: URL, owner: Type[URL]) -> Any: ...
def __set__(self, inst: URL, value: Any) -> None: ...

class CacheInfo(TypedDict):
idna_encode: _CacheInfo
idna_decode: _CacheInfo

def cache_clear() -> None: ...
def cache_info() -> CacheInfo: ...
def cache_configure(
*, idna_encode_size: Optional[int] = ..., idna_decode_size: Optional[int] = ...
) -> None: ...