From bcf754c5b910ac76200dc8d9b656d49ae5fbf1bd Mon Sep 17 00:00:00 2001
From: Andrew Svetlov <andrew.svetlov@gmail.com>
Date: Sun, 19 Jul 2020 14:05:45 +0300
Subject: [PATCH 1/3] Implement IDNA encode/decode caching

---
 docs/api.rst          | 33 ++++++++++++++++++++++++++++
 requirements/test.txt |  1 +
 setup.py              |  2 +-
 yarl/__init__.py      | 51 ++++++++++++++++++++++++++++++++++++-------
 yarl/__init__.pyi     | 12 ++++++++++
 5 files changed, 90 insertions(+), 9 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index 313e045a9..201e7d7c4 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -793,6 +793,39 @@ Default port substitution
       False
 
 
+Cache control
+-------------
+
+IDNA conversion used for host encoding is quite expensive operation, that's why the
+``yarl`` library caches IDNA encoding/decoding calls by storing last ``256`` encodes
+and last ``256`` decodes in the global LRU cache.
+
+.. function:: clear_cache()
+
+   Clear IDNA caches.
+
+
+.. function:: cache_info()
+
+   Return a dictionary with ``"idna_encode"`` and ``"idna_decode"`` keys, each value
+   points to corresponding ``CacheInfo`` structure (see :func:`functools.lru_cache` for
+   details):
+
+   .. doctest::
+      :options: +SKIP
+
+      >>> yarl.cache_info()
+      {'idna_encode': CacheInfo(hits=5, misses=5, maxsize=256, currsize=5),
+       'idna_decode': CacheInfo(hits=24, misses=15, maxsize=256, currsize=15)}
+
+
+.. function:: set_cache_sizes(*, idna_encode_size=256, idna_decode_size=256)
+
+   Set IDNA encode and decode cache sizes (``256`` for each by default).
+
+   Pass ``None`` to make the corresponding cache unbounded (may speed up the IDNA
+   encoding/decoding operation a little but the memory footprint can be very high,
+   please use with caution).
 
 References
 ----------
diff --git a/requirements/test.txt b/requirements/test.txt
index 0a2a3e520..9b941803c 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -2,4 +2,5 @@ pytest-cov>=2.3.1
 pytest==5.4.3
 multidict==4.7.6
 idna==2.10
+typing_extensions==3.7.4.2
 -e .
diff --git a/setup.py b/setup.py
index 8b8700c9d..e56e3f293 100644
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,7 @@
     except IndexError:
         raise RuntimeError("Unable to determine version.")
 
-install_requires = ["multidict>=4.0", "idna>=2.0"]
+install_requires = ["multidict>=4.0", "idna>=2.0", "typing_extensions>=3.7.4"]
 
 
 def read(name):
diff --git a/yarl/__init__.py b/yarl/__init__.py
index 0b358ccc7..619327436 100644
--- a/yarl/__init__.py
+++ b/yarl/__init__.py
@@ -1,3 +1,4 @@
+import functools
 import sys
 import warnings
 from collections.abc import Mapping, Sequence
@@ -453,10 +454,7 @@ def host(self):
             # fe80::2%Проверка
             # presence of '%' sign means only IPv6 address, so idna is useless.
             return raw
-        try:
-            return idna.decode(raw.encode("ascii"))
-        except UnicodeError:  # e.g. '::1'
-            return raw.encode("ascii").decode("idna")
+        return _idna_decode(raw)
 
     @property
     def port(self):
@@ -671,12 +669,11 @@ def _encode_host(cls, host):
             except ValueError:
                 # IDNA encoding is slow,
                 # skip it for ASCII-only strings
+                # Don't move the check into _idna_encode() helper
+                # to reduce the cache size
                 if host.isascii():
                     return host
-                try:
-                    host = idna.encode(host, uts46=True).decode("ascii")
-                except UnicodeError:
-                    host = host.encode("idna").decode("ascii")
+                return _idna_encode(host)
             else:
                 host = ip.compressed
                 if sep:
@@ -1029,3 +1026,41 @@ def human_repr(self):
                 self.fragment,
             )
         )
+
+
+_MAXCACHE = 256
+
+
+@functools.lru_cache(_MAXCACHE)
+def _idna_decode(raw):
+    try:
+        return idna.decode(raw.encode("ascii"))
+    except UnicodeError:  # e.g. '::1'
+        return raw.encode("ascii").decode("idna")
+
+
+@functools.lru_cache(_MAXCACHE)
+def _idna_encode(host):
+    try:
+        return idna.encode(host, uts46=True).decode("ascii")
+    except UnicodeError:
+        return host.encode("idna").decode("ascii")
+
+
+def clear_cache():
+    _idna_decode.clear_cache()
+    _idna_encode.clear_cache()
+
+
+def cache_info():
+    return {
+        "idna_encode": _idna_encode.cache_info(),
+        "idna_decode": _idna_decode.cache_info(),
+    }
+
+
+def set_cache_sizes(*, idna_encode_size=_MAXCACHE, idna_decode_size=_MAXCACHE):
+    global _idna_decode, _idna_encode
+
+    _idna_encode = functools.lru_cache(idna_encode_size)(_idna_encode.__wrapped__)
+    _idna_decode = functools.lru_cache(idna_decode_size)(_idna_decode.__wrapped__)
diff --git a/yarl/__init__.pyi b/yarl/__init__.pyi
index d04f38bd2..10491ee52 100644
--- a/yarl/__init__.pyi
+++ b/yarl/__init__.pyi
@@ -1,5 +1,7 @@
 from typing import overload, Any, Tuple, Optional, Mapping, Union, Sequence, Type
+from typing_extensions import TypedDict
 import multidict
+from functools import _CacheInfo
 
 _QueryVariable = Union[str, int]
 _Query = Union[
@@ -87,3 +89,13 @@ class cached_property:
     def __init__(self, wrapped: Any) -> None: ...
     def __get__(self, inst: URL, owner: Type[URL]) -> Any: ...
     def __set__(self, inst: URL, value: Any) -> None: ...
+
+class CacheInfo(TypedDict):
+    idna_encode: _CacheInfo
+    idna_decode: _CacheInfo
+
+def clear_cache() -> None: ...
+def cache_info() -> CacheInfo: ...
+def set_cache_sizes(
+    *, idna_encode_size: Optional[int] = ..., idna_decode_size: Optional[int] = ...
+) -> None: ...

From b14f26a4c6a49240d1ffe8df71602446e2b4f3f0 Mon Sep 17 00:00:00 2001
From: Andrew Svetlov <andrew.svetlov@gmail.com>
Date: Sun, 19 Jul 2020 14:20:49 +0300
Subject: [PATCH 2/3] Add tests, rename API

---
 .gitignore          |  1 +
 docs/api.rst        |  4 ++--
 tests/test_cache.py | 28 ++++++++++++++++++++++++++++
 yarl/__init__.py    |  8 ++++----
 yarl/__init__.pyi   |  4 ++--
 5 files changed, 37 insertions(+), 8 deletions(-)
 create mode 100644 tests/test_cache.py

diff --git a/.gitignore b/.gitignore
index d5d763e18..cad90aeff 100644
--- a/.gitignore
+++ b/.gitignore
@@ -104,3 +104,4 @@ yarl/_quoting.html
 .install-cython
 .install-deps
 .pytest_cache
+pip-wheel-metadata
\ No newline at end of file
diff --git a/docs/api.rst b/docs/api.rst
index 201e7d7c4..fa65dcced 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -800,7 +800,7 @@ IDNA conversion used for host encoding is quite expensive operation, that's why
 ``yarl`` library caches IDNA encoding/decoding calls by storing last ``256`` encodes
 and last ``256`` decodes in the global LRU cache.
 
-.. function:: clear_cache()
+.. function:: cache_clear()
 
    Clear IDNA caches.
 
@@ -819,7 +819,7 @@ and last ``256`` decodes in the global LRU cache.
        'idna_decode': CacheInfo(hits=24, misses=15, maxsize=256, currsize=15)}
 
 
-.. function:: set_cache_sizes(*, idna_encode_size=256, idna_decode_size=256)
+.. function:: cache_configure(*, idna_encode_size=256, idna_decode_size=256)
 
    Set IDNA encode and decode cache sizes (``256`` for each by default).
 
diff --git a/tests/test_cache.py b/tests/test_cache.py
new file mode 100644
index 000000000..22141dd08
--- /dev/null
+++ b/tests/test_cache.py
@@ -0,0 +1,28 @@
+import yarl
+
+# Don't check the actual behavior but make sure that calls are allowed
+
+
+def teardown_module():
+    yarl.cache_configure()
+
+
+def test_cache_clear() -> None:
+    yarl.cache_clear()
+
+
+def test_cache_info() -> None:
+    info = yarl.cache_info()
+    assert info.keys() == {"idna_encode", "idna_decode"}
+
+
+def test_cache_configure_default() -> None:
+    yarl.cache_configure()
+
+
+def test_cache_configure_None() -> None:
+    yarl.cache_configure(idna_encode_size=None, idna_decode_size=None)
+
+
+def test_cache_configure_explicit() -> None:
+    yarl.cache_configure(idna_encode_size=128, idna_decode_size=128)
diff --git a/yarl/__init__.py b/yarl/__init__.py
index 619327436..72e32b1cd 100644
--- a/yarl/__init__.py
+++ b/yarl/__init__.py
@@ -1047,9 +1047,9 @@ def _idna_encode(host):
         return host.encode("idna").decode("ascii")
 
 
-def clear_cache():
-    _idna_decode.clear_cache()
-    _idna_encode.clear_cache()
+def cache_clear():
+    _idna_decode.cache_clear()
+    _idna_encode.cache_clear()
 
 
 def cache_info():
@@ -1059,7 +1059,7 @@ def cache_info():
     }
 
 
-def set_cache_sizes(*, idna_encode_size=_MAXCACHE, idna_decode_size=_MAXCACHE):
+def cache_configure(*, idna_encode_size=_MAXCACHE, idna_decode_size=_MAXCACHE):
     global _idna_decode, _idna_encode
 
     _idna_encode = functools.lru_cache(idna_encode_size)(_idna_encode.__wrapped__)
diff --git a/yarl/__init__.pyi b/yarl/__init__.pyi
index 10491ee52..5ca62eb91 100644
--- a/yarl/__init__.pyi
+++ b/yarl/__init__.pyi
@@ -94,8 +94,8 @@ class CacheInfo(TypedDict):
     idna_encode: _CacheInfo
     idna_decode: _CacheInfo
 
-def clear_cache() -> None: ...
+def cache_clear() -> None: ...
 def cache_info() -> CacheInfo: ...
-def set_cache_sizes(
+def cache_configure(
     *, idna_encode_size: Optional[int] = ..., idna_decode_size: Optional[int] = ...
 ) -> None: ...

From 63bf8024b18f1d6d5412bb2de0abbcd90b1138a9 Mon Sep 17 00:00:00 2001
From: Andrew Svetlov <andrew.svetlov@gmail.com>
Date: Sun, 19 Jul 2020 14:21:56 +0300
Subject: [PATCH 3/3] Add CHANGES

---
 CHANGES/476.feature | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 CHANGES/476.feature

diff --git a/CHANGES/476.feature b/CHANGES/476.feature
new file mode 100644
index 000000000..2f822dbec
--- /dev/null
+++ b/CHANGES/476.feature
@@ -0,0 +1 @@
+Cache slow IDNA encode/decode calls.
\ No newline at end of file