diff --git a/docs/source/reference-io.rst b/docs/source/reference-io.rst index c43313af77..d4ff3b6bf1 100644 --- a/docs/source/reference-io.rst +++ b/docs/source/reference-io.rst @@ -191,12 +191,17 @@ convert a standard library socket into a trio socket: .. autofunction:: from_stdlib_socket +For name lookup, Trio provides the standard :func:`getaddrinfo` and +:func:`getnameinfo`, but with small changes: + +.. autofunction:: getaddrinfo + +.. autofunction:: getnameinfo + The following functions have identical interfaces to their standard library version, but are now ``async`` functions, so you need to use ``await`` to call them: -* :func:`~socket.getaddrinfo` -* :func:`~socket.getnameinfo` * :func:`~socket.getfqdn` Trio intentionally DOES NOT include some obsolete, redundant, or diff --git a/setup.py b/setup.py index 4bcda3db46..7bd32beb8f 100644 --- a/setup.py +++ b/setup.py @@ -74,6 +74,7 @@ "attrs", "sortedcontainers", "async_generator >= 1.6", + "idna", # PEP 508 style, but: # https://bitbucket.org/pypa/wheel/issues/181/bdist_wheel-silently-discards-pep-508 #"cffi; os_name == 'nt'", # "cffi is required on windows" diff --git a/trio/socket.py b/trio/socket.py index 5d08b901ca..4f4a3093e4 100644 --- a/trio/socket.py +++ b/trio/socket.py @@ -5,6 +5,8 @@ from contextlib import contextmanager as _contextmanager import errno as _errno +import idna + from . import _core from ._threads import run_in_worker_thread as _run_in_worker_thread @@ -94,6 +96,19 @@ async def __aexit__(self, etype, value, tb): _NUMERIC_ONLY = _stdlib_socket.AI_NUMERICHOST | _stdlib_socket.AI_NUMERICSERV async def getaddrinfo(host, port, family=0, type=0, proto=0, flags=0): + """Look up a numeric address given a name. + + Arguments and return values are identical to :func:`socket.getaddrinfo`, + except that this version is async. + + Also, :func:`trio.socket.getaddrinfo` correctly uses IDNA 2008 to process + non-ASCII domain names. (:func:`socket.getaddrinfo` uses IDNA 2003, which + can give the wrong result in some cases and cause you to connect to a + different host than the one you intended; see `bpo-17305 + `__.) + + """ + # If host and port are numeric, then getaddrinfo doesn't block and we can # skip the whole thread thing, which seems worthwhile. So we try first # with the _NUMERIC_ONLY flags set, and then only spawn a thread if that @@ -103,13 +118,71 @@ def numeric_only_failure(exc): async with _try_sync(numeric_only_failure): return _stdlib_socket.getaddrinfo( host, port, family, type, proto, flags | _NUMERIC_ONLY) - # That failed, try a thread instead + # That failed; it's a real hostname. We better use a thread. + # + # Also, it might be a unicode hostname, in which case we want to do our + # own encoding using the idna module, rather than letting Python do + # it. (Python will use the old IDNA 2003 standard, and possibly get the + # wrong answer - see bpo-17305). However, the idna module is picky, and + # will refuse to process some valid hostname strings, like "::1". So if + # it's already ascii, we pass it through; otherwise, we encode it to. + if isinstance(host, str): + try: + host = host.encode("ascii") + except UnicodeEncodeError: + # UTS-46 defines various normalizations; in particular, by default + # idna.encode will error out if the hostname has Capital Letters + # in it; with uts46=True it will lowercase them instead. + host = idna.encode(host, uts46=True) return await _run_in_worker_thread( _stdlib_socket.getaddrinfo, host, port, family, type, proto, flags, cancellable=True) __all__.append("getaddrinfo") + +async def getnameinfo(sockaddr, flags): + """Look up a name given a numeric address. + + Arguments and return values are identical to :func:`socket.getnameinfo`, + except: + + * This version is async. + + * This version does *not* perform implicit name resolution. For example, + this will raise an error:: + + await trio.socket.getnameinfo(("localhost", 80), 0) # error! + + Instead, use :func:`getaddrinfo` or similar to get a numeric address, + and then use that:: + + await trio.socket.getnameinfo(("127.0.0.1", 80), 0) # correct! + + """ + # stdlib version accepts hostnames; we want to restrict to only numeric + # addresses, to avoid complications with IDNA etc. and for consistency + # with analogous socket methods. + if not isinstance(sockaddr, tuple) or not 2 <= len(sockaddr) <= 4: + await _core.yield_briefly() + raise ValueError( + "expected a (host, port) tuple, not {}".format(sockaddr)) + host, port, *_ = sockaddr + try: + _stdlib_socket.getaddrinfo(host, port, flags=_NUMERIC_ONLY) + except gaierror as exc: + await _core.yield_briefly() + if exc.errno == EAI_NONAME: + raise ValueError( + "expected an already-resolved numeric address, not {}" + .format(sockaddr)) + raise + return await _run_in_worker_thread( + _stdlib_socket.getnameinfo, sockaddr, flags, cancellable=True) + +__all__.append("getnameinfo") + + def _worker_thread_reexport(name): fn = getattr(_stdlib_socket, name) @_wraps(fn, assigned=("__name__", "__doc__")) @@ -122,7 +195,6 @@ async def wrapper(*args, **kwargs): __all__.append(name) _worker_thread_reexport("getfqdn") -_worker_thread_reexport("getnameinfo") # obsolete gethostbyname etc. intentionally omitted diff --git a/trio/tests/test_socket.py b/trio/tests/test_socket.py index e043ee6ab1..c22d19c957 100644 --- a/trio/tests/test_socket.py +++ b/trio/tests/test_socket.py @@ -120,11 +120,11 @@ def without_proto(gai_tup): ("::1", 12345, 0, 0)), ]) - monkeygai.set("x", "host", "port", family=0, type=0, proto=0, flags=0) + monkeygai.set("x", b"host", "port", family=0, type=0, proto=0, flags=0) with assert_yields(): res = await tsocket.getaddrinfo("host", "port") assert res == "x" - assert monkeygai.record[-1] == ("host", "port", 0, 0, 0, 0) + assert monkeygai.record[-1] == (b"host", "port", 0, 0, 0, 0) # check raising an error from a non-blocking getaddrinfo with assert_yields(): @@ -684,3 +684,49 @@ async def receiver(): assert await b.recv(10) == b"e" a.shutdown(tsocket.SHUT_WR) assert await b.recv(10) == b"" + + +async def test_idna(monkeygai): + # This is the encoding for "faß.de", which uses one of the characters that + # IDNA 2003 handles incorrectly: + monkeygai.set("ok faß.de", b"xn--fa-hia.de", 80) + monkeygai.set("ok ::1", "::1", 80, flags=tsocket._NUMERIC_ONLY) + monkeygai.set("ok ::1", b"::1", 80, flags=tsocket._NUMERIC_ONLY) + # Some things that should not reach the underlying socket.getaddrinfo: + monkeygai.set("bad", "fass.de", 80) + # We always call socket.getaddrinfo with bytes objects: + monkeygai.set("bad", "xn--fa-hia.de", 80) + + assert "ok ::1" == await tsocket.getaddrinfo("::1", 80) + assert "ok ::1" == await tsocket.getaddrinfo(b"::1", 80) + assert "ok faß.de" == await tsocket.getaddrinfo("faß.de", 80) + assert "ok faß.de" == await tsocket.getaddrinfo("xn--fa-hia.de", 80) + assert "ok faß.de" == await tsocket.getaddrinfo(b"xn--fa-hia.de", 80) + + +async def test_getnameinfo(): + # Only 2-to-4-tuples allowed + with assert_yields(): + with pytest.raises(ValueError): + await tsocket.getnameinfo(("127.0.0.1",), 0) + with assert_yields(): + with pytest.raises(ValueError): + await tsocket.getnameinfo(("127.0.0.1", 0, 0, 0, 0), 0) + with assert_yields(): + with pytest.raises(ValueError): + await tsocket.getnameinfo(["127.0.0.1", 80], 0) + + # Must be numeric + with assert_yields(): + with pytest.raises(ValueError): + await tsocket.getnameinfo(("localhost", 80), 0) + + # A working version: + assert (await tsocket.getnameinfo(("127.0.0.1", 80), 0) + == ("localhost", "http")) + + assert (await tsocket.getnameinfo(("127.0.0.1", 80), tsocket.NI_NUMERICHOST) + == ("127.0.0.1", "http")) + + assert (await tsocket.getnameinfo(("127.0.0.1", 80), tsocket.NI_NUMERICSERV) + == ("localhost", "80"))