Skip to content

Commit

Permalink
IDNA 2008 support for trio.socket
Browse files Browse the repository at this point in the history
Specifically:

- Use IDNA 2008 in getaddrinfo

- Require pre-resolved names in getnameinfo (b/c otherwise it
  makes an implicit call to getaddrinfo using IDNA 2003; also, this
  makes it more consistent with the rest of trio.socket).

This fixes part, but not all, of python-triogh-11.
  • Loading branch information
njsmith committed Jun 12, 2017
1 parent 3c00381 commit 5dc1b5a
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 6 deletions.
9 changes: 7 additions & 2 deletions docs/source/reference-io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,17 @@ convert a standard library socket into a trio socket:

.. autofunction:: from_stdlib_socket

For name lookup, Trio provides the standard :func:`getaddrinfo` and
:func:`getnameinfo`, but with small changes:

.. autofunction:: getaddrinfo

.. autofunction:: getnameinfo

The following functions have identical interfaces to their standard
library version, but are now ``async`` functions, so you need to use
``await`` to call them:

* :func:`~socket.getaddrinfo`
* :func:`~socket.getnameinfo`
* :func:`~socket.getfqdn`

Trio intentionally DOES NOT include some obsolete, redundant, or
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
"attrs",
"sortedcontainers",
"async_generator >= 1.6",
"idna",
# PEP 508 style, but:
# https://bitbucket.org/pypa/wheel/issues/181/bdist_wheel-silently-discards-pep-508
#"cffi; os_name == 'nt'", # "cffi is required on windows"
Expand Down
76 changes: 74 additions & 2 deletions trio/socket.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from contextlib import contextmanager as _contextmanager
import errno as _errno

import idna

from . import _core
from ._threads import run_in_worker_thread as _run_in_worker_thread

Expand Down Expand Up @@ -94,6 +96,19 @@ async def __aexit__(self, etype, value, tb):
_NUMERIC_ONLY = _stdlib_socket.AI_NUMERICHOST | _stdlib_socket.AI_NUMERICSERV

async def getaddrinfo(host, port, family=0, type=0, proto=0, flags=0):
"""Look up a numeric address given a name.
Arguments and return values are identical to :func:`socket.getaddrinfo`,
except that this version is async.
Also, :func:`trio.socket.getaddrinfo` correctly uses IDNA 2008 to process
non-ASCII domain names. (:func:`socket.getaddrinfo` uses IDNA 2003, which
can give the wrong result in some cases and cause you to connect to a
different host than the one you intended; see `bpo-17305
<https://bugs.python.org/issue17305>`__.)
"""

# If host and port are numeric, then getaddrinfo doesn't block and we can
# skip the whole thread thing, which seems worthwhile. So we try first
# with the _NUMERIC_ONLY flags set, and then only spawn a thread if that
Expand All @@ -103,13 +118,71 @@ def numeric_only_failure(exc):
async with _try_sync(numeric_only_failure):
return _stdlib_socket.getaddrinfo(
host, port, family, type, proto, flags | _NUMERIC_ONLY)
# That failed, try a thread instead
# That failed; it's a real hostname. We better use a thread.
#
# Also, it might be a unicode hostname, in which case we want to do our
# own encoding using the idna module, rather than letting Python do
# it. (Python will use the old IDNA 2003 standard, and possibly get the
# wrong answer - see bpo-17305). However, the idna module is picky, and
# will refuse to process some valid hostname strings, like "::1". So if
# it's already ascii, we pass it through; otherwise, we encode it to.
if isinstance(host, str):
try:
host = host.encode("ascii")
except UnicodeEncodeError:
# UTS-46 defines various normalizations; in particular, by default
# idna.encode will error out if the hostname has Capital Letters
# in it; with uts46=True it will lowercase them instead.
host = idna.encode(host, uts46=True)
return await _run_in_worker_thread(
_stdlib_socket.getaddrinfo, host, port, family, type, proto, flags,
cancellable=True)

__all__.append("getaddrinfo")


async def getnameinfo(sockaddr, flags):
"""Look up a name given a numeric address.
Arguments and return values are identical to :func:`socket.getnameinfo`,
except:
* This version is async.
* This version does *not* perform implicit name resolution. For example,
this will raise an error::
await trio.socket.getnameinfo(("localhost", 80), 0) # error!
Instead, use :func:`getaddrinfo` or similar to get a numeric address,
and then use that::
await trio.socket.getnameinfo(("127.0.0.1", 80), 0) # correct!
"""
# stdlib version accepts hostnames; we want to restrict to only numeric
# addresses, to avoid complications with IDNA etc. and for consistency
# with analogous socket methods.
if not isinstance(sockaddr, tuple) or not 2 <= len(sockaddr) <= 4:
await _core.yield_briefly()
raise ValueError(
"expected a (host, port) tuple, not {}".format(sockaddr))
host, port, *_ = sockaddr
try:
_stdlib_socket.getaddrinfo(host, port, flags=_NUMERIC_ONLY)
except gaierror as exc:
await _core.yield_briefly()
if exc.errno == EAI_NONAME:
raise ValueError(
"expected an already-resolved numeric address, not {}"
.format(sockaddr))
raise
return await _run_in_worker_thread(
_stdlib_socket.getnameinfo, sockaddr, flags, cancellable=True)

__all__.append("getnameinfo")


def _worker_thread_reexport(name):
fn = getattr(_stdlib_socket, name)
@_wraps(fn, assigned=("__name__", "__doc__"))
Expand All @@ -122,7 +195,6 @@ async def wrapper(*args, **kwargs):
__all__.append(name)

_worker_thread_reexport("getfqdn")
_worker_thread_reexport("getnameinfo")

# obsolete gethostbyname etc. intentionally omitted

Expand Down
50 changes: 48 additions & 2 deletions trio/tests/test_socket.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ def without_proto(gai_tup):
("::1", 12345, 0, 0)),
])

monkeygai.set("x", "host", "port", family=0, type=0, proto=0, flags=0)
monkeygai.set("x", b"host", "port", family=0, type=0, proto=0, flags=0)
with assert_yields():
res = await tsocket.getaddrinfo("host", "port")
assert res == "x"
assert monkeygai.record[-1] == ("host", "port", 0, 0, 0, 0)
assert monkeygai.record[-1] == (b"host", "port", 0, 0, 0, 0)

# check raising an error from a non-blocking getaddrinfo
with assert_yields():
Expand Down Expand Up @@ -684,3 +684,49 @@ async def receiver():
assert await b.recv(10) == b"e"
a.shutdown(tsocket.SHUT_WR)
assert await b.recv(10) == b""


async def test_idna(monkeygai):
# This is the encoding for "faß.de", which uses one of the characters that
# IDNA 2003 handles incorrectly:
monkeygai.set("ok faß.de", b"xn--fa-hia.de", 80)
monkeygai.set("ok ::1", "::1", 80, flags=tsocket._NUMERIC_ONLY)
monkeygai.set("ok ::1", b"::1", 80, flags=tsocket._NUMERIC_ONLY)
# Some things that should not reach the underlying socket.getaddrinfo:
monkeygai.set("bad", "fass.de", 80)
# We always call socket.getaddrinfo with bytes objects:
monkeygai.set("bad", "xn--fa-hia.de", 80)

assert "ok ::1" == await tsocket.getaddrinfo("::1", 80)
assert "ok ::1" == await tsocket.getaddrinfo(b"::1", 80)
assert "ok faß.de" == await tsocket.getaddrinfo("faß.de", 80)
assert "ok faß.de" == await tsocket.getaddrinfo("xn--fa-hia.de", 80)
assert "ok faß.de" == await tsocket.getaddrinfo(b"xn--fa-hia.de", 80)


async def test_getnameinfo():
# Only 2-to-4-tuples allowed
with assert_yields():
with pytest.raises(ValueError):
await tsocket.getnameinfo(("127.0.0.1",), 0)
with assert_yields():
with pytest.raises(ValueError):
await tsocket.getnameinfo(("127.0.0.1", 0, 0, 0, 0), 0)
with assert_yields():
with pytest.raises(ValueError):
await tsocket.getnameinfo(["127.0.0.1", 80], 0)

# Must be numeric
with assert_yields():
with pytest.raises(ValueError):
await tsocket.getnameinfo(("localhost", 80), 0)

# A working version:
assert (await tsocket.getnameinfo(("127.0.0.1", 80), 0)
== ("localhost", "http"))

assert (await tsocket.getnameinfo(("127.0.0.1", 80), tsocket.NI_NUMERICHOST)
== ("127.0.0.1", "http"))

assert (await tsocket.getnameinfo(("127.0.0.1", 80), tsocket.NI_NUMERICSERV)
== ("localhost", "80"))

0 comments on commit 5dc1b5a

Please sign in to comment.