From b71278f481f56269a1de67372eb40d6ed3f25923 Mon Sep 17 00:00:00 2001 From: Max Marrone Date: Thu, 12 May 2022 15:24:05 -0400 Subject: [PATCH] fix(update-server): Avoid bricking when given a bad name (#10219) Closes #9960. Closes #10198. --- update-server/otupdate/buildroot/__init__.py | 2 +- update-server/otupdate/buildroot/__main__.py | 21 +- .../otupdate/common/name_management.py | 246 ++++++++++++------ .../otupdate/openembedded/__init__.py | 2 +- .../tests/buildroot/test_name_endpoints.py | 19 +- .../tests/common/test_name_management.py | 18 +- 6 files changed, 212 insertions(+), 96 deletions(-) diff --git a/update-server/otupdate/buildroot/__init__.py b/update-server/otupdate/buildroot/__init__.py index d9b6d0fde4c..95a84c611ff 100644 --- a/update-server/otupdate/buildroot/__init__.py +++ b/update-server/otupdate/buildroot/__init__.py @@ -52,7 +52,7 @@ def get_app( system_version_file = BR_BUILTIN_VERSION_FILE version = get_version(system_version_file) - name = name_override or name_management.get_name() + name = name_override or name_management.get_pretty_hostname() boot_id = boot_id_override or control.get_boot_id() config_obj = config.load(config_file_override) diff --git a/update-server/otupdate/buildroot/__main__.py b/update-server/otupdate/buildroot/__main__.py index 13a500e0c5d..fdd9a9c1c17 100644 --- a/update-server/otupdate/buildroot/__main__.py +++ b/update-server/otupdate/buildroot/__main__.py @@ -12,22 +12,29 @@ LOG = logging.getLogger(__name__) -def main(): +def main() -> None: parser = cli.build_root_parser() args = parser.parse_args() loop = asyncio.get_event_loop() systemd.configure_logging(getattr(logging, args.log_level.upper())) - LOG.info("Setting hostname") - hostname = loop.run_until_complete(name_management.setup_hostname()) - LOG.info(f"Set hostname to {hostname}") + LOG.info("Setting static hostname") + hostname = loop.run_until_complete(name_management.set_up_static_hostname()) + LOG.info(f"Set static hostname to {hostname}") LOG.info("Building buildroot update server") app = get_app(args.version_file, args.config_file) - name = app[constants.DEVICE_NAME_VARNAME] - LOG.info(f"Setting advertised name to {name}") - loop.run_until_complete(name_management.set_name(name)) + name = app[constants.DEVICE_NAME_VARNAME] # Set by get_app(). + + LOG.info(f"Setting Avahi service name to {name}") + try: + loop.run_until_complete(name_management.set_avahi_service_name(name)) + except Exception: + LOG.exception( + "Error setting the Avahi service name." + " mDNS + DNS-SD advertisement may not work." + ) LOG.info(f"Notifying {systemd.SOURCE} that service is up") systemd.notify_up() diff --git a/update-server/otupdate/common/name_management.py b/update-server/otupdate/common/name_management.py index 131205730a2..2d55f1879f3 100644 --- a/update-server/otupdate/common/name_management.py +++ b/update-server/otupdate/common/name_management.py @@ -1,22 +1,48 @@ """ otupdate.common.name_management: functions for managing machine names -The robot has two names associated with it: - -- The hostname is the name by which the machine is advertised on mdns. This can - be used to ping, issue HTTP requests, ssh in, etc. It should be set when the - update server starts by :py:func:`setup_hostname`. During this call, avahi - will be restarted to pick up the new hostname. -- The name (unqualified) is the name of the robot. The name is stored in - /etc/machine-info as the PRETTY_HOSTNAME. `opentrons-${name}` is used as the - name value in the health endpoints and as the avahi service name. This name - can be set with POST /server/name, but does not change the machine hostname. - This can be set with :py:func:`set_name`, which should be called at boot - and whenever the name changes; this function will write /etc/machine-info - and change the avahi service advertisement (without restarting avahi). +The robot has several names associated with it, some of which we tie together. + + +- The static hostname: + + This is the traditional computer networking hostname, + which has limits on length and allowed characters. + + Avahi automatically advertises this over mDNS, + so it can be used to ping, issue HTTP requests, ssh in, etc., + via .local. + + +- The Avahi service name: + + This is a human-readable Unicode string. + It affects how the system is advertised over mDNS + DNS-SD. + Network exploration tools may use it as a user-facing label. + + The DNS-SD spec calls this the "instance name". + (This is not to be confused with what the DNS-SD spec calls the "service name", + which is a totally separate thing.) + + +- The pretty hostname: + + A human-readable Unicode string. + This is a systemd thing, stored in /etc/machine-info as the PRETTY_HOSTNAME + and accessible via tools like hostnamectl. + + +- "The name" (unqualified): + + Over HTTP, we let clients get and set the robot's "name," a human-readable + Unicode string. + + Behind the scenes, this is implemented in terms of setting other names. + See `set_name_endpoint()`. """ import asyncio +import json import logging import os from typing import Optional @@ -57,11 +83,18 @@ def __init__( _BUS_STATE: Optional[DBusState] = None - def _set_name_future(name: str): - """ - The implementation of the name setting, to be run in a concurrent - executor since the dbus module doesn't work with asyncio + def _set_avahi_service_name_sync(new_service_name: str): + """The synchronous implementation of setting the Avahi service name. + + The dbus module doesn't natively support async/await. """ + # For semantics of the methods we're calling, see Avahi's API docs. + # For example: https://www.avahi.org/doxygen/html/index.html#good_publish + # It's mostly in terms of the C API, but the semantics should be the same. + # + # For exact method names and argument types, see Avahi's D-Bus bindings, + # which they specify across several machine-readable files. For example: + # https://github.com/lathiat/avahi/blob/v0.7/avahi-daemon/org.freedesktop.Avahi.EntryGroup.xml global _BUS_STATE if not _BUS_STATE: bus = dbus.SystemBus() @@ -75,13 +108,19 @@ def _set_name_future(name: str): _BUS_STATE = DBusState(bus, server_if, entrygroup_if) _BUS_STATE.entrygroup.Reset() + hostname = _BUS_STATE.server.GetHostName() domainname = _BUS_STATE.server.GetDomainName() + + # TODO(mm, 2022-05-06): This isn't exception-safe. + # Since we've already reset the entrygroup, if this fails + # (for example because Avahi doesn't like the new name), + # we'll be left with no entrygroup and Avahi will stop advertising the machine. _BUS_STATE.entrygroup.AddService( dbus.Int32(-1), # avahi.IF_UNSPEC dbus.Int32(-1), # avahi.PROTO_UNSPEC dbus.UInt32(0), # flags - name, # sname + new_service_name, # sname "_http._tcp", # stype domainname, # sdomain (.local) f"{hostname}.{domainname}", # shost (hostname.local) @@ -90,28 +129,53 @@ def _set_name_future(name: str): ) _BUS_STATE.entrygroup.Commit() + # TODO(mm, 2022-05-04): Recover from AVAHI_ENTRY_GROUP_COLLISION in case + # this name collides with another device on the network. + # https://github.com/Opentrons/opentrons/issues/10126 + except ImportError: LOG.exception("Couldn't import dbus, name setting will be nonfunctional") - def _set_name_future(name: str): + def _set_avahi_service_name_sync(new_service_name: str): LOG.warning("Not setting name, dbus could not be imported") _BUS_LOCK = asyncio.Lock() -def _get_hostname() -> str: - """Get a good value for the system hostname. +async def set_avahi_service_name(new_service_name: str) -> None: + """Set the Avahi service name. + + The new service name will only apply to the current boot. + + Avahi requires a service name. + It will not advertise the system over mDNS + DNS-SD until one is set. + + Since the Avahi service name corresponds to the DNS-SD instance name, + it's a human-readable string of mostly arbitrary Unicode, + at most 63 octets (not 63 code points or 63 characters!) long. + (See: https://datatracker.ietf.org/doc/html/rfc6763#section-4.1.1) + Avahi will raise an error if it thinks the new service name is invalid. + """ + async with _BUS_LOCK: + await asyncio.get_event_loop().run_in_executor( + None, _set_avahi_service_name_sync, new_service_name + ) + + +def _choose_static_hostname() -> str: + """Get a good value for the system's static hostname. - The hostname is loaded from, in order of preference, + The static hostname is loaded from, in order of preference: - - url-encoding the contents of /var/serial-number, if it is present, - not empty, and not the default - - the systemd-generated machine-id, which changes at every boot. + 1. url-encoding the contents of /var/serial-number, if it is present and not empty. + 2. the systemd-generated machine-id. """ if os.path.exists("/var/serial"): serial = open("/var/serial").read().strip() if serial: + # TODO(mm, 2022-04-27): This uses the serial number even if it hasn't + # been configured and is still the default, like "opentrons." LOG.info("Using serial for hostname") hn = "".join([c for c in urllib.parse.quote(serial, safe="") if c != "%"]) if hn != serial: @@ -123,22 +187,25 @@ def _get_hostname() -> str: else: LOG.info("Using machine-id for hostname: no /var/serial") - return open("/etc/machine-id").read().strip()[:6] + with open("/etc/machine-id") as f: + return f.read().strip()[:6] -async def setup_hostname() -> str: - """ - Intended to be run when the server starts. Sets the machine hostname. +async def set_up_static_hostname() -> str: + """Automatically configure the machine's static hostname. - Once the hostname is set, we restart avahi. + Intended to be run once when the server starts. - This is a separate task from establishing and changing the opentrons - machine name, which is UTF-8 and stored in /etc/machine-info as the - PRETTY_HOSTNAME and used in the avahi service name. + This function: - :returns: the hostname + 1. Picks a good value for the static hostname. + 2. Persists the new value for future boots. + 3. Updates the "live" value for the current boot. + 4. Restarts Avahi so that it advertises using the new live value. + 5. Returns the new static hostname. """ - hostname = _get_hostname() + hostname = _choose_static_hostname() + with open("/etc/hostname", "w") as ehn: ehn.write(f"{hostname}\n") @@ -178,10 +245,11 @@ async def setup_hostname() -> str: ) raise RuntimeError("Error restarting avahi") LOG.debug("Updated hostname and restarted avahi OK") + return hostname -def _update_pretty_hostname(new_val: str): +def _rewrite_machine_info(new_pretty_hostname: str): """Write a new value for the pretty hostname. :raises OSError: If the new value could not be written. @@ -192,12 +260,14 @@ def _update_pretty_hostname(new_val: str): except OSError: LOG.exception("Couldn't read /etc/machine-info") contents = "" - new_contents = _rewrite_machine_info(contents, new_val) + new_contents = _rewrite_machine_info_str( + current_machine_info_contents=contents, new_pretty_hostname=new_pretty_hostname + ) with open("/etc/machine-info", "w") as emi: emi.write(new_contents) -def _rewrite_machine_info( +def _rewrite_machine_info_str( current_machine_info_contents: str, new_pretty_hostname: str ) -> str: """ @@ -209,13 +279,16 @@ def _rewrite_machine_info( preserved_lines = [ ln for ln in current_lines if not ln.startswith("PRETTY_HOSTNAME") ] + # FIXME(mm, 2022-04-27): This will not correctly store the pretty hostname + # if it contains newlines or certain other special characters. + # https://github.com/Opentrons/opentrons/issues/9960 new_lines = preserved_lines + [f"PRETTY_HOSTNAME={new_pretty_hostname}"] new_contents = "\n".join(new_lines) + "\n" return new_contents -def get_name(default: str = "no name set"): - """Get the currently-configured name of the machine""" +def get_pretty_hostname(default: str = "no name set"): + """Get the currently-configured pretty hostname""" try: with open("/etc/machine-info") as emi: contents = emi.read() @@ -224,69 +297,90 @@ def get_name(default: str = "no name set"): contents = "" for line in contents.split("\n"): if line.startswith("PRETTY_HOSTNAME="): + # FIXME(mm, 2022-04-27): This will not correctly read the pretty hostname + # if it's quoted or contains escaped characters. + # https://github.com/Opentrons/opentrons/issues/10197 + # Perhaps we should query the pretty hostname from hostnamectl instead of + # implementing our own parsing. return "=".join(line.split("=")[1:]) LOG.warning(f"No PRETTY_HOSTNAME in {contents}, defaulting to {default}") - try: - _update_pretty_hostname(default) - except OSError: - LOG.exception("Could not write new pretty hostname!") return default -async def set_name(name: str = None) -> str: - """ - Change the name by writing /etc/machine-info and then calling setup_name +async def persist_pretty_hostname(name: str) -> str: + """Change the robot's pretty hostname. - :param name: The name to set. If ``None``, read it from /etc/machine-info - :returns: The name that was set. This may be different from ``name``, if - ``name`` was none or if the pretty hostname could not be written - """ + Writes the new name to /etc/machine-info so it persists across reboots. - if name: + :param name: The name to set. + :returns: The name that was set. This may be different from ``name``, + if the pretty hostname could not be written. + """ + try: + # We can't run `hostnamectl --pretty ` to write this for us + # because it fails with a read-only filesystem error, for unknown reasons. + _rewrite_machine_info(new_pretty_hostname=name) checked_name = name - try: - _update_pretty_hostname(checked_name) - except OSError: - LOG.exception("Could not set pretty hostname") - checked_name = get_name() - else: - checked_name = get_name() - - async with _BUS_LOCK: - await asyncio.get_event_loop().run_in_executor( - None, _set_name_future, checked_name - ) + except OSError: + LOG.exception("Could not set pretty hostname") + checked_name = get_pretty_hostname() return checked_name +async def set_name(app: web.Application, new_name: str) -> str: + """See `set_name_endpoint()`.""" + await set_avahi_service_name(new_name) + # Setting the Avahi service name can fail if Avahi doesn't like the new name. + # Persist only after it succeeds, so we don't persist something invalid. + persisted_pretty_hostname = await persist_pretty_hostname(new_name) + return persisted_pretty_hostname + + async def set_name_endpoint(request: web.Request) -> web.Response: - """Set the name of the robot. + """Set the robot's name. + + This comprises a few things: + + * The name returned over HTTP + * The pretty hostname + * The Avahi service name + + It does not include the static hostname. Request with POST /server/name {"name": new_name} - Responds with 200 OK {"hostname": new_name, "prettyname": pretty_name} + Responds with 200 OK {"name": "set_name"} or 400 Bad Request - In general, the new pretty name will be the specified name. The true - hostname will be capped to 53 letters, and have any characters other than - ascii letters or dashes replaced with dashes to fit the requirements here - https://www.freedesktop.org/software/systemd/man/hostname.html#. + In general, the name that is set will be the same name that was requested. + It may be different if it had to be truncated, sanitized, etc. """ def build_400(msg: str) -> web.Response: return web.json_response(data={"message": msg}, status=400) - body = await request.json() - if "name" not in body or not isinstance(body["name"], str): - return build_400('Body has no "name" key with a string') + try: + body = await request.json() + except json.JSONDecodeError as exception: + # stringifying a JSONDecodeError will include an error summary and location, + # e.g. "Expecting value: line 1 column 1 (char 0)" + return build_400(str(exception)) - new_name = await set_name(body["name"]) - request.app[DEVICE_NAME_VARNAME] = new_name + try: + name_to_set = body["name"] + except KeyError: + return build_400('Body has no "name" key') + if not isinstance(name_to_set, str): + return build_400('"name" key is not a string"') + + new_name = await set_name(app=request.app, new_name=name_to_set) + + request.app[DEVICE_NAME_VARNAME] = new_name return web.json_response(data={"name": new_name}, status=200) async def get_name_endpoint(request: web.Request) -> web.Response: - """Get the name of the robot. + """Get the robot's name, as previously set with `set_name_endpoint()`. This information is also accessible in /server/update/health, but this endpoint provides symmetry with POST /server/name. diff --git a/update-server/otupdate/openembedded/__init__.py b/update-server/otupdate/openembedded/__init__.py index 3c8e128a0d8..1e97d5761e3 100644 --- a/update-server/otupdate/openembedded/__init__.py +++ b/update-server/otupdate/openembedded/__init__.py @@ -62,7 +62,7 @@ def get_app( config_obj = config.load(config_file_override) app = web.Application(middlewares=[log_error_middleware]) - name = name_override or name_management.get_name() + name = name_override or name_management.get_pretty_hostname() boot_id = boot_id_override or control.get_boot_id() app[config.CONFIG_VARNAME] = config_obj app[constants.RESTART_LOCK_NAME] = asyncio.Lock() diff --git a/update-server/tests/buildroot/test_name_endpoints.py b/update-server/tests/buildroot/test_name_endpoints.py index c27d0010142..f845ad3cd9a 100644 --- a/update-server/tests/buildroot/test_name_endpoints.py +++ b/update-server/tests/buildroot/test_name_endpoints.py @@ -2,17 +2,17 @@ async def test_name_endpoint(test_cli, monkeypatch): - async def fake_name(name): - return name + name + async def fake_set_name(app, new_name): + return new_name + new_name - monkeypatch.setattr(name_management, "set_name", fake_name) + monkeypatch.setattr(name_management, "set_name", fake_set_name) + # Valid: to_set = "check out this cool name" resp = await test_cli.post("/server/name", json={"name": to_set}) assert resp.status == 200 body = await resp.json() assert body["name"] == to_set + to_set - health = await test_cli.get("/server/update/health") health_body = await health.json() assert health_body["name"] == to_set + to_set @@ -20,6 +20,7 @@ async def fake_name(name): name_body = await get_name.json() assert name_body["name"] == to_set + to_set + # Invalid, field is not a str: resp = await test_cli.post("/server/name", json={"name": 2}) assert resp.status == 400 body = await resp.json() @@ -31,6 +32,7 @@ async def fake_name(name): name_body = await get_name.json() assert name_body["name"] == to_set + to_set + # Invalid, field is missing: resp = await test_cli.post("/server/name", json={}) assert resp.status == 400 body = await resp.json() @@ -38,3 +40,12 @@ async def fake_name(name): health = await test_cli.get("/server/update/health") health_body = await health.json() assert health_body["name"] == to_set + to_set + + # Invalid, body is not JSON: + resp = await test_cli.post("/server/name", data="bada bing bada boom") + assert resp.status == 400 + body = await resp.json() + assert "message" in body + health = await test_cli.get("/server/update/health") + health_body = await health.json() + assert health_body["name"] == to_set + to_set diff --git a/update-server/tests/common/test_name_management.py b/update-server/tests/common/test_name_management.py index 3345d2d8614..f64e7ee19a4 100644 --- a/update-server/tests/common/test_name_management.py +++ b/update-server/tests/common/test_name_management.py @@ -12,8 +12,9 @@ @pytest.mark.parametrize("initial_contents", machine_info_examples) -def test_rewrite_machine_info_updates_pretty_hostname(initial_contents): - rewrite = name_management._rewrite_machine_info( +def test_rewrite_machine_info_updates_pretty_hostname(initial_contents) -> None: + # TODO(mm, 2022-04-27): Rework so we don't have to test a private function. + rewrite = name_management._rewrite_machine_info_str( initial_contents, "new_pretty_hostname" ) assert ( @@ -25,9 +26,10 @@ def test_rewrite_machine_info_updates_pretty_hostname(initial_contents): @pytest.mark.parametrize("initial_contents", machine_info_examples) -def test_rewrite_machine_info_preserves_other_lines(initial_contents): +def test_rewrite_machine_info_preserves_other_lines(initial_contents) -> None: + # TODO(mm, 2022-04-27): Rework so we don't have to test a private function. initial_lines = Counter(initial_contents.splitlines()) - rewrite_string = name_management._rewrite_machine_info( + rewrite_string = name_management._rewrite_machine_info_str( initial_contents, "new_pretty_hostname" ) rewrite_lines = Counter(rewrite_string.splitlines()) @@ -38,12 +40,14 @@ def test_rewrite_machine_info_preserves_other_lines(initial_contents): assert line.startswith("PRETTY_HOSTNAME=") or line == "" +# Covers this bug: https://github.com/Opentrons/opentrons/pull/4671 @pytest.mark.parametrize("initial_contents", machine_info_examples) -def test_rewrite_machine_info_is_idempotent(initial_contents): - first_rewrite = name_management._rewrite_machine_info( +def test_rewrite_machine_info_is_idempotent(initial_contents) -> None: + # TODO(mm, 2022-04-27): Rework so we don't have to test a private function. + first_rewrite = name_management._rewrite_machine_info_str( initial_contents, "new_pretty_hostname" ) - second_rewrite = name_management._rewrite_machine_info( + second_rewrite = name_management._rewrite_machine_info_str( first_rewrite, "new_pretty_hostname" ) assert second_rewrite == first_rewrite