diff --git a/changelog.d/20250122_094142_PL-133360-fix-ipv6-autoconfig-again_scriv.md b/changelog.d/20250122_094142_PL-133360-fix-ipv6-autoconfig-again_scriv.md new file mode 100644 index 000000000..49ce4ead2 --- /dev/null +++ b/changelog.d/20250122_094142_PL-133360-fix-ipv6-autoconfig-again_scriv.md @@ -0,0 +1,21 @@ + + +### Impact + + + +### NixOS XX.XX platform + +- platform: ensure that IPv6 autoconfiguration is correctly disabled + on both physical and virtual hosts. (PL-133360) diff --git a/nixos/infrastructure/flyingcircus-physical.nix b/nixos/infrastructure/flyingcircus-physical.nix index 835144654..0759ee30b 100644 --- a/nixos/infrastructure/flyingcircus-physical.nix +++ b/nixos/infrastructure/flyingcircus-physical.nix @@ -12,6 +12,7 @@ mkIf (cfg.infrastructureModule == "flyingcircus-physical") (lib.mkMerge [ hardware.cpu.amd.updateMicrocode = true; hardware.cpu.intel.updateMicrocode = true; flyingcircus.raid.enable = true; + flyingcircus.networking.physicalHostNetworking = true; boot = { initrd.availableKernelModules = [ diff --git a/nixos/lib/network.nix b/nixos/lib/network.nix index 828270af8..41bdd580b 100644 --- a/nixos/lib/network.nix +++ b/nixos/lib/network.nix @@ -20,7 +20,7 @@ let vxlanInterfaces = lib.filterAttrs (name: value: value.policy or null == "vxlan") encInterfaces; vxlanCount = length (attrNames vxlanInterfaces); in - if config.flyingcircus.infrastructureModule != "flyingcircus-physical" + if !config.flyingcircus.networking.physicalHostNetworking then foldConds encInterfaces [ { diff --git a/nixos/platform/network.nix b/nixos/platform/network.nix index 1b171bbcc..e799674aa 100644 --- a/nixos/platform/network.nix +++ b/nixos/platform/network.nix @@ -97,6 +97,11 @@ in description = "Names of ethernet devices to monitor."; default = []; }; + flyingcircus.networking.physicalHostNetworking = lib.mkOption { + type = lib.types.bool; + description = "Use a network configuration profile suitable for physical hosts"; + default = false; + }; }; config = lib.mkMerge [ @@ -224,7 +229,7 @@ in wireguard.enable = true; firewall.trustedInterfaces = - lib.optionals (!isNull fclib.underlay && cfg.infrastructureModule == "flyingcircus-physical") + lib.optionals (!isNull fclib.underlay && cfg.networking.physicalHostNetworking) (map (l: l.link) fclib.underlay.links or []); firewall.extraCommands = '' @@ -265,7 +270,7 @@ in }; - flyingcircus.services.telegraf.inputs = lib.optionalAttrs (cfg.infrastructureModule == "flyingcircus-physical") { + flyingcircus.services.telegraf.inputs = lib.optionalAttrs (cfg.networking.physicalHostNetworking) { exec = [{ commands = [ "${pkgs.fc.telegraf-routes-summary}/bin/telegraf-routes-summary" ]; timeout = "10s"; @@ -448,7 +453,7 @@ in # TODO: it'd be preferrable to manage this on a by-interface base # and distinguish whether an interface is physical. # Can this be done based on `config.flyingcircus.enc.parameters.interfaces.fe.policy`? - ${lib.optionalString (config.flyingcircus.infrastructureModule == "flyingcircus-physical") '' + ${lib.optionalString (config.flyingcircus.networking.physicalHostNetworking) '' echo "Disabling flow control" ethtool -A ${iface.link} autoneg off rx off tx off || true ''} @@ -509,14 +514,14 @@ in }; })) ethernetLinks) ++ - (let unitName = link: "network-disable-ipv6-autoconfig-${link}"; - unitTemplate = link: rec { + unitTemplate = link: fixAddrGen: rec { description = "Disable IPv6 autoconfig for link ${link}"; wantedBy = [ "network-addresses-${link}.service" ]; before = wantedBy; requires = [ "${link}-netdev.service" ]; + bindsTo = requires; after = requires; path = [ pkgs.procps fclib.relaxedIp ]; stopIfChanged = false; @@ -527,12 +532,14 @@ in sysctl net.ipv6.conf.${link}.temp_valid_lft=0 sysctl net.ipv6.conf.${link}.temp_prefered_lft=0 - # If an interface has previously been managed by dhcpcd this sysctl might be - # set to a non-zero value, which disables automatic generation of link-local - # addresses. This can leave the interface without a link-local address when - # dhcpcd deletes addresses from the interface when it exits. Resetting this - # to 0 restores the default kernel behaviour. - sysctl net.ipv6.conf.${link}.addr_gen_mode=0 + ${lib.optionalString fixAddrGen '' + # If an interface has previously been managed by dhcpcd this sysctl might be + # set to a non-zero value, which disables automatic generation of link-local + # addresses. This can leave the interface without a link-local address when + # dhcpcd deletes addresses from the interface when it exits. Resetting this + # to 0 restores the default kernel behaviour. + sysctl net.ipv6.conf.${link}.addr_gen_mode=0 + ''} for oldtmp in $(ip -6 address show dev ${link} dynamic scope global | grep inet6 | cut -d ' ' -f6); do ip addr del $oldtmp dev ${link} @@ -543,15 +550,13 @@ in RemainAfterExit = true; }; }; - - virtualLinkUnit = link: ((unitTemplate link) // { - bindsTo = [ "${link}-netdev.service" ]; - after = [ "${link}-netdev.service" ]; - }); in (map (link: - lib.nameValuePair (unitName link) (virtualLinkUnit link)) - virtualLinks) + lib.nameValuePair (unitName link) (unitTemplate link false)) + virtualLinks) ++ + (map (link: + lib.nameValuePair (unitName link.link) (unitTemplate link.link true)) + ethernetLinks) ) ++ (lib.optionals (!isNull fclib.underlay) @@ -900,10 +905,10 @@ in # as a reasonable size and I'd suggest generalizing this number to all machines. "net.netfilter.nf_conntrack_max" = 262144; } - (lib.mkIf (cfg.infrastructureModule != "flyingcircus-physical") { + (lib.mkIf (!cfg.networking.physicalHostNetworking) { "net.core.rmem_max" = 8388608; }) - (lib.mkIf (cfg.infrastructureModule == "flyingcircus-physical") { + (lib.mkIf (cfg.networking.physicalHostNetworking) { "vm.min_free_kbytes" = "513690"; "net.core.netdev_max_backlog" = 300000; @@ -944,7 +949,7 @@ in }; } - (lib.mkIf (config.flyingcircus.infrastructureModule == "flyingcircus") { + (lib.mkIf (cfg.infrastructureModule == "flyingcircus") { # This check is here to identify abysmal but otherwise subtle network speed # issues *in VMs* as we have seen in PL-132971. If downloading a 1MiB test # file takes longer than 5-10 seconds, something is very much off. diff --git a/tests/default.nix b/tests/default.nix index a3a0a3d21..03f4404bf 100644 --- a/tests/default.nix +++ b/tests/default.nix @@ -46,6 +46,7 @@ in { collect-garbage = callTest ./collect-garbage.nix {}; gitlab = callTest ./gitlab.nix {}; haproxy = callTest ./haproxy.nix {}; + ipv6-autoconfig = callSubTests ./ipv6-autoconfig.nix {}; java = callTest ./java.nix {}; journal = callTest ./journal.nix {}; journalbeat = callTest ./journalbeat.nix {}; diff --git a/tests/ipv6-autoconfig.nix b/tests/ipv6-autoconfig.nix new file mode 100644 index 000000000..99b9ab525 --- /dev/null +++ b/tests/ipv6-autoconfig.nix @@ -0,0 +1,114 @@ +import ./make-test-python.nix ({ pkgs, testlib, ... }: +let + assertZeroSysctl = pkgs.writeScriptBin "assert_zero_sysctl" '' + set -eu + sysctl="$1" + + value="$(${pkgs.procps}/bin/sysctl -n -b "$sysctl")" + + [ "$value" == "0" ] + ''; + + makePhysicalHost = { id, links }: { lib, config, ... }: + let + testNodeId = config.virtualisation.test.nodeNumber; + in { + imports = [ + (testlib.fcConfig { + inherit id; + net.mgm = true; + net.ul = true; + extraEncParameters = { + inherit id; + interfaces.fe.policy = "vxlan"; + interfaces.srv.policy = "vxlan"; + interfaces.ul = { + policy = "underlay"; + nics = map (link: { + mac = "52:54:00:12:${lib.toLower (lib.toHexString link)}:0${toString testNodeId}"; + external_label = "phys/${toString link}"; + }) links; + }; + }; + }) + ]; + + # use the hardware networking config profile + flyingcircus.networking.physicalHostNetworking = true; + # extra underlay network links + virtualisation.vlans = links; + + services.fail2ban.enable = false; + + environment.systemPackages = [ assertZeroSysctl ]; + }; +in { + name = "ipv6-autoconfig"; + testCases = { + virtual = { + name = "virtual"; + nodes.machine = { ... }: { + imports = [ + (testlib.fcConfig {}) + ]; + + environment.systemPackages = [ assertZeroSysctl ]; + }; + testScript = '' + sysctls = [ + "accept_ra", + "autoconf", + "temp_valid_lft", + "temp_prefered_lft", + "addr_gen_mode", + ] + + machine.wait_for_unit("multi-user.target") + + with subtest("testing ipv6 autoconf configuration on ethsrv"): + for sysctl in sysctls: + machine.succeed(f"assert_zero_sysctl net.ipv6.conf.ethsrv.{sysctl}") + with subtest("testing ipv6 autoconf configuration on ethfe"): + for sysctl in sysctls: + machine.succeed(f"assert_zero_sysctl net.ipv6.conf.ethfe.{sysctl}") + ''; + }; + hardware = { + name = "hardware"; + nodes = { + machine = makePhysicalHost { id = 1; links = [ 253 254 ]; }; + switch1 = testlib.mockVxlanSwitch { id = 2; links = [ 253 ]; }; + switch2 = testlib.mockVxlanSwitch { id = 2; links = [ 254 ]; }; + }; + testScript = '' + sysctls = [ + "accept_ra", + "autoconf", + "temp_valid_lft", + "temp_prefered_lft", + ] + hw_sysctls = sysctls.copy() + hw_sysctls.append("addr_gen_mode") + + start_all() + for vm in [machine, switch1, switch2]: + vm.wait_for_unit("multi-user.target") + + virt_links = ["brsrv", "brfe", "vxsrv", "vxfe"]; + phys_links = ["ethmgm", "ul-phys-253", "ul-phys-254"]; + + with subtest("testing physical links"): + for link in phys_links: + with subtest(f"testing ipv6 autoconf configuration on {link}"): + for sysctl in hw_sysctls: + machine.succeed(f"assert_zero_sysctl net.ipv6.conf.{link}.{sysctl}") + + with subtest("testing virtual links"): + for link in virt_links: + with subtest(f"testing ipv6 autoconf configuration on {link}"): + for sysctl in sysctls: + machine.succeed(f"assert_zero_sysctl net.ipv6.conf.{link}.{sysctl}") + ''; + }; + }; +}) diff --git a/tests/testlib.nix b/tests/testlib.nix index 44a30b261..98e736548 100644 --- a/tests/testlib.nix +++ b/tests/testlib.nix @@ -95,6 +95,12 @@ rec { chosen_networks = (network_options // { srv = true; fe = true; } // net); active_vlan_attrs = filterAttrs (name: vid: chosen_networks.${name}) vlans; + + # the nixos test driver internally assigns each test vm an id + # which is used for generating the mac addresses on each + # vlan. however, this might be a different id from the one we use + # for generating ip addresses. + test_node_id = config.virtualisation.test.nodeNumber; in { imports = [ @@ -117,7 +123,7 @@ rec { inherit resource_group location secrets; interfaces = mapAttrs (name: vid: { - mac = "52:54:00:12:0${toString vid}:0${toString id}"; + mac = "52:54:00:12:0${toString vid}:0${toString test_node_id}"; bridged = false; networks = { "192.168.${toString vid}.0/24" = [ "192.168.${toString vid}.${toString id}" ]; @@ -126,7 +132,7 @@ rec { gateways = {}; nics = [ { - "mac" = "52:54:00:12:0${toString vid}:0${toString id}"; + "mac" = "52:54:00:12:0${toString vid}:0${toString test_node_id}"; "external_label" = "${name}nic${toString id}"; } ]; @@ -135,6 +141,120 @@ rec { }; }; + /* + Generate a machine configuration which mocks a datacentre VXLAN switch + */ + mockVxlanSwitch = { + id, + links ? [], + }: { pkgs, lib, config, ... }: let + vlanInterfaces = listToAttrs (map (v: lib.nameValuePair "eth${toString v}" v) links); + underlayAddress = "192.168.${toString vlans.ul}.${toString id}"; + in { + imports = [ ../nixos ../nixos/roles ]; + services.telegraf.enable = false; + networking = { + useDHCP = lib.mkForce false; + firewall.allowPing = lib.mkForce true; + firewall.checkReversePath = lib.mkForce false; + }; + boot.kernel.sysctl."net.ipv4.conf.all.ip_forward" = 1; + boot.initrd.availableKernelModules = [ "dummy" ]; + networking.firewall.enable = false; + + virtualisation.vlans = lib.mkForce []; + virtualisation.interfaces = mapAttrs (_: vlan: { inherit vlan; }) vlanInterfaces; + + networking.firewall.trustedInterfaces = attrNames vlanInterfaces; + networking.interfaces = { + underlay.ipv4.addresses = [{ + address = underlayAddress; prefixLength = 32; + }]; + } // (listToAttrs + (map (name: lib.nameValuePair name { + ipv4.addresses = lib.mkForce []; + ipv6.addresses = lib.mkForce []; + }) (attrNames vlanInterfaces)) + ); + + systemd.services = { + underlay-netdev = rec { + description = "Set up underlay loopback device"; + wantedBy = [ "network-setup.service" "multi-user.target" ]; + before = wantedBy; + after = [ "network-pre.service" ]; + requires = [ "network-setup.service" ]; + path = [ pkgs.iproute2 ]; + script = "ip link add underlay type dummy"; + preStop = "ip link delete underlay"; + serviceConfig.Type = "oneshot"; + serviceConfig.RemainAfterExit = true; + }; + } // (listToAttrs + (map (name: lib.nameValuePair "${name}-netdev" { + wantedBy = [ "network-setup.service" "multi-user.target" ]; + requires = [ "network-setup.service" ]; + script = ":"; + serviceConfig.Type = "oneshot"; + serviceConfig.RemainAfterExit = true; + }) (attrNames vlanInterfaces)) + + ); + + # udev in the test vm initrd sometimes run before hardware + # enumeration completes. + services.udev.extraRules = config.boot.initrd.services.udev.rules; + + services.frr = { + bfdd.enable = true; + bgpd.enable = true; + config = '' + frr version 8.5.1 + frr defaults datacenter + ! + router bgp ${toString (65000 + id)} + bgp router-id ${underlayAddress} + bgp bestpath as-path multipath-relax + no bgp ebgp-requires-policy + neighbor remotes peer-group + neighbor remotes remote-as external + neighbor remotes capability extended-nexthop + neighbor remotes passive + neighbor remotes bfd + ${lib.concatMapStringsSep "\n " + (name: "neighbor ${name} interface peer-group remotes") + (attrNames vlanInterfaces) + } + ! + address-family ipv4 unicast + redistribute connected + neighbor remotes route-map accept-all-routes in + neighbor remotes route-map accept-all-routes out + exit-address-family + ! + address-family l2vpn evpn + neighbor remotes activate + neighbor remotes route-map accept-all-routes in + neighbor remotes route-map accept-all-routes out + advertise-all-vni + advertise-svi-ip + exit-address-family + ! + exit + ! + route-map accept-all-routes permit 1 + exit + ! + route-map set-source-address permit 1 + set src ${underlayAddress} + exit + ! + ip protocol bgp route-map set-source-address + ! + ''; + }; + }; + fcVlanIfaces = mapAttrs' (vlan: vid: { name = "eth${vlan}"; value = { vlan = vid; assignIP = true; };