From b6d9c7d062e2dcdfd2040ebb4202e9e7bbc5a97e Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Thu, 4 Jan 2024 14:09:37 -0500 Subject: [PATCH] Netavark: nftables support Adds an nftables firewall backend and tests for said backend. Implements basic forwarding, port forwarding, and teardown for all relevant rules. Heavily based on our existing iptables driver but with a number of improvements (we live in a dedicated table, so this should play much more nicely with other tools using the firewall; IPv4 and IPv6 share a table and almost all code; and rule structure is a bit simpler because we do have our own table and don't have to worry about cluttering up the FORWARD chain, we'll the the only ones using it. This implementation presently does not support isolation; that will be added in a followon. Fixes #816 Signed-off-by: Matthew Heon --- Cargo.lock | 44 ++ Cargo.toml | 1 + src/error/mod.rs | 26 + src/firewall/firewalld.rs | 69 ++- src/firewall/iptables.rs | 67 +- src/firewall/mod.rs | 5 +- src/firewall/nft.rs | 1078 +++++++++++++++++++++++++++++++++ test/250-bridge-nftables.bats | 949 +++++++++++++++++++++++++++++ 8 files changed, 2170 insertions(+), 69 deletions(-) create mode 100644 src/firewall/nft.rs create mode 100644 test/250-bridge-nftables.bats diff --git a/Cargo.lock b/Cargo.lock index 2dc9ad7d6..aad960ca2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1355,6 +1355,7 @@ dependencies = [ "netlink-packet-route", "netlink-packet-utils", "netlink-sys", + "nftables", "nispor", "nix 0.27.1", "once_cell", @@ -1452,6 +1453,20 @@ dependencies = [ "tokio", ] +[[package]] +name = "nftables" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10abe0d631f93f30c3600ecb4ddd21561bcc6bac1837db11cda9c1c922207e7" +dependencies = [ + "serde", + "serde_json", + "serde_path_to_error", + "strum", + "strum_macros", + "thiserror", +] + [[package]] name = "nispor" version = "1.2.16" @@ -1929,6 +1944,16 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4beec8bce849d58d06238cb50db2e1c417cfeafa4c63f692b15c82b7c80f8335" +dependencies = [ + "itoa", + "serde", +] + [[package]] name = "serde_repr" version = "0.1.17" @@ -2018,6 +2043,25 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "strum" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" + +[[package]] +name = "strum_macros" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 1.0.109", +] + [[package]] name = "syn" version = "1.0.109" diff --git a/Cargo.toml b/Cargo.toml index cb8a228a1..a4380844d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,7 @@ sha2 = "0.10.8" netlink-packet-utils = "0.5.2" netlink-packet-route = "0.18.1" netlink-packet-core = "0.7.0" +nftables = "0.2.4" fs2 = "0.4.3" netlink-sys = "0.8.5" tokio = { version = "1.35", features = ["rt", "rt-multi-thread", "signal", "fs"] } diff --git a/src/error/mod.rs b/src/error/mod.rs index 9ca9d9801..bb6bbb6c8 100644 --- a/src/error/mod.rs +++ b/src/error/mod.rs @@ -81,6 +81,11 @@ pub enum NetavarkError { DHCPProxy(tonic::Status), List(NetavarkErrorList), + + Nftables(nftables::helper::NftablesError), + + SubnetParse(ipnet::AddrParseError), + AddrParse(std::net::AddrParseError), } /// Internal struct for JSON output @@ -160,6 +165,9 @@ impl fmt::Display for NetavarkError { Ok(()) } } + NetavarkError::Nftables(e) => write!(f, "nftables error: {e}"), + NetavarkError::SubnetParse(e) => write!(f, "parsing IP subnet error: {e}"), + NetavarkError::AddrParse(e) => write!(f, "parsing IP address error: {e}"), } } } @@ -213,3 +221,21 @@ impl From for NetavarkError { NetavarkError::DHCPProxy(err) } } + +impl From for NetavarkError { + fn from(err: nftables::helper::NftablesError) -> Self { + NetavarkError::Nftables(err) + } +} + +impl From for NetavarkError { + fn from(err: ipnet::AddrParseError) -> Self { + NetavarkError::SubnetParse(err) + } +} + +impl From for NetavarkError { + fn from(err: std::net::AddrParseError) -> Self { + NetavarkError::AddrParse(err) + } +} diff --git a/src/firewall/firewalld.rs b/src/firewall/firewalld.rs index 8d97793df..635b26286 100644 --- a/src/firewall/firewalld.rs +++ b/src/firewall/firewalld.rs @@ -4,7 +4,7 @@ use crate::network::internal_types; use crate::network::internal_types::{PortForwardConfig, TearDownNetwork, TeardownPortForward}; use crate::network::types::PortMapping; use core::convert::TryFrom; -use log::{debug, info}; +use log::{debug, info, warn}; use std::collections::HashMap; use std::vec::Vec; use zbus::{ @@ -679,3 +679,70 @@ fn make_port_tuple(port: &PortMapping, addr: &str) -> (String, String, String, S to_return } } + +/// Check if firewalld is running. +/// Not used within the firewalld driver, but by other drivers that may need to +/// interact with firewalld. +pub fn is_firewalld_running(conn: &Connection) -> bool { + conn.call_method( + Some("org.freedesktop.DBus"), + "/org/freedesktop/DBus", + Some("org.freedesktop.DBus"), + "GetNameOwner", + &"org.fedoraproject.FirewallD1", + ) + .is_ok() +} + +/// If possible, add a firewalld rule to allow traffic. +/// Ignore all errors, beyond possibly logging them. +/// Not used within the firewalld driver, but by other drivers that may need to +/// interact with firewalld. +pub fn add_firewalld_if_possible(net: &ipnet::IpNet) { + let conn = match Connection::system() { + Ok(conn) => conn, + Err(_) => return, + }; + if !is_firewalld_running(&conn) { + return; + } + debug!("Adding firewalld rules for network {}", net.to_string()); + + match add_source_subnets_to_zone(&conn, "trusted", &[*net]) { + Ok(_) => {} + Err(e) => warn!( + "Error adding subnet {} from firewalld trusted zone: {}", + net.to_string(), + e + ), + } +} + +/// If possible, remove a firewalld rule to allow traffic. +/// Ignore all errors, beyond possibly logging them. +/// Not used within the firewalld driver, but by other drivers that may need to +/// interact with firewalld. +pub fn rm_firewalld_if_possible(net: &ipnet::IpNet) { + let conn = match Connection::system() { + Ok(conn) => conn, + Err(_) => return, + }; + if !is_firewalld_running(&conn) { + return; + } + debug!("Removing firewalld rules for IPs {}", net.to_string()); + match conn.call_method( + Some("org.fedoraproject.FirewallD1"), + "/org/fedoraproject/FirewallD1", + Some("org.fedoraproject.FirewallD1.zone"), + "removeSource", + &("trusted", net.to_string()), + ) { + Ok(_) => {} + Err(e) => warn!( + "Error removing subnet {} from firewalld trusted zone: {}", + net.to_string(), + e + ), + }; +} diff --git a/src/firewall/iptables.rs b/src/firewall/iptables.rs index 39221c967..8baaf0112 100644 --- a/src/firewall/iptables.rs +++ b/src/firewall/iptables.rs @@ -10,8 +10,6 @@ use crate::network::internal_types::{ }; use iptables; use iptables::IPTables; -use log::{debug, warn}; -use zbus::blocking::Connection; pub(crate) const MAX_HASH_SIZE: usize = 13; @@ -64,7 +62,7 @@ impl firewall::FirewallDriver for IptablesDriver { create_network_chains(chains)?; - add_firewalld_if_possible(&network); + firewalld::add_firewalld_if_possible(&network); } } Ok(()) @@ -106,7 +104,7 @@ impl firewall::FirewallDriver for IptablesDriver { } if tear.complete_teardown { - rm_firewalld_if_possible(&network) + firewalld::rm_firewalld_if_possible(&network) } } } @@ -216,64 +214,3 @@ impl firewall::FirewallDriver for IptablesDriver { Result::Ok(()) } } - -/// Check if firewalld is running -fn is_firewalld_running(conn: &Connection) -> bool { - conn.call_method( - Some("org.freedesktop.DBus"), - "/org/freedesktop/DBus", - Some("org.freedesktop.DBus"), - "GetNameOwner", - &"org.fedoraproject.FirewallD1", - ) - .is_ok() -} - -/// If possible, add a firewalld rule to allow traffic. -/// Ignore all errors, beyond possibly logging them. -fn add_firewalld_if_possible(net: &ipnet::IpNet) { - let conn = match Connection::system() { - Ok(conn) => conn, - Err(_) => return, - }; - if !is_firewalld_running(&conn) { - return; - } - debug!("Adding firewalld rules for network {}", net.to_string()); - - match firewalld::add_source_subnets_to_zone(&conn, "trusted", &[*net]) { - Ok(_) => {} - Err(e) => warn!( - "Error adding subnet {} from firewalld trusted zone: {}", - net.to_string(), - e - ), - } -} - -// If possible, remove a firewalld rule to allow traffic. -// Ignore all errors, beyond possibly logging them. -fn rm_firewalld_if_possible(net: &ipnet::IpNet) { - let conn = match Connection::system() { - Ok(conn) => conn, - Err(_) => return, - }; - if !is_firewalld_running(&conn) { - return; - } - debug!("Removing firewalld rules for IPs {}", net.to_string()); - match conn.call_method( - Some("org.fedoraproject.FirewallD1"), - "/org/fedoraproject/FirewallD1", - Some("org.fedoraproject.FirewallD1.zone"), - "removeSource", - &("trusted", net.to_string()), - ) { - Ok(_) => {} - Err(e) => warn!( - "Error removing subnet {} from firewalld trusted zone: {}", - net.to_string(), - e - ), - }; -} diff --git a/src/firewall/mod.rs b/src/firewall/mod.rs index f6b1086ac..c2eda43ac 100644 --- a/src/firewall/mod.rs +++ b/src/firewall/mod.rs @@ -8,6 +8,7 @@ use zbus::blocking::Connection; pub mod firewalld; pub mod fwnone; pub mod iptables; +pub mod nft; pub mod state; mod varktables; @@ -108,9 +109,7 @@ pub fn get_supported_firewall_driver( } FirewallImpl::Nftables => { info!("Using nftables firewall driver"); - Err(NetavarkError::msg( - "nftables support presently not available", - )) + nft::new() } FirewallImpl::Fwnone => { info!("Not using firewall"); diff --git a/src/firewall/nft.rs b/src/firewall/nft.rs new file mode 100644 index 000000000..f9753550f --- /dev/null +++ b/src/firewall/nft.rs @@ -0,0 +1,1078 @@ +use crate::error::{NetavarkError, NetavarkResult}; +use crate::firewall; +use crate::firewall::firewalld; +use crate::network::internal_types; +use crate::network::types::PortMapping; +use ipnet::IpNet; +use nftables::batch::Batch; +use nftables::expr; +use nftables::helper::{self}; +use nftables::schema; +use nftables::stmt; +use nftables::types; +use std::collections::HashSet; +use std::net::IpAddr; + +const TABLENAME: &str = "netavark"; + +const INPUTCHAIN: &str = "INPUT"; +const FORWARDCHAIN: &str = "FORWARD"; +const POSTROUTINGCHAIN: &str = "POSTROUTING"; +const PREROUTINGCHAIN: &str = "PREROUTING"; +const OUTPUTCHAIN: &str = "OUTPUT"; +const DNATCHAIN: &str = "NETAVARK-HOSTPORT-DNAT"; +const MASKCHAIN: &str = "NETAVARK-HOSTPORT-SETMARK"; + +const MASK: u32 = 0x2000; + +pub struct Nftables {} + +pub fn new() -> Result, NetavarkError> { + Ok(Box::new(Nftables {})) +} + +impl firewall::FirewallDriver for Nftables { + fn driver_name(&self) -> &str { + firewall::NFTABLES + } + + fn setup_network(&self, network_setup: internal_types::SetupNetwork) -> NetavarkResult<()> { + let mut batch = Batch::new(); + + // Overall table + batch.add(schema::NfListObject::Table(schema::Table::new( + types::NfFamily::INet, + TABLENAME.to_string(), + ))); + + // Five default chains, one for each hook we have to monitor + batch.add(schema::NfListObject::Chain(schema::Chain::new( + types::NfFamily::INet, + TABLENAME.to_string(), + INPUTCHAIN.to_string(), + Some(types::NfChainType::Filter), + Some(types::NfHook::Input), + Some(0), // Prio 0 == filter + None, + Some(types::NfChainPolicy::Accept), + ))); + batch.add(schema::NfListObject::Chain(schema::Chain::new( + types::NfFamily::INet, + TABLENAME.to_string(), + FORWARDCHAIN.to_string(), + Some(types::NfChainType::Filter), + Some(types::NfHook::Forward), + Some(0), // Prio 0 == filter + None, + Some(types::NfChainPolicy::Accept), + ))); + batch.add(schema::NfListObject::Chain(schema::Chain::new( + types::NfFamily::INet, + TABLENAME.to_string(), + POSTROUTINGCHAIN.to_string(), + Some(types::NfChainType::NAT), + Some(types::NfHook::Postrouting), + Some(100), // Prio 100 == srcnat + None, + Some(types::NfChainPolicy::Accept), + ))); + batch.add(schema::NfListObject::Chain(schema::Chain::new( + types::NfFamily::INet, + TABLENAME.to_string(), + PREROUTINGCHAIN.to_string(), + Some(types::NfChainType::NAT), + Some(types::NfHook::Prerouting), + Some(-100), // Prio -100 == dnat + None, + Some(types::NfChainPolicy::Accept), + ))); + batch.add(schema::NfListObject::Chain(schema::Chain::new( + types::NfFamily::INet, + TABLENAME.to_string(), + OUTPUTCHAIN.to_string(), + Some(types::NfChainType::NAT), + Some(types::NfHook::Output), + Some(-100), // Prio -100 == dnat + None, + Some(types::NfChainPolicy::Accept), + ))); + + // dnat rules. Not used here, but need to be created first, because they have rules that must be first in their chains. + // A lot of these are thus conditional on if the rule already exists or not. + let existing_rules = helper::get_current_ruleset(None, None)?; + + // Two extra chains, not hooked to anything, for our NAT pf rules + batch.add(make_basic_chain(DNATCHAIN)); + batch.add(make_basic_chain(MASKCHAIN)); + + // Postrouting chain needs a single rule to masquerade if mask is set. + // But only one copy of that rule. So check if such a rule exists. + let match_meta_masq = |r: &schema::Rule| -> bool { + // Match on any rule that matches against 0x2000 + for statement in &r.expr { + match statement { + stmt::Statement::Match(m) => match &m.right { + expr::Expression::Number(n) => { + if *n == MASK { + return true; + } + } + _ => continue, + }, + _ => continue, + } + } + false + }; + if get_matching_rules_in_chain(&existing_rules, POSTROUTINGCHAIN, match_meta_masq) + .is_empty() + { + // Postrouting: meta mark & 0x2000 == 0x2000 masquerade + batch.add(make_rule( + POSTROUTINGCHAIN, + vec![ + stmt::Statement::Match(stmt::Match { + left: expr::Expression::BinaryOperation(expr::BinaryOperation::AND( + Box::new(expr::Expression::Named(expr::NamedExpression::Meta( + expr::Meta { + key: expr::MetaKey::Mark, + }, + ))), + Box::new(expr::Expression::Number(MASK)), + )), + right: expr::Expression::Number(MASK), + op: stmt::Operator::EQ, + }), + stmt::Statement::Masquerade(None), + ], + )); + } + + // Mask chain needs a single rule to apply the mask. + // But only one copy of that rule. So check if such a rule exists. + let match_meta_mark = |r: &schema::Rule| -> bool { + // Match on any mangle rule. + for statement in &r.expr { + match statement { + stmt::Statement::Mangle(_) => return true, + _ => continue, + } + } + false + }; + if get_matching_rules_in_chain(&existing_rules, MASKCHAIN, match_meta_mark).is_empty() { + // Mask chain: mark or 0x2000 + batch.add(make_rule( + MASKCHAIN, + vec![stmt::Statement::Mangle(stmt::Mangle { + key: expr::Expression::Named(expr::NamedExpression::Meta(expr::Meta { + key: expr::MetaKey::Mark, + })), + value: expr::Expression::BinaryOperation(expr::BinaryOperation::OR( + Box::new(expr::Expression::Named(expr::NamedExpression::Meta( + expr::Meta { + key: expr::MetaKey::Mark, + }, + ))), + Box::new(expr::Expression::Number(MASK)), + )), + })], + )); + } + + // We need rules in Prerouting and Output pointing to our dnat chain. + // But only if they do not exist. + let match_jump_dnat = get_rule_matcher_jump_to(DNATCHAIN.to_string()); + // Prerouting: fib daddr type local jump + // Output: fib daddr type local jump + let mut rules_hash: HashSet = HashSet::new(); + rules_hash.insert(expr::FibFlag::Daddr); + let base_conditions: Vec = vec![ + stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::Fib(expr::Fib { + result: expr::FibResult::Type, + flags: rules_hash, + })), + right: expr::Expression::String("local".to_string()), + op: stmt::Operator::EQ, + }), + get_jump_action(DNATCHAIN), + ]; + if get_matching_rules_in_chain(&existing_rules, PREROUTINGCHAIN, &match_jump_dnat) + .is_empty() + { + batch.add(make_rule(PREROUTINGCHAIN, base_conditions.clone())); + } + if get_matching_rules_in_chain(&existing_rules, OUTPUTCHAIN, &match_jump_dnat).is_empty() { + batch.add(make_rule(OUTPUTCHAIN, base_conditions.clone())); + } + + // Forward chain: ct state invalid drop + let match_deny = |r: &schema::Rule| -> bool { + for statement in &r.expr { + match statement { + stmt::Statement::Drop(_) => return true, + _ => continue, + } + } + false + }; + if get_matching_rules_in_chain(&existing_rules, FORWARDCHAIN, match_deny).is_empty() { + batch.add(make_rule( + FORWARDCHAIN, + vec![ + stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::CT(expr::CT { + key: "state".to_string(), + family: None, + dir: None, + })), + right: expr::Expression::String("invalid".to_string()), + op: stmt::Operator::IN, + }), + stmt::Statement::Drop(None), + ], + )); + } + + // Basic forwarding for all subnets + if let Some(nets) = network_setup.subnets { + for subnet in nets { + let chain = get_subnet_chain_name(subnet, false); + + // Do we already have a chain for the subnet? + if get_chain(&existing_rules, &chain).is_some() { + continue; + } + + // We don't. Make one. + batch.add(make_basic_chain(&chain)); + + log::info!("Creating container chain {chain}"); + + // Subnet chain: ip daddr accept + batch.add(make_rule( + &chain, + vec![ + get_subnet_match(&subnet, "daddr", stmt::Operator::EQ), + stmt::Statement::Accept(None), + ], + )); + + // Subnet chain: ip daddr != 224.0.0.0/4 masquerade + batch.add(make_rule( + &chain, + vec![ + get_subnet_match(&("224.0.0.0/4".parse()?), "daddr", stmt::Operator::NEQ), + stmt::Statement::Masquerade(None), + ], + )); + + // Next, populate basic chains with forwarding rules + // Input chain: ip saddr udp dport 53 accept + batch.add(make_rule( + INPUTCHAIN, + vec![ + get_subnet_match(&subnet, "saddr", stmt::Operator::EQ), + stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::Payload( + expr::Payload { + protocol: "udp".to_string(), + field: "dport".to_string(), + }, + )), + right: expr::Expression::Number(53), + op: stmt::Operator::EQ, + }), + stmt::Statement::Accept(None), + ], + )); + // Forward chain: ip daddr ct state related,established accept + batch.add(make_rule( + FORWARDCHAIN, + vec![ + get_subnet_match(&subnet, "daddr", stmt::Operator::EQ), + stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::CT(expr::CT { + key: "state".to_string(), + family: None, + dir: None, + })), + right: expr::Expression::List(vec![ + expr::Expression::String("established".to_string()), + expr::Expression::String("related".to_string()), + ]), + op: stmt::Operator::IN, + }), + stmt::Statement::Accept(None), + ], + )); + // Forward chain: ip saddr accept + batch.add(make_rule( + FORWARDCHAIN, + vec![ + get_subnet_match(&subnet, "saddr", stmt::Operator::EQ), + stmt::Statement::Accept(None), + ], + )); + // Postrouting chain: ip saddr jump + batch.add(make_rule( + POSTROUTINGCHAIN, + vec![ + get_subnet_match(&subnet, "saddr", stmt::Operator::EQ), + get_jump_action(&chain), + ], + )); + + // Finally: Add us to firewalld if necessary. + firewalld::add_firewalld_if_possible(&subnet); + } + } + + let rules = batch.to_nftables(); + + helper::apply_ruleset(&rules, None, None)?; + + Ok(()) + } + + fn teardown_network(&self, tear: internal_types::TearDownNetwork) -> NetavarkResult<()> { + if !tear.complete_teardown { + log::info!("Nothing to tear down, network still in use"); + + return Ok(()); + } + + let mut batch = Batch::new(); + + let existing_rules = helper::get_current_ruleset(None, None)?; + + if let Some(nets) = tear.config.subnets { + for subnet in nets { + // Match subnet, either saddr or daddr. + let match_subnet = |r: &schema::Rule| -> bool { + // Statement matching: We only care about match statements. + // Don't bother with left side. Just check if what they compare to is our subnet. + for statement in &r.expr { + match statement { + stmt::Statement::Match(m) => match &m.right { + expr::Expression::Named(expr::NamedExpression::Prefix(p)) => { + match *p.addr.clone() { + expr::Expression::String(s) => { + if *s == subnet.addr().to_string() { + return true; + } + } + _ => continue, + } + } + _ => continue, + }, + _ => continue, + } + } + false + }; + + let mut to_remove: Vec = Vec::new(); + to_remove.append(&mut get_matching_rules_in_chain( + &existing_rules, + INPUTCHAIN, + match_subnet, + )); + to_remove.append(&mut get_matching_rules_in_chain( + &existing_rules, + FORWARDCHAIN, + match_subnet, + )); + to_remove.append(&mut get_matching_rules_in_chain( + &existing_rules, + POSTROUTINGCHAIN, + match_subnet, + )); + + log::debug!("Removing {} rules", to_remove.len()); + + for rule in to_remove { + batch.delete(schema::NfListObject::Rule(rule)); + } + + // Delete the chain last + let chain = get_subnet_chain_name(subnet, false); + if let Some(c) = get_chain(&existing_rules, &chain) { + batch.delete(schema::NfListObject::Chain(c)); + } + + // After all nftables work is done, remove us from firewalld. + firewalld::rm_firewalld_if_possible(&subnet); + } + } + + let rules = batch.to_nftables(); + + helper::apply_ruleset(&rules, None, None)?; + Ok(()) + } + + fn setup_port_forward( + &self, + setup_portfw: internal_types::PortForwardConfig, + ) -> NetavarkResult<()> { + let mut batch = Batch::new(); + + let existing_rules = helper::get_current_ruleset(None, None)?; + + // Need DNAT rules for DNS if Aardvark is not on port 53. + // Only need one per DNS server IP, so check if they already exist first. + if setup_portfw.dns_port != 53 { + for ip in setup_portfw.dns_server_ips { + let match_dns_ip_dnat = |r: &schema::Rule| { + for statement in &r.expr { + match statement { + stmt::Statement::Match(m) => match &m.right { + expr::Expression::String(s) => { + if *s == ip.to_string() { + return true; + } + } + _ => continue, + }, + _ => continue, + } + } + false + }; + if !get_matching_rules_in_chain(&existing_rules, DNATCHAIN, match_dns_ip_dnat) + .is_empty() + { + continue; + } + + // We have multiple DNS server IPs. Potentially v4 and v6 both. + // Only add those when the container has an IP address matching the family. + match ip { + IpAddr::V4(_) => { + if setup_portfw.container_ip_v4.is_some() { + batch.add(make_dns_dnat_rule(ip, setup_portfw.dns_port, false)); + } + } + IpAddr::V6(_) => { + if setup_portfw.container_ip_v6.is_some() { + batch.add(make_dns_dnat_rule(ip, setup_portfw.dns_port, true)); + } + } + } + } + } + + if let Some(ip_v4) = setup_portfw.container_ip_v4 { + if let Some(subnet_v4) = setup_portfw.subnet_v4 { + for rule in get_dnat_rules_for_addr_family( + ip_v4, + subnet_v4, + &existing_rules, + &setup_portfw, + )? { + batch.add(rule); + } + } + } + if let Some(ip_v6) = setup_portfw.container_ip_v6 { + if let Some(subnet_v6) = setup_portfw.subnet_v6 { + for rule in get_dnat_rules_for_addr_family( + ip_v6, + subnet_v6, + &existing_rules, + &setup_portfw, + )? { + batch.add(rule); + } + } + } + + let rules = batch.to_nftables(); + + helper::apply_ruleset(&rules, None, None)?; + + Ok(()) + } + + fn teardown_port_forward( + &self, + teardown_pf: internal_types::TeardownPortForward, + ) -> NetavarkResult<()> { + let mut batch = Batch::new(); + + let existing_rules = helper::get_current_ruleset(None, None)?; + + let dnat_chain_v4 = teardown_pf + .config + .subnet_v4 + .map(|s| get_subnet_chain_name(s, true)); + let dnat_chain_v6 = teardown_pf + .config + .subnet_v6 + .map(|s| get_subnet_chain_name(s, true)); + + // We need two matchers for each port. + // One matching both the port and jumping to either the V4 or V6 chain (to clean NETAVARK_DNAT) + // One matching just the port, to clean the v4 and v6 chains for the network. + // As a bonus, the last one needs to match any individual port inside the range. + if let Some(ports) = teardown_pf.config.port_mappings { + for port in ports { + let matcher_port_jump = |r: &schema::Rule| -> bool { + let mut match_jump = false; + let mut match_port = false; + for stmt in &r.expr { + // Basically, check for match and jump statements. + // For match, check that the right side is appropriate + // for our port mapping. Has to handle range vs + // singleton. + // For jump, make sure that it matches either the v4 or + // v6 DNAT chains. + // If we find both, the rule matches. + match stmt { + stmt::Statement::Match(m) => match &m.right { + expr::Expression::Number(n) => { + if port.range <= 1 && port.host_port as u32 == *n { + if match_jump { + return true; + } + match_port = true; + } + } + expr::Expression::Range(r) => { + if port.range > 1 { + if r.range.len() != 2 { + // Malformed range, just return false + return false; + } + match r.range[0] { + expr::Expression::Number(n) => { + if port.host_port as u32 != n { + continue; + } + } + _ => continue, + } + match r.range[1] { + expr::Expression::Number(n) => { + if (port.host_port + port.range - 1) as u32 == n { + if match_jump { + return true; + } + match_port = true; + } + } + _ => continue, + } + } + } + _ => continue, + }, + stmt::Statement::Jump(j) => { + if let Some(v4) = dnat_chain_v4.clone() { + if j.target == v4 { + if match_port { + return true; + } + match_jump = true; + } + } + if let Some(v6) = dnat_chain_v6.clone() { + if j.target == v6 { + if match_port { + return true; + } + match_jump = true + } + } + } + _ => continue, + } + } + match_jump && match_port + }; + + let match_all_ports_in_range = |r: &schema::Rule| -> bool { + for stmt in &r.expr { + match stmt { + stmt::Statement::Match(m) => match &m.right { + expr::Expression::Number(n) => { + if port.range <= 1 && *n == port.host_port as u32 { + return true; + } + if port.range > 1 + && *n >= port.host_port as u32 + && *n <= (port.host_port + port.range - 1) as u32 + { + return true; + } + } + expr::Expression::Range(r) => { + if port.range > 1 { + if r.range.len() != 2 { + // Malformed range, just return false + return false; + } + match r.range[0] { + expr::Expression::Number(n) => { + if port.host_port as u32 != n { + continue; + } + } + _ => continue, + } + match r.range[1] { + expr::Expression::Number(n) => { + if (port.host_port + port.range - 1) as u32 == n { + return true; + } + } + _ => continue, + } + } + } + _ => continue, + }, + _ => continue, + } + } + false + }; + + for rule in + get_matching_rules_in_chain(&existing_rules, DNATCHAIN, matcher_port_jump) + { + batch.delete(schema::NfListObject::Rule(rule)); + } + + if let Some(v4) = dnat_chain_v4.clone() { + for rule in + get_matching_rules_in_chain(&existing_rules, &v4, match_all_ports_in_range) + { + batch.delete(schema::NfListObject::Rule(rule)); + } + } + if let Some(v6) = dnat_chain_v6.clone() { + for rule in + get_matching_rules_in_chain(&existing_rules, &v6, match_all_ports_in_range) + { + batch.delete(schema::NfListObject::Rule(rule)); + } + } + } + } + + if teardown_pf.complete_teardown { + let match_dns_dnat = |r: &schema::Rule| -> bool { + for statement in &r.expr { + match statement { + // Match any DNS server IP + stmt::Statement::Match(m) => match &m.right { + expr::Expression::String(s) => { + for ip in teardown_pf.config.dns_server_ips { + if *s == ip.to_string() { + return true; + } + } + } + _ => continue, + }, + _ => continue, + } + } + false + }; + for rule in get_matching_rules_in_chain(&existing_rules, DNATCHAIN, match_dns_dnat) { + batch.delete(schema::NfListObject::Rule(rule)); + } + + if let Some(v4) = dnat_chain_v4 { + if let Some(c) = get_chain(&existing_rules, &v4) { + batch.delete(schema::NfListObject::Chain(c)); + } + } + if let Some(v6) = dnat_chain_v6 { + if let Some(c) = get_chain(&existing_rules, &v6) { + batch.delete(schema::NfListObject::Chain(c)); + } + } + } + + let rules = batch.to_nftables(); + + helper::apply_ruleset(&rules, None, None)?; + + Ok(()) + } +} + +/// Convert a subnet into a chain name. +fn get_subnet_chain_name(subnet: IpNet, dnat: bool) -> String { + // nftables is very lenient around chain name lengths. + // So let's use the full IP to be unambiguous. + // Replace . and : with _, and / with _nm (netmask), to remove special characters. + let subnet_clean = subnet + .to_string() + .replace('.', "_") + .replace(':', "-") + .replace('/', "_nm"); + + if dnat { + format!("nv_{}_dnat", subnet_clean) + } else { + format!("nv_{}", subnet_clean) + } +} + +/// Get a statement to match the given IP address. +/// Field should be either "saddr" or "daddr" for matching source or destination. +fn get_ip_match(ip: &IpAddr, field: &str, op: stmt::Operator) -> stmt::Statement { + stmt::Statement::Match(stmt::Match { + left: ip_to_payload(ip, field), + right: expr::Expression::String(ip.to_string()), + op, + }) +} + +/// Convert a single IP into a Payload field. +/// Basically, pasts in "ip" or "ipv6" in protocol field based on whether this is a v4 or v6 address. +fn ip_to_payload(addr: &IpAddr, field: &str) -> expr::Expression { + let proto = match addr { + IpAddr::V4(_) => "ip".to_string(), + IpAddr::V6(_) => "ipv6".to_string(), + }; + + expr::Expression::Named(expr::NamedExpression::Payload(expr::Payload { + protocol: proto, + field: field.to_string(), + })) +} + +/// Get a statement to match the given subnet. +/// Field should be either "saddr" or "daddr" for matching source or destination. +fn get_subnet_match(net: &IpNet, field: &str, op: stmt::Operator) -> stmt::Statement { + stmt::Statement::Match(stmt::Match { + left: subnet_to_payload(net, field), + right: expr::Expression::Named(expr::NamedExpression::Prefix(expr::Prefix { + addr: Box::new(expr::Expression::String(net.addr().to_string())), + len: net.prefix_len() as u32, + })), + op, + }) +} + +/// Convert a subnet into a Payload field. +/// Basically, pastes in "ip" or "ipv6" in protocol field based on whether this +/// is a v4 or v6 subnet. +fn subnet_to_payload(net: &IpNet, field: &str) -> expr::Expression { + let proto = match net { + IpNet::V4(_) => "ip".to_string(), + IpNet::V6(_) => "ipv6".to_string(), + }; + + expr::Expression::Named(expr::NamedExpression::Payload(expr::Payload { + protocol: proto, + field: field.to_string(), + })) +} + +/// Get a condition to match destination port/ports based on a given PortMapping. +/// Properly handles port ranges, protocol, etc. +fn get_dport_cond(port: &PortMapping) -> stmt::Statement { + stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::Payload(expr::Payload { + protocol: port.protocol.clone(), + field: "dport".to_string(), + })), + right: if port.range > 1 { + // Ranges are a vector with a length of 2. + // First value start, second value end. + let range_vec = vec![ + expr::Expression::Number(port.host_port as u32), + expr::Expression::Number((port.host_port + port.range - 1) as u32), + ]; + expr::Expression::Range(expr::Range { range: range_vec }) + } else { + expr::Expression::Number(port.host_port as u32) + }, + op: stmt::Operator::EQ, + }) +} + +/// Make the first container DNAT chain rule, which is used for both IP and IPv6 DNAT. +fn get_subnet_dport_match( + dnat_chain: &str, + subnet: &Option, + host_ip_match: &Option, + dport_match: &stmt::Statement, +) -> schema::NfListObject { + // ip saddr ip daddr dport jump MARKCHAIN + let mut statements: Vec = Vec::new(); + if let Some(net) = &subnet { + statements.push(get_subnet_match(net, "saddr", stmt::Operator::EQ)); + } + if let Some(stmt) = &host_ip_match { + statements.push(stmt.clone()); + } + statements.push(dport_match.clone()); + statements.push(get_jump_action(MASKCHAIN)); + make_rule(dnat_chain, statements) +} + +/// Create DNAT rules for each port to be forwarded. +/// Used for both IP and IPv6 DNAT. +fn get_dnat_port_rules( + dnat_chain: &str, + port: &PortMapping, + ip: &IpAddr, + host_ip_match: &Option, + is_v6: bool, +) -> Vec { + let mut rules: Vec = Vec::new(); + + // Container dnat chain: ip daddr dport dnat to + // Unfortunately: We don't have range support in the schema. So we need 1 rule per port. + let range = if port.range == 0 { 1 } else { port.range }; + for i in 0..range { + let host_port: u32 = (port.host_port + i) as u32; + let ctr_port: u32 = (port.container_port + i) as u32; + + let mut statements: Vec = Vec::new(); + if let Some(stmt) = host_ip_match { + statements.push(stmt.clone()); + } + statements.push(stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::Payload(expr::Payload { + protocol: port.protocol.clone(), + field: "dport".to_string(), + })), + right: expr::Expression::Number(host_port), + op: stmt::Operator::EQ, + })); + statements.push(stmt::Statement::DNAT(Some(stmt::NAT { + addr: Some(expr::Expression::String(ip.to_string())), + family: Some(if is_v6 { + stmt::NATFamily::IP6 + } else { + stmt::NATFamily::IP + }), + port: Some(ctr_port), + flags: None, + }))); + rules.push(make_rule(dnat_chain, statements)); + } + + rules +} + +fn get_dnat_rules_for_addr_family( + ip: IpAddr, + subnet: IpNet, + existing_rules: &schema::Nftables, + setup_portfw: &internal_types::PortForwardConfig, +) -> NetavarkResult> { + let mut rules: Vec = Vec::new(); + + if let Some(ports) = setup_portfw.port_mappings { + let subnet_dnat_chain = get_subnet_chain_name(subnet, true); + + // Make the chain if it does not exist + if get_chain(existing_rules, &subnet_dnat_chain).is_none() { + rules.push(make_basic_chain(&subnet_dnat_chain)); + } + + for port in ports { + // Condition to match destination ports (ports on the host) + let dport_cond = get_dport_cond(port); + // Destination address is only if user set an IP on the host to bind to. + // Used by multiple rules in this section. + let daddr_statement: Option = if !port.host_ip.is_empty() { + Some(get_ip_match( + &(port.host_ip.parse()?), + "daddr", + stmt::Operator::EQ, + )) + } else { + None + }; + + // dnat chain: dport jump + rules.push(make_rule( + DNATCHAIN, + vec![dport_cond.clone(), get_jump_action(&subnet_dnat_chain)], + )); + + // Container dnat chain: ip saddr ip daddr dport jump SETMARKCHAIN + rules.push(get_subnet_dport_match( + &subnet_dnat_chain, + &Some(subnet), + &daddr_statement, + &dport_cond, + )); + + // This rule is only used for v4. + if ip.is_ipv4() { + // Container dnat chain: ip saddr 127.0.0.1 ip daddr dport jump SETMARKCHAIN + let mut localhost_jump_statements: Vec = Vec::new(); + localhost_jump_statements.push(get_ip_match( + &("127.0.0.1".parse()?), + "saddr", + stmt::Operator::EQ, + )); + if let Some(stmt) = &daddr_statement { + localhost_jump_statements.push(stmt.clone()); + } + localhost_jump_statements.push(dport_cond.clone()); + localhost_jump_statements.push(get_jump_action(MASKCHAIN)); + rules.push(make_rule(&subnet_dnat_chain, localhost_jump_statements)); + } + + for rule in get_dnat_port_rules(&subnet_dnat_chain, port, &ip, &daddr_statement, false) + { + rules.push(rule); + } + } + } + + Ok(rules) +} + +/// Make a DNAT rule to allow DNS traffic to a DNS server on a non-standard port (53 -> actual port). +fn make_dns_dnat_rule(dns_ip: &IpAddr, dns_port: u16, is_v6: bool) -> schema::NfListObject { + make_rule( + DNATCHAIN, + vec![ + get_ip_match(dns_ip, "daddr", stmt::Operator::EQ), + stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::Payload(expr::Payload { + protocol: "udp".to_string(), + field: "dport".to_string(), + })), + right: expr::Expression::Number(53), + op: stmt::Operator::EQ, + }), + stmt::Statement::DNAT(Some(stmt::NAT { + addr: Some(expr::Expression::String(dns_ip.to_string())), + family: Some(if is_v6 { + stmt::NATFamily::IP6 + } else { + stmt::NATFamily::IP + }), + port: Some(dns_port as u32), + flags: None, + })), + ], + ) +} + +/// Create a statement to jump to the given target +fn get_jump_action(target: &str) -> stmt::Statement { + stmt::Statement::Jump(stmt::JumpTarget { + target: target.to_string(), + }) +} + +/// Create an instruction to make a basic chain (no hooks, no priority). +/// Chain is always inet, always in our overall netavark table. +fn make_basic_chain(name: &str) -> schema::NfListObject { + schema::NfListObject::Chain(schema::Chain::new( + types::NfFamily::INet, + TABLENAME.to_string(), + name.to_string(), + None, + None, + None, + None, + None, + )) +} + +/// Make a rule in the given chain with the given conditions +fn make_rule(chain: &str, conditions: Vec) -> schema::NfListObject { + schema::NfListObject::Rule(schema::Rule::new( + types::NfFamily::INet, + TABLENAME.to_string(), + chain.to_string(), + conditions, + )) +} + +/// Make a closure that matches any rule that jumps to the given chain. +fn get_rule_matcher_jump_to(jump_target: String) -> Box bool> { + Box::new(move |r: &schema::Rule| -> bool { + for statement in &r.expr { + match statement { + stmt::Statement::Jump(j) => { + return j.target == jump_target; + } + _ => continue, + } + } + false + }) +} + +/// Find all rules in the given chain which match the given closure (true == include). +/// Returns all those rules, in a vector. Vector will be empty if there are none. +fn get_matching_rules_in_chain bool>( + base_rules: &schema::Nftables, + chain: &str, + rule_match: F, +) -> Vec { + let mut rules: Vec = Vec::new(); + + // Basically, we get back a big, flat array of everything in the table. + // That makes this an absolute destructuring nightmare, but there's no avoiding it. + // Ignore everything we get back that is not a rule. + // Then ignore everything that is not in our table (not passed, but we only use one table). + // Then ignore everything that is not in the given chain. + // Then check conditions and add to the vector if it matches. + for object in &base_rules.objects { + match object { + schema::NfObject::CmdObject(_) => continue, + schema::NfObject::ListObject(obj) => match obj { + schema::NfListObject::Rule(r) => { + if r.table != *TABLENAME { + continue; + } + if r.chain != *chain { + continue; + } + + if rule_match(r) { + log::debug!("Matched {:?}", r); + rules.push(r.clone()); + } + } + _ => continue, + }, + } + } + + rules +} + +/// Get a chain with the given name in the Netavark table. +fn get_chain(base_rules: &schema::Nftables, chain: &str) -> Option { + for object in &base_rules.objects { + match object { + schema::NfObject::CmdObject(_) => continue, + schema::NfObject::ListObject(obj) => match obj { + schema::NfListObject::Chain(c) => { + if c.table != *TABLENAME { + continue; + } + if c.name == *chain { + log::debug!("Found chain {}", chain); + return Some(c.clone()); + } + } + _ => continue, + }, + } + } + + None +} diff --git a/test/250-bridge-nftables.bats b/test/250-bridge-nftables.bats new file mode 100644 index 000000000..2ec8d2644 --- /dev/null +++ b/test/250-bridge-nftables.bats @@ -0,0 +1,949 @@ +#!/usr/bin/env bats -*- bats -*- +# +# bridge driver tests with nftables firewall driver +# + +load helpers + +fw_driver=nftables +export NETAVARK_FW=nftables + +@test "check nftables driver is in use" { + RUST_LOG=netavark=info run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json setup $(get_container_netns_path) + assert "${lines[0]}" "==" "[INFO netavark::firewall] Using nftables firewall driver" "nftables driver is in use" +} + +@test "$fw_driver - internal network" { + run_in_host_netns nft list table inet netavark + before="$output" + + run_netavark --file ${TESTSDIR}/testfiles/internal.json setup $(get_container_netns_path) + + run_in_host_netns nft list table inet netavark + assert "$output" == "$before" "make sure tables have not changed" + + run_in_container_netns ip route show + assert "$output" "!~" "default" "No default route for internal networks" + + run_in_container_netns ping -c 1 10.88.0.1 + + run_netavark --file ${TESTSDIR}/testfiles/internal.json teardown $(get_container_netns_path) +} + +@test "$fw_driver - simple bridge" { + run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json setup $(get_container_netns_path) + result="$output" + assert_json "$result" 'has("podman")' == "true" "object key exists" + + mac=$(jq -r '.podman.interfaces.eth0.mac_address' <<<"$result") + # check that interface exists + run_in_container_netns ip -j --details link show eth0 + link_info="$output" + assert_json "$link_info" ".[].address" == "$mac" "MAC matches container mac" + assert_json "$link_info" '.[].flags[] | select(.=="UP")' == "UP" "Container interface is up" + assert_json "$link_info" ".[].linkinfo.info_kind" == "veth" "Container interface is a veth device" + + ipaddr="10.88.0.2/16" + run_in_container_netns ip addr show eth0 + assert "$output" =~ "$ipaddr" "IP address matches container address" + assert_json "$result" ".podman.interfaces.eth0.subnets[0].ipnet" == "$ipaddr" "Result contains correct IP address" + + run_in_host_netns ip -j --details link show podman0 + link_info="$output" + assert_json "$link_info" '.[].flags[] | select(.=="UP")' == "UP" "Host bridge interface is up" + assert_json "$link_info" ".[].linkinfo.info_kind" == "bridge" "The bridge interface is actually a bridge" + bridge_mac=$(jq -r '.[].address' <<<"$link_info") + + run_in_host_netns ip -j link show veth0 + veth_info="$output" + assert_json "$veth_info" ".[].address" != "$bridge_mac" "Bridge and Veth must have different mac address" + + ipaddr="10.88.0.1" + run_in_host_netns ip addr show podman0 + assert "$output" =~ "$ipaddr" "IP address matches bridge gateway address" + assert_json "$result" ".podman.interfaces.eth0.subnets[0].gateway" == "$ipaddr" "Result contains gateway address" + + # check that the loopback adapter is up + run_in_container_netns ip addr show lo + assert "$output" =~ "127.0.0.1" "Loopback adapter is up (has address)" + + run_in_host_netns ping -c 1 10.88.0.2 + + check_simple_bridge_nftables + + run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json teardown $(get_container_netns_path) + + # now check that nftables rules are gone + + # check FORWARD rules + run_in_host_netns nft list chain inet netavark FORWARD + assert "${lines[3]}" =~ "ct state invalid drop" "CT state invalid rule" + assert "${#lines[@]}" = 6 "too many FORWARD rules after teardown" + + # check POSTROUTING rules + run_in_host_netns nft list chain inet netavark POSTROUTING + assert "${lines[2]}" =~ "meta mark & 0x00002000 == 0x00002000 masquerade" "Mark-masquerade rule" + assert "${#lines[@]}" = 6 "too many POSTROUTING rules after teardown" + + # nv_10_88_0_0_nm16 chain should not exists + expected_rc=1 run_in_host_netns nft list chain inet netavark nv_10_88_0_0_nm16 + + # bridge should be removed on teardown + expected_rc=1 run_in_host_netns ip addr show podman0 +} + +@test "$fw_driver - bridge with static routes" { + # add second interface and routes through that interface to test proper teardown + run_in_container_netns ip link add type dummy + run_in_container_netns ip a add 10.91.0.10/24 dev dummy0 + run_in_container_netns ip link set dummy0 up + + run_netavark --file ${TESTSDIR}/testfiles/bridge-staticroutes.json setup $(get_container_netns_path) + + # check static routes + run_in_container_netns ip r + assert "$output" "=~" "10.89.0.0/24 via 10.88.0.2" "static route not set" + assert "$output" "=~" "10.90.0.0/24 via 10.88.0.3" "static route not set" + assert "$output" "=~" "10.92.0.0/24 via 10.91.0.1" "static route not set" + + run_netavark --file ${TESTSDIR}/testfiles/bridge-staticroutes.json teardown $(get_container_netns_path) + + # check static routes get removed + assert "$output" "!~" "10.89.0.0/24 via 10.88.0.2" "static route not set" + assert "$output" "!~" "10.90.0.0/24 via 10.88.0.3" "static route not set" + assert "$output" "!~" "10.92.0.0/24 via 10.91.0.1" "static route not removed" +} + +@test "$fw_driver - bridge with no default route" { + run_netavark --file ${TESTSDIR}/testfiles/bridge-nodefaultroute.json setup $(get_container_netns_path) + + run_in_container_netns ip r + assert "$output" "!~" "default" "default route exists" + + run_in_container_netns ip -6 r + assert "$output" "!~" "default" "default route exists" + + run_netavark --file ${TESTSDIR}/testfiles/bridge-nodefaultroute.json teardown $(get_container_netns_path) + assert "" "no errors" +} + +@test "$fw_driver - bridge driver must generate config for aardvark with multiple custom dns server with network dns servers and perform update" { + # get a random port directly to avoid low ports e.g. 53 would not create nftables rules + dns_port=$((RANDOM+10000)) + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-network-container-dns-server.json \ + setup $(get_container_netns_path) + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" =~ "10.89.3.1,fd10:88:a::1 127.0.0.1,3.3.3.3" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename 8.8.8.8,1.1.1.1$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + + aardvark_pid=$(cat "$NETAVARK_TMPDIR/config/aardvark-dns/aardvark.pid") + assert "$ardvark_pid" =~ "[0-9]*" "aardvark pid not found" + run_helper ps "$aardvark_pid" + assert "${lines[1]}" =~ ".*aardvark-dns --config $NETAVARK_TMPDIR/config/aardvark-dns -p $dns_port run" "aardvark not running or bad options" + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-network-container-dns-server.json \ + update podman1 --network-dns-servers 8.8.8.8 + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" =~ "10.89.3.1,fd10:88:a::1 8.8.8.8" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename 8.8.8.8,1.1.1.1$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + + # remove network and check running and verify if aardvark config has no nameserver + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-network-container-dns-server.json \ + update podman1 --network-dns-servers "" + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" == "10.89.3.1,fd10:88:a::1" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename 8.8.8.8,1.1.1.1$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + +} + +# netavark must do no-op on upates when no aardvark config is there +@test "run netavark update - no-op" { + # get a random port directly to avoid low ports e.g. 53 would not create nftables rules + dns_port=$((RANDOM+10000)) + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-network-container-dns-server.json \ + update podman1 --network-dns-servers 8.8.8.8 +} + +@test "$fw_driver - ipv6 bridge" { + run_netavark --file ${TESTSDIR}/testfiles/ipv6-bridge.json setup $(get_container_netns_path) + result="$output" + assert_json "$result" 'has("podman1")' == "true" "object key exists" + + mac=$(jq -r '.podman1.interfaces.eth0.mac_address' <<<"$result") + # check that interface exists + run_in_container_netns ip -j --details link show eth0 + link_info="$output" + assert_json "$link_info" ".[].address" == "$mac" "MAC matches container mac" + assert_json "$link_info" '.[].flags[] | select(.=="UP")' == "UP" "Container interface is up" + assert_json "$link_info" ".[].linkinfo.info_kind" == "veth" "Container interface is a veth device" + + ipaddr="fd10:88:a::2/64" + run_in_container_netns ip addr show eth0 + assert "$output" =~ "$ipaddr" "IP address matches container address" + assert_json "$result" ".podman1.interfaces.eth0.subnets[0].ipnet" == "$ipaddr" "Result contains correct IP address" + + run_in_host_netns ip -j --details link show podman1 + link_info="$output" + assert_json "$link_info" '.[].flags[] | select(.=="UP")' == "UP" "Host bridge interface is up" + assert_json "$link_info" ".[].linkinfo.info_kind" == "bridge" "The bridge interface is actually a bridge" + + ipaddr="fd10:88:a::1" + run_in_host_netns ip addr show podman1 + assert "$output" =~ "$ipaddr" "IP address matches bridge gateway address" + assert_json "$result" ".podman1.interfaces.eth0.subnets[0].gateway" == "$ipaddr" "Result contains gateway address" + + # check that the loopback adapter is up + run_in_container_netns ip addr show lo + assert "$output" =~ "127.0.0.1" "Loopback adapter is up (has address)" + + run_in_host_netns ping6 -c 1 fd10:88:a::2 + + run_netavark --file ${TESTSDIR}/testfiles/ipv6-bridge.json teardown $(get_container_netns_path) +} + +@test "$fw_driver - ipv6 bridge with static routes" { + # add second interface and routes through that interface to test proper teardown + run_in_container_netns ip link add type dummy + run_in_container_netns ip a add fd10:49:b::2/64 dev dummy0 + run_in_container_netns ip link set dummy0 up + + run_netavark --file ${TESTSDIR}/testfiles/ipv6-bridge-staticroutes.json setup $(get_container_netns_path) + + # check static routes + run_in_container_netns ip -6 -br r + assert "$output" "=~" "fd10:89:b::/64 via fd10:88:a::ac02" "static route not set" + assert "$output" "=~" "fd10:89:c::/64 via fd10:88:a::ac03" "static route not set" + assert "$output" "=~" "fd10:51:b::/64 via fd10:49:b::30" "static route not set" + + run_netavark --file ${TESTSDIR}/testfiles/ipv6-bridge-staticroutes.json teardown $(get_container_netns_path) + + # check static routes get removed + run_in_container_netns ip -6 -br r + assert "$output" "!~" "fd10:89:b::/64 via fd10:88:a::ac02" "static route not removed" + assert "$output" "!~" "fd10:89:c::/64 via fd10:88:a::ac03" "static route not removed" + assert "$output" "!~" "fd10:51:b::/64 via fd10:49:b::30" "static route not removed" + + run_in_container_netns ip link delete dummy0 +} + +@test "$fw_driver - bridge driver must generate config for aardvark with custom dns server" { + # get a random port directly to avoid low ports e.g. 53 would not create nftables rules + dns_port=$((RANDOM+10000)) + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-custom-dns-server.json \ + setup $(get_container_netns_path) + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" =~ "10.89.3.1,fd10:88:a::1" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename 8.8.8.8$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + + aardvark_pid=$(cat "$NETAVARK_TMPDIR/config/aardvark-dns/aardvark.pid") + assert "$ardvark_pid" =~ "[0-9]*" "aardvark pid not found" + run_helper ps "$aardvark_pid" + assert "${lines[1]}" =~ ".*aardvark-dns --config $NETAVARK_TMPDIR/config/aardvark-dns -p $dns_port run" "aardvark not running or bad options" +} + +@test "$fw_driver - bridge driver must generate config for aardvark with multiple custom dns server" { + # get a random port directly to avoid low ports e.g. 53 would not create nftables + dns_port=$((RANDOM+10000)) + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-multiple-custom-dns-server.json \ + setup $(get_container_netns_path) + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" =~ "10.89.3.1,fd10:88:a::1" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename 8.8.8.8,1.1.1.1$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + + aardvark_pid=$(cat "$NETAVARK_TMPDIR/config/aardvark-dns/aardvark.pid") + assert "$ardvark_pid" =~ "[0-9]*" "aardvark pid not found" + run_helper ps "$aardvark_pid" + assert "${lines[1]}" =~ ".*aardvark-dns --config $NETAVARK_TMPDIR/config/aardvark-dns -p $dns_port run" "aardvark not running or bad options" +} + +@test "$fw_driver - bridge driver must generate config for aardvark with multiple custom dns server with network dns servers" { + # get a random port directly to avoid low ports e.g. 53 would not create nftables rules + dns_port=$((RANDOM+10000)) + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-network-container-dns-server.json \ + setup $(get_container_netns_path) + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" =~ "10.89.3.1,fd10:88:a::1 127.0.0.1,3.3.3.3" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename 8.8.8.8,1.1.1.1$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + + aardvark_pid=$(cat "$NETAVARK_TMPDIR/config/aardvark-dns/aardvark.pid") + assert "$ardvark_pid" =~ "[0-9]*" "aardvark pid not found" + run_helper ps "$aardvark_pid" + assert "${lines[1]}" =~ ".*aardvark-dns --config $NETAVARK_TMPDIR/config/aardvark-dns -p $dns_port run" "aardvark not running or bad options" +} + +@test "$fw_driver - dual stack dns with alt port" { + # get a random port directly to avoid low ports e.g. 53 would not create nftables rules + dns_port=$((RANDOM+10000)) + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge.json \ + setup $(get_container_netns_path) + + # check nftables + run_in_host_netns nft list chain inet netavark NETAVARK-HOSTPORT-DNAT + assert "${lines[1]}" =~ "ip daddr 10.89.0.1 udp dport 53 dnat ip to 10.89.0.1:$dns_port" "DNS forward rule" + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" =~ "10.89.3.1,fd10:88:a::1" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + + aardvark_pid=$(cat "$NETAVARK_TMPDIR/config/aardvark-dns/aardvark.pid") + assert "$ardvark_pid" =~ "[0-9]*" "aardvark pid not found" + run_helper ps "$aardvark_pid" + assert "${lines[1]}" =~ ".*aardvark-dns --config $NETAVARK_TMPDIR/config/aardvark-dns -p $dns_port run" "aardvark not running or bad options" + + # test redirection actually works + run_in_container_netns dig +short "somename.dns.podman" @10.89.3.1 A "somename.dns.podman" @10.89.3.1 AAAA + assert "${lines[0]}" =~ "10.89.3.2" "ipv4 dns resolution works 1/2" + assert "${lines[1]}" =~ "fd10:88:a::2" "ipv6 dns resolution works 2/2" + + run_in_container_netns dig +short "somename.dns.podman" @fd10:88:a::1 + assert "${lines[0]}" =~ "10.89.3.2" "ipv6 dns resolution works" + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge.json \ + teardown $(get_container_netns_path) + + # check nftables rules were removed + run_in_host_netns nft list chain inet netavark NETAVARK-HOSTPORT-DNAT + assert "${#lines[@]}" = 4 "too many v4 NETAVARK_HOSTPORT-DNAT rules after teardown" + + # check aardvark config got cleared, process killed + expected_rc=2 run_helper ls "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + expected_rc=1 run_helper ps "$aardvark_pid" +} + +@test "$fw_driver - check error message from netns thread" { + # create interface in netns to force error + run_in_container_netns ip link add eth0 type dummy + + expected_rc=1 run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json setup $(get_container_netns_path) + assert_json ".error" "create veth pair: interface eth0 already exists on container namespace: Netlink error: File exists (os error 17)" "interface exists on netns" +} + +@test "$fw_driver - port forwarding ipv4 - tcp" { + test_port_fw +} + +@test "$fw_driver - port forwarding ipv6 - tcp" { + test_port_fw ip=6 +} + +@test "$fw_driver - port forwarding dualstack - tcp" { + test_port_fw ip=dual +} + +@test "$fw_driver - port forwarding ipv4 - udp" { + test_port_fw proto=udp +} + +@test "$fw_driver - port forwarding ipv6 - udp" { + test_port_fw ip=6 proto=udp +} + +@test "$fw_driver - port forwarding dualstack - udp" { + test_port_fw ip=dual proto=udp +} + +@test "$fw_driver - port forwarding ipv4 - sctp" { + setup_sctp_kernel_module + test_port_fw proto=sctp +} + +@test "$fw_driver - port forwarding ipv6 - sctp" { + setup_sctp_kernel_module + test_port_fw ip=6 proto=sctp +} + +@test "$fw_driver - port forwarding dualstack - sctp" { + setup_sctp_kernel_module + test_port_fw ip=dual proto=sctp +} + +@test "$fw_driver - port range forwarding ipv4 - tcp" { + test_port_fw range=3 +} + +@test "$fw_driver - port range forwarding ipv6 - tcp" { + test_port_fw ip=6 range=3 +} + +@test "$fw_driver - port range forwarding ipv4 - udp" { + test_port_fw proto=udp range=3 +} + +@test "$fw_driver - port range forwarding ipv6 - udp" { + test_port_fw ip=6 proto=udp range=3 +} + +@test "$fw_driver - port range forwarding dual - udp" { + test_port_fw ip=dual proto=udp range=3 +} + +@test "$fw_driver - port range forwarding dual - tcp" { + test_port_fw ip=dual proto=tcp range=3 +} + + +@test "$fw_driver - port forwarding with hostip ipv4 - tcp" { + add_dummy_interface_on_host dummy0 "172.16.0.1/24" + test_port_fw hostip="172.16.0.1" +} + +@test "$fw_driver - port forwarding with hostip ipv4 dual stack - tcp" { + add_dummy_interface_on_host dummy0 "172.16.0.1/24" + test_port_fw ip=dual hostip="172.16.0.1" +} + +@test "$fw_driver - port forwarding with hostip ipv6 - tcp" { + add_dummy_interface_on_host dummy0 "fd65:8371:648b:0c06::1/64" + test_port_fw ip=6 hostip="fd65:8371:648b:0c06::1" +} + +@test "$fw_driver - port forwarding with hostip ipv6 dual stack - tcp" { + add_dummy_interface_on_host dummy0 "fd65:8371:648b:0c06::1/64" + test_port_fw ip=dual hostip="fd65:8371:648b:0c06::1" +} + +@test "$fw_driver - port forwarding with wildcard hostip ipv4 - tcp" { + add_dummy_interface_on_host dummy0 "172.16.0.1/24" + test_port_fw hostip="0.0.0.0" connectip="172.16.0.1" +} + +@test "$fw_driver - port forwarding with wildcard hostip ipv4 dual stack - tcp" { + add_dummy_interface_on_host dummy0 "172.16.0.1/24" + test_port_fw ip=dual hostip="0.0.0.0" connectip="172.16.0.1" +} + +@test "$fw_driver - port forwarding with wildcard hostip ipv6 - tcp" { + add_dummy_interface_on_host dummy0 "fd65:8371:648b:0c06::1/64" + test_port_fw ip=6 hostip="::" connectip="fd65:8371:648b:0c06::1" +} + +@test "$fw_driver - port forwarding with wildcard hostip ipv6 dual stack - tcp" { + add_dummy_interface_on_host dummy0 "fd65:8371:648b:0c06::1/64" + test_port_fw ip=dual hostip="::" connectip="fd65:8371:648b:0c06::1" +} + +@test "$fw_driver - port forwarding with hostip ipv4 - udp" { + add_dummy_interface_on_host dummy0 "172.16.0.1/24" + test_port_fw proto=udp hostip="172.16.0.1" +} + +@test "$fw_driver - port forwarding with hostip ipv6 - udp" { + add_dummy_interface_on_host dummy0 "fd65:8371:648b:0c06::1/64" + test_port_fw ip=6 proto=udp hostip="fd65:8371:648b:0c06::1" +} + +@test "bridge ipam none" { + read -r -d '\0' config < /proc/sys/net/ipv4/ip_forward" + run_in_container_netns sh -c "echo 1 > /proc/sys/net/ipv4/conf/default/arp_notify" + run_in_host_netns mount -t proc -o ro,nosuid,nodev,noexec proc /proc + + run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json setup $(get_container_netns_path) + run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json teardown $(get_container_netns_path) + + run_in_host_netns mount -t proc -o remount,rw /proc + run_in_host_netns sh -c "echo 0 > /proc/sys/net/ipv4/ip_forward" + run_in_host_netns mount -t proc -o remount,ro /proc + + expected_rc=1 run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json setup $(get_container_netns_path) + assert_json ".error" "Sysctl error: IO Error: Read-only file system (os error 30)" "Sysctl error because fs is read only" +} + + +@test "$fw_driver - bridge static mac" { + mac="aa:bb:cc:dd:ee:ff" + + read -r -d '\0' config <