diff --git a/Cargo.lock b/Cargo.lock index 2dc9ad7d6..aad960ca2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1355,6 +1355,7 @@ dependencies = [ "netlink-packet-route", "netlink-packet-utils", "netlink-sys", + "nftables", "nispor", "nix 0.27.1", "once_cell", @@ -1452,6 +1453,20 @@ dependencies = [ "tokio", ] +[[package]] +name = "nftables" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10abe0d631f93f30c3600ecb4ddd21561bcc6bac1837db11cda9c1c922207e7" +dependencies = [ + "serde", + "serde_json", + "serde_path_to_error", + "strum", + "strum_macros", + "thiserror", +] + [[package]] name = "nispor" version = "1.2.16" @@ -1929,6 +1944,16 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4beec8bce849d58d06238cb50db2e1c417cfeafa4c63f692b15c82b7c80f8335" +dependencies = [ + "itoa", + "serde", +] + [[package]] name = "serde_repr" version = "0.1.17" @@ -2018,6 +2043,25 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "strum" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" + +[[package]] +name = "strum_macros" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 1.0.109", +] + [[package]] name = "syn" version = "1.0.109" diff --git a/Cargo.toml b/Cargo.toml index cb8a228a1..a4380844d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,7 @@ sha2 = "0.10.8" netlink-packet-utils = "0.5.2" netlink-packet-route = "0.18.1" netlink-packet-core = "0.7.0" +nftables = "0.2.4" fs2 = "0.4.3" netlink-sys = "0.8.5" tokio = { version = "1.35", features = ["rt", "rt-multi-thread", "signal", "fs"] } diff --git a/src/error/mod.rs b/src/error/mod.rs index 9ca9d9801..bb6bbb6c8 100644 --- a/src/error/mod.rs +++ b/src/error/mod.rs @@ -81,6 +81,11 @@ pub enum NetavarkError { DHCPProxy(tonic::Status), List(NetavarkErrorList), + + Nftables(nftables::helper::NftablesError), + + SubnetParse(ipnet::AddrParseError), + AddrParse(std::net::AddrParseError), } /// Internal struct for JSON output @@ -160,6 +165,9 @@ impl fmt::Display for NetavarkError { Ok(()) } } + NetavarkError::Nftables(e) => write!(f, "nftables error: {e}"), + NetavarkError::SubnetParse(e) => write!(f, "parsing IP subnet error: {e}"), + NetavarkError::AddrParse(e) => write!(f, "parsing IP address error: {e}"), } } } @@ -213,3 +221,21 @@ impl From for NetavarkError { NetavarkError::DHCPProxy(err) } } + +impl From for NetavarkError { + fn from(err: nftables::helper::NftablesError) -> Self { + NetavarkError::Nftables(err) + } +} + +impl From for NetavarkError { + fn from(err: ipnet::AddrParseError) -> Self { + NetavarkError::SubnetParse(err) + } +} + +impl From for NetavarkError { + fn from(err: std::net::AddrParseError) -> Self { + NetavarkError::AddrParse(err) + } +} diff --git a/src/firewall/firewalld.rs b/src/firewall/firewalld.rs index 8d97793df..635b26286 100644 --- a/src/firewall/firewalld.rs +++ b/src/firewall/firewalld.rs @@ -4,7 +4,7 @@ use crate::network::internal_types; use crate::network::internal_types::{PortForwardConfig, TearDownNetwork, TeardownPortForward}; use crate::network::types::PortMapping; use core::convert::TryFrom; -use log::{debug, info}; +use log::{debug, info, warn}; use std::collections::HashMap; use std::vec::Vec; use zbus::{ @@ -679,3 +679,70 @@ fn make_port_tuple(port: &PortMapping, addr: &str) -> (String, String, String, S to_return } } + +/// Check if firewalld is running. +/// Not used within the firewalld driver, but by other drivers that may need to +/// interact with firewalld. +pub fn is_firewalld_running(conn: &Connection) -> bool { + conn.call_method( + Some("org.freedesktop.DBus"), + "/org/freedesktop/DBus", + Some("org.freedesktop.DBus"), + "GetNameOwner", + &"org.fedoraproject.FirewallD1", + ) + .is_ok() +} + +/// If possible, add a firewalld rule to allow traffic. +/// Ignore all errors, beyond possibly logging them. +/// Not used within the firewalld driver, but by other drivers that may need to +/// interact with firewalld. +pub fn add_firewalld_if_possible(net: &ipnet::IpNet) { + let conn = match Connection::system() { + Ok(conn) => conn, + Err(_) => return, + }; + if !is_firewalld_running(&conn) { + return; + } + debug!("Adding firewalld rules for network {}", net.to_string()); + + match add_source_subnets_to_zone(&conn, "trusted", &[*net]) { + Ok(_) => {} + Err(e) => warn!( + "Error adding subnet {} from firewalld trusted zone: {}", + net.to_string(), + e + ), + } +} + +/// If possible, remove a firewalld rule to allow traffic. +/// Ignore all errors, beyond possibly logging them. +/// Not used within the firewalld driver, but by other drivers that may need to +/// interact with firewalld. +pub fn rm_firewalld_if_possible(net: &ipnet::IpNet) { + let conn = match Connection::system() { + Ok(conn) => conn, + Err(_) => return, + }; + if !is_firewalld_running(&conn) { + return; + } + debug!("Removing firewalld rules for IPs {}", net.to_string()); + match conn.call_method( + Some("org.fedoraproject.FirewallD1"), + "/org/fedoraproject/FirewallD1", + Some("org.fedoraproject.FirewallD1.zone"), + "removeSource", + &("trusted", net.to_string()), + ) { + Ok(_) => {} + Err(e) => warn!( + "Error removing subnet {} from firewalld trusted zone: {}", + net.to_string(), + e + ), + }; +} diff --git a/src/firewall/iptables.rs b/src/firewall/iptables.rs index 39221c967..8baaf0112 100644 --- a/src/firewall/iptables.rs +++ b/src/firewall/iptables.rs @@ -10,8 +10,6 @@ use crate::network::internal_types::{ }; use iptables; use iptables::IPTables; -use log::{debug, warn}; -use zbus::blocking::Connection; pub(crate) const MAX_HASH_SIZE: usize = 13; @@ -64,7 +62,7 @@ impl firewall::FirewallDriver for IptablesDriver { create_network_chains(chains)?; - add_firewalld_if_possible(&network); + firewalld::add_firewalld_if_possible(&network); } } Ok(()) @@ -106,7 +104,7 @@ impl firewall::FirewallDriver for IptablesDriver { } if tear.complete_teardown { - rm_firewalld_if_possible(&network) + firewalld::rm_firewalld_if_possible(&network) } } } @@ -216,64 +214,3 @@ impl firewall::FirewallDriver for IptablesDriver { Result::Ok(()) } } - -/// Check if firewalld is running -fn is_firewalld_running(conn: &Connection) -> bool { - conn.call_method( - Some("org.freedesktop.DBus"), - "/org/freedesktop/DBus", - Some("org.freedesktop.DBus"), - "GetNameOwner", - &"org.fedoraproject.FirewallD1", - ) - .is_ok() -} - -/// If possible, add a firewalld rule to allow traffic. -/// Ignore all errors, beyond possibly logging them. -fn add_firewalld_if_possible(net: &ipnet::IpNet) { - let conn = match Connection::system() { - Ok(conn) => conn, - Err(_) => return, - }; - if !is_firewalld_running(&conn) { - return; - } - debug!("Adding firewalld rules for network {}", net.to_string()); - - match firewalld::add_source_subnets_to_zone(&conn, "trusted", &[*net]) { - Ok(_) => {} - Err(e) => warn!( - "Error adding subnet {} from firewalld trusted zone: {}", - net.to_string(), - e - ), - } -} - -// If possible, remove a firewalld rule to allow traffic. -// Ignore all errors, beyond possibly logging them. -fn rm_firewalld_if_possible(net: &ipnet::IpNet) { - let conn = match Connection::system() { - Ok(conn) => conn, - Err(_) => return, - }; - if !is_firewalld_running(&conn) { - return; - } - debug!("Removing firewalld rules for IPs {}", net.to_string()); - match conn.call_method( - Some("org.fedoraproject.FirewallD1"), - "/org/fedoraproject/FirewallD1", - Some("org.fedoraproject.FirewallD1.zone"), - "removeSource", - &("trusted", net.to_string()), - ) { - Ok(_) => {} - Err(e) => warn!( - "Error removing subnet {} from firewalld trusted zone: {}", - net.to_string(), - e - ), - }; -} diff --git a/src/firewall/mod.rs b/src/firewall/mod.rs index f6b1086ac..c2eda43ac 100644 --- a/src/firewall/mod.rs +++ b/src/firewall/mod.rs @@ -8,6 +8,7 @@ use zbus::blocking::Connection; pub mod firewalld; pub mod fwnone; pub mod iptables; +pub mod nft; pub mod state; mod varktables; @@ -108,9 +109,7 @@ pub fn get_supported_firewall_driver( } FirewallImpl::Nftables => { info!("Using nftables firewall driver"); - Err(NetavarkError::msg( - "nftables support presently not available", - )) + nft::new() } FirewallImpl::Fwnone => { info!("Not using firewall"); diff --git a/src/firewall/nft.rs b/src/firewall/nft.rs new file mode 100644 index 000000000..4730874c3 --- /dev/null +++ b/src/firewall/nft.rs @@ -0,0 +1,1122 @@ +use crate::error::{NetavarkError, NetavarkResult}; +use crate::firewall; +use crate::firewall::firewalld; +use crate::network::internal_types; +use crate::network::types::PortMapping; +use ipnet::IpNet; +use nftables::batch::Batch; +use nftables::expr; +use nftables::helper::{self}; +use nftables::schema; +use nftables::stmt; +use nftables::types; +use std::collections::HashSet; +use std::net::IpAddr; + +const TABLENAME: &str = "netavark"; + +const INPUTCHAIN: &str = "INPUT"; +const FORWARDCHAIN: &str = "FORWARD"; +const POSTROUTINGCHAIN: &str = "POSTROUTING"; +const PREROUTINGCHAIN: &str = "PREROUTING"; +const OUTPUTCHAIN: &str = "OUTPUT"; +const DNATCHAIN: &str = "NETAVARK-HOSTPORT-DNAT"; +const MASKCHAIN: &str = "NETAVARK-HOSTPORT-SETMARK"; + +const MASK: u32 = 0x2000; + +/// The dnat priority for chains +/// This (and the below) are based on https://wiki.nftables.org/wiki-nftables/index.php/Netfilter_hooks#Priority_within_hook +const DNATPRIO: i32 = -100; +/// The srcnat priority for chains +const SRCNATPRIO: i32 = 100; +/// The filter priority for chains +const FILTERPRIO: i32 = 0; + +pub struct Nftables {} + +pub fn new() -> Result, NetavarkError> { + Ok(Box::new(Nftables {})) +} + +impl firewall::FirewallDriver for Nftables { + fn driver_name(&self) -> &str { + firewall::NFTABLES + } + + fn setup_network(&self, network_setup: internal_types::SetupNetwork) -> NetavarkResult<()> { + let mut batch = Batch::new(); + + // Overall table + batch.add(schema::NfListObject::Table(schema::Table::new( + types::NfFamily::INet, + TABLENAME.to_string(), + ))); + + // Five default chains, one for each hook we have to monitor + batch.add(make_complex_chain( + INPUTCHAIN, + types::NfChainType::Filter, + types::NfHook::Input, + FILTERPRIO, + )); + batch.add(make_complex_chain( + FORWARDCHAIN, + types::NfChainType::Filter, + types::NfHook::Forward, + FILTERPRIO, + )); + batch.add(make_complex_chain( + POSTROUTINGCHAIN, + types::NfChainType::NAT, + types::NfHook::Postrouting, + SRCNATPRIO, + )); + batch.add(make_complex_chain( + PREROUTINGCHAIN, + types::NfChainType::NAT, + types::NfHook::Prerouting, + DNATPRIO, + )); + batch.add(make_complex_chain( + OUTPUTCHAIN, + types::NfChainType::NAT, + types::NfHook::Output, + DNATPRIO, + )); + + // dnat rules. Not used here, but need to be created first, because they have rules that must be first in their chains. + // A lot of these are thus conditional on if the rule already exists or not. + let existing_rules = helper::get_current_ruleset(None, None)?; + + // Two extra chains, not hooked to anything, for our NAT pf rules + batch.add(make_basic_chain(DNATCHAIN)); + batch.add(make_basic_chain(MASKCHAIN)); + + // Postrouting chain needs a single rule to masquerade if mask is set. + // But only one copy of that rule. So check if such a rule exists. + let match_meta_masq = |r: &schema::Rule| -> bool { + // Match on any rule that matches against 0x2000 + for statement in &r.expr { + match statement { + stmt::Statement::Match(m) => match &m.right { + expr::Expression::Number(n) => { + if *n == MASK { + return true; + } + } + _ => continue, + }, + _ => continue, + } + } + false + }; + if get_matching_rules_in_chain(&existing_rules, POSTROUTINGCHAIN, match_meta_masq) + .is_empty() + { + // Postrouting: meta mark & 0x2000 == 0x2000 masquerade + batch.add(make_rule( + POSTROUTINGCHAIN, + vec![ + stmt::Statement::Match(stmt::Match { + left: expr::Expression::BinaryOperation(expr::BinaryOperation::AND( + Box::new(expr::Expression::Named(expr::NamedExpression::Meta( + expr::Meta { + key: expr::MetaKey::Mark, + }, + ))), + Box::new(expr::Expression::Number(MASK)), + )), + right: expr::Expression::Number(MASK), + op: stmt::Operator::EQ, + }), + stmt::Statement::Masquerade(None), + ], + )); + } + + // Mask chain needs a single rule to apply the mask. + // But only one copy of that rule. So check if such a rule exists. + let match_meta_mark = |r: &schema::Rule| -> bool { + // Match on any mangle rule. + for statement in &r.expr { + match statement { + stmt::Statement::Mangle(_) => return true, + _ => continue, + } + } + false + }; + if get_matching_rules_in_chain(&existing_rules, MASKCHAIN, match_meta_mark).is_empty() { + // Mask chain: mark or 0x2000 + batch.add(make_rule( + MASKCHAIN, + vec![stmt::Statement::Mangle(stmt::Mangle { + key: expr::Expression::Named(expr::NamedExpression::Meta(expr::Meta { + key: expr::MetaKey::Mark, + })), + value: expr::Expression::BinaryOperation(expr::BinaryOperation::OR( + Box::new(expr::Expression::Named(expr::NamedExpression::Meta( + expr::Meta { + key: expr::MetaKey::Mark, + }, + ))), + Box::new(expr::Expression::Number(MASK)), + )), + })], + )); + } + + // We need rules in Prerouting and Output pointing to our dnat chain. + // But only if they do not exist. + let match_jump_dnat = get_rule_matcher_jump_to(DNATCHAIN.to_string()); + // Prerouting: fib daddr type local jump + // Output: fib daddr type local jump + let mut rules_hash: HashSet = HashSet::new(); + rules_hash.insert(expr::FibFlag::Daddr); + let base_conditions: Vec = vec![ + stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::Fib(expr::Fib { + result: expr::FibResult::Type, + flags: rules_hash, + })), + right: expr::Expression::String("local".to_string()), + op: stmt::Operator::EQ, + }), + get_jump_action(DNATCHAIN), + ]; + if get_matching_rules_in_chain(&existing_rules, PREROUTINGCHAIN, &match_jump_dnat) + .is_empty() + { + batch.add(make_rule(PREROUTINGCHAIN, base_conditions.clone())); + } + if get_matching_rules_in_chain(&existing_rules, OUTPUTCHAIN, &match_jump_dnat).is_empty() { + batch.add(make_rule(OUTPUTCHAIN, base_conditions.clone())); + } + + // Forward chain: ct state invalid drop + let match_deny = |r: &schema::Rule| -> bool { + for statement in &r.expr { + match statement { + stmt::Statement::Drop(_) => return true, + _ => continue, + } + } + false + }; + if get_matching_rules_in_chain(&existing_rules, FORWARDCHAIN, match_deny).is_empty() { + batch.add(make_rule( + FORWARDCHAIN, + vec![ + stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::CT(expr::CT { + key: "state".to_string(), + family: None, + dir: None, + })), + right: expr::Expression::String("invalid".to_string()), + op: stmt::Operator::IN, + }), + stmt::Statement::Drop(None), + ], + )); + } + + // Basic forwarding for all subnets + if let Some(nets) = network_setup.subnets { + for subnet in nets { + let chain = get_subnet_chain_name(subnet, &network_setup.network_id, false); + + // Add us to firewalld if necessary. + // Do this first, as firewalld doesn't wipe our rules - so after a reload, we skip everything below. + firewalld::add_firewalld_if_possible(&subnet); + + // Do we already have a chain for the subnet? + if get_chain(&existing_rules, &chain).is_some() { + continue; + } + + // We don't. Make one. + batch.add(make_basic_chain(&chain)); + + log::info!("Creating container chain {chain}"); + + // Subnet chain: ip daddr accept + batch.add(make_rule( + &chain, + vec![ + get_subnet_match(&subnet, "daddr", stmt::Operator::EQ), + stmt::Statement::Accept(None), + ], + )); + + // Subnet chain: ip daddr != 224.0.0.0/4 masquerade + let multicast_address: IpNet = match subnet { + IpNet::V4(_) => "224.0.0.0/4".parse()?, + IpNet::V6(_) => "ff::00/8".parse()?, + }; + batch.add(make_rule( + &chain, + vec![ + get_subnet_match(&multicast_address, "daddr", stmt::Operator::NEQ), + stmt::Statement::Masquerade(None), + ], + )); + + // Next, populate basic chains with forwarding rules + // Input chain: ip saddr udp dport 53 accept + batch.add(make_rule( + INPUTCHAIN, + vec![ + get_subnet_match(&subnet, "saddr", stmt::Operator::EQ), + stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::Payload( + expr::Payload { + protocol: "udp".to_string(), + field: "dport".to_string(), + }, + )), + right: expr::Expression::Number(53), + op: stmt::Operator::EQ, + }), + stmt::Statement::Accept(None), + ], + )); + // Forward chain: ip daddr ct state related,established accept + batch.add(make_rule( + FORWARDCHAIN, + vec![ + get_subnet_match(&subnet, "daddr", stmt::Operator::EQ), + stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::CT(expr::CT { + key: "state".to_string(), + family: None, + dir: None, + })), + right: expr::Expression::List(vec![ + expr::Expression::String("established".to_string()), + expr::Expression::String("related".to_string()), + ]), + op: stmt::Operator::IN, + }), + stmt::Statement::Accept(None), + ], + )); + // Forward chain: ip saddr accept + batch.add(make_rule( + FORWARDCHAIN, + vec![ + get_subnet_match(&subnet, "saddr", stmt::Operator::EQ), + stmt::Statement::Accept(None), + ], + )); + // Postrouting chain: ip saddr jump + batch.add(make_rule( + POSTROUTINGCHAIN, + vec![ + get_subnet_match(&subnet, "saddr", stmt::Operator::EQ), + get_jump_action(&chain), + ], + )); + } + } + + let rules = batch.to_nftables(); + + helper::apply_ruleset(&rules, None, None)?; + + Ok(()) + } + + fn teardown_network(&self, tear: internal_types::TearDownNetwork) -> NetavarkResult<()> { + let mut batch = Batch::new(); + + let existing_rules = helper::get_current_ruleset(None, None)?; + + if let Some(nets) = tear.config.subnets { + for subnet in nets { + // Match subnet, either saddr or daddr. + let match_subnet = |r: &schema::Rule| -> bool { + // Statement matching: We only care about match statements. + // Don't bother with left side. Just check if what they compare to is our subnet. + for statement in &r.expr { + match statement { + stmt::Statement::Match(m) => match &m.right { + expr::Expression::Named(expr::NamedExpression::Prefix(p)) => { + match p.addr.as_ref() { + expr::Expression::String(s) => { + if *s == subnet.addr().to_string() + && subnet.prefix_len() as u32 == p.len + { + return true; + } + } + _ => continue, + } + } + _ => continue, + }, + _ => continue, + } + } + false + }; + + let mut to_remove: Vec = Vec::new(); + to_remove.append(&mut get_matching_rules_in_chain( + &existing_rules, + INPUTCHAIN, + match_subnet, + )); + to_remove.append(&mut get_matching_rules_in_chain( + &existing_rules, + FORWARDCHAIN, + match_subnet, + )); + to_remove.append(&mut get_matching_rules_in_chain( + &existing_rules, + POSTROUTINGCHAIN, + match_subnet, + )); + + log::debug!("Removing {} rules", to_remove.len()); + + for rule in to_remove { + batch.delete(schema::NfListObject::Rule(rule)); + } + + // Delete the chain last + let chain = get_subnet_chain_name(subnet, &tear.config.network_id, false); + if let Some(c) = get_chain(&existing_rules, &chain) { + batch.delete(schema::NfListObject::Chain(c)); + } + + // After all nftables work is done, remove us from firewalld. + firewalld::rm_firewalld_if_possible(&subnet); + } + } + + let rules = batch.to_nftables(); + + helper::apply_ruleset(&rules, None, None)?; + Ok(()) + } + + fn setup_port_forward( + &self, + setup_portfw: internal_types::PortForwardConfig, + ) -> NetavarkResult<()> { + let mut batch = Batch::new(); + + let existing_rules = helper::get_current_ruleset(None, None)?; + + // Need DNAT rules for DNS if Aardvark is not on port 53. + // Only need one per DNS server IP, so check if they already exist first. + if setup_portfw.dns_port != 53 { + for ip in setup_portfw.dns_server_ips { + let match_dns_ip_dnat = |r: &schema::Rule| { + for statement in &r.expr { + match statement { + stmt::Statement::Match(m) => match &m.right { + expr::Expression::String(s) => { + if *s == ip.to_string() { + return true; + } + } + _ => continue, + }, + _ => continue, + } + } + false + }; + if !get_matching_rules_in_chain(&existing_rules, DNATCHAIN, match_dns_ip_dnat) + .is_empty() + { + continue; + } + + // We have multiple DNS server IPs. Potentially v4 and v6 both. + // Only add those when the container has an IP address matching the family. + match ip { + IpAddr::V4(_) => { + if setup_portfw.container_ip_v4.is_some() { + batch.add(make_dns_dnat_rule(ip, setup_portfw.dns_port)); + } + } + IpAddr::V6(_) => { + if setup_portfw.container_ip_v6.is_some() { + batch.add(make_dns_dnat_rule(ip, setup_portfw.dns_port)); + } + } + } + } + } + + if let Some(ip_v4) = setup_portfw.container_ip_v4 { + if let Some(subnet_v4) = setup_portfw.subnet_v4 { + for rule in get_dnat_rules_for_addr_family( + ip_v4, + subnet_v4, + &setup_portfw.id, + &existing_rules, + &setup_portfw, + )? { + batch.add(rule); + } + } + } + if let Some(ip_v6) = setup_portfw.container_ip_v6 { + if let Some(subnet_v6) = setup_portfw.subnet_v6 { + for rule in get_dnat_rules_for_addr_family( + ip_v6, + subnet_v6, + &setup_portfw.id, + &existing_rules, + &setup_portfw, + )? { + batch.add(rule); + } + } + } + + let rules = batch.to_nftables(); + + helper::apply_ruleset(&rules, None, None)?; + + Ok(()) + } + + fn teardown_port_forward( + &self, + teardown_pf: internal_types::TeardownPortForward, + ) -> NetavarkResult<()> { + let mut batch = Batch::new(); + + let existing_rules = helper::get_current_ruleset(None, None)?; + + let dnat_chain_v4 = teardown_pf + .config + .subnet_v4 + .map(|s| get_subnet_chain_name(s, &teardown_pf.config.id, true)); + let dnat_chain_v6 = teardown_pf + .config + .subnet_v6 + .map(|s| get_subnet_chain_name(s, &teardown_pf.config.id, true)); + + // We need two matchers for each port. + // One matching both the port and jumping to either the V4 or V6 chain (to clean NETAVARK_DNAT) + // One matching just the port, to clean the v4 and v6 chains for the network. + // As a bonus, the last one needs to match any individual port inside the range. + if let Some(ports) = teardown_pf.config.port_mappings { + for port in ports { + let matcher_port_jump = |r: &schema::Rule| -> bool { + let mut match_jump = false; + let mut match_port = false; + for stmt in &r.expr { + // Basically, check for match and jump statements. + // For match, check that the right side is appropriate + // for our port mapping. Has to handle range vs + // singleton. + // For jump, make sure that it matches either the v4 or + // v6 DNAT chains. + // If we find both, the rule matches. + match stmt { + stmt::Statement::Match(m) => match &m.right { + expr::Expression::Number(n) => { + if port.range <= 1 && port.host_port as u32 == *n { + if match_jump { + return true; + } + match_port = true; + } + } + expr::Expression::Range(r) => { + if port.range > 1 { + if r.range.len() != 2 { + // Malformed range, just return false + return false; + } + match r.range[0] { + expr::Expression::Number(n) => { + if port.host_port as u32 != n { + continue; + } + } + _ => continue, + } + match r.range[1] { + expr::Expression::Number(n) => { + if (port.host_port + port.range - 1) as u32 == n { + if match_jump { + return true; + } + match_port = true; + } + } + _ => continue, + } + } + } + _ => continue, + }, + stmt::Statement::Jump(j) => { + if let Some(v4) = &dnat_chain_v4 { + if &j.target == v4 { + if match_port { + return true; + } + match_jump = true; + } + } + if let Some(v6) = &dnat_chain_v6 { + if &j.target == v6 { + if match_port { + return true; + } + match_jump = true + } + } + } + _ => continue, + } + } + match_jump && match_port + }; + + let match_all_ports_in_range = |r: &schema::Rule| -> bool { + for stmt in &r.expr { + match stmt { + stmt::Statement::Match(m) => match &m.right { + expr::Expression::Number(n) => { + if port.range <= 1 && *n == port.host_port as u32 { + return true; + } + if port.range > 1 + && *n >= port.host_port as u32 + && *n <= (port.host_port + port.range - 1) as u32 + { + return true; + } + } + expr::Expression::Range(r) => { + if port.range > 1 { + if r.range.len() != 2 { + // Malformed range, just return false + return false; + } + match r.range[0] { + expr::Expression::Number(n) => { + if port.host_port as u32 != n { + continue; + } + } + _ => continue, + } + match r.range[1] { + expr::Expression::Number(n) => { + if (port.host_port + port.range - 1) as u32 == n { + return true; + } + } + _ => continue, + } + } + } + _ => continue, + }, + _ => continue, + } + } + false + }; + + for rule in + get_matching_rules_in_chain(&existing_rules, DNATCHAIN, matcher_port_jump) + { + batch.delete(schema::NfListObject::Rule(rule)); + } + + if let Some(v4) = &dnat_chain_v4 { + for rule in + get_matching_rules_in_chain(&existing_rules, v4, match_all_ports_in_range) + { + batch.delete(schema::NfListObject::Rule(rule)); + } + } + if let Some(v6) = &dnat_chain_v6 { + for rule in + get_matching_rules_in_chain(&existing_rules, v6, match_all_ports_in_range) + { + batch.delete(schema::NfListObject::Rule(rule)); + } + } + } + } + + if teardown_pf.complete_teardown { + let match_dns_dnat = |r: &schema::Rule| -> bool { + for statement in &r.expr { + match statement { + // Match any DNS server IP + stmt::Statement::Match(m) => match &m.right { + expr::Expression::String(s) => { + for ip in teardown_pf.config.dns_server_ips { + if *s == ip.to_string() { + return true; + } + } + } + _ => continue, + }, + _ => continue, + } + } + false + }; + for rule in get_matching_rules_in_chain(&existing_rules, DNATCHAIN, match_dns_dnat) { + batch.delete(schema::NfListObject::Rule(rule)); + } + + if let Some(v4) = dnat_chain_v4 { + if let Some(c) = get_chain(&existing_rules, &v4) { + batch.delete(schema::NfListObject::Chain(c)); + } + } + if let Some(v6) = dnat_chain_v6 { + if let Some(c) = get_chain(&existing_rules, &v6) { + batch.delete(schema::NfListObject::Chain(c)); + } + } + } + + let rules = batch.to_nftables(); + + helper::apply_ruleset(&rules, None, None)?; + + Ok(()) + } +} + +/// Convert a subnet into a chain name. +fn get_subnet_chain_name(subnet: IpNet, net_id: &str, dnat: bool) -> String { + // nftables is very lenient around chain name lengths. + // So let's use the full IP to be unambiguous. + // Replace . and : with _, and / with _nm (netmask), to remove special characters. + let subnet_clean = subnet + .to_string() + .replace('.', "_") + .replace(':', "-") + .replace('/', "_nm"); + let net_id_clean = if net_id.len() > 8 { + net_id.split_at(8).0 + } else { + net_id + }; + + if dnat { + format!("nv_{}_{}_dnat", net_id_clean, subnet_clean) + } else { + format!("nv_{}_{}", net_id_clean, subnet_clean) + } +} + +/// Get a statement to match the given IP address. +/// Field should be either "saddr" or "daddr" for matching source or destination. +fn get_ip_match(ip: &IpAddr, field: &str, op: stmt::Operator) -> stmt::Statement { + stmt::Statement::Match(stmt::Match { + left: ip_to_payload(ip, field), + right: expr::Expression::String(ip.to_string()), + op, + }) +} + +/// Convert a single IP into a Payload field. +/// Basically, pasts in "ip" or "ip6" in protocol field based on whether this is a v4 or v6 address. +fn ip_to_payload(addr: &IpAddr, field: &str) -> expr::Expression { + let proto = match addr { + IpAddr::V4(_) => "ip".to_string(), + IpAddr::V6(_) => "ip6".to_string(), + }; + + expr::Expression::Named(expr::NamedExpression::Payload(expr::Payload { + protocol: proto, + field: field.to_string(), + })) +} + +/// Get a statement to match the given subnet. +/// Field should be either "saddr" or "daddr" for matching source or destination. +fn get_subnet_match(net: &IpNet, field: &str, op: stmt::Operator) -> stmt::Statement { + stmt::Statement::Match(stmt::Match { + left: subnet_to_payload(net, field), + right: expr::Expression::Named(expr::NamedExpression::Prefix(expr::Prefix { + addr: Box::new(expr::Expression::String(net.addr().to_string())), + len: net.prefix_len() as u32, + })), + op, + }) +} + +/// Convert a subnet into a Payload field. +/// Basically, pastes in "ip" or "ip6" in protocol field based on whether this +/// is a v4 or v6 subnet. +fn subnet_to_payload(net: &IpNet, field: &str) -> expr::Expression { + let proto = match net { + IpNet::V4(_) => "ip".to_string(), + IpNet::V6(_) => "ip6".to_string(), + }; + + expr::Expression::Named(expr::NamedExpression::Payload(expr::Payload { + protocol: proto, + field: field.to_string(), + })) +} + +/// Get a condition to match destination port/ports based on a given PortMapping. +/// Properly handles port ranges, protocol, etc. +fn get_dport_cond(port: &PortMapping) -> stmt::Statement { + stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::Payload(expr::Payload { + protocol: port.protocol.clone(), + field: "dport".to_string(), + })), + right: if port.range > 1 { + // Ranges are a vector with a length of 2. + // First value start, second value end. + let range_vec = vec![ + expr::Expression::Number(port.host_port as u32), + expr::Expression::Number((port.host_port + port.range - 1) as u32), + ]; + expr::Expression::Range(expr::Range { range: range_vec }) + } else { + expr::Expression::Number(port.host_port as u32) + }, + op: stmt::Operator::EQ, + }) +} + +/// Make the first container DNAT chain rule, which is used for both IP and IPv6 DNAT. +fn get_subnet_dport_match( + dnat_chain: &str, + subnet: &Option, + host_ip_match: &Option, + dport_match: &stmt::Statement, +) -> schema::NfListObject { + // ip saddr ip daddr dport jump MARKCHAIN + let mut statements: Vec = Vec::new(); + if let Some(net) = &subnet { + statements.push(get_subnet_match(net, "saddr", stmt::Operator::EQ)); + } + + if let Some(stmt) = host_ip_match { + statements.push(stmt.clone()); + } + + statements.push(dport_match.clone()); + statements.push(get_jump_action(MASKCHAIN)); + make_rule(dnat_chain, statements) +} + +/// Create DNAT rules for each port to be forwarded. +/// Used for both IP and IPv6 DNAT. +fn get_dnat_port_rules( + dnat_chain: &str, + port: &PortMapping, + ip: &IpAddr, + host_ip_cond: &Option, +) -> Vec { + let mut rules: Vec = Vec::new(); + + // Container dnat chain: ip daddr dport dnat to + // Unfortunately: We don't have range support in the schema. So we need 1 rule per port. + let range = if port.range == 0 { 1 } else { port.range }; + for i in 0..range { + let host_port: u32 = (port.host_port + i) as u32; + let ctr_port: u32 = (port.container_port + i) as u32; + + let mut statements: Vec = Vec::new(); + if let Some(stmt) = host_ip_cond { + statements.push(stmt.clone()); + } + statements.push(stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::Payload(expr::Payload { + protocol: port.protocol.clone(), + field: "dport".to_string(), + })), + right: expr::Expression::Number(host_port), + op: stmt::Operator::EQ, + })); + statements.push(stmt::Statement::DNAT(Some(stmt::NAT { + addr: Some(expr::Expression::String(ip.to_string())), + family: Some(if ip.is_ipv6() { + stmt::NATFamily::IP6 + } else { + stmt::NATFamily::IP + }), + port: Some(ctr_port), + flags: None, + }))); + rules.push(make_rule(dnat_chain, statements)); + } + + rules +} + +fn get_dnat_rules_for_addr_family( + ip: IpAddr, + subnet: IpNet, + net_id: &str, + existing_rules: &schema::Nftables, + setup_portfw: &internal_types::PortForwardConfig, +) -> NetavarkResult> { + let mut rules: Vec = Vec::new(); + + if let Some(ports) = setup_portfw.port_mappings { + let subnet_dnat_chain = get_subnet_chain_name(subnet, net_id, true); + + // Make the chain if it does not exist + if get_chain(existing_rules, &subnet_dnat_chain).is_none() { + rules.push(make_basic_chain(&subnet_dnat_chain)); + } + + for port in ports { + // Condition to match destination ports (ports on the host) + let dport_cond = get_dport_cond(port); + // Destination address is only if user set an IP on the host to bind to. + // Used by multiple rules in this section. + // We need to ignore wildcards, but only if our IP family matches the wildcard. + // If it doesn't, don't add any rules. + let daddr: Option = if !port.host_ip.is_empty() { + if port.host_ip == "0.0.0.0" { + if ip.is_ipv6() { + continue; + } + None + } else if port.host_ip == "::" { + if ip.is_ipv4() { + continue; + } + None + } else { + match port.host_ip.parse() { + Ok(i) => Some(i), + Err(_) => { + return Err(NetavarkError::msg(format!( + "invalid host ip \"{}\" provided for port {}", + port.host_ip, port.host_port + ))); + } + } + } + } else { + None + }; + + // Do not add rules where the address family of host address does not match container address. + if let Some(host_ip) = daddr { + if ip.is_ipv4() != host_ip.is_ipv4() { + continue; + } + } + let daddr_cond: Option = + daddr.map(|i| get_ip_match(&i, "daddr", stmt::Operator::EQ)); + + // dnat chain: dport jump + rules.push(make_rule( + DNATCHAIN, + vec![dport_cond.clone(), get_jump_action(&subnet_dnat_chain)], + )); + + // Container dnat chain: ip saddr ip daddr dport jump SETMARKCHAIN + rules.push(get_subnet_dport_match( + &subnet_dnat_chain, + &Some(subnet), + &daddr_cond, + &dport_cond, + )); + + // This rule is only used for v4. + if ip.is_ipv4() { + // Container dnat chain: ip saddr 127.0.0.1 ip daddr dport jump SETMARKCHAIN + let mut localhost_jump_statements: Vec = Vec::new(); + localhost_jump_statements.push(get_ip_match( + &("127.0.0.1".parse()?), + "saddr", + stmt::Operator::EQ, + )); + if let Some(stmt) = &daddr_cond { + localhost_jump_statements.push(stmt.clone()); + } + localhost_jump_statements.push(dport_cond); + localhost_jump_statements.push(get_jump_action(MASKCHAIN)); + rules.push(make_rule(&subnet_dnat_chain, localhost_jump_statements)); + } + + for rule in get_dnat_port_rules(&subnet_dnat_chain, port, &ip, &daddr_cond) { + rules.push(rule); + } + } + } + + Ok(rules) +} + +/// Make a DNAT rule to allow DNS traffic to a DNS server on a non-standard port (53 -> actual port). +fn make_dns_dnat_rule(dns_ip: &IpAddr, dns_port: u16) -> schema::NfListObject { + make_rule( + DNATCHAIN, + vec![ + get_ip_match(dns_ip, "daddr", stmt::Operator::EQ), + stmt::Statement::Match(stmt::Match { + left: expr::Expression::Named(expr::NamedExpression::Payload(expr::Payload { + protocol: "udp".to_string(), + field: "dport".to_string(), + })), + right: expr::Expression::Number(53), + op: stmt::Operator::EQ, + }), + stmt::Statement::DNAT(Some(stmt::NAT { + addr: Some(expr::Expression::String(dns_ip.to_string())), + family: Some(if dns_ip.is_ipv6() { + stmt::NATFamily::IP6 + } else { + stmt::NATFamily::IP + }), + port: Some(dns_port as u32), + flags: None, + })), + ], + ) +} + +/// Create a statement to jump to the given target +fn get_jump_action(target: &str) -> stmt::Statement { + stmt::Statement::Jump(stmt::JumpTarget { + target: target.to_string(), + }) +} + +/// Create an instruction to make a basic chain (no hooks, no priority). +/// Chain is always inet, always in our overall netavark table. +fn make_basic_chain(name: &str) -> schema::NfListObject { + schema::NfListObject::Chain(schema::Chain::new( + types::NfFamily::INet, + TABLENAME.to_string(), + name.to_string(), + None, + None, + None, + None, + None, + )) +} + +/// Create a more complicated chain with hooks and priority. +/// Policy is always accept, because we don't need anything else. +fn make_complex_chain( + name: &str, + chain_type: types::NfChainType, + hook: types::NfHook, + priority: i32, +) -> schema::NfListObject { + schema::NfListObject::Chain(schema::Chain::new( + types::NfFamily::INet, + TABLENAME.to_string(), + name.to_string(), + Some(chain_type), + Some(hook), + Some(priority), + None, + Some(types::NfChainPolicy::Accept), + )) +} + +/// Make a rule in the given chain with the given conditions +fn make_rule(chain: &str, conditions: Vec) -> schema::NfListObject { + schema::NfListObject::Rule(schema::Rule::new( + types::NfFamily::INet, + TABLENAME.to_string(), + chain.to_string(), + conditions, + )) +} + +/// Make a closure that matches any rule that jumps to the given chain. +fn get_rule_matcher_jump_to(jump_target: String) -> Box bool> { + Box::new(move |r: &schema::Rule| -> bool { + for statement in &r.expr { + match statement { + stmt::Statement::Jump(j) => { + return j.target == jump_target; + } + _ => continue, + } + } + false + }) +} + +/// Find all rules in the given chain which match the given closure (true == include). +/// Returns all those rules, in a vector. Vector will be empty if there are none. +fn get_matching_rules_in_chain bool>( + base_rules: &schema::Nftables, + chain: &str, + rule_match: F, +) -> Vec { + let mut rules: Vec = Vec::new(); + + // Basically, we get back a big, flat array of everything in the table. + // That makes this an absolute destructuring nightmare, but there's no avoiding it. + // Ignore everything we get back that is not a rule. + // Then ignore everything that is not in our table (not passed, but we only use one table). + // Then ignore everything that is not in the given chain. + // Then check conditions and add to the vector if it matches. + for object in &base_rules.objects { + match object { + schema::NfObject::CmdObject(_) => continue, + schema::NfObject::ListObject(obj) => match obj { + schema::NfListObject::Rule(r) => { + if r.table != *TABLENAME { + continue; + } + if r.chain != *chain { + continue; + } + + if rule_match(r) { + log::debug!("Matched {:?}", r); + rules.push(r.clone()); + } + } + _ => continue, + }, + } + } + + rules +} + +/// Get a chain with the given name in the Netavark table. +fn get_chain(base_rules: &schema::Nftables, chain: &str) -> Option { + for object in &base_rules.objects { + match object { + schema::NfObject::CmdObject(_) => continue, + schema::NfObject::ListObject(obj) => match obj { + schema::NfListObject::Chain(c) => { + if c.table != *TABLENAME { + continue; + } + if c.name == *chain { + log::debug!("Found chain {}", chain); + return Some(c.clone()); + } + } + _ => continue, + }, + } + } + + None +} diff --git a/src/firewall/state.rs b/src/firewall/state.rs index d9e6323ee..e55743ddc 100644 --- a/src/firewall/state.rs +++ b/src/firewall/state.rs @@ -266,15 +266,18 @@ mod tests { let net_conf = SetupNetwork { subnets: Some(vec!["10.0.0.0/24".parse().unwrap()]), + network_id: "c2c8a073252874648259997d53b0a1bffa491e21f04bc1bf8609266359931395" + .to_string(), bridge_name: "bridge".to_string(), network_hash_name: "hash".to_string(), isolation: IsolateOption::Never, dns_port: 53, }; - let net_conf_json = r#"{"subnets":["10.0.0.0/24"],"bridge_name":"bridge","network_hash_name":"hash","isolation":"Never","dns_port":53}"#; + let net_conf_json = r#"{"subnets":["10.0.0.0/24"],"bridge_name":"bridge","network_id":"c2c8a073252874648259997d53b0a1bffa491e21f04bc1bf8609266359931395","network_hash_name":"hash","isolation":"Never","dns_port":53}"#; let port_conf = PortForwardConfig { container_id: container_id.to_string(), + id: "c2c8a073252874648259997d53b0a1bffa491e21f04bc1bf8609266359931395".to_string(), port_mappings: &None, network_name: "name".to_string(), network_hash_name: "hash".to_string(), @@ -285,7 +288,7 @@ mod tests { dns_port: 53, dns_server_ips: &vec![], }; - let port_conf_json = r#"{"container_id":"123","port_mappings":null,"network_name":"name","network_hash_name":"hash","container_ip_v4":"10.0.0.2","subnet_v4":"10.0.0.0/24","container_ip_v6":null,"subnet_v6":null,"dns_port":53,"dns_server_ips":[]}"#; + let port_conf_json = r#"{"container_id":"123","id":"c2c8a073252874648259997d53b0a1bffa491e21f04bc1bf8609266359931395","port_mappings":null,"network_name":"name","network_hash_name":"hash","container_ip_v4":"10.0.0.2","subnet_v4":"10.0.0.0/24","container_ip_v6":null,"subnet_v6":null,"dns_port":53,"dns_server_ips":[]}"#; let res = write_fw_config( config_dir, diff --git a/src/network/bridge.rs b/src/network/bridge.rs index b7f714a2f..3b3c85955 100644 --- a/src/network/bridge.rs +++ b/src/network/bridge.rs @@ -325,6 +325,7 @@ impl<'a> Bridge<'a> { .as_ref() .map(|nets| nets.iter().map(|n| n.subnet).collect()), bridge_name, + network_id: self.info.network.id.clone(), network_hash_name: id_network_hash.clone(), isolation: isolate, dns_port: self.info.dns_port, @@ -359,6 +360,7 @@ impl<'a> Bridge<'a> { } let spf = PortForwardConfig { container_id: self.info.container_id.clone(), + id: self.info.network.id.clone(), port_mappings: self.info.port_mappings, network_name: self.info.network.name.clone(), network_hash_name: id_network_hash, diff --git a/src/network/internal_types.rs b/src/network/internal_types.rs index e6c9719d2..64afe4595 100644 --- a/src/network/internal_types.rs +++ b/src/network/internal_types.rs @@ -17,6 +17,8 @@ pub struct SetupNetwork { pub subnets: Option>, /// bridge interface name pub bridge_name: String, + /// id for the network + pub network_id: String, /// hash id for the network pub network_hash_name: String, /// isolation determines whether the network can communicate with others outside of its interface @@ -35,6 +37,8 @@ pub struct TearDownNetwork { pub struct PortForwardConfigGeneric { /// id of container pub container_id: String, + /// id of the network + pub id: String, /// port mappings pub port_mappings: Ports, /// name of network @@ -76,6 +80,7 @@ impl<'a> From<&'a PortForwardConfigOwned> for PortForwardConfig<'a> { fn from(p: &'a PortForwardConfigOwned) -> PortForwardConfig<'a> { Self { container_id: p.container_id.clone(), + id: p.id.clone(), port_mappings: &p.port_mappings, network_name: p.network_name.clone(), network_hash_name: p.network_hash_name.clone(), diff --git a/test/250-bridge-nftables.bats b/test/250-bridge-nftables.bats new file mode 100644 index 000000000..c7e1318d4 --- /dev/null +++ b/test/250-bridge-nftables.bats @@ -0,0 +1,968 @@ +#!/usr/bin/env bats -*- bats -*- +# +# bridge driver tests with nftables firewall driver +# + +load helpers + +fw_driver=nftables +export NETAVARK_FW=nftables + +@test "check nftables driver is in use" { + RUST_LOG=netavark=info run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json setup $(get_container_netns_path) + assert "${lines[0]}" "==" "[INFO netavark::firewall] Using nftables firewall driver" "nftables driver is in use" +} + +@test "$fw_driver - internal network" { + # Table doesn't exist at this point otherwise + run_in_host_netns nft add table inet netavark + run_in_host_netns nft list table inet netavark + before="$output" + + run_netavark --file ${TESTSDIR}/testfiles/internal.json setup $(get_container_netns_path) + + run_in_host_netns nft list table inet netavark + assert "$output" == "$before" "make sure tables have not changed" + + run_in_container_netns ip route show + assert "$output" "!~" "default" "No default route for internal networks" + + run_in_container_netns ping -c 1 10.88.0.1 + + run_netavark --file ${TESTSDIR}/testfiles/internal.json teardown $(get_container_netns_path) +} + +@test "$fw_driver - simple bridge" { + run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json setup $(get_container_netns_path) + result="$output" + assert_json "$result" 'has("podman")' == "true" "object key exists" + + mac=$(jq -r '.podman.interfaces.eth0.mac_address' <<<"$result") + # check that interface exists + run_in_container_netns ip -j --details link show eth0 + link_info="$output" + assert_json "$link_info" ".[].address" == "$mac" "MAC matches container mac" + assert_json "$link_info" '.[].flags[] | select(.=="UP")' == "UP" "Container interface is up" + assert_json "$link_info" ".[].linkinfo.info_kind" == "veth" "Container interface is a veth device" + + ipaddr="10.88.0.2/16" + run_in_container_netns ip addr show eth0 + assert "$output" =~ "$ipaddr" "IP address matches container address" + assert_json "$result" ".podman.interfaces.eth0.subnets[0].ipnet" == "$ipaddr" "Result contains correct IP address" + + run_in_host_netns ip -j --details link show podman0 + link_info="$output" + assert_json "$link_info" '.[].flags[] | select(.=="UP")' == "UP" "Host bridge interface is up" + assert_json "$link_info" ".[].linkinfo.info_kind" == "bridge" "The bridge interface is actually a bridge" + bridge_mac=$(jq -r '.[].address' <<<"$link_info") + + run_in_host_netns ip -j link show veth0 + veth_info="$output" + assert_json "$veth_info" ".[].address" != "$bridge_mac" "Bridge and Veth must have different mac address" + + ipaddr="10.88.0.1" + run_in_host_netns ip addr show podman0 + assert "$output" =~ "$ipaddr" "IP address matches bridge gateway address" + assert_json "$result" ".podman.interfaces.eth0.subnets[0].gateway" == "$ipaddr" "Result contains gateway address" + + # check that the loopback adapter is up + run_in_container_netns ip addr show lo + assert "$output" =~ "127.0.0.1" "Loopback adapter is up (has address)" + + run_in_host_netns ping -c 1 10.88.0.2 + + check_simple_bridge_nftables + + run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json teardown $(get_container_netns_path) + + # now check that nftables rules are gone + + # check FORWARD rules + run_in_host_netns nft list chain inet netavark FORWARD + assert "${lines[3]}" =~ "ct state invalid drop" "CT state invalid rule" + assert "${#lines[@]}" = 6 "too many FORWARD rules after teardown" + + # check POSTROUTING rules + run_in_host_netns nft list chain inet netavark POSTROUTING + assert "${lines[3]}" =~ "meta mark & 0x00002000 == 0x00002000 masquerade" "Mark-masquerade rule" + assert "${#lines[@]}" = 6 "too many POSTROUTING rules after teardown" + + # nv_10_88_0_0_nm16 chain should not exists + expected_rc=1 run_in_host_netns nft list chain inet netavark nv_10_88_0_0_nm16 + + # bridge should be removed on teardown + expected_rc=1 run_in_host_netns ip addr show podman0 +} + +@test "$fw_driver - bridge with static routes" { + # add second interface and routes through that interface to test proper teardown + run_in_container_netns ip link add type dummy + run_in_container_netns ip a add 10.91.0.10/24 dev dummy0 + run_in_container_netns ip link set dummy0 up + + run_netavark --file ${TESTSDIR}/testfiles/bridge-staticroutes.json setup $(get_container_netns_path) + + # check static routes + run_in_container_netns ip r + assert "$output" "=~" "10.89.0.0/24 via 10.88.0.2" "static route not set" + assert "$output" "=~" "10.90.0.0/24 via 10.88.0.3" "static route not set" + assert "$output" "=~" "10.92.0.0/24 via 10.91.0.1" "static route not set" + + run_netavark --file ${TESTSDIR}/testfiles/bridge-staticroutes.json teardown $(get_container_netns_path) + + # check static routes get removed + assert "$output" "!~" "10.89.0.0/24 via 10.88.0.2" "static route not set" + assert "$output" "!~" "10.90.0.0/24 via 10.88.0.3" "static route not set" + assert "$output" "!~" "10.92.0.0/24 via 10.91.0.1" "static route not removed" +} + +@test "$fw_driver - bridge with no default route" { + run_netavark --file ${TESTSDIR}/testfiles/bridge-nodefaultroute.json setup $(get_container_netns_path) + + run_in_container_netns ip r + assert "$output" "!~" "default" "default route exists" + + run_in_container_netns ip -6 r + assert "$output" "!~" "default" "default route exists" + + run_netavark --file ${TESTSDIR}/testfiles/bridge-nodefaultroute.json teardown $(get_container_netns_path) + assert "" "no errors" +} + +@test "$fw_driver - bridge driver must generate config for aardvark with multiple custom dns server with network dns servers and perform update" { + # get a random port directly to avoid low ports e.g. 53 would not create nftables rules + dns_port=$((RANDOM+10000)) + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-network-container-dns-server.json \ + setup $(get_container_netns_path) + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" =~ "10.89.3.1,fd10:88:a::1 127.0.0.1,3.3.3.3" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename 8.8.8.8,1.1.1.1$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + + aardvark_pid=$(cat "$NETAVARK_TMPDIR/config/aardvark-dns/aardvark.pid") + assert "$ardvark_pid" =~ "[0-9]*" "aardvark pid not found" + run_helper ps "$aardvark_pid" + assert "${lines[1]}" =~ ".*aardvark-dns --config $NETAVARK_TMPDIR/config/aardvark-dns -p $dns_port run" "aardvark not running or bad options" + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-network-container-dns-server.json \ + update podman1 --network-dns-servers 8.8.8.8 + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" =~ "10.89.3.1,fd10:88:a::1 8.8.8.8" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename 8.8.8.8,1.1.1.1$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + + # remove network and check running and verify if aardvark config has no nameserver + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-network-container-dns-server.json \ + update podman1 --network-dns-servers "" + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" == "10.89.3.1,fd10:88:a::1" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename 8.8.8.8,1.1.1.1$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + +} + +# netavark must do no-op on upates when no aardvark config is there +@test "run netavark update - no-op" { + # get a random port directly to avoid low ports e.g. 53 would not create nftables rules + dns_port=$((RANDOM+10000)) + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-network-container-dns-server.json \ + update podman1 --network-dns-servers 8.8.8.8 +} + +@test "$fw_driver - ipv6 bridge" { + run_netavark --file ${TESTSDIR}/testfiles/ipv6-bridge.json setup $(get_container_netns_path) + result="$output" + assert_json "$result" 'has("podman1")' == "true" "object key exists" + + mac=$(jq -r '.podman1.interfaces.eth0.mac_address' <<<"$result") + # check that interface exists + run_in_container_netns ip -j --details link show eth0 + link_info="$output" + assert_json "$link_info" ".[].address" == "$mac" "MAC matches container mac" + assert_json "$link_info" '.[].flags[] | select(.=="UP")' == "UP" "Container interface is up" + assert_json "$link_info" ".[].linkinfo.info_kind" == "veth" "Container interface is a veth device" + + ipaddr="fd10:88:a::2/64" + run_in_container_netns ip addr show eth0 + assert "$output" =~ "$ipaddr" "IP address matches container address" + assert_json "$result" ".podman1.interfaces.eth0.subnets[0].ipnet" == "$ipaddr" "Result contains correct IP address" + + run_in_host_netns ip -j --details link show podman1 + link_info="$output" + assert_json "$link_info" '.[].flags[] | select(.=="UP")' == "UP" "Host bridge interface is up" + assert_json "$link_info" ".[].linkinfo.info_kind" == "bridge" "The bridge interface is actually a bridge" + + ipaddr="fd10:88:a::1" + run_in_host_netns ip addr show podman1 + assert "$output" =~ "$ipaddr" "IP address matches bridge gateway address" + assert_json "$result" ".podman1.interfaces.eth0.subnets[0].gateway" == "$ipaddr" "Result contains gateway address" + + # check that the loopback adapter is up + run_in_container_netns ip addr show lo + assert "$output" =~ "127.0.0.1" "Loopback adapter is up (has address)" + + run_in_host_netns ping6 -c 1 fd10:88:a::2 + + run_netavark --file ${TESTSDIR}/testfiles/ipv6-bridge.json teardown $(get_container_netns_path) +} + +@test "$fw_driver - ipv6 bridge with static routes" { + # add second interface and routes through that interface to test proper teardown + run_in_container_netns ip link add type dummy + run_in_container_netns ip a add fd10:49:b::2/64 dev dummy0 + run_in_container_netns ip link set dummy0 up + + run_netavark --file ${TESTSDIR}/testfiles/ipv6-bridge-staticroutes.json setup $(get_container_netns_path) + + # check static routes + run_in_container_netns ip -6 -br r + assert "$output" "=~" "fd10:89:b::/64 via fd10:88:a::ac02" "static route not set" + assert "$output" "=~" "fd10:89:c::/64 via fd10:88:a::ac03" "static route not set" + assert "$output" "=~" "fd10:51:b::/64 via fd10:49:b::30" "static route not set" + + run_netavark --file ${TESTSDIR}/testfiles/ipv6-bridge-staticroutes.json teardown $(get_container_netns_path) + + # check static routes get removed + run_in_container_netns ip -6 -br r + assert "$output" "!~" "fd10:89:b::/64 via fd10:88:a::ac02" "static route not removed" + assert "$output" "!~" "fd10:89:c::/64 via fd10:88:a::ac03" "static route not removed" + assert "$output" "!~" "fd10:51:b::/64 via fd10:49:b::30" "static route not removed" + + run_in_container_netns ip link delete dummy0 +} + +@test "$fw_driver - bridge driver must generate config for aardvark with custom dns server" { + # get a random port directly to avoid low ports e.g. 53 would not create nftables rules + dns_port=$((RANDOM+10000)) + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-custom-dns-server.json \ + setup $(get_container_netns_path) + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" =~ "10.89.3.1,fd10:88:a::1" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename 8.8.8.8$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + + aardvark_pid=$(cat "$NETAVARK_TMPDIR/config/aardvark-dns/aardvark.pid") + assert "$ardvark_pid" =~ "[0-9]*" "aardvark pid not found" + run_helper ps "$aardvark_pid" + assert "${lines[1]}" =~ ".*aardvark-dns --config $NETAVARK_TMPDIR/config/aardvark-dns -p $dns_port run" "aardvark not running or bad options" +} + +@test "$fw_driver - bridge driver must generate config for aardvark with multiple custom dns server" { + # get a random port directly to avoid low ports e.g. 53 would not create nftables + dns_port=$((RANDOM+10000)) + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-multiple-custom-dns-server.json \ + setup $(get_container_netns_path) + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" =~ "10.89.3.1,fd10:88:a::1" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename 8.8.8.8,1.1.1.1$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + + aardvark_pid=$(cat "$NETAVARK_TMPDIR/config/aardvark-dns/aardvark.pid") + assert "$ardvark_pid" =~ "[0-9]*" "aardvark pid not found" + run_helper ps "$aardvark_pid" + assert "${lines[1]}" =~ ".*aardvark-dns --config $NETAVARK_TMPDIR/config/aardvark-dns -p $dns_port run" "aardvark not running or bad options" +} + +@test "$fw_driver - bridge driver must generate config for aardvark with multiple custom dns server with network dns servers" { + # get a random port directly to avoid low ports e.g. 53 would not create nftables rules + dns_port=$((RANDOM+10000)) + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge-network-container-dns-server.json \ + setup $(get_container_netns_path) + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" =~ "10.89.3.1,fd10:88:a::1 127.0.0.1,3.3.3.3" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename 8.8.8.8,1.1.1.1$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + + aardvark_pid=$(cat "$NETAVARK_TMPDIR/config/aardvark-dns/aardvark.pid") + assert "$ardvark_pid" =~ "[0-9]*" "aardvark pid not found" + run_helper ps "$aardvark_pid" + assert "${lines[1]}" =~ ".*aardvark-dns --config $NETAVARK_TMPDIR/config/aardvark-dns -p $dns_port run" "aardvark not running or bad options" +} + +@test "$fw_driver - dual stack dns with alt port" { + # get a random port directly to avoid low ports e.g. 53 would not create nftables rules + dns_port=$((RANDOM+10000)) + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge.json \ + setup $(get_container_netns_path) + + # check nftables + run_in_host_netns nft list chain inet netavark NETAVARK-HOSTPORT-DNAT + assert "${lines[2]}" =~ "ip daddr 10.89.3.1 udp dport 53 dnat ip to 10.89.3.1:$dns_port" "DNS forward rule" + + # check aardvark config and running + run_helper cat "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + assert "${lines[0]}" =~ "10.89.3.1,fd10:88:a::1" "aardvark set to listen to all IPs" + assert "${lines[1]}" =~ "^[0-9a-f]{64} 10.89.3.2 fd10:88:a::2 somename$" "aardvark config's container" + assert "${#lines[@]}" = 2 "too many lines in aardvark config" + + aardvark_pid=$(cat "$NETAVARK_TMPDIR/config/aardvark-dns/aardvark.pid") + assert "$ardvark_pid" =~ "[0-9]*" "aardvark pid not found" + run_helper ps "$aardvark_pid" + assert "${lines[1]}" =~ ".*aardvark-dns --config $NETAVARK_TMPDIR/config/aardvark-dns -p $dns_port run" "aardvark not running or bad options" + + # test redirection actually works + run_in_container_netns dig +short "somename.dns.podman" @10.89.3.1 A "somename.dns.podman" @10.89.3.1 AAAA + assert "${lines[0]}" =~ "10.89.3.2" "ipv4 dns resolution works 1/2" + assert "${lines[1]}" =~ "fd10:88:a::2" "ipv6 dns resolution works 2/2" + + run_in_container_netns dig +short "somename.dns.podman" @fd10:88:a::1 + assert "${lines[0]}" =~ "10.89.3.2" "ipv6 dns resolution works" + + NETAVARK_DNS_PORT="$dns_port" run_netavark --file ${TESTSDIR}/testfiles/dualstack-bridge.json \ + teardown $(get_container_netns_path) + + # check nftables rules were removed + run_in_host_netns nft list chain inet netavark NETAVARK-HOSTPORT-DNAT + assert "${#lines[@]}" = 4 "too many v4 NETAVARK_HOSTPORT-DNAT rules after teardown" + + # check aardvark config got cleared, process killed + expected_rc=2 run_helper ls "$NETAVARK_TMPDIR/config/aardvark-dns/podman1" + expected_rc=1 run_helper ps "$aardvark_pid" +} + +@test "$fw_driver - check error message from netns thread" { + # create interface in netns to force error + run_in_container_netns ip link add eth0 type dummy + + expected_rc=1 run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json setup $(get_container_netns_path) + assert_json ".error" "create veth pair: interface eth0 already exists on container namespace: Netlink error: File exists (os error 17)" "interface exists on netns" +} + +@test "$fw_driver - port forwarding ipv4 - tcp" { + test_port_fw +} + +@test "$fw_driver - port forwarding ipv6 - tcp" { + test_port_fw ip=6 +} + +@test "$fw_driver - port forwarding dualstack - tcp" { + test_port_fw ip=dual +} + +@test "$fw_driver - port forwarding ipv4 - udp" { + test_port_fw proto=udp +} + +@test "$fw_driver - port forwarding ipv6 - udp" { + test_port_fw ip=6 proto=udp +} + +@test "$fw_driver - port forwarding dualstack - udp" { + test_port_fw ip=dual proto=udp +} + +@test "$fw_driver - port forwarding ipv4 - sctp" { + setup_sctp_kernel_module + test_port_fw proto=sctp +} + +@test "$fw_driver - port forwarding ipv6 - sctp" { + setup_sctp_kernel_module + test_port_fw ip=6 proto=sctp +} + +@test "$fw_driver - port forwarding dualstack - sctp" { + setup_sctp_kernel_module + test_port_fw ip=dual proto=sctp +} + +@test "$fw_driver - port range forwarding ipv4 - tcp" { + test_port_fw range=3 +} + +@test "$fw_driver - port range forwarding ipv6 - tcp" { + test_port_fw ip=6 range=3 +} + +@test "$fw_driver - port range forwarding ipv4 - udp" { + test_port_fw proto=udp range=3 +} + +@test "$fw_driver - port range forwarding ipv6 - udp" { + test_port_fw ip=6 proto=udp range=3 +} + +@test "$fw_driver - port range forwarding dual - udp" { + test_port_fw ip=dual proto=udp range=3 +} + +@test "$fw_driver - port range forwarding dual - tcp" { + test_port_fw ip=dual proto=tcp range=3 +} + + +@test "$fw_driver - port forwarding with hostip ipv4 - tcp" { + add_dummy_interface_on_host dummy0 "172.16.0.1/24" + test_port_fw hostip="172.16.0.1" +} + +@test "$fw_driver - port forwarding with hostip ipv4 dual stack - tcp" { + add_dummy_interface_on_host dummy0 "172.16.0.1/24" + test_port_fw ip=dual hostip="172.16.0.1" +} + +@test "$fw_driver - port forwarding with hostip ipv6 - tcp" { + add_dummy_interface_on_host dummy0 "fd65:8371:648b:0c06::1/64" + test_port_fw ip=6 hostip="fd65:8371:648b:0c06::1" +} + +@test "$fw_driver - port forwarding with hostip ipv6 dual stack - tcp" { + add_dummy_interface_on_host dummy0 "fd65:8371:648b:0c06::1/64" + test_port_fw ip=dual hostip="fd65:8371:648b:0c06::1" +} + +@test "$fw_driver - port forwarding with wildcard hostip ipv4 - tcp" { + add_dummy_interface_on_host dummy0 "172.16.0.1/24" + test_port_fw hostip="0.0.0.0" connectip="172.16.0.1" +} + +@test "$fw_driver - port forwarding with wildcard hostip ipv4 dual stack - tcp" { + add_dummy_interface_on_host dummy0 "172.16.0.1/24" + test_port_fw ip=dual hostip="0.0.0.0" connectip="172.16.0.1" +} + +@test "$fw_driver - port forwarding with wildcard hostip ipv6 - tcp" { + add_dummy_interface_on_host dummy0 "fd65:8371:648b:0c06::1/64" + test_port_fw ip=6 hostip="::" connectip="fd65:8371:648b:0c06::1" +} + +@test "$fw_driver - port forwarding with wildcard hostip ipv6 dual stack - tcp" { + add_dummy_interface_on_host dummy0 "fd65:8371:648b:0c06::1/64" + test_port_fw ip=dual hostip="::" connectip="fd65:8371:648b:0c06::1" +} + +@test "$fw_driver - port forwarding with hostip ipv4 - udp" { + add_dummy_interface_on_host dummy0 "172.16.0.1/24" + test_port_fw proto=udp hostip="172.16.0.1" +} + +@test "$fw_driver - port forwarding with hostip ipv6 - udp" { + add_dummy_interface_on_host dummy0 "fd65:8371:648b:0c06::1/64" + test_port_fw ip=6 proto=udp hostip="fd65:8371:648b:0c06::1" +} + +@test "bridge ipam none" { + read -r -d '\0' config < /proc/sys/net/ipv4/ip_forward" + run_in_container_netns sh -c "echo 1 > /proc/sys/net/ipv4/conf/default/arp_notify" + run_in_host_netns mount -t proc -o ro,nosuid,nodev,noexec proc /proc + + run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json setup $(get_container_netns_path) + run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json teardown $(get_container_netns_path) + + run_in_host_netns mount -t proc -o remount,rw /proc + run_in_host_netns sh -c "echo 0 > /proc/sys/net/ipv4/ip_forward" + run_in_host_netns mount -t proc -o remount,ro /proc + + expected_rc=1 run_netavark --file ${TESTSDIR}/testfiles/simplebridge.json setup $(get_container_netns_path) + assert_json ".error" "Sysctl error: IO Error: Read-only file system (os error 30)" "Sysctl error because fs is read only" +} + + +@test "$fw_driver - bridge static mac" { + mac="aa:bb:cc:dd:ee:ff" + + read -r -d '\0' config <