diff --git a/Cargo.lock b/Cargo.lock index bfa0601..2f1c91a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -727,6 +727,7 @@ dependencies = [ "aws-types", "clap", "handlebars", + "itertools", "k8s-openapi", "kube", "rust-embed", @@ -1167,6 +1168,15 @@ dependencies = [ "windows-sys 0.45.0", ] +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.6" diff --git a/docs/process/checks.md b/docs/process/checks.md index 54cf80e..c46013f 100644 --- a/docs/process/checks.md +++ b/docs/process/checks.md @@ -54,7 +54,7 @@ Checks that are specific to Amazon EKS **❌ Remediation required** -There are at least 5 available IPs for the control plane to upgrade; required for cross account ENI creation. +There are at least 2 subnets in different availability zones, each with at least 5 available IPs for the control plane to upgrade. #### EKS002 diff --git a/eksup/Cargo.toml b/eksup/Cargo.toml index 82c73bd..728ef82 100644 --- a/eksup/Cargo.toml +++ b/eksup/Cargo.toml @@ -33,6 +33,7 @@ aws-sdk-eks = "0.24" aws-types = "0.54" clap = { version = "4.0", features = ["derive", "string"] } handlebars = { version = "4.3", features = ["rust-embed"] } +itertools = "0.10" # https://kube.rs/kubernetes-version/ k8s-openapi = { version = "0.17.0", default-features = false, features = ["v1_22"] } kube = { version = "0.80.0", default-features = false, features = [ "client", "derive", "rustls-tls" ] } diff --git a/eksup/src/eks/checks.rs b/eksup/src/eks/checks.rs index e3fb496..f713a19 100644 --- a/eksup/src/eks/checks.rs +++ b/eksup/src/eks/checks.rs @@ -5,6 +5,7 @@ use aws_sdk_eks::{ model::{Addon, Cluster, Nodegroup}, Client as EksClient, }; +use itertools::Itertools; use kube::Client as K8sClient; use serde::{Deserialize, Serialize}; use tabled::{locator::ByColumnName, Disable, Margin, Style, Table, Tabled}; @@ -100,57 +101,69 @@ pub(crate) async fn cluster_health(cluster: &Cluster) -> Result, + pub id: String, pub available_ips: i32, } -impl Findings for Option { +impl Findings for Vec { fn to_markdown_table(&self, leading_whitespace: &str) -> Result { - match self { - Some(finding) => { - let mut table = Table::new(vec![finding]); - table - .with(Disable::column(ByColumnName::new("CHECK"))) - .with(Margin::new(1, 0, 0, 0).set_fill('\t', 'x', 'x', 'x')) - .with(Style::markdown()); - - Ok(format!("{table}\n")) - } - None => Ok(format!( + if self.is_empty() { + return Ok(format!( "{leading_whitespace}✅ - There is sufficient IP space in the subnets provided" - )), + )); } + + let mut table = Table::new(self); + table + .with(Disable::column(ByColumnName::new("CHECK"))) + .with(Margin::new(1, 0, 0, 0).set_fill('\t', 'x', 'x', 'x')) + .with(Style::markdown()); + + Ok(format!("{table}\n")) } fn to_stdout_table(&self) -> Result { - match self { - None => Ok("".to_owned()), - Some(finding) => { - let mut table = Table::new(vec![finding]); - table.with(Style::sharp()); - - Ok(format!("{table}\n")) - } + if self.is_empty() { + return Ok("".to_owned()); } + + let mut table = Table::new(self); + table.with(Style::sharp()); + + Ok(format!("{table}\n")) } } -pub(crate) async fn control_plane_ips( - ec2_client: &Ec2Client, - cluster: &Cluster, -) -> Result> { +pub(crate) async fn control_plane_ips(ec2_client: &Ec2Client, cluster: &Cluster) -> Result> { let subnet_ids = match cluster.resources_vpc_config() { Some(vpc_config) => match vpc_config.subnet_ids() { Some(subnet_ids) => subnet_ids.to_owned(), - None => return Ok(None), + None => return Ok(vec![]), }, - None => return Ok(None), + None => return Ok(vec![]), }; let subnet_ips = resources::get_subnet_ips(ec2_client, subnet_ids).await?; - if subnet_ips.available_ips >= 5 { - return Ok(None); + + let availability_zone_ips: Vec<(String, i32)> = subnet_ips + .iter() + .group_by(|subnet| subnet.availablity_zone_id.clone()) + .into_iter() + .map(|(az, subnets)| { + let total_ips = subnets.map(|subnet| subnet.available_ips).sum(); + (az, total_ips) + }) + .collect(); + + // There are at least 2 different availability zones with 5 or more IPs; no finding + if availability_zone_ips + .iter() + .filter(|(_az, ips)| ips >= &5) + .collect::>() + .len() + >= 2 + { + return Ok(vec![]); } let remediation = finding::Remediation::Required; @@ -160,13 +173,16 @@ pub(crate) async fn control_plane_ips( remediation, }; - let finding = InsufficientSubnetIps { - finding, - ids: subnet_ips.ids, - available_ips: subnet_ips.available_ips, - }; - - Ok(Some(finding)) + Ok( + availability_zone_ips + .iter() + .map(|(az, ips)| InsufficientSubnetIps { + finding: finding.clone(), + id: az.clone(), + available_ips: *ips, + }) + .collect(), + ) } /// Check if the subnets used by the pods will support an upgrade @@ -179,10 +195,10 @@ pub(crate) async fn pod_ips( k8s_client: &K8sClient, required_ips: i32, recommended_ips: i32, -) -> Result> { +) -> Result> { let eniconfigs = k8s::get_eniconfigs(k8s_client).await?; if eniconfigs.is_empty() { - return Ok(None); + return Ok(vec![]); } let subnet_ids = eniconfigs @@ -191,12 +207,13 @@ pub(crate) async fn pod_ips( .collect(); let subnet_ips = resources::get_subnet_ips(ec2_client, subnet_ids).await?; + let available_ips: i32 = subnet_ips.iter().map(|subnet| subnet.available_ips).sum(); - if subnet_ips.available_ips >= recommended_ips { - return Ok(None); + if available_ips >= recommended_ips { + return Ok(vec![]); } - let remediation = if subnet_ips.available_ips >= required_ips { + let remediation = if available_ips >= required_ips { finding::Remediation::Required } else { finding::Remediation::Recommended @@ -208,13 +225,18 @@ pub(crate) async fn pod_ips( remediation, }; - let subnetips = InsufficientSubnetIps { - finding, - ids: subnet_ips.ids, - available_ips: subnet_ips.available_ips, - }; - - Ok(Some(subnetips)) + Ok( + subnet_ips + .iter() + .group_by(|subnet| subnet.availablity_zone_id.clone()) + .into_iter() + .map(|(az, subnets)| InsufficientSubnetIps { + finding: finding.clone(), + id: az, + available_ips: subnets.map(|subnet| subnet.available_ips).sum(), + }) + .collect(), + ) } /// Details of the addon as viewed from an upgrade perspective diff --git a/eksup/src/eks/findings.rs b/eksup/src/eks/findings.rs index 7f1bd6e..c868a2d 100644 --- a/eksup/src/eks/findings.rs +++ b/eksup/src/eks/findings.rs @@ -28,10 +28,10 @@ pub async fn get_cluster_findings(cluster: &Cluster) -> Result #[derive(Debug, Serialize, Deserialize)] pub struct SubnetFindings { /// The Amazon EKS service requires at least 5 available IPs in order to upgrade a cluster in-place - pub control_plane_ips: Option, + pub control_plane_ips: Vec, /// This is the number of IPs available to pods when custom networking is enabled on the AWS VPC CNI, /// pulling the available number of IPs for the subnets listed in the ENIConfig resource(s) - pub pod_ips: Option, + pub pod_ips: Vec, } /// Collects findings related to networking and subnets diff --git a/eksup/src/eks/resources.rs b/eksup/src/eks/resources.rs index 966397a..5b361cc 100644 --- a/eksup/src/eks/resources.rs +++ b/eksup/src/eks/resources.rs @@ -34,9 +34,10 @@ pub async fn get_cluster(client: &EksClient, name: &str) -> Result { /// Container for the subnet IDs and their total available IPs #[derive(Clone, Debug, Serialize, Deserialize)] -pub(crate) struct SubnetIPs { - pub(crate) ids: Vec, +pub(crate) struct VpcSubnet { + pub(crate) id: String, pub(crate) available_ips: i32, + pub(crate) availablity_zone_id: String, } /// Describe the subnets provided by ID @@ -45,7 +46,7 @@ pub(crate) struct SubnetIPs { /// IP contention/exhaustion across the various subnets in use /// by the control plane ENIs, the nodes, and the pods (when custom /// networking is enabled) -pub(crate) async fn get_subnet_ips(client: &Ec2Client, subnet_ids: Vec) -> Result { +pub(crate) async fn get_subnet_ips(client: &Ec2Client, subnet_ids: Vec) -> Result> { let subnets = client .describe_subnets() .set_subnet_ids(Some(subnet_ids)) @@ -54,17 +55,22 @@ pub(crate) async fn get_subnet_ips(client: &Ec2Client, subnet_ids: Vec) .subnets .context("Subnets not found")?; - let available_ips = subnets - .iter() - .map(|subnet| subnet.available_ip_address_count.unwrap_or_default()) - .sum(); - - let ids = subnets - .iter() - .map(|subnet| subnet.subnet_id().unwrap_or_default().to_string()) - .collect::>(); - - Ok(SubnetIPs { ids, available_ips }) + Ok( + subnets + .iter() + .map(|subnet| { + let id = subnet.subnet_id().unwrap_or_default().to_string(); + let available_ips = subnet.available_ip_address_count.unwrap_or_default(); + let availablity_zone_id = subnet.availability_zone_id().unwrap_or_default().to_string(); + + VpcSubnet { + id, + available_ips, + availablity_zone_id, + } + }) + .collect(), + ) } pub async fn get_addons(client: &EksClient, cluster_name: &str) -> Result> { diff --git a/eksup/src/lib.rs b/eksup/src/lib.rs index f8626cd..7476303 100644 --- a/eksup/src/lib.rs +++ b/eksup/src/lib.rs @@ -131,8 +131,8 @@ pub async fn create(args: &Create) -> Result<()> { let results = analysis::analyze(&aws_config, &cluster).await?; - if let Err(_err) = playbook::create(playbook, region, &cluster, results) { - // eprintln!("{err}"); + if let Err(err) = playbook::create(playbook, region, &cluster, results) { + eprintln!("{err}"); process::exit(2); } } diff --git a/examples/test-mixed_v1.24_upgrade.md b/examples/test-mixed_v1.24_upgrade.md index d7491aa..6c01b6e 100644 --- a/examples/test-mixed_v1.24_upgrade.md +++ b/examples/test-mixed_v1.24_upgrade.md @@ -76,12 +76,12 @@ | K8S001 | ❌ | v1.21 | v1.23 | +2 | 2 | | K8S001 | ❌ | v1.22 | v1.23 | +1 | 2 | - | | NAME | NODE | CONTROL PLANE | SKEW | - |----|----------------------------|-------|---------------|------| - | ❌ | ip-10-0-10-49.ec2.internal | v1.21 | v1.23 | +2 | - | ❌ | ip-10-0-14-22.ec2.internal | v1.22 | v1.23 | +1 | - | ❌ | ip-10-0-20-62.ec2.internal | v1.22 | v1.23 | +1 | - | ❌ | ip-10-0-7-12.ec2.internal | v1.21 | v1.23 | +2 | + | | NAME | NODE | CONTROL PLANE | SKEW | + |----|-----------------------------|-------|---------------|------| + | ❌ | ip-10-0-0-100.ec2.internal | v1.21 | v1.23 | +2 | + | ❌ | ip-10-0-14-188.ec2.internal | v1.22 | v1.23 | +1 | + | ❌ | ip-10-0-19-35.ec2.internal | v1.21 | v1.23 | +2 | + | ❌ | ip-10-0-40-93.ec2.internal | v1.22 | v1.23 | +1 | 3. Verify that there are at least 5 free IPs in the VPC subnets used by the control plane. Amazon EKS creates new elastic network interfaces (ENIs) in any of the subnets specified for the control plane. If there are not enough available IPs, then the upgrade will fail (your control plane will stay on the prior version). @@ -337,7 +337,7 @@ The default update strategy for EKS managed nodegroups is a surge, rolling updat Check [[EKS006]](https://clowdhaus.github.io/eksup/process/checks/#eks006) | | MANAGED NODEGROUP | LAUNCH TEMP ID | CURRENT | LATEST | |---|-------------------------------------|----------------------|---------|--------| - | ⚠️ | standard-20230310135434793800000027 | lt-0d8873f5c893efaa0 | 1 | 2 | + | ⚠️ | standard-20230311143408696200000027 | lt-0a9ebcea03f330711 | 1 | 2 | ##### Upgrade @@ -413,7 +413,7 @@ A starting point for the instance refresh configuration is to use a value of 70% Check [[EKS007]](https://clowdhaus.github.io/eksup/process/checks/#eks007) | | AUTOSCALING GROUP | LAUNCH TEMP ID | CURRENT | LATEST | |---|--------------------------------------|----------------------|---------|--------| - | ⚠️ | different-20230310135435081600000029 | lt-0a880c2680a8cf174 | 1 | 2 | + | ⚠️ | different-20230311143408778000000029 | lt-061e6a6f3cc5c1db9 | 1 | 2 | ##### Upgrade