From 5ec7bfc81763d2533ecb58ddceb5a2ddf9433dca Mon Sep 17 00:00:00 2001 From: Felix Zhe Huang <92051185+felix-zhe-huang@users.noreply.github.com> Date: Mon, 14 Mar 2022 19:54:13 -0400 Subject: [PATCH] v0.7.0 (#1512) --- charts/index.yaml | 93 +- charts/karpenter-0.7.0.tgz | Bin 0 -> 9312 bytes charts/karpenter/Chart.yaml | 4 +- charts/karpenter/README.md | 12 +- charts/karpenter/values.yaml | 4 +- website/config.yaml | 8 +- website/content/en/v0.7.0/AWS/_index.md | 7 + .../content/en/v0.7.0/AWS/launch-templates.md | 237 +++++ website/content/en/v0.7.0/AWS/provisioning.md | 214 ++++ website/content/en/v0.7.0/_index.md | 27 + website/content/en/v0.7.0/concepts/_index.md | 164 +++ .../content/en/v0.7.0/development-guide.md | 114 ++ website/content/en/v0.7.0/faq.md | 140 +++ .../en/v0.7.0/getting-started/_index.md | 9 + .../getting-started-with-eksctl/_index.md | 172 ++++ .../cloudformation.yaml | 60 ++ .../grafana-values.yaml | 27 + .../karpenter-controllers-allocation.json | 330 ++++++ .../karpenter-controllers.json | 446 ++++++++ .../karpenter-node-metrics.json | 791 ++++++++++++++ .../karpenter-pod-metrics.json | 970 ++++++++++++++++++ .../prometheus-values.yaml | 14 + .../scripts/add-monitoring.sh | 22 + .../scripts/add-provisioner.sh | 14 + .../scripts/cleanup.sh | 12 + .../scripts/install.sh | 26 + .../scripts/step01-config.sh | 3 + .../scripts/step02-create-cluster.sh | 22 + .../scripts/step03-iam-cloud-formation.sh | 8 + .../scripts/step04-grant-access.sh | 6 + .../scripts/step05-controller-iam.sh | 8 + .../scripts/step06-add-spot-role.sh | 3 + .../scripts/step07-install-helm-chart.sh | 2 + .../scripts/step08-apply-helm-chart.sh | 8 + .../scripts/step09-add-prometheus-grafana.sh | 11 + .../step10-add-grafana-port-forward.sh | 1 + .../scripts/step11-grafana-get-password.sh | 1 + .../scripts/step12-add-provisioner.sh | 20 + .../step13-automatic-node-provisioning.sh | 25 + .../scripts/step14-deprovisioning.sh | 2 + .../scripts/step15-delete-node.sh | 1 + .../scripts/step16-cleanup.sh | 8 + .../getting-started-with-kops/_index.md | 161 +++ .../getting-started-with-terraform/_index.md | 408 ++++++++ website/content/en/v0.7.0/provisioner.md | 201 ++++ website/content/en/v0.7.0/tasks/_index.md | 7 + .../content/en/v0.7.0/tasks/deprovisioning.md | 86 ++ .../content/en/v0.7.0/tasks/pod-density.md | 59 ++ .../content/en/v0.7.0/tasks/provisioning.md | 80 ++ website/content/en/v0.7.0/tasks/scheduling.md | 300 ++++++ .../en/v0.7.0/tasks/set-resource-limits.md | 62 ++ website/content/en/v0.7.0/troubleshooting.md | 88 ++ website/static/_redirects | 2 +- 53 files changed, 5448 insertions(+), 52 deletions(-) create mode 100644 charts/karpenter-0.7.0.tgz create mode 100644 website/content/en/v0.7.0/AWS/_index.md create mode 100644 website/content/en/v0.7.0/AWS/launch-templates.md create mode 100644 website/content/en/v0.7.0/AWS/provisioning.md create mode 100755 website/content/en/v0.7.0/_index.md create mode 100644 website/content/en/v0.7.0/concepts/_index.md create mode 100644 website/content/en/v0.7.0/development-guide.md create mode 100644 website/content/en/v0.7.0/faq.md create mode 100644 website/content/en/v0.7.0/getting-started/_index.md create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/_index.md create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/cloudformation.yaml create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/grafana-values.yaml create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-controllers-allocation.json create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-controllers.json create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-node-metrics.json create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-pod-metrics.json create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/prometheus-values.yaml create mode 100755 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/add-monitoring.sh create mode 100755 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/add-provisioner.sh create mode 100755 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/cleanup.sh create mode 100755 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/install.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step01-config.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step02-create-cluster.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step03-iam-cloud-formation.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step04-grant-access.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step05-controller-iam.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step06-add-spot-role.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step07-install-helm-chart.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step08-apply-helm-chart.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step09-add-prometheus-grafana.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step10-add-grafana-port-forward.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step11-grafana-get-password.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step12-add-provisioner.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step13-automatic-node-provisioning.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step14-deprovisioning.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step15-delete-node.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step16-cleanup.sh create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-kops/_index.md create mode 100644 website/content/en/v0.7.0/getting-started/getting-started-with-terraform/_index.md create mode 100644 website/content/en/v0.7.0/provisioner.md create mode 100755 website/content/en/v0.7.0/tasks/_index.md create mode 100644 website/content/en/v0.7.0/tasks/deprovisioning.md create mode 100644 website/content/en/v0.7.0/tasks/pod-density.md create mode 100644 website/content/en/v0.7.0/tasks/provisioning.md create mode 100755 website/content/en/v0.7.0/tasks/scheduling.md create mode 100644 website/content/en/v0.7.0/tasks/set-resource-limits.md create mode 100644 website/content/en/v0.7.0/troubleshooting.md diff --git a/charts/index.yaml b/charts/index.yaml index 8821213deb94..1f1d1abbc5d6 100644 --- a/charts/index.yaml +++ b/charts/index.yaml @@ -1,9 +1,30 @@ apiVersion: v1 entries: karpenter: + - apiVersion: v2 + appVersion: 0.7.0 + created: "2022-03-14T18:20:57.242973-04:00" + description: A Helm chart for Karpenter, an open-source node provisioning project + built for Kubernetes. + digest: dbaec3f2aa4000a851156188bfcbcec1c5dec40467cbc1d8dca40c38cdd82bba + home: https://karpenter.sh/ + icon: https://repository-images.githubusercontent.com/278480393/dab059c8-caa1-4b55-aaa7-3d30e47a5616 + keywords: + - cluster + - node + - scheduler + - autoscaling + - lifecycle + name: karpenter + sources: + - https://github.com/aws/karpenter/ + type: application + urls: + - karpenter-0.7.0.tgz + version: 0.7.0 - apiVersion: v2 appVersion: 0.6.5 - created: "2022-03-08T15:40:56.872838-05:00" + created: "2022-03-14T18:20:57.241256-04:00" description: A Helm chart for Karpenter, an open-source node provisioning project built for Kubernetes. digest: d762b94978f95743ebe84d6dd9a6d61da6d31b634bb72ef2bda8b3aa086bc139 @@ -24,7 +45,7 @@ entries: version: 0.6.5 - apiVersion: v2 appVersion: 0.6.4 - created: "2022-03-08T15:40:56.872089-05:00" + created: "2022-03-14T18:20:57.238274-04:00" description: A Helm chart for Karpenter, an open-source node provisioning project built for Kubernetes. digest: e95407c59da890e93bb92f4717341bb7aca93b905254be06c112fbee8057ebbf @@ -45,7 +66,7 @@ entries: version: 0.6.4 - apiVersion: v2 appVersion: 0.6.3 - created: "2022-03-08T15:40:56.870853-05:00" + created: "2022-03-14T18:20:57.235947-04:00" description: A Helm chart for Karpenter, an open-source node provisioning project built for Kubernetes. digest: 9251ebd3c6ae4c5bcaa9e2ef23e357576b4c153ba8821113050400f6eb23a90f @@ -66,7 +87,7 @@ entries: version: 0.6.3 - apiVersion: v2 appVersion: 0.6.2 - created: "2022-03-08T15:40:56.869941-05:00" + created: "2022-03-14T18:20:57.233812-04:00" description: A Helm chart for Karpenter, an open-source node provisioning project built for Kubernetes. digest: 2d1853e5fcd1e2cd502c7c338b94a7789162f44238942b5cb6ff3567b2d0f63f @@ -87,7 +108,7 @@ entries: version: 0.6.2 - apiVersion: v2 appVersion: 0.6.1 - created: "2022-03-08T15:40:56.869012-05:00" + created: "2022-03-14T18:20:57.231825-04:00" description: A Helm chart for https://github.com/aws/karpenter/. digest: 025184e9b0b36529501b3d20d1c87e629964d7cbb50400f486044922b7ace7bf name: karpenter @@ -97,7 +118,7 @@ entries: version: 0.6.1 - apiVersion: v2 appVersion: 0.6.0 - created: "2022-03-08T15:40:56.86799-05:00" + created: "2022-03-14T18:20:57.230212-04:00" description: A Helm chart for https://github.com/aws/karpenter/. digest: 351c364ad0a81dd52ffcc81d456b265e5e04cf355d03cdaf2003befe37a1550a name: karpenter @@ -107,7 +128,7 @@ entries: version: 0.6.0 - apiVersion: v2 appVersion: 0.5.6 - created: "2022-03-08T15:40:56.867187-05:00" + created: "2022-03-14T18:20:57.228339-04:00" description: A Helm chart for https://github.com/aws/karpenter/. digest: b52a3bbac517e9130c8bec587c411c81f49b38e4510fa45ce1f212377145705b name: karpenter @@ -117,7 +138,7 @@ entries: version: 0.5.6 - apiVersion: v2 appVersion: 0.5.5 - created: "2022-03-08T15:40:56.865694-05:00" + created: "2022-03-14T18:20:57.226743-04:00" description: A Helm chart for https://github.com/aws/karpenter/. digest: 2bd507dd53f10c82bdfe049f0bb4c0193ecf684d73b44ff1bbb5cf2f16f0fa25 name: karpenter @@ -127,7 +148,7 @@ entries: version: 0.5.5 - apiVersion: v2 appVersion: 0.5.4 - created: "2022-03-08T15:40:56.864535-05:00" + created: "2022-03-14T18:20:57.224801-04:00" description: A Helm chart for https://github.com/aws/karpenter/. digest: 0c613878bc1827f2188a78b5d2a2945c417c29c699f8f9c5fa7f4e2cbdeefebb name: karpenter @@ -136,7 +157,7 @@ entries: - karpenter-0.5.4.tgz version: 0.5.4 - apiVersion: v2 - created: "2022-03-08T15:40:56.863765-05:00" + created: "2022-03-14T18:20:57.223282-04:00" description: A Helm chart for https://github.com/aws/karpenter/. digest: f6bb831f6ceea8b90634f9a6841e77de97ad1e3350437cb66cc50f14b7c3becc name: karpenter @@ -145,7 +166,7 @@ entries: - karpenter-0.5.3.tgz version: 0.5.3 - apiVersion: v2 - created: "2022-03-08T15:40:56.862803-05:00" + created: "2022-03-14T18:20:57.221651-04:00" description: A Helm chart for https://github.com/aws/karpenter/. digest: 45f6262a018c11c0c3068e29dc2e1cf337cba8c390ec269b9b36c6d8bdac581e name: karpenter @@ -154,7 +175,7 @@ entries: - karpenter-0.5.2.tgz version: 0.5.2 - apiVersion: v2 - created: "2022-03-08T15:40:56.861856-05:00" + created: "2022-03-14T18:20:57.220063-04:00" description: A Helm chart for https://github.com/aws/karpenter/. digest: 4247c15ec571f82025c80ceaa9354ce3ee3b209e4ba15a0ca3c36a1f90a859ec name: karpenter @@ -163,7 +184,7 @@ entries: - karpenter-0.5.1.tgz version: 0.5.1 - apiVersion: v2 - created: "2022-03-08T15:40:56.860846-05:00" + created: "2022-03-14T18:20:57.217065-04:00" description: A Helm chart for https://github.com/aws/karpenter/. digest: 6d49a00dca8a6f8d7938e9508228af085d85a2746e2f742c895b954fe71292df name: karpenter @@ -172,7 +193,7 @@ entries: - karpenter-0.5.0.tgz version: 0.5.0 - apiVersion: v2 - created: "2022-03-08T15:40:56.859919-05:00" + created: "2022-03-14T18:20:57.215599-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 70c5bbea1016aa56da5227b7f5d7cf5700c4d38ff1814f8e28072c77d8a02c20 name: karpenter @@ -181,7 +202,7 @@ entries: - karpenter-0.4.3.tgz version: 0.4.3 - apiVersion: v2 - created: "2022-03-08T15:40:56.859011-05:00" + created: "2022-03-14T18:20:57.214118-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 558f26b8786384766407c5f86cff5180066c4fa68fe9e3cf7b782ec8564f04ff name: karpenter @@ -190,7 +211,7 @@ entries: - karpenter-0.4.2.tgz version: 0.4.2 - apiVersion: v2 - created: "2022-03-08T15:40:56.858072-05:00" + created: "2022-03-14T18:20:57.212549-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: ec4bcf2a7f279ad97b8749a7f1a8edd527a09f8c00614d911a773843bd44a58b name: karpenter @@ -199,7 +220,7 @@ entries: - karpenter-0.4.1.tgz version: 0.4.1 - apiVersion: v2 - created: "2022-03-08T15:40:56.857239-05:00" + created: "2022-03-14T18:20:57.210268-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: d4ea011979906ad22b2345d0eb4c941445e89c1b0561c6ae44bb71fdb641c161 name: karpenter @@ -208,7 +229,7 @@ entries: - karpenter-0.4.0.tgz version: 0.4.0 - apiVersion: v2 - created: "2022-03-08T15:40:56.85635-05:00" + created: "2022-03-14T18:20:57.207855-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: e8b9b3dd447d0ab48b66156bf88ae924121e5780d741ee18cc9a43a90a6f4290 name: karpenter @@ -217,7 +238,7 @@ entries: - karpenter-0.3.4.tgz version: 0.3.4 - apiVersion: v2 - created: "2022-03-08T15:40:56.855431-05:00" + created: "2022-03-14T18:20:57.206092-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 0d8bc4c74416aeb5233a0a65c4b4fd678b7c961f1dca10604e33337715b585ff name: karpenter @@ -226,7 +247,7 @@ entries: - karpenter-0.3.3.tgz version: 0.3.3 - apiVersion: v2 - created: "2022-03-08T15:40:56.854451-05:00" + created: "2022-03-14T18:20:57.202549-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: e24d1ca8364e92d161f8295b552ec101a72491c62926dd67779d4e4a7e6b5756 name: karpenter @@ -235,7 +256,7 @@ entries: - karpenter-0.3.2.tgz version: 0.3.2 - apiVersion: v2 - created: "2022-03-08T15:40:56.853728-05:00" + created: "2022-03-14T18:20:57.200799-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 86c9a22a01247b2ed564baa12b88ed2df6556c9e8cb6aca9b990212c1e809c45 name: karpenter @@ -244,7 +265,7 @@ entries: - karpenter-0.3.1.tgz version: 0.3.1 - apiVersion: v2 - created: "2022-03-08T15:40:56.852446-05:00" + created: "2022-03-14T18:20:57.199492-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 6d86156704c2c7eac8094dbe57458a3b1334eb38626b6da4ccd60c5cd65e0250 name: karpenter @@ -253,7 +274,7 @@ entries: - karpenter-0.3.0.tgz version: v0.3.0 - apiVersion: v2 - created: "2022-03-08T15:40:56.851763-05:00" + created: "2022-03-14T18:20:57.197972-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 5b19f5da4dd3d3e147f3e93bf168d1cd24106e12eb7fe44ad88850f73ceb82f0 name: karpenter @@ -262,7 +283,7 @@ entries: - karpenter-0.2.9.tgz version: v0.2.9 - apiVersion: v2 - created: "2022-03-08T15:40:56.851037-05:00" + created: "2022-03-14T18:20:57.19643-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 8b704acc1a9c018da774f62c6be824e7389bb482437d318902d01237e3cacfc5 name: karpenter @@ -271,7 +292,7 @@ entries: - karpenter-0.2.8.tgz version: v0.2.8 - apiVersion: v2 - created: "2022-03-08T15:40:56.850366-05:00" + created: "2022-03-14T18:20:57.194921-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 048ec496d46d8ab534bc8af6090699fedd2aaf45c8e1658636c7a44afdb6e898 name: karpenter @@ -280,7 +301,7 @@ entries: - karpenter-0.2.7.tgz version: v0.2.7 - apiVersion: v2 - created: "2022-03-08T15:40:56.84968-05:00" + created: "2022-03-14T18:20:57.193293-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 48008ba427baa5206bd59abe9e25005b0b6673f03e705f2149717bb89156873d name: karpenter @@ -289,7 +310,7 @@ entries: - karpenter-0.2.6.tgz version: v0.2.6 - apiVersion: v2 - created: "2022-03-08T15:40:56.849039-05:00" + created: "2022-03-14T18:20:57.191352-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 208d0c14d2cdbc8c387dc92a473b625f905e829486edd5fd007eb56c9f896682 name: karpenter @@ -298,7 +319,7 @@ entries: - karpenter-0.2.5.tgz version: v0.2.5 - apiVersion: v2 - created: "2022-03-08T15:40:56.848266-05:00" + created: "2022-03-14T18:20:57.190027-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: ed7d1d08bde38f41a6bc8a4bf93b4dd85a3b6e5f526e44324483eadc9faeea6d name: karpenter @@ -307,7 +328,7 @@ entries: - karpenter-0.2.4.tgz version: v0.2.4 - apiVersion: v2 - created: "2022-03-08T15:40:56.847392-05:00" + created: "2022-03-14T18:20:57.188277-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 902bd53be060893d4bfe3c0f57ae831448aa8790930d153666429ea5472d824b name: karpenter @@ -316,7 +337,7 @@ entries: - karpenter-0.2.3.tgz version: v0.2.3 - apiVersion: v2 - created: "2022-03-08T15:40:56.846845-05:00" + created: "2022-03-14T18:20:57.187378-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 90d075cbc08871ffb56b2e530fdf304b6af32c76670fcdd299af87d3810d4651 name: karpenter @@ -325,7 +346,7 @@ entries: - karpenter-0.2.2.tgz version: v0.2.2 - apiVersion: v2 - created: "2022-03-08T15:40:56.84618-05:00" + created: "2022-03-14T18:20:57.186369-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 87e161d40c65dc58a3963f17d70cc165a5bf46155f723e487486f57d209e50d1 name: karpenter @@ -334,7 +355,7 @@ entries: - karpenter-0.2.1.tgz version: v0.2.1 - apiVersion: v2 - created: "2022-03-08T15:40:56.84547-05:00" + created: "2022-03-14T18:20:57.18514-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 552bdc17f5625e4696bb7419284026f4291428877092ec5d984f486a2b812d6f name: karpenter @@ -343,7 +364,7 @@ entries: - karpenter-0.2.0.tgz version: v0.2.0 - apiVersion: v2 - created: "2022-03-08T15:40:56.844663-05:00" + created: "2022-03-14T18:20:57.183752-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 1a597c415201e61576b83ee6ec3e24b99281805b3be8141b0a344c6f014d4e15 name: karpenter @@ -352,7 +373,7 @@ entries: - karpenter-0.1.3.tgz version: v0.1.3 - apiVersion: v2 - created: "2022-03-08T15:40:56.84382-05:00" + created: "2022-03-14T18:20:57.182565-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 6a6753731aef19db2aae72b6bfc1535917053d87f706579e158cae98c23887b4 name: karpenter @@ -361,7 +382,7 @@ entries: - karpenter-0.1.2.tgz version: v0.1.2 - apiVersion: v2 - created: "2022-03-08T15:40:56.84211-05:00" + created: "2022-03-14T18:20:57.180151-04:00" description: A Helm chart for https://github.com/awslabs/karpenter/. digest: 39685c8cbe9a757ca48721aed08b49111fef18bc2a9f67d3223f19d0706f09f7 name: karpenter @@ -369,4 +390,4 @@ entries: urls: - karpenter-0.1.1.tgz version: v0.1.1 -generated: "2022-03-08T15:40:56.840237-05:00" +generated: "2022-03-14T18:20:57.177912-04:00" diff --git a/charts/karpenter-0.7.0.tgz b/charts/karpenter-0.7.0.tgz new file mode 100644 index 0000000000000000000000000000000000000000..954b386f785e7ba7cc3f91e35de8728d046cd47f GIT binary patch literal 9312 zcmV-mB%j+KiwG0|00000|0w_~VMtOiV@ORlOnEsqVl!4SWK%V1T2nbTPgYhoO;>Dc zVQyr3R8em|NM&qo0PKBxbKAD^X#duyz){X`n)H}7^|m$B>2)19$CJeM*ls#!(%cjx zmlBRif(1Z3Zk+qs@8HFkNJ)OgY5Ft%NMsUREEbD>01NCnW+@~B*!KPcGvQw0IQr(E zi|2XXcsNx5d!ASS-y4qx-}Hw4-e5AAOnSXYe*a z3QoxzV4TooiZ1#coTmAoo;z{9P5|6zBo*q>9{M*#G4ds=ppY{3(z4GM!U>`&Bo3z; z^8qC^0F*L%K_oPi%;g{d1z&U!nq?$1-)A#m2?*eBXF+3_qJ-W}vdB-0q006s8ELX+(T1$=$iAj>Kmf|N8uo zsDFOnZLFLB{oZ70{*S%K`Tr0lfE1Dd5}yE{qPOol0ht3AQ^XgzKN?R*qutSNZx{Rh z@$PKc^F~219tC|5X8nmj>rZ;Ko!!aU?*%odrq(Kc>S|pZ#vwW_DAky*xTvtI^NFI^SYmh#~(|3DdztLjxyk%!vI(_|Hs40 zs6PLP!{Oune~9t~IS%^y0vIC!a3mI>wfnamNp0pToo3%pQMcP!@r6(0G!qb@FpDDn zx&5^aVlN1YRC72&5uQQBk)Q|%0V0W@C$z0q+%c4Rpsrhtje@X%_x`scuqF*`cy|Klu*j#ZT@It-7fIA*{h z5le75_zVPJ3FuVUQL>l=yC6R7`994OG3_9vAfBe+laK&MBt#gUR%#)>pji~486cm5 z6ufQ+nV(Z6SOyl%*#guUBO0nvticf!WgJ{p`!GZa6^MgCI0+0iM7V;tg|LQ0ILf6x z%DA-oBwWh^NI;xMSSXmfX{%5dav$Nrk4p00E%CuG^k9> z^@C|YYn*nc;Af18t9@z5e-KL_)!s{_L;ix%ES;jB=Xn~+F(XvfiZIubtOlt3{z&m6 zzv2R7#~A*O&xjyCj;iubK?oqkStO7E7L!B|-JD?`PzsFDpxRWa)(ctbSk*ZNpC;1P zP~d+gNnpQ_6HoqJP%cmc;*zp+xAO#@EeMxWjKLonVGu}$PRumJ$j`W-G1@;ml#Ub^ zNBTp{znWR3OU5ArUr?r`qIGJ@ZA*WLqe~(dX1Lvsd~|BQlLS96yf#9&DW9T2>DbD=DV$FMNkO0{rr*EBT`bRA5+v# zvspxZ7kuVc9nBJ)3*}h;A7kqGaEQZMfM=7@ESv?qgGqnaABVxX2jM8_^=ICsH{9JB z_6DQLZWs(lurtD8&)?Y{?|75mB$wrFQ@ChZ{lp{zCER#zN|yv>Ns>TvK^RS9NCdjT zj7a&+n_w&Uk|Og$(M>@z`eyIf)AuI_FAje`GLMzfJ4KK=m%xS7>)3`*z&P(KT40bn z_?BmZi=46iG?OA&{}Dq>+0_*FUJ&&pA~D&hYJH$1I!6&)K(uA$5VsOz9?_W;(zZqi z$=|Q!Po-)vVYZ<3JZGO@_1`z(9Sfn#I0G2=`a9t)?2SWjG{QZ+(+?)SVQ=hB`lC_k zkArb<7k0+GJK?bB_wZzAJP5skKMRAAH}=;uj%jvdt~oQqrRt)TGHcD$Axb#%^%U(4 zhl3k4oyF@jaa;}jM(pN!agB{e-s^UjYpq^kkUQ|4g7czYsi+dbESu|E`&#oFzr*O^ zBWovCs#tR)pw7-)}k<$v-0qU$fb-VB1GN);m-f?ZVi@hbaMu4{#jwW z(s;^QVs>TZSgJ8K45Di(k7?z&s(Qts);m+Cv;J-NOK7MTYY-FLyOp4T5%^?%><`u#`!{}AQ&`u|7G z8h>*$fJSfvq5R5vA?8-crTa-rmZ_N=WqVxHk#s6CWuiqcn{J+Vl{}})KAf*!jpG$eg0IzY{Alx-` zSkCBdCQ-hWuy<}I?QKt=G==MDGhoh~ZvTnX?O8-;+ZYdEG63J(*&T;FGjBW&f^a;U!$j&*aZ;33Wwl^G?&f^indmeWymO{L)v1Gq3d@<<~KA zj_3As^H;e7&qv18DIP2#c%b{HTS|$*6*Og^N;L?lJ7cfcwWH9{|1Qm-BAlhnU3&Xo zwOXp7XLidH`x0auThRj6#*nOn`K6`e|DMvM{{u)PdL`w`{hS6^t^a$y;kbVOZ#?Kd z#{WM^DeHfnro3?Vp66U}dl&DoY#ZFnG~g-tQx$||gR4vG**~ zv|qO&Hf?2PP=4{<>ezkV`s!9oyW7?-Duw44bt^IZ=9Eq&&^)fS65O8qX2xPGg{rTz z{CL|CuN>wTL*46FVJxAaPl1X6Euo}dHqGQ5MM=f^)E9-VCpGd~B^0{7+K$??IFgnW8%^5$^A9obE^ z@iP3=|D2v3yjEm5J>F{u{}ZF}wDtn&kU6t5+W1a7a?|3pU2X}~n4>g5@FjPfgKgW; z-~09S{quvL_I`PF_Wtnb^la~F|KR=c$zFI%5n>N?T!mB0~Bo}pr?=o9b%EZ!P z72W@Z5UCS`V8rLO{^nxhSckWJJ6^3u%BY||ji%^q|F{+WTClwh!A9f)65#xp(HT^` zBinfaqRvXG;-KvX;7Ba~tiM$4*Dz#&19GcIz+*y`8XjwOG)D*g3jgbrNHXOOX z0NIa!pPe0_-cfVhy0KO^co_Y$)O`8aY*DR#=yzqKH*)dp(ch9sUOD`GU+t7m(3V`f z(y`jxwf3c#?C!NRt^eLz;IAp`_kX?m`7dw$xc>Vf<%aP;cMJUe`z!~yg$Lh5bmtZ- zMemw$!utpGr4@}%WetJE2H}3GA2CyD9yTs>TP1x+;rnKq*dkOqCWo`WL zXt;F#r~i2V=RwLX^#7d$fB!~i!0p0=ALD`_%U`54$$xDk*hLLxi`+>MtdalTs5e?V z|KUCQe-Ba`<9~H%!Yu{7;+MablvgxBA3vcd=rECDSl{}PW0)`hed)oxMQNV@N>SX~ z63{yPujke6ztL#?X#YJ(X_^0u0Uj;9BJNLLjB3tx9UgZoyCf_2zaTs)vrOXh7{f+u5b8g7k=Gkh5R?yyWCw3to8qg_4_|2z2W2e zuLmg`-N2)7A!=6?)&<>EU###%A4+qO5K*>9b+ix#uhA|yQA{@q0ACI)UxV0A|K3u=x09}bq+em`Phm8@l4l2{S^@WL(%HQiWFUyVYpB(I+9aJ9u za{OFAavY~~^Lh0qzoBl!nPsoEqsd=nW;JpA4Uv50_^*=V%a5KPygK-zlfYcJ^H*j6 zX7v7!j89d}O8pE$gEJu+y?#}BikWa|*c#1J>cy9~p}yu)*#GVVqL`@sYWLj6+W3#b za{gEE@&3;TDNoQb76O>gQ>TYtCq$ZMBnnjCcI=<)B&8+^AR8R(;C&;9+c(Z?s0>XI5ofEOG{?+Jr+}WROTm5SvEauztANwc2NVW?=Gwh#d zDGEshyz`yQFVoIz^e^W;0o$vl%=Lvd)8KD_RhtCgq$F&LVT@nD^*3}vP zZ^ymhJ`G^|YYsje~n{WA*&+^@siX{ZIW-fBZQAAEG=d=TP25|Ar_w zscb^Z&`bMHPG4ykCXO~U)#<>(h*V-i&juwo%-4lN?{xn0Ha7)5ckGVdJ+*f%l6da& zg-XFB75@webJ#YO9aYyMNl00YwU7P-7grH{A4XU>{ErMXc!vJ*_Dlt6;8fkjBrC3I zK-PCk)i-IQNH?=tCyM$T_M_8z@&rAneilQbR~K(LAeG>brLFn0scA7 z1bS<6Dw~)Erj-<@7YeWKXRc~looSBpJgq9nPN(xDgiUixXEvdgFxrel03DASeEE)&@_tZrEJ2dag387PtR?kxSh`LzyChNe4&yLX~?od zj(L7Pr^2V|L~fTp(x*x!v`*(gG*gJ{_DvpNfr-T5^hA?W{=qWe{8km;RtUbDlyeZ$ zeQ~5%5fIe>cBHm*;GFv++J0gheugd=#9ttvUI5c6)r-MS8K`WyoCx45162iGnkrAt zRhbTHCVZMF>fuoC47Z7{8E~BqQ6)nYD(7KD&Y^jD>Z6gRbA|&zj$_b3j+46&r6F=0 z9XwEa`CA8-DioThcqVH&j?J*82x^{j5U72-+l6!fn~P)oCr$7r|4r>jF&cq`S@Qiq zKNhzl9PYh-fAaIIgZFzUN1sZtOSeCK|Id%*iz6Cp*-IEoHSIkP<=XAH1$>_m?-324 z_R?u?{}K~{p5)0vi>1UoPpCBD%$UsdoK-`h6GR^u4!@odPoy~$WxzX~YxEMX&^3}W z5nZF_CL@yk#eGH;hoS4vwIhrE*Zx=erT*$%qvrM0YvT~2YxMiaPrsw<8>Q<rdv)&-JO2y-Ygll{9-8)b$9yqW4E4xT0K06tM0VBGVgSEt7~&j zccZVHyCa)5IqZky;i$hGgrngu z-W|`p5qQ`O`m@O(ob3*VV+eL8gWha+XB5u-oxxx*0)XCVG6?(-#+n564B&NC^GkRe z$akxx()9+4J1eS_NM{?Clfh53?zFen)ftAn({Aqt>3-Vk+62b!brfH(&BR^DsD9Pa zvW&P>VABjoFYSP8a zpYjuw=xegBPxy=a7cve$)RHJRN9=W zt}hAnociaM&3Fy0z2&k#;YyIqx#Koa&3vk|Lo1x>DWug$w;GXR5GwbcRJpgh7D;(` z+Kot(edsFwkxDxckx(3S}CW$)ax`oqz1P{03Y@Rb*3&|_3ek&0(c10lh#4N*`E<#}XqRlC_ zlacB@2yM`#N}-_&KvI6atJc{hs>bH>X>tKfAO=32C*;o@cD_iW&b5geOdi$kRsibq$BfaFv;;@GY$sa_O>wqDA~ z!Rc9kfItmKo%(t`CI65YLm&x8k`S04#E{Wg!L&=M+UZeY>ouS}o5e)v9W3B#LCZz^ zN_I%+M|&QDiw+aCk7J1TF^A7S0Fu(&k%ZlB2kyAj2%(WM3WWgl(li8`m9X@bW_GEX zhLzTF64XGZG$;o>_*8mmda1cmvstdmB6U*)qH^qHIb%s>++H?(qB>)|N0egDA>YR} zEiA!AdJDG_Q>o~-RI?p>q|S&aZSwr+wCQaP_oA{E6l(RIhUoAZ;UHkZd3oPG*HcYJ z!r)3Oxaf!q(0nClvvh(~w*o6MG7jo#-bS4_bRh!b#y+C<8CkD@8N*jg?^|XlYRGnJ zOR>RqypsMa@v0EtY?Mh>iSQLL8w-C@a{ZK6Qemo^gWv+#a(8sLj%6aQ>ZV$?U&Gq3 zR3)Ror5@4Kc}BA&(0oy0S1Aj8BCeJZSp8pc{`K@XSN-_z*$>aI?H~Vo_U!4?w=ZA6I6FRg zN1k23O|tl0|8@QJEgZbt5XiG;MSEdt!!Dt%*Uo$(LI%Ris!mi z&aW$)G2}-y3oO(d&8pfsD0_)rfRiQX3B61l-D2a2lox!~^rR{v>f*zSmAeRzR>@nz zV^1Fgu#qt3E*QJhzEMnJ>E1Z_+QQyK7@fnFHpcal0~PnE0jQ%z#*a9MD^Hmf0z}Bx==KJIE;m0Rh5NpjgGJ!w*}b>h`ru zA7LK?6sQns?TBIlbK;{I*j%0T^A~MBUTOYqz)Y)6z+B=6O-(p*W;H%vG4dNIuqIq| zFw7d6f2JzsV^f#xS`uqtPhU?h7EL<82IMFn4^d=!<;mTyvyK567y0D zYzfFkhl+z~5?!H7MuY$zu$YtD2IIBB(kB+DD8K?7iN>9D`^FZR9ud7_=~JdxtpY7c zO1ygxtqOAuLaU{}=H?qHhTF=15t-o4y-)xn*fJMfD>v$TKc`(CK)Wj-^ zr?`+`sU}QZjl?Xb>jAJipJ z8}2`-x|Mx9I#Vkdfr6u!`|F#dwzS(WlE=u>#ZnEZ?B^9Ns~Uf{fKZ%#(#zA0{z%!J zRi|`t1c*qLsuCq^nTW4|_-b9j9v`4g+d2!%Tc(3WLK-_UG?;LkS)1SH9nEQti(J%2 zL#mG$y-d{2DvkQiF3Y*-6ri#>w#a5G2CG0{}L%7+x)XW*J8U)Yg+xgqou5US$r&$}gILnLNrIOsM1$7ySxq z8{-X)upkkfwOd9AkpwAESnZO5z!yDN;ix2szND+zKFtHg;ExPPL|nP25aUD;zuh<| zVF(NoA2@o7qX;P+kqSTLXaRbYoC}BQE`R(q!#5;B;y# z>lI$(1W+U-tJquFcpge2+iqX(DoIfn{VI(rVYIH&Ma6=Ew6#j7O%52N4XfdscDF(O zv~^ZbHshn_$Zn~bNJDg{4z2x!BMw{W7rnB6d;d&U*oZBzQm7K|bNC&#s~F3#T1NGj zj)!F9Q>hP|W$co4&FeM`)^d=MQnO9wPSxgYUQAl=Zmyv3ijBlgHxXMm5Z7-WuH88N zOKlq7c*C$|v(U1Gi5w_SiK%rj<)h7@9M}NC4Yt%8Uo1V;rfCUpE+~__s`MnAEpdmD zN$BIJP9dJK??WmeC|#DXg8{qRM7L3X>d%rzi{8HL=!PLEUEcYa()zJHmdElJE&o3N O0RR7oQgH?V)Bpgd(pC2W literal 0 HcmV?d00001 diff --git a/charts/karpenter/Chart.yaml b/charts/karpenter/Chart.yaml index 451911d28e56..07e163f72fee 100644 --- a/charts/karpenter/Chart.yaml +++ b/charts/karpenter/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: karpenter description: A Helm chart for Karpenter, an open-source node provisioning project built for Kubernetes. type: application -version: 0.6.5 -appVersion: 0.6.5 +version: 0.7.0 +appVersion: 0.7.0 keywords: - cluster - node diff --git a/charts/karpenter/README.md b/charts/karpenter/README.md index 2560622a23ba..14215506ce5a 100644 --- a/charts/karpenter/README.md +++ b/charts/karpenter/README.md @@ -2,11 +2,11 @@ A Helm chart for Karpenter, an open-source node provisioning project built for Kubernetes. -![Version: 0.6.5](https://img.shields.io/badge/Version-0.6.5-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.6.5](https://img.shields.io/badge/AppVersion-0.6.5-informational?style=flat-square) +![Version: 0.7.0](https://img.shields.io/badge/Version-0.7.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.7.0](https://img.shields.io/badge/AppVersion-0.7.0-informational?style=flat-square) ## Documentation -For full Karpenter documentation please checkout [https://karpenter.sh](https://karpenter.sh/v0.6.5/). +For full Karpenter documentation please checkout [https://karpenter.sh](https://karpenter.sh/v0.7.0/). ## Installing the Chart @@ -17,12 +17,12 @@ helm repo add karpenter https://charts.karpenter.sh/ helm repo update ``` -You can follow the detailed installation instruction in the [documentation](https://karpenter.sh/v0.6.5/getting-started/getting-started-with-eksctl/#install) which covers the Karpenter prerequisites and installation options. The outcome of these instructions should result in something like the following command. +You can follow the detailed installation instruction in the [documentation](https://karpenter.sh/v0.7.0/getting-started/getting-started-with-eksctl/#install) which covers the Karpenter prerequisites and installation options. The outcome of these instructions should result in something like the following command. ```bash helm upgrade --install --namespace karpenter --create-namespace \ karpenter karpenter/karpenter \ - --version 0.6.5 \ + --version 0.7.0 \ --set serviceAccount.annotations.eks\.amazonaws\.com/role-arn=${KARPENTER_IAM_ROLE_ARN} \ --set clusterName=${CLUSTER_NAME} \ --set clusterEndpoint=${CLUSTER_ENDPOINT} \ @@ -41,7 +41,7 @@ helm upgrade --install --namespace karpenter --create-namespace \ | clusterEndpoint | string | `""` | Cluster endpoint. | | clusterName | string | `""` | Cluster name. | | controller.env | list | `[]` | Additional environment variables for the controller pod. | -| controller.image | string | `"public.ecr.aws/karpenter/controller:v0.6.5@sha256:f2f64529df549a96b05e0a0d2b73fb9346ed8731b985fbc83335eee1573dcfe6"` | Controller image. | +| controller.image | string | `"public.ecr.aws/karpenter/controller:v0.7.0@sha256:1afafbdab75bfbd93729c6fd61ef5d12b071498413579fd45e85af1c89680717"` | Controller image. | | controller.logLevel | string | `""` | Controller log level, defaults to the global log level | | controller.resources | object | `{"limits":{"cpu":1,"memory":"1Gi"},"requests":{"cpu":1,"memory":"1Gi"}}` | Resources for the controller pod. | | controller.securityContext | object | `{}` | SecurityContext for the controller container. | @@ -67,7 +67,7 @@ helm upgrade --install --namespace karpenter --create-namespace \ | terminationGracePeriodSeconds | string | `nil` | Override the default termination grace period for the pod. | | tolerations | list | `[]` | Tolerations to allow the pod to be scheduled to nodes with taints. | | webhook.env | list | `[]` | Additional environment variables for the webhook pod. | -| webhook.image | string | `"public.ecr.aws/karpenter/webhook:v0.6.5@sha256:d84f495408e0a5f5e576170c7b5aff8291766a42b421419b9f43574b71499cc1"` | Webhook image. | +| webhook.image | string | `"public.ecr.aws/karpenter/webhook:v0.7.0@sha256:3e4128fbf16f055a1a82d7141607255fc6d619e8698f41c1a7863f03cbfd506c"` | Webhook image. | | webhook.logLevel | string | `""` | Webhook log level, defaults to the global log level | | webhook.port | int | `8443` | The container port to use for the webhook. | | webhook.resources | object | `{"limits":{"cpu":"100m","memory":"50Mi"},"requests":{"cpu":"100m","memory":"50Mi"}}` | Resources for the webhook pod. | diff --git a/charts/karpenter/values.yaml b/charts/karpenter/values.yaml index 87f639611e05..e0f78223c0bc 100644 --- a/charts/karpenter/values.yaml +++ b/charts/karpenter/values.yaml @@ -61,7 +61,7 @@ affinity: tolerations: [] controller: # -- Controller image. - image: "public.ecr.aws/karpenter/controller:v0.6.5@sha256:f2f64529df549a96b05e0a0d2b73fb9346ed8731b985fbc83335eee1573dcfe6" + image: "public.ecr.aws/karpenter/controller:v0.7.0@sha256:1afafbdab75bfbd93729c6fd61ef5d12b071498413579fd45e85af1c89680717" # -- SecurityContext for the controller container. securityContext: {} # -- Additional environment variables for the controller pod. @@ -81,7 +81,7 @@ controller: logLevel: "" webhook: # -- Webhook image. - image: "public.ecr.aws/karpenter/webhook:v0.6.5@sha256:d84f495408e0a5f5e576170c7b5aff8291766a42b421419b9f43574b71499cc1" + image: "public.ecr.aws/karpenter/webhook:v0.7.0@sha256:3e4128fbf16f055a1a82d7141607255fc6d619e8698f41c1a7863f03cbfd506c" # -- SecurityContext for the webhook container. securityContext: {} # -- The container port to use for the webhook. diff --git a/website/config.yaml b/website/config.yaml index d43eb2700f31..4d73082d1ded 100644 --- a/website/config.yaml +++ b/website/config.yaml @@ -65,12 +65,10 @@ params: url: 'https://slack.k8s.io/' icon: fab fa-slack desc: 'Chat with us on Slack in the #aws-provider channel' - latest_release_version: v0.6.5 + latest_release_version: v0.7.0 versions: + - "v0.7.0" - "v0.6.5" - - "v0.6.4" - - "v0.6.3" - - "v0.6.0" - "v0.5.6" - "v0.4.3" - "preview" @@ -82,5 +80,5 @@ menu: pre: - name: Docs weight: 20 - url: '/v0.6.5/' + url: '/v0.7.0/' pre: diff --git a/website/content/en/v0.7.0/AWS/_index.md b/website/content/en/v0.7.0/AWS/_index.md new file mode 100644 index 000000000000..8c6bd01646fe --- /dev/null +++ b/website/content/en/v0.7.0/AWS/_index.md @@ -0,0 +1,7 @@ +--- +title: "AWS" +linkTitle: "AWS" +weight: 70 +--- + +Check out the [Karpenter EKS Best Practices](https://aws.github.io/aws-eks-best-practices/karpenter/) guide. \ No newline at end of file diff --git a/website/content/en/v0.7.0/AWS/launch-templates.md b/website/content/en/v0.7.0/AWS/launch-templates.md new file mode 100644 index 000000000000..b3546ecc22d3 --- /dev/null +++ b/website/content/en/v0.7.0/AWS/launch-templates.md @@ -0,0 +1,237 @@ +--- +title: "Launch Templates and Custom Images" +linkTitle: "Launch Templates" +weight: 80 +--- + +By default, Karpenter generates launch templates with the following features: +- [EKS Optimized AMI](https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami.html) for nodes. +- Encrypted EBS root volumes with the default (AWS managed) KMS key for nodes. + +If these features are not sufficient for your use case (customizing node image, customizing EBS KMS key, etc), you need a custom launch template. + +Karpenter supports using custom launch templates. + +Note: When using a custom launch template, **you are taking responsibility** for maintaining the launch template, including updating which AMI is used (ie, for security updates). In the default configuration, Karpenter will use the latest version of the EKS optimized AMI, which is maintained by AWS. + + +## Introduction + +Karpenter follows existing AWS patterns for customizing the base image of +instances. More specifically, Karpenter uses [EC2 launch templates](https://docs.aws.amazon.com/autoscaling/ec2/userguide/LaunchTemplates.html). Launch +templates may specify many values. The pivotal value is the base image (AMI). +Launch templates further specify many different parameters related to networking, authorization, instance type, and more. + +Launch Templates and AMIs are unique to AWS regions, similar to EKS clusters. IAM resources are global. + +**Karpenter only implements a subset of launch template fields, and some fields should not be set.** + +This guide describes requirements for using launch templates with Karpenter, and later an example procedure. + +## Launch Template Requirements + +The Launch Template resource includes a large number of fields. AWS accepts launch templates with any subset of these fields defined. + +Certain fields are obviously critical, such as AMI and User Data. Some fields are useful for particular workloads, such as storage and IAM Instance Profile. + +Finally, **the majority of Launch Template fields should not be set** (or will have no effect), such as network interfaces and instance type. + +## Important Fields + +When creating a custom launch template, the AMI and User Data are the defining characteristics. Instance Profile (IAM Role) and Security Group (firewall rules) are also important for Karpenter. + +### AMI + +AMI (Amazon Machine Image), is the base image/VM for a launch template. + +[Review the instructions for importing a VM to AWS.](https://docs.aws.amazon.com/vm-import/latest/userguide/vmimport-image-import.html) Note the AMI id generated by this process, such as, +`ami-074cce78125f09d61`. + +### User Data - Autoconfigure + +Importantly, the AMI must support automatically connecting to a cluster based +on "user data", or a base64 encoded string passed to the instance at startup. +The syntax and purpose of the user data varies between images. The Karpenter +default OS, Amazon Linux 2 (AL2), accepts shell scripts (bash commands). + +[AWS calls data passed to an instance at launch time "user +data".](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html#user-data-shell-scripts) + +In the default configuration, Karpenter uses an EKS optimized version of AL2 and passes the hostname of the Kubernetes API server, and a certificate. The EKS Optimized AMI includes a `bootstrap.sh` script which connects the instance to the cluster, based on the passed data. + +Alternatively, you may reference AWS's [`bootstrap.sh` +file](https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) +when building a custom base image. + +``` +#!/bin/bash +/etc/eks/bootstrap.sh \ +--kubelet-extra-args <'--max-pods=40'> \ +--b64-cluster-ca \ +--apiserver-endpoint \ +--dns-cluster-ip \ +--use-max-pods false +``` + +Note, you must populate this command with live values. Karpenter will +not change the user data in the launch template. + +Encode using yaml function `!Base64` yaml function or `cat userdata.sh | base64 > userdata-encoded.txt` shell command. + +**Bootstrap Script Parameters** + +The sample bootstrap script requires information to join the cluster. + +These values may be found using: +``` +aws eks describe-cluster --name MyKarpenterCluster +``` + +**Kubelet Arguments** + +Specifying max-pods can break Karpenter's binpacking logic (it has no way to know what this setting is). If Karpenter attempts to pack more than this number of pods, the instance may be oversized, and additional pods will reschedule. + +## Situational Fields + +Configure these values in response to a particular use case, such as nodes interacting with another AWS service, or using EBS storage on the node. + +### Instance Profile - IAM + +The launch template must include an "instance profile" -- an IAM role. + +The instance profile must include *at least* the permissions of the default Karpenter node instance profile. See the default role, `KarpenterNodeRole`, in the full example below for more information. + +See also, [the managed policy "AmazonEKSWorkerNodePolicy"](https://docs.aws.amazon.com/eks/latest/userguide/security-iam-awsmanpol.html#security-iam-awsmanpol-AmazonEKSWorkerNodePolicy) which includes permission to describe clusters and subnets. + +### Storage + +Karpenter expects nothing of node storage. Configure as needed for your base +image. + +### Security Groups - Firewall + +The launch template may include a security group (i.e., instance firewall rules) and the security group must be associated with the virtual private cloud (VPC) of the EKS cluster. If none is specified, the default security group of the cluster VPC is used. + +The security group must permit communication with EKS control plane. Outbound access should be permitted for at least: HTTPS on port 443, DNS (UDP and TCP) on port 53, and your subnet's network access control list (network ACL). + +## Fields with Undefined Behavior + +Resources referenced by these fields are controlled by EKS/Karpenter, and not the launch template. + +### Instance Type + +The instance type should not be specified in the launch template. Karpenter +will determine the launch template at run time. + +### Network Interfaces + +The [AWS CNI](https://docs.aws.amazon.com/eks/latest/userguide/pod-networking.html) will configure the network interfaces. Do not configure network instances in the launch template. + +## Creating the Launch Template + +Launch Templates may be created via the web console, the AWS CLI, or +CloudFormation. + +### CloudFormation + +The procedure, in summary, is to: +1. [Create an AMI as described in the EC2 documentation.](https://docs.aws.amazon.com/vm-import/latest/userguide/vmimport-image-import.html) +2. Write a EC2 Launch Template specification including the AMI. +3. Push the specification to AWS with CloudFormation. +4. Update the Provisioner CRD to specify the new Launch Template. + +An example yaml cloudformation definition of a launch template for Karpenter is +provided below. + +CloudFormation yaml is suited for the moderately high configuration density of +launch templates, and creating the unusual InstanceProfile resource. + +You must manually replace these values in the template: +- SecurityGroupID + - list all security groups with `aws ec2 describe-security-groups` +- Parameters in UserData +- AMI + +```yaml +AWSTemplateFormatVersion: '2010-09-09' +Resources: + # create InstanceProfile wrapper on NodeRole + KarpenterNodeInstanceProfile: + Type: "AWS::IAM::InstanceProfile" + Properties: + InstanceProfileName: "KarpenterNodeInstanceProfile" + Path: "/" + Roles: + - Ref: "KarpenterNodeRole" + # create role with basic permissions for EKS node + KarpenterNodeRole: + Type: "AWS::IAM::Role" + Properties: + RoleName: "KarpenterNodeRole" + Path: / + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: + Service: + !Sub "ec2.${AWS::URLSuffix}" + Action: + - "sts:AssumeRole" + ManagedPolicyArns: + - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEKSWorkerNodePolicy" + - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEKS_CNI_Policy" + - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" + - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonSSMManagedInstanceCore" + MyLaunchTemplate: + Type: AWS::EC2::LaunchTemplate + Properties: + LaunchTemplateData: + IamInstanceProfile: + # Get ARN of InstanceProfile defined above + Arn: !GetAtt + - KarpenterNodeInstanceProfile + - Arn + ImageId: ami-074cce78125f09d61 + # UserData is Base64 Encoded + UserData: !Base64 > + #!/bin/bash + /etc/eks/bootstrap.sh 'MyClusterName' \ + --kubelet-extra-args '--node-labels=node.k8s.aws/capacity-type=spot' \ + --b64-cluster-ca 'LS0t....0tCg==' \ + --apiserver-endpoint 'https://B0385BE29EA792E811CB5866D23C856E.gr7.us-east-2.eks.amazonaws.com' + BlockDeviceMappings: + - Ebs: + VolumeSize: 80 + VolumeType: gp3 + DeviceName: /dev/xvda + # The SecurityGroup must be associated with the cluster VPC + SecurityGroupIds: + - sg-a69adfdb + LaunchTemplateName: KarpenterCustomLaunchTemplate +``` + +Create the Launch Template by uploading the CloudFormation yaml file. The +sample yaml creates an IAM Object (InstanceProfile), so `--capabilities +CAPABILITY_NAMED_IAM` must be indicated. + +``` +aws cloudformation create-stack \ + --stack-name KarpenterLaunchTemplateStack \ + --template-body file://$(pwd)/lt-cfn-demo.yaml \ + --capabilities CAPABILITY_NAMED_IAM +``` + +### Define LaunchTemplate for Provisioner + +The LaunchTemplate is ready to be used. Specify it by name in the [Provisioner +CRD](../../provisioner/). Karpenter will use this template when creating new instances. + +```yaml +apiVersion: karpenter.sh/v1alpha5 +kind: Provisioner +spec: + provider: + launchTemplate: CustomKarpenterLaunchTemplateDemo + +``` diff --git a/website/content/en/v0.7.0/AWS/provisioning.md b/website/content/en/v0.7.0/AWS/provisioning.md new file mode 100644 index 000000000000..cc279e411bde --- /dev/null +++ b/website/content/en/v0.7.0/AWS/provisioning.md @@ -0,0 +1,214 @@ +--- +title: "Provisioning Configuration" +linkTitle: "Provisioning" +weight: 10 +--- + +## spec.provider + +This section covers parameters of the AWS Cloud Provider. + +[Review these fields in the code.](https://github.com/aws/karpenter/blob{{< githubRelRef >}}pkg/cloudprovider/aws/apis/v1alpha1/provider.go) + +### InstanceProfile +An `InstanceProfile` is a way to pass a single IAM role to an EC2 instance. Karpenter will not create one automatically. +A default profile may be specified on the controller, allowing it to be omitted here. If not specified as either a default +or on the controller, node provisioning will fail. + +``` +spec: + provider: + instanceProfile: MyInstanceProfile +``` + +### LaunchTemplate + +A launch template is a set of configuration values sufficient for launching an EC2 instance (e.g., AMI, storage spec). + +A custom launch template is specified by name. If none is specified, Karpenter will automatically create a launch template. + +Review the [Launch Template documentation](../launch-templates/) to learn how to create a custom one. + +``` +spec: + provider: + launchTemplate: MyLaunchTemplate +``` + +### SubnetSelector + +Karpenter discovers subnets using [AWS tags](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html). + +Subnets may be specified by any AWS tag, including `Name`. Selecting tag values using wildcards ("\*") is supported. + +When launching nodes, Karpenter automatically chooses a subnet that matches the desired zone. If multiple subnets exist for a zone, the one with the most available IP addresses will be used. + +**Examples** + +Select all subnets with a specified tag: +``` + subnetSelector: + karpenter.sh/discovery/MyClusterName: '*' +``` + +Select subnets by name: +``` + subnetSelector: + Name: my-subnet +``` + +Select subnets by an arbitrary AWS tag key/value pair: +``` + subnetSelector: + MySubnetTag: value +``` + +Select subnets using wildcards: +``` + subnetSelector: + Name: "*Public*" + +``` + +### SecurityGroupSelector + +The security group of an instance is comparable to a set of firewall rules. + +EKS creates at least two security groups by default, [review the documentation](https://docs.aws.amazon.com/eks/latest/userguide/sec-group-reqs.html) for more info. + +Security groups may be specified by any AWS tag, including "Name". Selecting tags using wildcards ("*") is supported. + +‼️ When launching nodes, Karpenter uses all of the security groups that match the selector. If multiple security groups with the tag `karpenter.sh/discovery/MyClusterName` match the selector, this may result in failures using the AWS Load Balancer controller. The Load Balancer controller only supports a single security group having that tag key. See this [issue](https://github.com/kubernetes-sigs/aws-load-balancer-controller/issues/2367) for more details. + +To verify if this restriction affects you, run the following commands. +```bash +CLUSTER_VPC_ID="$(aws eks describe-cluster --name $CLUSTER_NAME --query cluster.resourcesVpcConfig.vpcId --output text)" + +aws ec2 describe-security-groups --filters Name=vpc-id,Values=$CLUSTER_VPC_ID Name=tag-key,Values=karpenter.sh/discovery/$CLUSTER_NAME --query 'SecurityGroups[].[GroupName]' --output text +``` + +If multiple securityGroups are printed, you will need a more targeted securityGroupSelector. + +**Examples** + +Select all security groups with a specified tag: +``` +spec: + provider: + securityGroupSelector: + karpenter.sh/discovery/MyClusterName: '*' +``` + +Select security groups by name, or another tag (all criteria must match): +``` + securityGroupSelector: + Name: my-security-group + MySecurityTag: '' # matches all resources with the tag +``` + +Select security groups by name using a wildcard: +``` + securityGroupSelector: + Name: "*Public*" +``` + +### Tags + +Karpenter adds tags to all resources it creates, including EC2 Instances, EBS volumes, and Launch Templates. The default set of AWS tags are listed below. + +``` +Name: karpenter.sh/cluster//provisioner/ +karpenter.sh/cluster/: owned +kubernetes.io/cluster/: owned +``` + +Additional tags can be added in the provider tags section which are merged with and can override the default tag values. +``` +spec: + provider: + tags: + InternalAccountingTag: 1234 + dev.corp.net/app: Calculator + dev.corp.net/team: MyTeam +``` + +### Metadata Options + +Control the exposure of [Instance Metadata Service](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) on EC2 Instances launched by this provisioner using a generated launch template. + +Refer to [recommended, security best practices](https://aws.github.io/aws-eks-best-practices/security/docs/iam/#restrict-access-to-the-instance-profile-assigned-to-the-worker-node) for limiting exposure of Instance Metadata and User Data to pods. + +If metadataOptions are omitted from this provisioner, the following default settings will be used. + +``` +spec: + provider: + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required +``` + +### Amazon Machine Image (AMI) Family + +The AMI used when provisioning nodes can be controlled by the `amiFamily` field. Based on the value set for `amiFamily`, Karpenter will automatically query for the appropriate [EKS optimized AMI](https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-amis.html) via AWS Systems Manager (SSM). + +Currently, Karpenter supports `amiFamily` values `AL2`, `Bottlerocket`, and `Ubuntu`. GPUs are only supported with `AL2` and `Bottlerocket`. + +Note: If a custom launch template is specified, then the AMI value in the launch template is used rather than the `amiFamily` value. + + +``` +spec: + provider: + amiFamily: Bottlerocket +``` + +### Block Device Mappings + +The `blockDeviceMappings` field in a Provisioner can be used to control the Elastic Block Storage (EBS) volumes that Karpenter attaches to provisioned nodes. Karpenter uses default block device mappings for the AMI Family specified. For example, the `Bottlerocket` AMI Family defaults with two block device mappings, one for Bottlerocket's control volume and the other for container resources such as images and logs. + +Learn more about [block device mappings](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/block-device-mapping-concepts.html). + +Note: If a custom launch template is specified, then the `BlockDeviceMappings` field in the launch template is used rather than the provisioner's `blockDeviceMappings`. + +``` +spec: + provider: + blockDeviceMappings: + - deviceName: /dev/xvda + volumeSize: 100Gi + volumeType: gp3 + iops: 10000 + encrypted: true + kmsKeyID: "arn:aws:kms:us-west-2:111122223333:key/1234abcd-12ab-34cd-56ef-1234567890ab" + deleteOnTermination: true + throughput: 125 +``` + +## Other Resources + +### Accelerators, GPU + +Accelerator (e.g., GPU) values include +- `nvidia.com/gpu` +- `amd.com/gpu` +- `aws.amazon.com/neuron` + +Karpenter supports accelerators, such as GPUs. + + +Additionally, include a resource requirement in the workload manifest. This will cause the GPU dependent pod will be scheduled onto the appropriate node. + +*Accelerator resource in workload manifest (e.g., pod)* + +```yaml +spec: + template: + spec: + containers: + - resources: + limits: + nvidia.com/gpu: "1" +``` diff --git a/website/content/en/v0.7.0/_index.md b/website/content/en/v0.7.0/_index.md new file mode 100755 index 000000000000..fd0addc59a24 --- /dev/null +++ b/website/content/en/v0.7.0/_index.md @@ -0,0 +1,27 @@ + +--- +title: "Documentation" +linkTitle: "Docs" +weight: 20 +cascade: + type: docs +--- +Karpenter is an open-source node provisioning project built for Kubernetes. +Adding Karpenter to a Kubernetes cluster can dramatically improve the efficiency and cost of running workloads on that cluster. +Karpenter is tightly integrated with Kubernetes features to make sure that the right types and amounts of compute resources are available to pods as they are needed. +Karpenter works by: + +* **Watching** for pods that the Kubernetes scheduler has marked as unschedulable +* **Evaluating** scheduling constraints (resource requests, nodeselectors, affinities, tolerations, and topology spread constraints) requested by the pods +* **Provisioning** nodes that meet the requirements of the pods +* **Scheduling** the pods to run on the new nodes +* **Removing** the nodes when the nodes are no longer needed + +As a cluster administrator, you can configure an unconstrained Karpenter provisioner when it is first installed and not change it again. +Other times, you might continue to tweak the provisioner or create multiple provisioners for a cluster used by different teams. +On-going cluster administrator tasks include upgrading and decommissioning nodes. + +As an application developer, you can make specific requests for capacity and features you want from the nodes running your pods. +Karpenter is designed to quickly create the best possible nodes to meet those needs and schedule the pods to run on them. + +Learn more about Karpenter and how to get started below. diff --git a/website/content/en/v0.7.0/concepts/_index.md b/website/content/en/v0.7.0/concepts/_index.md new file mode 100644 index 000000000000..8fb252e5ad29 --- /dev/null +++ b/website/content/en/v0.7.0/concepts/_index.md @@ -0,0 +1,164 @@ +--- +title: "Concepts" +linkTitle: "Concepts" +weight: 35 +--- + +Users fall under two basic roles: Kubernetes cluster administrators and application developers. +This document describes Karpenter concepts through the lens of those two types of users. + +## Cluster administrator + +As a Kubernetes cluster administrator, you can engage with Karpenter to: + +* Install Karpenter +* Configure provisioners to set constraints and other features for managing nodes +* Deprovision nodes +* Upgrade nodes + +Concepts associated with this role are described below. + + +### Installing Karpenter + +Karpenter is designed to run on a node in your Kubernetes cluster. +As part of the installation process, you need credentials from the underlying cloud provider to allow nodes to be started up and added to the cluster as they are needed. + +[Getting Started with Karpenter on AWS](../getting-started) +describes the process of installing Karpenter on an AWS cloud provider. +Because requests to add and delete nodes and schedule pods are made through Kubernetes, AWS IAM Roles for Service Accounts (IRSA) are needed by your Kubernetes cluster to make privileged requests to AWS. +For example, Karpenter uses AWS IRSA roles to grant the permissions needed to describe EC2 instance types and create EC2 instances. + +Once privileges are in place, Karpenter is deployed with a Helm chart. + +### Configuring provisioners + +Karpenter's job is to add nodes to handle unschedulable pods, schedule pods on those nodes, and remove the nodes when they are not needed. +To configure Karpenter, you create *provisioners* that define how Karpenter manages unschedulable pods and expires nodes. +Here are some things to know about the Karpenter provisioner: + +* **Unschedulable pods**: Karpenter only attempts to schedule pods that have a status condition `Unschedulable=True`, which the kube scheduler sets when it fails to schedule the pod to existing capacity. + +* **Provisioner CR**: Karpenter defines a Custom Resource called a Provisioner to specify provisioning configuration. +Each provisioner manages a distinct set of nodes, but pods can be scheduled to any provisioner that supports its scheduling constraints. +A provisioner contains constraints that impact the nodes that can be provisioned and attributes of those nodes (such timers for removing nodes). +See [Provisioner API](../provisioner) for a description of settings and the [Provisioning](../tasks/provisioning) task for provisioner examples. + +* **Well-known labels**: The provisioner can use well-known Kubernetes labels to allow pods to request only certain instance types, architectures, operating systems, or other attributes when creating nodes. +See [Well-Known Labels, Annotations and Taints](https://kubernetes.io/docs/reference/labels-annotations-taints/) for details. +Keep in mind that only a subset of these labels are supported in Karpenter, as described later. + +* **Deprovisioning nodes**: A provisioner can also include time-to-live values to indicate when nodes should be deprovisioned after a set amount of time from when they were created or after they becomes empty of deployed pods. + +* **Multiple provisioners**: Multiple provisioners can be configured on the same cluster. +For example, you might want to configure different teams on the same cluster to run on completely separate capacity. +One team could run on nodes using BottleRocket, while another uses EKSOptimizedAMI. + +Although most use cases are addressed with a single provisioner for multiple teams, multiple provisioners are useful to isolate nodes for billing, use different node constraints (such as no GPUs for a team), or use different deprovisioning settings. + +### Deprovisioning nodes + +Karpenter deletes nodes when they are no longer needed. + +* **Finalizer**: Karpenter places a finalizer bit on each node it creates. +When a request comes in to delete one of those nodes (such as a TTL or a manual `kubectl delete node`), Karpenter will cordon the node, drain all the pods, terminate the EC2 instance, and delete the node object. +Karpenter handles all clean-up work needed to properly delete the node. +* **Node Expiry**: If a node expiry time-to-live value (`ttlSecondsUntilExpired`) is reached, that node is drained of pods and deleted (even if it is still running workloads). +* **Empty nodes**: When the last workload pod running on a Karpenter-managed node is gone, the node is annotated with an emptiness timestamp. +Once that "node empty" time-to-live (`ttlSecondsAfterEmpty`) is reached, finalization is triggered. + +For more details on how Karpenter deletes nodes, see [Deprovisioning nodes](../tasks/deprovisioning) for details. + +### Upgrading nodes + +A straight-forward way to upgrade nodes is to set `ttlSecondsUntilExpired`. +Nodes will be terminated after a set period of time and will be replaced with newer nodes using the latest [EKS Optimized AMI](https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-amis.html) or the AMI specified in the `$LATEST` version of your launch template. + +Understanding the following concepts will help you in carrying out the tasks just described. + +### Constraints + +The concept of layered constraints is key to using Karpenter. +With no constraints defined in provisioners and none requested from pods being deployed, Karpenter chooses from the entire universe of features available to your cloud provider. +Nodes can be created using any instance type and run in any zones. + +An application developer can tighten the constraints defined in a provisioner by the cluster administrator by defining additional scheduling constraints in their pod spec. +Refer to the description of Karpenter constraints in the Application Developer section below for details. + +### Scheduling + +Karpenter schedules pods that the Kubernetes scheduler has marked unschedulable. +After solving scheduling constraints and launching capacity, Karpenter optimistically creates the Node object and binds the pod. +This stateless approach helps to avoid race conditions and improves performance. +If something is wrong with the launched node, Kubernetes will automatically migrate the pods to a new node. + +Once Karpenter brings up a node, that node is available for the Kubernetes scheduler to schedule pods on it as well. +This is useful if there is additional room in the node due to imperfect packing shape or because workloads finish over time. + +### Cloud provider +Karpenter makes requests to provision new nodes to the associated cloud provider. +The first supported cloud provider is AWS, although Karpenter is designed to work with other cloud providers. +Separating Kubernetes and AWS-specific settings allows Karpenter a clean path to integrating with other cloud providers. + +While using Kubernetes well-known labels, the provisioner can set some values that are specific to the cloud provider. +So, for example, to include a certain instance type, you could use the Kubernetes label `node.kubernetes.io/instance-type`, but set its value to an AWS instance type (such as `m5.large` or `m5.2xlarge`). + +### Kubernetes cluster autoscaler +Like Karpenter, [Kubernetes Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) is +designed to add nodes when requests come in to run pods that cannot be met by current capacity. +Cluster autoscaler is part of the Kubernetes project, with implementations by most major Kubernetes cloud providers. +By taking a fresh look at provisioning, Karpenter offers the following improvements: + +* **Designed to handle the full flexibility of the cloud**: +Karpenter has the ability to efficiently address the full range of instance types available through AWS. +Cluster autoscaler was not originally built with the flexibility to handle hundreds of instance types, zones, and purchase options. + +* **Group-less node provisioning**: Karpenter manages each instance directly, without use of additional orchestration mechanisms like node groups. +This enables it to retry in milliseconds instead of minutes when capacity is unavailable. +It also allows Karpenter to leverage diverse instance types, availability zones, and purchase options without the creation of hundreds of node groups. + +* **Scheduling enforcement**: Cluster autoscaler doesn’t bind pods to the nodes it creates. +Instead, it relies on the kube-scheduler to make the same scheduling decision after the node has come online. +A node that Karpenter launches has its pods bound immediately. +The kubelet doesn't have to wait for the scheduler or for the node to become ready. +It can start preparing the container runtime immediately, including pre-pulling the image. +This can save seconds off of node startup latency. + +## Application developer + +As someone deploying pods that might be evaluated by Karpenter, you should know how to request the properties that your pods need of its compute resources. +Karpenter's job is to efficiently assess and choose compute assets based on requests from pod deployments. +These can include basic Kubernetes features or features that are specific to the cloud provider (such as AWS). + +Layered *constraints* are applied when a pod makes requests for compute resources that cannot be met by current capacity. +A pod can specify `nodeAffinity` (to run in a particular zone or instance type) or a `topologySpreadConstraints` spread (to cause a set of pods to be balanced across multiple nodes). +The pod can specify a `nodeSelector` to run only on nodes with a particular label and `resource.requests` to ensure that the node has enough available memory. + +The Kubernetes scheduler tries to match those constraints with available nodes. +If the pod is unschedulable, Karpenter creates compute resources that match its needs. +When Karpenter tries to provision a node, it analyzes scheduling constraints before choosing the node to create. + +As long as the requests are not outside of the provisioner's constraints, +Karpenter will look to best match the request, comparing the same well-known labels defined by the pod's scheduling constraints. +Note that if the constraints are such that a match is not possible, the pod will remain unscheduled. + +So, what constraints can you use as an application developer deploying pods that could be managed by Karpenter? + +Kubernetes features that Karpenter supports for scheduling pods include nodeAffinity and [nodeSelector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector). +It also supports [PodDisruptionBudget](https://kubernetes.io/docs/tasks/run-application/configure-pdb/) and [topologySpreadConstraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). + +From the Kubernetes [Well-Known Labels, Annotations and Taints](https://kubernetes.io/docs/reference/labels-annotations-taints/) page, +you can see a full list of Kubernetes labels, annotations and taints that determine scheduling. +Those that are implemented in Karpenter include: + +* **kubernetes.io/arch**: For example, kubernetes.io/arch=amd64 +* **node.kubernetes.io/instance-type**: For example, node.kubernetes.io/instance-type=m3.medium +* **topology.kubernetes.io/zone**: For example, topology.kubernetes.io/zone=us-east-1c + +{{% alert title="Note" color="primary" %}} +Don't use `podAffinity` and `podAntiAffinity` to schedule pods on the same or different nodes as other pods. +Kubernetes SIG scalability recommends against these features due to their negative performance impact on the Kubernetes Scheduler (see [KEP 895](https://github.com/kubernetes/enhancements/tree/master/keps/sig-scheduling/895-pod-topology-spread#impact-to-other-features)) and Karpenter doesn't support them for the moment (you can follow their consideration by subscribing to the [issue](https://github.com/aws/karpenter/issues/942). +Instead, the Karpenter project recommends `topologySpreadConstraints` to reduce blast radius and `nodeSelectors` and `taints` to implement colocation. +{{% /alert %}} + +For more on how, as a developer, you can add constraints to your pod deployment, see [Scheduling](../tasks/scheduling/) for details. diff --git a/website/content/en/v0.7.0/development-guide.md b/website/content/en/v0.7.0/development-guide.md new file mode 100644 index 000000000000..ff1f78d4cdad --- /dev/null +++ b/website/content/en/v0.7.0/development-guide.md @@ -0,0 +1,114 @@ +--- +title: "Development Guide" +linkTitle: "Development Guide" +weight: 80 +--- + +## Dependencies + +The following tools are required for contributing to the Karpenter project. + +| Package | Version | Install | +| ------------------------------------------------------------------ | -------- | ---------------------------------------------- | +| [go](https://golang.org/dl/) | v1.15.3+ | [Instructions](https://golang.org/doc/install) | +| [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) | | `brew install kubectl` | +| [helm](https://helm.sh/docs/intro/install/) | | `brew install helm` | +| Other tools | | `make toolchain` | + +## Developing + +### Setup / Teardown + +Based on how you are running your Kubernetes cluster, follow the [Environment specific setup](#environment-specific-setup) to configure your environment before you continue. Once you have your environment set up, to install Karpenter in the Kubernetes cluster specified in your `~/.kube/config` run the following commands. + +```bash +CLOUD_PROVIDER="" make apply # Install Karpenter +make delete # Uninstall Karpenter +``` + +### Developer Loop + +* Make sure dependencies are installed + * Run `make codegen` to make sure yaml manifests are generated + * Run `make toolchain` to install cli tools for building and testing the project +* You will need a personal development image repository (e.g. ECR) + * Make sure you have valid credentials to your development repository. + * `$KO_DOCKER_REPO` must point to your development repository + * Your cluster must have permissions to read from the repository +* It's also a good idea to persist `${CLOUD_PROVIDER}` in your environment variables to simplify the `make apply` command. + +### Build and Deploy + +*Note: these commands do not rely on each other and may be executed independently* + +```bash +make apply # quickly deploy changes to your cluster +make dev # run codegen, lint, and tests +``` + +### Testing + +```bash +make test # E2E correctness tests +make battletest # More rigorous tests run in CI environment +``` + +### Change Log Level + +```bash +kubectl patch configmap config-logging -n karpenter --patch '{"data":{"loglevel.controller":"debug"}}' # Debug Level +kubectl patch configmap config-logging -n karpenter --patch '{"data":{"loglevel.controller":"info"}}' # Info Level +``` + +### Debugging Metrics + +OSX: + +```bash +open http://localhost:8080/metrics && kubectl port-forward service/karpenter-metrics -n karpenter 8080 +``` + +Linux: + +```bash +gio open http://localhost:8080/metrics && kubectl port-forward service/karpenter-metrics -n karpenter 8080 +``` + +### Tailing Logs + +While you can tail Karpenter's logs with kubectl, there's a number of tools out there that enhance the experience. We recommend [Stern](https://pkg.go.dev/github.com/planetscale/stern#section-readme): + +```bash +stern -n karpenter -l app.kubernetes.io/name=karpenter +``` + +## Environment specific setup + +### AWS + +Set the CLOUD_PROVIDER environment variable to build cloud provider specific packages of Karpenter. + +```sh +export CLOUD_PROVIDER="aws" +``` + +For local development on Karpenter you will need a Docker repo which can manage your images for Karpenter components. +You can use the following command to provision an ECR repository. + +```bash +aws ecr create-repository \ + --repository-name karpenter/controller \ + --image-scanning-configuration scanOnPush=true \ + --region "${AWS_DEFAULT_REGION}" +aws ecr create-repository \ + --repository-name karpenter/webhook \ + --image-scanning-configuration scanOnPush=true \ + --region "${AWS_DEFAULT_REGION}" +``` + +Once you have your ECR repository provisioned, configure your Docker daemon to authenticate with your newly created repository. + +```bash +export KO_DOCKER_REPO="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/karpenter" +aws ecr get-login-password --region "${AWS_DEFAULT_REGION}" | docker login --username AWS --password-stdin "${KO_DOCKER_REPO}" +``` diff --git a/website/content/en/v0.7.0/faq.md b/website/content/en/v0.7.0/faq.md new file mode 100644 index 000000000000..cf3af129fe94 --- /dev/null +++ b/website/content/en/v0.7.0/faq.md @@ -0,0 +1,140 @@ +--- +title: "FAQs" +linkTitle: "FAQs" +weight: 90 +--- +## General + +### How does a provisioner decide to manage a particular node? +See [Configuring provisioners]({{< ref "./concepts/#configuring-provisioners" >}}) for information on how Karpenter provisions and manages nodes. + +### What cloud providers are supported? +AWS is the first cloud provider supported by Karpenter, although it is designed to be used with other cloud providers as well. +See [Cloud provider]({{< ref "./concepts/#cloud-provider" >}}) for details. + +### Can I write my own cloud provider for Karpenter? +Yes, but there is no documentation yet for it. +Start with Karpenter's GitHub [cloudprovider](https://github.com/aws/karpenter/tree{{< githubRelRef >}}pkg/cloudprovider) documentation to see how the AWS provider is built, but there are other sections of the code that will require changes too. + +### What operating system nodes does Karpenter deploy? +By default, Karpenter uses Amazon Linux 2 images. + +### Can I provide my own custom operating system images? +Karpenter allows you to create your own AWS AMIs using custom launch templates. +See [Launch Templates and Custom Images]({{< ref "./aws/launch-templates/" >}}) for details. + +### Can Karpenter deal with workloads for mixed architecture cluster (arm vs. amd)? +Yes. Build and prepare custom arm images as described in [Launch Templates and Custom Images]({{< ref "./aws/launch-templates/" >}}). +Specify the desired architecture when you deploy workloads. + +### What RBAC access is required? +All of the required RBAC rules can be found in the helm chart template. +See [clusterrolebinding.yaml](https://github.com/aws/karpenter/blob{{< githubRelRef >}}charts/karpenter/templates/clusterrolebinding.yaml), [clusterrole.yaml](https://github.com/aws/karpenter/blob{{< githubRelRef >}}charts/karpenter/templates/clusterrole.yaml), [rolebinding.yaml](https://github.com/aws/karpenter/blob{{< githubRelRef >}}charts/karpenter/templates/rolebinding.yaml), and [role.yaml](https://github.com/aws/karpenter/blob{{< githubRelRef >}}charts/karpenter/templates/role.yaml) files for details. + +### Can I run Karpenter outside of a Kubernetes cluster? +Yes, as long as the controller has network and IAM/RBAC access to the Kubernetes API and your provider API. + +## Compatibility + +### Which versions of Kubernetes does Karpenter support? +Karpenter is tested with Kubernetes v1.19 and later. + +### What Kubernetes distributions are supported? +Karpenter documents integration with a fresh install of the latest AWS Elastic Kubernetes Service (EKS). +Existing EKS distributions can be used, but this use case has not yet been documented. +Other Kubernetes distributions (KOPs, etc.) can be used, but setting up cloud provider permissions for those distributions has not been documented. + +### How does Karpenter interact with AWS node group features? +Provisioners are designed to work alongside static capacity management solutions like EKS Managed Node Groups and EC2 Auto Scaling Groups. +You can manage all capacity using provisioners, use a mixed model with dynamic and statically managed capacity, or use a fully static approach. +We expect most users will use a mixed approach in the near term and provisioner-managed in the long term. + + +### How does Karpenter interact with Kubernetes features? +* Kubernetes Cluster Autoscaler: Karpenter can work alongside cluster autoscaler. +See [Kubernetes cluster autoscaler]({{< ref "./concepts/#kubernetes-cluster-autoscaler" >}}) for details. +* Kubernetes Scheduler: Karpenter focuses on scheduling pods that the Kubernetes scheduler has marked as unschedulable. +See [Scheduling]({{< ref "./concepts/#scheduling" >}}) for details on how Karpenter interacts with the Kubernetes scheduler. + +## Provisioning +### What features does the Karpenter provisioner support? +See [Provisioner API]({{< ref "./provisioner" >}}) for provisioner examples and descriptions of features. + +### Can I create multiple (team-based) provisioners on a cluster? +Yes, provisioners can identify multiple teams based on labels. +See [Provisioner API]({{< ref "./provisioner" >}}) for details. + +### If multiple provisioners are defined, which will my pod use? + +By default, pods will use the rules defined by a provisioner named default. +This is analogous to the default scheduler. +To select an alternative provisioner, use the node selector `karpenter.sh/provisioner-name: alternative-provisioner`. +You must either define a default provisioner or explicitly specify `karpenter.sh/provisioner-name node selector`. + +### Can I set total limits of CPU and memory for a provisioner? +Yes, the setting is provider-specific. +See examples in [Accelerators, GPU]({{< ref "./aws/provisioning/#accelerators-gpu" >}}) Karpenter documentation. + +### Can I mix spot and on-demand EC2 run types? +Yes, see [Example Provisioner Resource]({{< ref "./provisioner/#example-provisioner-resource" >}}) for an example. + +### Can I restrict EC2 instance types? + +* Attribute-based requests are currently not possible. +* You can select instances with special hardware, such as gpu. + +### How does Karpenter dynamically select instance types? + +Karpenter batches pending pods and then binpacks them based on CPU, memory, and GPUs required, taking into account node overhead, VPC CNI resources required, and daemon sets that will be packed when bringing up a new node. +By default Karpenter uses all available instance types, but it can be constrained in the provisioner spec with the [instance-type](https://kubernetes.io/docs/reference/labels-annotations-taints/#nodekubernetesioinstance-type) well-known label in the requirements section. +After the pods are binpacked on the most efficient instance type (i.e. the smallest instance type that can fit the pod batch), Karpenter takes 19 other instance types that are larger than the most efficient packing, and passes all 20 instance type options to an API called Amazon EC2 Fleet. +The EC2 fleet API attempts to provision the instance type based on a user-defined allocation strategy. +If you are using the on-demand capacity type, then Karpenter uses the `lowest-price` allocation strategy. +So fleet will provision the lowest price instance type it can get from the 20 Karpenter passed it. +If the instance type is unavailable for some reason, then fleet will move on to the next cheapest instance type. +If you are using the spot capacity type, Karpenter uses the capacity-optimized-prioritized allocation strategy which tells fleet to find the instance type that EC2 has the most capacity of which will decrease the probability of a spot interruption happening in the near term. +See [Choose the appropriate allocation strategy](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-fleet-allocation-strategy.html#ec2-fleet-allocation-use-cases) for information on fleet optimization. + +### What if there is no spot capacity? Will Karpenter fallback to on-demand? + +Karpenter will fallback to on-demand, if your provisioner specifies both spot and on-demand. + +More specifically, Karpenter maintains a concept of "offerings" for each instance type, which is a combination of zone and capacity type (equivalent in the AWS cloud provider to an EC2 purchase option). Spot offerings are prioritized, if they're available. Whenever the Fleet API returns an insufficient capacity error for Spot instances, those particular offerings are temporarily removed from consideration (across the entire provisioner) so that Karpenter can make forward progress through fallback. The retry will happen immediately within milliseconds. + +## Workloads + +### How can someone deploying pods take advantage of Karpenter? + +See [Application developer]({{< ref "./concepts/#application-developer" >}}) for descriptions of how Karpenter matches nodes with pod requests. + +### How do I use Karpenter with the AWS load balancer controller? + +* Set the [ALB target type](https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.3/guide/ingress/annotations/#target-type) to IP mode for the pods. +Use IP targeting if you want the pods to receive equal weight. +Instance balancing could greatly skew the traffic being sent to a node without also managing host spread of the workload. +* Set [readiness gate](https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.3/deploy/pod_readiness_gate/) on the namespace. +The default is round robin at the node level. +For Karpenter, not all nodes are equal. +For example, each node will have different performance characteristics and a different number of pods running on it. +A `t3.small` with three instances should not receive the same amount of traffic as a `m5.4xlarge` with dozens of pods. +If you don't specify a spread at the workload level, or limit what instances should be picked, you could get the same amount of traffic sent to the `t3` and `m5`. + +### Can I use Karpenter with EBS disks per availability zone? +Yes. See [Persistent Volume Topology]({{< ref "./tasks/scheduling#persistent-volume-topology" >}}) for details. + +### Can I set `--max-pods` on my nodes? +Not yet. + +## Deprovisioning +### How does Karpenter deprovision nodes? +See [Deprovisioning nodes]({{< ref "./tasks/deprovisioning" >}}) for information on how Karpenter deprovisions nodes. + +## Upgrading +### How do I upgrade Karpenter? +Karpenter is a controller that runs in your cluster, but it is not tied to a specific Kubernetes version, as the Cluster Autoscaler is. +Use your existing upgrade mechanisms to upgrade your core add-ons in Kubernetes and keep Karpenter up to date on bug fixes and new features. + +Karpenter requires proper permissions in the `KarpenterNode IAM Role` and the `KarpenterController IAM Role`. +To upgrade Karpenter to version `$VERSION`, make sure that the `KarpenterNode IAM Role` and the `KarpenterController IAM Role` have the right permission described in `https://karpenter.sh/$VERSION/getting-started/getting-started-with-eksctl/cloudformation.yaml`. +Next, locate `KarpenterController IAM Role` ARN (i.e., ARN of the resource created in [Create the KarpenterController IAM Role](../getting-started/getting-started-with-eksctl/#create-the-karpentercontroller-iam-role)) and the cluster endpoint, and pass them to the helm upgrade command +{{% script file="./content/en/preview/getting-started/getting-started-with-eksctl/scripts/step08-apply-helm-chart.sh" language="bash"%}} diff --git a/website/content/en/v0.7.0/getting-started/_index.md b/website/content/en/v0.7.0/getting-started/_index.md new file mode 100644 index 000000000000..679bad728dd7 --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/_index.md @@ -0,0 +1,9 @@ +--- +title: "Getting Started" +linkTitle: "Getting Started" +weight: 10 +cascade: + type: docs +--- + + Learn more about Karpenter and how to get started below. diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/_index.md b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/_index.md new file mode 100644 index 000000000000..49908dbc9552 --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/_index.md @@ -0,0 +1,172 @@ + +--- +title: "Getting started with eksctl" +linkTitle: "Getting started with eksctl" +weight: 10 +--- + +Karpenter automatically provisions new nodes in response to unschedulable +pods. Karpenter does this by observing events within the Kubernetes cluster, +and then sending commands to the underlying cloud provider. + +In this example, the cluster is running on Amazon Web Services (AWS) Elastic +Kubernetes Service (EKS). Karpenter is designed to be cloud provider agnostic, +but currently only supports AWS. Contributions are welcomed. + +This guide should take less than 1 hour to complete, and cost less than $0.25. +Follow the clean-up instructions to reduce any charges. + +## Install + +Karpenter is installed in clusters with a Helm chart. + +Karpenter requires cloud provider permissions to provision nodes, for AWS IAM +Roles for Service Accounts (IRSA) should be used. IRSA permits Karpenter +(within the cluster) to make privileged requests to AWS (as the cloud provider) +via a ServiceAccount. + +### Required Utilities + +Install these tools before proceeding: + +1. [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2-linux.html) +2. `kubectl` - [the Kubernetes CLI](https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/) +3. `eksctl` - [the CLI for AWS EKS](https://docs.aws.amazon.com/eks/latest/userguide/eksctl.html) +4. `helm` - [the package manager for Kubernetes](https://helm.sh/docs/intro/install/) + +[Configure the AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html) +with a user that has sufficient privileges to create an EKS cluster. Verify that the CLI can +authenticate properly by running `aws sts get-caller-identity`. + +### Environment Variables + +After setting up the tools, set the following environment variable to the Karpenter version you +would like to install. + +```bash +export KARPENTER_VERSION=v0.7.0 +``` + +Also set the following environment variables to store commonly used values. + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step01-config.sh" language="bash"%}} + +### Create a Cluster + +Create a cluster with `eksctl`. This example configuration file specifies a basic cluster with one initial node and sets up an IAM OIDC provider for the cluster to enable IAM roles for pods: + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step02-create-cluster.sh" language="bash"%}} + +This guide uses [AWS EKS managed node groups](https://docs.aws.amazon.com/eks/latest/userguide/managed-node-groups.html) to host Karpenter. + +Karpenter itself can run anywhere, including on [self-managed node groups](https://docs.aws.amazon.com/eks/latest/userguide/worker.html), [managed node groups](https://docs.aws.amazon.com/eks/latest/userguide/managed-node-groups.html), or [AWS Fargate](https://aws.amazon.com/fargate/). + +Karpenter will provision EC2 instances in your account. + +### Create the KarpenterNode IAM Role + +Instances launched by Karpenter must run with an InstanceProfile that grants permissions necessary to run containers and configure networking. Karpenter discovers the InstanceProfile using the name `KarpenterNodeRole-${ClusterName}`. + +First, create the IAM resources using AWS CloudFormation. + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step03-iam-cloud-formation.sh" language="bash"%}} + +Second, grant access to instances using the profile to connect to the cluster. This command adds the Karpenter node role to your aws-auth configmap, allowing nodes with this role to connect to the cluster. + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step04-grant-access.sh" language="bash"%}} + +Now, Karpenter can launch new EC2 instances and those instances can connect to your cluster. + +### Create the KarpenterController IAM Role + +Karpenter requires permissions like launching instances. This will create an AWS IAM Role, Kubernetes service account, and associate them using [IRSA](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/setting-up-enable-IAM.html). + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step05-controller-iam.sh" language="bash"%}} + +### Create the EC2 Spot Service Linked Role + +This step is only necessary if this is the first time you're using EC2 Spot in this account. More details are available [here](https://docs.aws.amazon.com/batch/latest/userguide/spot_fleet_IAM_role.html). + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step06-add-spot-role.sh" language="bash"%}} + +### Install Karpenter Helm Chart + +Use Helm to deploy Karpenter to the cluster. + +Before the chart can be installed the repo needs to be added to Helm, run the following commands to add the repo. + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step07-install-helm-chart.sh" language="bash"%}} + +Install the chart passing in the cluster details and the Karpenter role ARN. + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step08-apply-helm-chart.sh" language="bash"%}} + +#### Deploy a temporary Prometheus and Grafana stack (optional) + +The following commands will deploy a Prometheus and Grafana stack that is suitable for this guide but does not include persistent storage or other configurations that would be necessary for monitoring a production deployment of Karpenter. This deployment includes two Karpenter dashboards that are automatically onboaraded to Grafana. They provide a variety of visualization examples on Karpenter metrices. + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step09-add-prometheus-grafana.sh" language="bash"%}} + +The Grafana instance may be accessed using port forwarding. + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step10-add-grafana-port-forward.sh" language="bash"%}} + +The new stack has only one user, `admin`, and the password is stored in a secret. The following command will retrieve the password. + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step11-grafana-get-password.sh" language="bash"%}} + +### Provisioner + +A single Karpenter provisioner is capable of handling many different pod +shapes. Karpenter makes scheduling and provisioning decisions based on pod +attributes such as labels and affinity. In other words, Karpenter eliminates +the need to manage many different node groups. + +Create a default provisioner using the command below. +This provisioner uses `securityGroupSelector` and `subnetSelector` to discover resources used to launch nodes. +We applied the tag `karpenter.sh/discovery` in the `eksctl` command above. +Depending how these resources are shared between clusters, you may need to use different tagging schemes. + +The `ttlSecondsAfterEmpty` value configures Karpenter to terminate empty nodes. +This behavior can be disabled by leaving the value undefined. + +Review the [provisioner CRD]({{}}) for more information. For example, +`ttlSecondsUntilExpired` configures Karpenter to terminate nodes when a maximum age is reached. + +Note: This provisioner will create capacity as long as the sum of all created capacity is less than the specified limit. + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step12-add-provisioner.sh" language="bash"%}} + +## First Use + +Karpenter is now active and ready to begin provisioning nodes. +Create some pods using a deployment, and watch Karpenter provision nodes in response. + +### Automatic Node Provisioning + +This deployment uses the [pause image](https://www.ianlewis.org/en/almighty-pause-container) and starts with zero replicas. + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step13-automatic-node-provisioning.sh" language="bash"%}} + +### Automatic Node Termination + +Now, delete the deployment. After 30 seconds (`ttlSecondsAfterEmpty`), +Karpenter should terminate the now empty nodes. + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step14-deprovisioning.sh" language="bash"%}} + +### Manual Node Termination + +If you delete a node with kubectl, Karpenter will gracefully cordon, drain, +and shutdown the corresponding instance. Under the hood, Karpenter adds a +finalizer to the node object, which blocks deletion until all pods are +drained and the instance is terminated. Keep in mind, this only works for +nodes provisioned by Karpenter. + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step15-delete-node.sh" language="bash"%}} + +## Cleanup + +To avoid additional charges, remove the demo infrastructure from your AWS account. + +{{% script file="./content/en/{VERSION}/getting-started/getting-started-with-eksctl/scripts/step16-cleanup.sh" language="bash"%}} diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/cloudformation.yaml b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/cloudformation.yaml new file mode 100644 index 000000000000..74af86867cc6 --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/cloudformation.yaml @@ -0,0 +1,60 @@ +AWSTemplateFormatVersion: "2010-09-09" +Description: Resources used by https://github.com/aws/karpenter +Parameters: + ClusterName: + Type: String + Description: "EKS cluster name" +Resources: + KarpenterNodeInstanceProfile: + Type: "AWS::IAM::InstanceProfile" + Properties: + InstanceProfileName: !Sub "KarpenterNodeInstanceProfile-${ClusterName}" + Path: "/" + Roles: + - Ref: "KarpenterNodeRole" + KarpenterNodeRole: + Type: "AWS::IAM::Role" + Properties: + RoleName: !Sub "KarpenterNodeRole-${ClusterName}" + Path: / + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: + Service: + !Sub "ec2.${AWS::URLSuffix}" + Action: + - "sts:AssumeRole" + ManagedPolicyArns: + - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEKS_CNI_Policy" + - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEKSWorkerNodePolicy" + - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" + - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonSSMManagedInstanceCore" + KarpenterControllerPolicy: + Type: AWS::IAM::ManagedPolicy + Properties: + ManagedPolicyName: !Sub "KarpenterControllerPolicy-${ClusterName}" + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Resource: "*" + Action: + # Write Operations + - ec2:CreateLaunchTemplate + - ec2:CreateFleet + - ec2:RunInstances + - ec2:CreateTags + - iam:PassRole + - ec2:TerminateInstances + - ec2:DeleteLaunchTemplate + # Read Operations + - ec2:DescribeLaunchTemplates + - ec2:DescribeInstances + - ec2:DescribeSecurityGroups + - ec2:DescribeSubnets + - ec2:DescribeInstanceTypes + - ec2:DescribeInstanceTypeOfferings + - ec2:DescribeAvailabilityZones + - ssm:GetParameter diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/grafana-values.yaml b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/grafana-values.yaml new file mode 100644 index 000000000000..b69241a99c90 --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/grafana-values.yaml @@ -0,0 +1,27 @@ +datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + version: 1 + url: http://prometheus-server:80 + access: proxy +dashboardProviders: + dashboardproviders.yaml: + apiVersion: 1 + providers: + - name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/default +dashboards: + default: + pod-dashboard: + url: https://karpenter.sh/preview/getting-started/getting-started-with-eksctl/karpenter-pod-metrics.json + node-dashboard: + url: https://karpenter.sh/preview/getting-started/getting-started-with-eksctl/karpenter-node-metrics.json diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-controllers-allocation.json b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-controllers-allocation.json new file mode 100644 index 000000000000..a2fbaaa6e3ed --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-controllers-allocation.json @@ -0,0 +1,330 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "8.1.6" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1636732403925, + "links": [], + "panels": [ + { + "datasource": null, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 5, + "options": { + "content": "Displays information about Allocation controller processes.\n\nSee the information icon of each panel for a description.\n\nSuggest improvements and additions [here](https://github.com/aws/karpenter/issues/new?labels=dashboard).", + "mode": "markdown" + }, + "pluginVersion": "8.1.6", + "targets": [ + { + "queryType": "randomWalk", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "About this dashboard", + "type": "text" + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateMagma", + "exponent": 0.5, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_PROMETHEUS}", + "description": "Aggregates the duration of all bind operations of the Allocation controller.\n\nThe color of each \"bucket\" is a visual clue to the number of bind operations that completed within that duration range.\n\nMouse-over a bucket to display exact values.", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 4 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 2, + "legend": { + "show": true + }, + "maxDataPoints": 25, + "pluginVersion": "7.5.0", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(karpenter_allocation_controller_bind_duration_seconds_bucket[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "All Binds" + } + ], + "title": "Bind duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": "0", + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateMagma", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_PROMETHEUS}", + "description": "Aggregates the duration of all binpack operations of the Allocation controller.\n\nThe color of each \"bucket\" is a visual clue to the number of binpack operations that completed within that duration range.\n\nMouse-over a bucket to display exact values.", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 12 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 7, + "legend": { + "show": true + }, + "maxDataPoints": 25, + "pluginVersion": "7.5.0", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(karpenter_allocation_controller_binpacking_duration_seconds_bucket[$__interval])) by (le)", + "format": "heatmap", + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "Binpacking Duration" + } + ], + "title": "Binpack duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": "0", + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_PROMETHEUS}", + "description": "Aggregates the duration of all scheduling operations of the Allocation controller for provisioner $provisioner.\n\nThe color of each \"bucket\" is a visual clue to the number of scheduling operations that completed within that duration range.\n\nMouse-over a bucket to display exact values.", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 20 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 9, + "legend": { + "show": true + }, + "maxDataPoints": 25, + "pluginVersion": "7.5.0", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(karpenter_allocation_controller_scheduling_duration_seconds_bucket{provisioner=\"$provisioner\"}[$__interval])) by (le)", + "format": "heatmap", + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "Scheduling Duration" + } + ], + "title": "Scheduling duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + } + ], + "refresh": "1m", + "schemaVersion": 30, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(karpenter_allocation_controller_scheduling_duration_seconds_bucket, provisioner)", + "description": "Karpenter provisioner", + "error": null, + "hide": 0, + "includeAll": false, + "label": "Provisioner", + "multi": false, + "name": "provisioner", + "options": [], + "query": { + "query": "label_values(karpenter_allocation_controller_scheduling_duration_seconds_bucket, provisioner)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Controllers / Allocation", + "uid": "fs47R-Dnz", + "version": 9 +} diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-controllers.json b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-controllers.json new file mode 100644 index 000000000000..ca7c44862ecf --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-controllers.json @@ -0,0 +1,446 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "8.1.6" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph (old)", + "version": "" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1636733515850, + "links": [], + "panels": [ + { + "datasource": null, + "description": "", + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 99, + "options": { + "content": "Displays information collected by the Kubernetes controller-runtime.\n\nSee the information icon of each panel for a description.\n\nSuggest improvements and additions [here](https://github.com/aws/karpenter/issues/new?labels=dashboard).", + "mode": "markdown" + }, + "pluginVersion": "8.1.6", + "title": "About this dashboard", + "type": "text" + }, + { + "aliasColors": { + "items": "light-blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fill": 1, + "fillGradient": 2, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 4 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "workqueue_depth{name=\"$controller\"}", + "interval": "", + "legendFormat": "items", + "queryType": "randomWalk", + "refId": "Work Queue Depth" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Items in Work Queue", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "error": "red", + "requeue": "orange", + "requeue_after": "semi-dark-purple", + "success": "green" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "The rate of completed reconciliations per minute broken out by result status.", + "fill": 1, + "fillGradient": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 4 + }, + "hiddenSeries": false, + "id": 76, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "rate(controller_runtime_reconcile_total{controller=\"$controller\"}[$trailing]) * 60", + "interval": "", + "legendFormat": "{{result}}", + "queryType": "randomWalk", + "refId": "Reconciliations per minute" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Reconciliations per minute", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:566", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:567", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateMagma", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_PROMETHEUS}", + "description": "Aggregates the duration of the reconciliation process.\n\nThe color of each \"bucket\" is a visual clue to the number of reconciliations that completed within that time range.\n\nMouse-over a bucket to display exact values.", + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 4 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 126, + "legend": { + "show": true + }, + "maxDataPoints": 25, + "pluginVersion": "8.1.6", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(controller_runtime_reconcile_time_seconds_bucket{controller=\"$controller\"}[$__interval])) by (le)", + "format": "heatmap", + "hide": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "p100" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Reconciliation duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": "0", + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + } + ], + "refresh": "1m", + "schemaVersion": 30, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": "", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(controller_runtime_reconcile_errors_total, controller)", + "description": "Kubernetes controller", + "error": null, + "hide": 0, + "includeAll": false, + "label": "Controller", + "multi": false, + "name": "controller", + "options": [], + "query": { + "query": "label_values(controller_runtime_reconcile_errors_total, controller)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "10m", + "value": "10m" + }, + "description": "Trailing aggregation window", + "error": null, + "hide": 0, + "includeAll": false, + "label": "Trailing", + "multi": false, + "name": "trailing", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": true, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,1m,5m,10m,30m,1h", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Controllers", + "uid": "-Yw9ShDnz", + "version": 15 +} diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-node-metrics.json b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-node-metrics.json new file mode 100644 index 000000000000..11783d5a9efc --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-node-metrics.json @@ -0,0 +1,791 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": null, + "graphTooltip": 0, + "id": 5, + "iteration": 1640028599664, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "((karpenter_nodes_total_pod_requests{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"} or karpenter_nodes_allocatable*0 + karpenter_nodes_total_daemon_requests{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"} or karpenter_nodes_allocatable*0) *100) / karpenter_nodes_allocatable{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"}", + "interval": "", + "legendFormat": "{{node_name}}", + "refId": "A" + } + ], + "title": "Node Utilization", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "((karpenter_nodes_total_pod_limits{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"} or karpenter_nodes_allocatable*0 + karpenter_nodes_total_daemon_limits{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"} or karpenter_nodes_allocatable*0 ) *100) / karpenter_nodes_allocatable{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"}", + "interval": "", + "legendFormat": "{{node_name}}", + "refId": "A" + } + ], + "title": "Node Overcommitment", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 3, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 5, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.2.5", + "targets": [ + { + "exemplar": true, + "expr": "sum by (resource_type)(karpenter_nodes_allocatable{resource_type!=\"pods\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"} or karpenter_nodes_total_overhead{resource_type!=\"pods\"}*0 or karpenter_nodes_total_requests{resource_type!=\"pods\"}*0)- sum by (resource_type)(karpenter_nodes_total_overhead{resource_type!=\"pods\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"} or karpenter_nodes_allocatable{resource_type!=\"pods\"}*0 or karpenter_nodes_total_requests{resource_type!=\"pods\"}*0) - sum by (resource_type)(karpenter_nodes_total_requests{resource_type!=\"pods\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"} or karpenter_nodes_allocatable{resource_type!=\"pods\"}*0 or karpenter_nodes_total_overhead{resource_type!=\"pods\"}*0)", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{resource_type}}", + "refId": "Residual" + }, + { + "exemplar": true, + "expr": "sum (karpenter_nodes_allocatable{resource_type=\"pods\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"}) - sum (karpenter_pods_state{})", + "hide": false, + "interval": "", + "legendFormat": "pods", + "refId": "Number of Pods" + } + ], + "title": "Cluster Residual Capacity", + "type": "gauge" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "center", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "name" + }, + "properties": [ + { + "id": "custom.width", + "value": 323 + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 2, + "options": { + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.2.5", + "targets": [ + { + "exemplar": true, + "expr": "karpenter_nodes_allocatable{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Allocatable CPU", + "refId": "Allocatable" + }, + { + "exemplar": true, + "expr": "karpenter_nodes_total_pod_requests{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Requests CPU", + "refId": "Pod Requests" + }, + { + "exemplar": true, + "expr": "karpenter_nodes_total_daemon_requests{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Requests CPU", + "refId": "Daemon Requests" + }, + { + "exemplar": true, + "expr": "karpenter_nodes_total_pod_limits{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Limits CPU", + "refId": "Pod Limits" + }, + { + "exemplar": true, + "expr": "karpenter_nodes_total_daemon_limits{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Limits CPU", + "refId": "Daemon Limits" + }, + { + "exemplar": false, + "expr": "karpenter_nodes_system_overhead{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Overhead CPU", + "refId": "Overhead" + }, + { + "exemplar": true, + "expr": "((karpenter_nodes_total_pod_requests{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"} or karpenter_nodes_allocatable*0 + karpenter_nodes_total_daemon_requests{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"} or karpenter_nodes_allocatable*0) *100) / karpenter_nodes_allocatable{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Utilization CPU", + "refId": "Utilization" + }, + { + "exemplar": true, + "expr": "((karpenter_nodes_total_pod_limits{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"} or karpenter_nodes_allocatable*0 + karpenter_nodes_total_daemon_limits{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"} or karpenter_nodes_allocatable*0 ) *100) / karpenter_nodes_allocatable{resource_type=\"$resource_type\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", instance_type=~\"$instance_type\", provisioner=~\"$provisioner\", zone=~\"$zone\", node_name=~\"$node_name\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Overcommitment CPU", + "refId": "Overcommitment" + } + ], + "title": "Current Node Metrics", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "node_name" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "Time 7": true, + "__name__ 1": true, + "__name__ 2": true, + "__name__ 3": true, + "__name__ 4": true, + "__name__ 5": true, + "arch 1": true, + "arch 2": true, + "arch 3": true, + "arch 4": true, + "arch 5": true, + "arch 6": true, + "arch 7": true, + "capacity_type 1": true, + "capacity_type 2": true, + "capacity_type 3": true, + "capacity_type 4": true, + "capacity_type 5": true, + "capacity_type 6": true, + "capacity_type 7": true, + "instance 1": true, + "instance 2": true, + "instance 3": true, + "instance 4": true, + "instance 5": true, + "instance 6": true, + "instance 7": true, + "instance_type 1": true, + "instance_type 2": true, + "instance_type 3": true, + "instance_type 4": true, + "instance_type 5": true, + "instance_type 6": true, + "instance_type 7": true, + "job 1": true, + "job 2": true, + "job 3": true, + "job 4": true, + "job 5": true, + "job 6": true, + "job 7": true, + "node_name": false, + "provisioner 1": true, + "provisioner 2": true, + "provisioner 3": true, + "provisioner 4": true, + "provisioner 5": true, + "provisioner 6": true, + "provisioner 7": true, + "resource_type 1": true, + "resource_type 2": true, + "resource_type 3": true, + "resource_type 4": true, + "resource_type 5": true, + "resource_type 6": true, + "resource_type 7": true, + "zone 1": true, + "zone 2": true, + "zone 3": true, + "zone 4": true, + "zone 5": true, + "zone 6": true, + "zone 7": true + }, + "indexByName": { + "Time 1": 7, + "Time 2": 17, + "Time 3": 27, + "Time 4": 37, + "Time 5": 47, + "Time 6": 56, + "Value #Allocatable": 1, + "Value #Overcommitment": 6, + "Value #Overhead": 4, + "Value #Pod Limits": 3, + "Value #Pod Requests": 2, + "Value #Utilization": 5, + "__name__ 1": 8, + "__name__ 2": 18, + "__name__ 3": 28, + "__name__ 4": 38, + "arch 1": 9, + "arch 2": 19, + "arch 3": 29, + "arch 4": 39, + "arch 5": 48, + "arch 6": 57, + "capacity_type 1": 10, + "capacity_type 2": 20, + "capacity_type 3": 30, + "capacity_type 4": 40, + "capacity_type 5": 49, + "capacity_type 6": 58, + "instance 1": 11, + "instance 2": 21, + "instance 3": 31, + "instance 4": 41, + "instance 5": 50, + "instance 6": 59, + "instance_type 1": 12, + "instance_type 2": 22, + "instance_type 3": 32, + "instance_type 4": 42, + "instance_type 5": 51, + "instance_type 6": 60, + "job 1": 13, + "job 2": 23, + "job 3": 33, + "job 4": 43, + "job 5": 52, + "job 6": 61, + "node_name": 0, + "provisioner 1": 14, + "provisioner 2": 24, + "provisioner 3": 34, + "provisioner 4": 44, + "provisioner 5": 53, + "provisioner 6": 62, + "resource_type 1": 15, + "resource_type 2": 25, + "resource_type 3": 35, + "resource_type 4": 45, + "resource_type 5": 54, + "resource_type 6": 63, + "zone 1": 16, + "zone 2": 26, + "zone 3": 36, + "zone 4": 46, + "zone 5": 55, + "zone 6": 64 + }, + "renameByName": { + "Value #Allocatable": "Allocatable", + "Value #Daemon Requests": "Requests from Daemon Set", + "Value #Overcommitment": "Overcommitment Rate %", + "Value #Overhead": "System Overhead", + "Value #Pod Limits": "Limits from Pods", + "Value #Pod Requests": "Requests from Pods", + "Value #Utilization": "Utilization Rate %", + "name": "Node Name" + } + } + } + ], + "type": "table" + } + ], + "refresh": "", + "schemaVersion": 32, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "amd64" + ], + "value": [ + "amd64" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(arch)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "arch", + "options": [], + "query": { + "query": "label_values(arch)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(capacity_type)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "capacity_type", + "options": [], + "query": { + "query": "label_values(capacity_type)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(instance_type)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "instance_type", + "options": [], + "query": { + "query": "label_values(instance_type)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(provisioner)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "provisioner", + "options": [], + "query": { + "query": "label_values(provisioner)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(zone)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "zone", + "options": [], + "query": { + "query": "label_values(zone)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "cpu", + "value": "cpu" + }, + "datasource": "Prometheus", + "definition": "label_values(resource_type)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resource_type", + "options": [], + "query": { + "query": "label_values(resource_type)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(node_name)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "node_name", + "options": [], + "query": { + "query": "label_values(node_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Cluster Capacity", + "uid": "GwdOTionz", + "version": 44 +} diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-pod-metrics.json b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-pod-metrics.json new file mode 100644 index 000000000000..5469a67fd79b --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/karpenter-pod-metrics.json @@ -0,0 +1,970 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": null, + "graphTooltip": 0, + "id": 4, + "iteration": 1640029833603, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": "Prometheus", + "description": "Total number of Pods", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum (karpenter_pods_state{namespace=~\"$namespace\", node=~\"$node\", zone=~\"$zone\", instance_type=~\"$instance_type\", owner=~\"$owner\", phase=~\"$phase\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", provisioner=~\"$provisioner\"})", + "interval": "", + "legendFormat": "Number of Pods", + "refId": "A" + } + ], + "title": "Total number of Pods", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Pod Distribution by AZ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 47, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum by (zone)(karpenter_pods_state{namespace=~\"$namespace\", node=~\"$node\", zone=~\"$zone\", instance_type=~\"$instance_type\", owner=~\"$owner\", phase=~\"$phase\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", provisioner=~\"$provisioner\"})", + "interval": "", + "legendFormat": "{{zone}}", + "refId": "A" + } + ], + "title": "Pod Distribution by AZ", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Pod Distribution by Namespaces", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 47, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum by (namespace)(karpenter_pods_state{namespace=~\"$namespace\", node=~\"$node\", zone=~\"$zone\", instance_type=~\"$instance_type\", owner=~\"$owner\", phase=~\"$phase\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", provisioner=~\"$provisioner\"})", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "title": "Pod Distribution by Namespaces", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Pod Distribution by Node", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 47, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum by (node)(karpenter_pods_state{namespace=~\"$namespace\", node=~\"$node\", zone=~\"$zone\", instance_type=~\"$instance_type\", owner=~\"$owner\", phase=~\"$phase\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", provisioner=~\"$provisioner\"})", + "interval": "", + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "title": "Pod Distribution by Node", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Pod Distribution by Instance Type", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 47, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum by (instance_type)(karpenter_pods_state{namespace=~\"$namespace\", node=~\"$node\", zone=~\"$zone\", instance_type=~\"$instance_type\", owner=~\"$owner\", phase=~\"$phase\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", provisioner=~\"$provisioner\"})", + "interval": "", + "legendFormat": "{{instance_type}}", + "refId": "A" + } + ], + "title": "Pod Distribution by Instance Type", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Pod Distribution by Phase", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 47, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 15, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum by (phase)(karpenter_pods_state{namespace=~\"$namespace\", node=~\"$node\", zone=~\"$zone\", instance_type=~\"$instance_type\", owner=~\"$owner\", phase=~\"$phase\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", provisioner=~\"$provisioner\"})", + "interval": "", + "legendFormat": "{{phase}}", + "refId": "A" + } + ], + "title": "Pod Distribution by Phase", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Pod Distribution by Capacity Type", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 47, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum by (capacity_type)(karpenter_pods_state{namespace=~\"$namespace\", node=~\"$node\", zone=~\"$zone\", instance_type=~\"$instance_type\", owner=~\"$owner\", phase=~\"$phase\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", provisioner=~\"$provisioner\"})", + "interval": "", + "legendFormat": "{{capacity_type}}", + "refId": "A" + } + ], + "title": "Pod Distribution by Capacity Type", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Pod Distribution by Owner", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 47, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum by (owner)(karpenter_pods_state{namespace=~\"$namespace\", node=~\"$node\", zone=~\"$zone\", instance_type=~\"$instance_type\", owner=~\"$owner\", phase=~\"$phase\", arch=~\"$arch\", capacity_type=~\"$capacity_type\", provisioner=~\"$provisioner\"})", + "interval": "", + "legendFormat": "{{owner}}", + "refId": "A" + } + ], + "title": "Pod Distribution by Owner", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 32, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(zone)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "zone", + "options": [], + "query": { + "query": "label_values(zone)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "label_values(owner)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "owner", + "options": [], + "query": { + "query": "label_values(owner)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "default", + "karpenter", + "kube-node-lease", + "kube-system" + ], + "value": [ + "default", + "karpenter", + "kube-node-lease", + "kube-system" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(namespace)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(node)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "node", + "options": [], + "query": { + "query": "label_values(node)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(instance_type)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "instance_type", + "options": [], + "query": { + "query": "label_values(instance_type)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "label_values(phase)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "phase", + "options": [], + "query": { + "query": "label_values(phase)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": "", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "label_values(capacity_type)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "capacity_type", + "options": [], + "query": { + "query": "label_values(capacity_type)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "label_values(arch)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "arch", + "options": [], + "query": { + "query": "label_values(arch)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": "", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "label_values(provisioner)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "provisioner", + "options": [], + "query": { + "query": "label_values(provisioner)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Pod Statistic", + "uid": "jizEPY2nk", + "version": 34 +} \ No newline at end of file diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/prometheus-values.yaml b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/prometheus-values.yaml new file mode 100644 index 000000000000..0eae2d3f882d --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/prometheus-values.yaml @@ -0,0 +1,14 @@ +alertmanager: + persistentVolume: + enabled: false + +server: + fullnameOverride: prometheus-server + persistentVolume: + enabled: false + +extraScrapeConfigs: | + - job_name: karpenter + static_configs: + - targets: + - karpenter-metrics.karpenter:8080 diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/add-monitoring.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/add-monitoring.sh new file mode 100755 index 000000000000..6d2332b06be2 --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/add-monitoring.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -euo pipefail #fail if one step fails + +if [ "$#" -ne 1 ] +then + echo "Missing required Karpenter version. Usage: add-monitoring.sh v0.0.1" + exit 1 +fi + +export KARPENTER_VERSION=$1 + +declare -a steps=( + step01-config.sh + step09-add-prometheus-grafana.sh + step10-add-grafana-port-forward.sh + step11-grafana-get-password.sh +) + +for step in "${steps[@]}"; do + echo "$step" + source $step +done diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/add-provisioner.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/add-provisioner.sh new file mode 100755 index 000000000000..51f286de0c3c --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/add-provisioner.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -euo pipefail #fail if one step fails + +declare -a steps=( + step01-config.sh + step12-add-provisioner.sh + step13-automatic-node-provisioning.sh + step14-automatic-node-termination.sh +) + +for step in "${steps[@]}"; do + echo "$step" + source $step +done diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/cleanup.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/cleanup.sh new file mode 100755 index 000000000000..bbbb6b389794 --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/cleanup.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +declare -a steps=( + step01-config.sh + step14-deprovisioning.sh + step16-cleanup.sh +) + +for step in "${steps[@]}"; do + echo "$step" + source $step +done diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/install.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/install.sh new file mode 100755 index 000000000000..05e50ada8513 --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/install.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -euo pipefail #fail if one step fails + +if [ "$#" -ne 1 ] +then + echo "Missing required Karpenter version. Usage: install.sh v0.0.1" + exit 1 +fi + +export KARPENTER_VERSION=$1 + +declare -a steps=( + step01-config.sh + step02-create-cluster.sh + step03-iam-cloud-formation.sh + step04-grant-access.sh + step05-controller-iam.sh + step06-add-spot-role.sh + step07-install-helm-chart.sh + step08-apply-helm-chart.sh +) + +for step in "${steps[@]}"; do + echo "$step" + source $step +done diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step01-config.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step01-config.sh new file mode 100644 index 000000000000..5792c574690e --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step01-config.sh @@ -0,0 +1,3 @@ +export CLUSTER_NAME="${USER}-karpenter-demo" +export AWS_DEFAULT_REGION="us-west-2" +export AWS_ACCOUNT_ID="$(aws sts get-caller-identity --query Account --output text)" diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step02-create-cluster.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step02-create-cluster.sh new file mode 100644 index 000000000000..d73ae192a332 --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step02-create-cluster.sh @@ -0,0 +1,22 @@ +eksctl create cluster -f - << EOF +--- +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig +metadata: + name: ${CLUSTER_NAME} + region: ${AWS_DEFAULT_REGION} + version: "1.21" + tags: + karpenter.sh/discovery: ${CLUSTER_NAME} +managedNodeGroups: + - instanceType: m5.large + amiFamily: AmazonLinux2 + name: ${CLUSTER_NAME}-ng + desiredCapacity: 1 + minSize: 1 + maxSize: 10 +iam: + withOIDC: true +EOF + +export CLUSTER_ENDPOINT="$(aws eks describe-cluster --name ${CLUSTER_NAME} --query "cluster.endpoint" --output text)" diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step03-iam-cloud-formation.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step03-iam-cloud-formation.sh new file mode 100644 index 000000000000..dce544c1a557 --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step03-iam-cloud-formation.sh @@ -0,0 +1,8 @@ +TEMPOUT=$(mktemp) + +curl -fsSL https://karpenter.sh/"${KARPENTER_VERSION}"/getting-started/getting-started-with-eksctl/cloudformation.yaml > $TEMPOUT \ +&& aws cloudformation deploy \ + --stack-name "Karpenter-${CLUSTER_NAME}" \ + --template-file "${TEMPOUT}" \ + --capabilities CAPABILITY_NAMED_IAM \ + --parameter-overrides "ClusterName=${CLUSTER_NAME}" diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step04-grant-access.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step04-grant-access.sh new file mode 100644 index 000000000000..f98a75a50011 --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step04-grant-access.sh @@ -0,0 +1,6 @@ +eksctl create iamidentitymapping \ + --username system:node:{{EC2PrivateDNSName}} \ + --cluster "${CLUSTER_NAME}" \ + --arn "arn:aws:iam::${AWS_ACCOUNT_ID}:role/KarpenterNodeRole-${CLUSTER_NAME}" \ + --group system:bootstrappers \ + --group system:nodes diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step05-controller-iam.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step05-controller-iam.sh new file mode 100644 index 000000000000..32673a49c9e0 --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step05-controller-iam.sh @@ -0,0 +1,8 @@ +eksctl create iamserviceaccount \ + --cluster "${CLUSTER_NAME}" --name karpenter --namespace karpenter \ + --role-name "${CLUSTER_NAME}-karpenter" \ + --attach-policy-arn "arn:aws:iam::${AWS_ACCOUNT_ID}:policy/KarpenterControllerPolicy-${CLUSTER_NAME}" \ + --role-only \ + --approve + +export KARPENTER_IAM_ROLE_ARN="arn:aws:iam::${AWS_ACCOUNT_ID}:role/${CLUSTER_NAME}-karpenter" diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step06-add-spot-role.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step06-add-spot-role.sh new file mode 100644 index 000000000000..3cb0510b905b --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step06-add-spot-role.sh @@ -0,0 +1,3 @@ +aws iam create-service-linked-role --aws-service-name spot.amazonaws.com || true +# If the role has already been successfully created, you will see: +# An error occurred (InvalidInput) when calling the CreateServiceLinkedRole operation: Service role name AWSServiceRoleForEC2Spot has been taken in this account, please try a different suffix. diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step07-install-helm-chart.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step07-install-helm-chart.sh new file mode 100644 index 000000000000..e36cf3d4be7e --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step07-install-helm-chart.sh @@ -0,0 +1,2 @@ +helm repo add karpenter https://charts.karpenter.sh/ +helm repo update diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step08-apply-helm-chart.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step08-apply-helm-chart.sh new file mode 100644 index 000000000000..bc7038a6358b --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step08-apply-helm-chart.sh @@ -0,0 +1,8 @@ +helm upgrade --install --namespace karpenter --create-namespace \ + karpenter karpenter/karpenter \ + --version ${KARPENTER_VERSION} \ + --set serviceAccount.annotations."eks\.amazonaws\.com/role-arn"=${KARPENTER_IAM_ROLE_ARN} \ + --set clusterName=${CLUSTER_NAME} \ + --set clusterEndpoint=${CLUSTER_ENDPOINT} \ + --set aws.defaultInstanceProfile=KarpenterNodeInstanceProfile-${CLUSTER_NAME} \ + --wait # for the defaulting webhook to install before creating a Provisioner diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step09-add-prometheus-grafana.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step09-add-prometheus-grafana.sh new file mode 100644 index 000000000000..1b028b23c8ed --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step09-add-prometheus-grafana.sh @@ -0,0 +1,11 @@ +helm repo add grafana-charts https://grafana.github.io/helm-charts +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update + +kubectl create namespace monitoring + +curl -fsSL https://karpenter.sh/"${KARPENTER_VERSION}"/getting-started/getting-started-with-eksctl/prometheus-values.yaml | tee prometheus-values.yaml +helm install --namespace monitoring prometheus prometheus-community/prometheus --values prometheus-values.yaml + +curl -fsSL https://karpenter.sh/"${KARPENTER_VERSION}"/getting-started/getting-started-with-eksctl/grafana-values.yaml | tee grafana-values.yaml +helm install --namespace monitoring grafana grafana-charts/grafana --values grafana-values.yaml diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step10-add-grafana-port-forward.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step10-add-grafana-port-forward.sh new file mode 100644 index 000000000000..85d4028642b6 --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step10-add-grafana-port-forward.sh @@ -0,0 +1 @@ +kubectl port-forward --namespace monitoring svc/grafana 3000:80 diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step11-grafana-get-password.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step11-grafana-get-password.sh new file mode 100644 index 000000000000..cdf4f61d3775 --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step11-grafana-get-password.sh @@ -0,0 +1 @@ +kubectl get secret --namespace monitoring grafana -o jsonpath="{.data.admin-password}" | base64 --decode diff --git a/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step12-add-provisioner.sh b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step12-add-provisioner.sh new file mode 100644 index 000000000000..05ad66953c2f --- /dev/null +++ b/website/content/en/v0.7.0/getting-started/getting-started-with-eksctl/scripts/step12-add-provisioner.sh @@ -0,0 +1,20 @@ +cat <}}) for more information. For example, +`ttlSecondsUntilExpired` configures Karpenter to terminate nodes when a maximum age is reached. + +Note: This provisioner will create capacity as long as the sum of all created capacity is less than the specified limit. + +```bash +cat < + Provisioner API reference page +--- + +## Example Provisioner Resource + +```yaml +apiVersion: karpenter.sh/v1alpha5 +kind: Provisioner +metadata: + name: default +spec: + # If omitted, the feature is disabled and nodes will never expire. If set to less time than it requires for a node + # to become ready, the node may expire before any pods successfully start. + ttlSecondsUntilExpired: 2592000 # 30 Days = 60 * 60 * 24 * 30 Seconds; + + # If omitted, the feature is disabled, nodes will never scale down due to low utilization + ttlSecondsAfterEmpty: 30 + + # Provisioned nodes will have these taints + # Taints may prevent pods from scheduling if they are not tolerated + taints: + - key: example.com/special-taint + effect: NoSchedule + + # Labels are arbitrary key-values that are applied to all nodes + labels: + billing-team: my-team + + # Requirements that constrain the parameters of provisioned nodes. + # These requirements are combined with pod.spec.affinity.nodeAffinity rules. + # Operators { In, NotIn } are supported to enable including or excluding values + requirements: + - key: "node.kubernetes.io/instance-type" + operator: In + values: ["m5.large", "m5.2xlarge"] + - key: "topology.kubernetes.io/zone" + operator: In + values: ["us-west-2a", "us-west-2b"] + - key: "kubernetes.io/arch" + operator: In + values: ["arm64", "amd64"] + - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand + operator: In + values: ["spot", "on-demand"] + + # Karpenter provides the ability to specify a few additional Kubelet args. + # These are all optional and provide support for additional customization and use cases. + kubeletConfiguration: + clusterDNS: ["10.0.1.100"] + + # Resource limits constrain the total size of the cluster. + # Limits prevent Karpenter from creating new instances once the limit is exceeded. + limits: + resources: + cpu: "1000" + memory: 1000Gi + + # These fields vary per cloud provider, see your cloud provider specific documentation + provider: {} +``` + +## Node deprovisioning + +If neither of these values are set, Karpenter will *not* delete instances. It is recommended to set the `ttlSecondsAfterEmpty` value, to enable scale down of the cluster. + +### spec.ttlSecondsAfterEmpty + +Setting a value here enables Karpenter to delete empty/unnecessary instances. DaemonSets are excluded from considering a node "empty". This value is in seconds. + +### spec.ttlSecondsUntilExpired + +Setting a value here enables node expiry. After nodes reach the defined age in seconds, they will be deleted, even if in use. This enables nodes to effectively be periodically "upgraded" by replacing them with newly provisioned instances. + +Note that Karpenter does not automatically add jitter to this value. If multiple instances are created in a small amount of time, they will expire at very similar times. Consider defining a [pod disruption budget](https://kubernetes.io/docs/tasks/run-application/configure-pdb/) to prevent excessive workload disruption. + + + +## spec.requirements + +Kubernetes defines the following [Well-Known Labels](https://kubernetes.io/docs/reference/labels-annotations-taints/), and cloud providers (e.g., AWS) implement them. They are defined at the "spec.requirements" section of the Provisioner API. + +These well known labels may be specified at the provisioner level, or in a workload definition (e.g., nodeSelector on a pod.spec). Nodes are chosen using both the provisioner's and pod's requirements. If there is no overlap, nodes will not be launched. In other words, a pod's requirements must be within the provisioner's requirements. If a requirement is not defined for a well known label, any value available to the cloud provider may be chosen. + +For example, an instance type may be specified using a nodeSelector in a pod spec. If the instance type requested is not included in the provisioner list and the provisioner has instance type requirements, Karpenter will not create a node or schedule the pod. + +📝 None of these values are required. + +### Instance Types + +- key: `node.kubernetes.io/instance-type` + +Generally, instance types should be a list and not a single value. Leaving this field undefined is recommended, as it maximizes choices for efficiently placing pods. + +☁️ **AWS** + +Review [AWS instance types](https://aws.amazon.com/ec2/instance-types/). + +The default value includes all instance types with the exclusion of metal +(non-virtualized), +[non-HVM](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/virtualization_types.html), +and GPU instances. + +View the full list of instance types with `aws ec2 describe-instance-types`. + +**Example** + +*Set Default with provisioner.yaml* + +```yaml +spec: + requirements: + - key: node.kubernetes.io/instance-type + operator: In + values: ["m5.large", "m5.2xlarge"] +``` + +*Override with workload manifest (e.g., pod)* + +```yaml +spec: + template: + spec: + nodeSelector: + node.kubernetes.io/instance-type: m5.large +``` + +### Availability Zones + +- key: `topology.kubernetes.io/zone` +- value example: `us-east-1c` + +☁️ **AWS** + +- value list: `aws ec2 describe-availability-zones --region ` + +Karpenter can be configured to create nodes in a particular zone. Note that the Availability Zone `us-east-1a` for your AWS account might not have the same location as `us-east-1a` for another AWS account. + +[Learn more about Availability Zone +IDs.](https://docs.aws.amazon.com/ram/latest/userguide/working-with-az-ids.html) + +### Architecture + +- key: `kubernetes.io/arch` +- values + - `amd64` (default) + - `arm64` + +Karpenter supports `amd64` nodes, and `arm64` nodes. + + +### Capacity Type + +- key: `karpenter.sh/capacity-type` + +☁️ **AWS** + +- values + - `spot` + - `on-demand` (default) + +Karpenter supports specifying capacity type, which is analogous to [EC2 purchase options](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-purchasing-options.html). + +Karpenter prioritizes Spot offerings if the provisioner allows Spot and on-demand instances. If the provider API (e.g. EC2 Fleet's API) indicates Spot capacity is unavailable, Karpenter caches that result across all attempts to provision EC2 capacity for that instance type and zone for the next 45 seconds. If there are no other possible offerings available for Spot, Karpenter will attempt to provision on-demand instances, generally within milliseconds. + + +## spec.kubeletConfiguration + +Karpenter provides the ability to specify a few additional Kubelet args. These are all optional and provide support for +additional customization and use cases. Adjust these only if you know you need to do so. + +```yaml +spec: + kubeletConfiguration: + clusterDNS: ["10.0.1.100"] +``` + +## spec.limits.resources + +The provisioner spec includes a limits section (`spec.limits.resources`), which constrains the maximum amount of resources that the provisioner will manage. + +Presently, Karpenter supports `memory` and `cpu` limits. + +CPU limits are described with a `DecimalSI` value. Note that the Kubernetes API will coerce this into a string, so we recommend against using integers to avoid GitOps skew. + +Memory limits are described with a [`BinarySI` value, such as 1000Gi.](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-memory) + +Karpenter stops allocating resources once at least one resource limit is met/exceeded. + +Review the [resource limit task](../tasks/set-resource-limits) for more information. + +## spec.provider + +This section is cloud provider specific. Reference the appropriate documentation: + +- [AWS](../aws/provisioning/) diff --git a/website/content/en/v0.7.0/tasks/_index.md b/website/content/en/v0.7.0/tasks/_index.md new file mode 100755 index 000000000000..f84a1a7cb00b --- /dev/null +++ b/website/content/en/v0.7.0/tasks/_index.md @@ -0,0 +1,7 @@ +--- +title: "Tasks" +linkTitle: "Tasks" +weight: 45 +--- + +Karpenter tasks can be divided into those for a cluster administrator who is managing the cluster itself and application developers who are deploying pod workloads on a cluster. diff --git a/website/content/en/v0.7.0/tasks/deprovisioning.md b/website/content/en/v0.7.0/tasks/deprovisioning.md new file mode 100644 index 000000000000..2d458fa74871 --- /dev/null +++ b/website/content/en/v0.7.0/tasks/deprovisioning.md @@ -0,0 +1,86 @@ +--- +title: "Deprovisioning" +linkTitle: "Deprovisioning" +weight: 10 +--- + +Karpenter sets a Kubernetes [finalizer](https://kubernetes.io/docs/concepts/overview/working-with-objects/finalizers/) on each node it provisions. +The finalizer specifies additional actions the Karpenter controller will take in response to a node deletion request. +These include: + +* Marking the node as unschedulable, so no further pods can be scheduled there. +* Evicting all pods other than daemonsets from the node. +* Terminating the instance from the cloud provider. +* Deleting the node from the Kubernetes cluster. + +## How Karpenter nodes are deprovisioned + +There are both automated and manual ways of deprovisioning nodes provisioned by Karpenter: + +* **Node empty**: Karpenter notes when the last workload (non-daemonset) pod stops running on a node. From that point, Karpenter waits the number of seconds set by `ttlSecondsAfterEmpty` in the provisioner, then Karpenter requests to delete the node. This feature can keep costs down by removing nodes that are no longer being used for workloads. +* **Node expired**: Karpenter requests to delete the node after a set number of seconds, based on the provisioner `ttlSecondsUntilExpired` value, from the time the node was provisioned. One use case for node expiry is to handle node upgrades. Old nodes (with a potentially outdated Kubernetes version or operating system) are deleted, and replaced with nodes on the current version (assuming that you requested the latest version, rather than a specific version). + + {{% alert title="Note" color="primary" %}} + Keep in mind that a small NodeExpiry results in a higher churn in cluster activity. So, for example, if a cluster + brings up all nodes at once, all the pods on those nodes would fall into the same batching window on expiration. + {{% /alert %}} + +* **Node deleted**: You could use `kubectl` to manually remove a single Karpenter node: + + ```bash + # Delete a specific node + kubectl delete node $NODE_NAME + + # Delete all nodes owned any provisioner + kubectl delete nodes -l karpenter.sh/provisioner-name + + # Delete all nodes owned by a specific provisioner + kubectl delete nodes -l karpenter.sh/provisioner-name=$PROVISIONER_NAME + ``` + +Whether through node expiry or manual deletion, Karpenter seeks to follow graceful termination procedures as described in Kubernetes [Graceful node shutdown](https://kubernetes.io/docs/concepts/architecture/nodes/#graceful-node-shutdow) documentation. +If the Karpenter controller is removed or fails, the finalizers on the nodes are orphaned and will require manual removal. + + +{{% alert title="Note" color="primary" %}} +By adding the finalizer, Karpenter improves the default Kubernetes process of node deletion. +When you run `kubectl delete node` on a node without a finalizer, the node is deleted without triggering the finalization logic. The instance will continue running in EC2, even though there is no longer a node object for it. +The kubelet isn’t watching for its own existence, so if a node is deleted the kubelet doesn’t terminate itself. +All the pod objects get deleted by a garbage collection process later, because the pods’ node is gone. +{{% /alert %}} + +## What can cause deprovisioning to fail? + +There are a few cases where requesting to deprovision a Karpenter node will fail. These include Pod Disruption Budgets and pods that have the `do-not-evict` annotation set. + +### Disruption budgets + +Karpenter respects Pod Disruption Budgets (PDBs) by using a backoff retry eviction strategy. Pods will never be forcibly deleted, so pods that fail to shut down will prevent a node from deprovisioning. +Kubernetes PDBs let you specify how much of a Deployment, ReplicationController, ReplicaSet, or StatefulSet must be protected from disruptions when pod eviction requests are made. + +PDBs can be used to strike a balance by protecting the application's availability while still allowing a cluster administrator to manage the cluster. +Here is an example where the pods matching the label `myapp` will block node termination if evicting the pod would reduce the number of available pods below 4. + +```yaml +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: myapp-pdb +spec: + minAvailable: 4 + selector: + matchLabels: + app: myapp +``` + +You can set `minAvailable` or `maxUnavailable` as integers or as a percentage. +Review what [disruptions are](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/), and [how to configure them](https://kubernetes.io/docs/tasks/run-application/configure-pdb/). + +### Pod set to do-not-evict + +If a pod exists with the annotation `karpenter.sh/do-not-evict` on a node, and a request is made to delete the node, Karpenter will not drain any pods from that node or otherwise try to delete the node. This annotation will have no effect for static pods, pods that tolerate `NoSchedule`, or pods terminating past their graceful termination period. + +This is useful for pods that you want to run from start to finish without interruption. +Examples might include a real-time, interactive game that you don't want to interrupt or a long batch job (such as you might have with machine learning) that would need to start over if it were interrupted. + +If you want to terminate a node with a `do-not-evict` pod, you can simply remove the annotation and the deprovisioning process will continue. diff --git a/website/content/en/v0.7.0/tasks/pod-density.md b/website/content/en/v0.7.0/tasks/pod-density.md new file mode 100644 index 000000000000..826e124a2e4c --- /dev/null +++ b/website/content/en/v0.7.0/tasks/pod-density.md @@ -0,0 +1,59 @@ +--- +title: "Control Pod Density" +linkTitle: "Control Pod Density" +weight: 20 +--- + +Pod density is the number of pods per node. + +Kubernetes has a default limit of 110 pods per node. If you are using the EKS Optimized AMI on AWS, the [number of pods is limited by instance type](https://github.com/awslabs/amazon-eks-ami/blob/master/files/eni-max-pods.txt) in the default configuration. + +## Max Pods + +Do not use the `max-pods` argument to kubelet. Karpenter is not aware of this value. For example, Karpenter may provision an instance expecting it to accommodate more pods than this static limit. + +## Increase Pod Density + +### Networking Limitations + +*☁️ AWS Specific* + +The number of pods on a node is limited by the number of networking interfaces (ENIs) that may be attached to a node. + +[AWS VPC CNI v1.9 introduced prefix assignment.](https://aws.amazon.com/blogs/containers/amazon-vpc-cni-increases-pods-per-node-limits/) In short, a single ENI can provide IP addresses for multiple pods. Much higher pod densities are now supported. + +Run the Karpenter controller with the enviornment variable `AWS_ENI_LIMITED_POD_DENSITY` (or the argument `--aws-eni-limited-pod-density=true`) to enable nodes with more than 110 pods. + +Environment variables for the Karpenter controller may be specified as [helm chart values](https://github.com/aws/karpenter/blob/c73f425e924bb64c3f898f30ca5035a1d8591183/charts/karpenter/values.yaml#L15). + +## Limit Pod Density + +Generally, increasing pod density is more efficient. However, some use cases exist for limiting pod density. + +### Topology Spread + +You can use [topology spread]({{< relref "scheduling.md#topology-spread" >}}) features to reduce blast radius. For example, spreading workloads across EC2 Availability Zones. + + +### Restrict Instance Types + +Exclude large instance sizes to reduce the blast radius of an EC2 instance failure. + +Consider setting up upper or lower boundaries on target instance sizes with the node.kubernetes.io/instance-type key. + +The following example shows how to avoid provisioning large Graviton instances in order to reduce the impact of individual instance failures: + +``` +-key: node.kubernetes.io/instance-type + operator: NotIn + values: + 'm6g.16xlarge' + 'm6gd.16xlarge' + 'r6g.16xlarge' + 'r6gd.16xlarge' + 'c6g.16xlarge' +``` + + + + diff --git a/website/content/en/v0.7.0/tasks/provisioning.md b/website/content/en/v0.7.0/tasks/provisioning.md new file mode 100644 index 000000000000..d718e024c58d --- /dev/null +++ b/website/content/en/v0.7.0/tasks/provisioning.md @@ -0,0 +1,80 @@ +--- +title: "Provisioning" +linkTitle: "Provisioning" +weight: 5 +--- + +When you first installed Karpenter, you set up a default Provisioner. +The Provisioner sets constraints on the nodes that can be created by Karpenter and the pods that can run on those nodes. +The Provisioner can be set to do things like: + +* Define taints to limit the pods that can run on nodes Karpenter creates +* Limit node creation to certain zones, instance types, and computer architectures +* Set defaults for node expiration + +You can change your provisioner or add other provisioners to Karpenter. +Here are things you should know about Provisioners: + +* Karpenter won't do anything if there is not at least one Provisioner configured. +* Each Provisioner that is configured is looped through by Karpenter. +* If Karpenter encounters a taint in the Provisioner that is not tolerated by a Pod, Karpenter won't use that Provisioner to provision the pod. +* It is recommended to create Provisioners that are mutually exclusive. So no Pod should match multiple Provisioners. If multiple Provisioners are matched, Karpenter will randomly choose which to use. + +If you want to modify or add provisioners to Karpenter, do the following: + +1. Review the following Provisioner documents: + + * [Provisioner](../../getting-started/getting-started-with-eksctl/#provisioner) in the Getting Started guide for a sample default Provisioner + * [Provisioner API](../../provisioner/) for descriptions of Provisioner API values + * [Provisioning Configuration](../../AWS/provisioning) for cloud-specific settings + +2. Apply the new or modified Provisioner to the cluster. + +The following examples illustrate different aspects of Provisioners. +Refer to [Scheduling](../scheduling) to see how the same features are used in Pod specs to determine where pods run. + +## Example: Requirements + +This provisioner limits nodes to specific zones. +It is flexible to both spot and on-demand capacity types. + +``` +apiVersion: karpenter.sh/v1alpha5 +kind: Provisioner +metadata: + name: westzones +spec: + requirements: + - key: "topology.kubernetes.io/zone" + operator: In + values: ["us-west-2a", "us-west-2b", "us-west-2c"] + - key: "karpenter.sh/capacity-type" + operator: In + values: ["spot", "on-demand"] + provider: + instanceProfile: myprofile-cluster101 +``` +With these settings, the provisioner is able to launch nodes in three availability zones and is flexible to both spot and on-demand purchase types. + +## Example: Isolating Expensive Hardware + +A provisioner can be set up to only provision nodes on particular processor types. +The following example sets a taint that only allows pods with tolerations for Nvidia GPUs to be scheduled: + +``` +apiVersion: karpenter.sh/v1alpha5 +kind: Provisioner +metadata: + name: gpu +spec: + ttlSecondsAfterEmpty: 60 + requirements: + - key: node.kubernetes.io/instance-type + operator: In + values: ["p3.8xlarge", "p3.16xlarge"] + taints: + - key: nvidia.com/gpu + value: true + effect: “NoSchedule” +``` +In order for a pod to run on a node defined in this provisioner, it must tolerate `nvidia.com/gpu` in its pod spec. diff --git a/website/content/en/v0.7.0/tasks/scheduling.md b/website/content/en/v0.7.0/tasks/scheduling.md new file mode 100755 index 000000000000..8077a99267c1 --- /dev/null +++ b/website/content/en/v0.7.0/tasks/scheduling.md @@ -0,0 +1,300 @@ +--- +title: "Scheduling" +linkTitle: "Scheduling" +weight: 15 +--- + +If your pods have no requirements for how or where to run, you can let Karpenter choose nodes from the full range of available cloud provider resources. +However, by taking advantage of Karpenter's model of layered constraints, you can be sure that the precise type and amount of resources needed are available to your pods. +Reasons for constraining where your pods run could include: + +* Needing to run in zones where dependent applications or storage are available +* Requiring certain kinds of processors or other hardware +* Wanting to use techniques like topology spread to help insure high availability + +Your Cloud Provider defines the first layer of constraints, including all instance types, architectures, zones, and purchase types available to its cloud. +The cluster administrator adds the next layer of constraints by creating one or more provisioners. +The final layer comes from you adding specifications to your Kubernetes pod deployments. +Pod scheduling constraints must fall within a provisioner's constraints or the pods will not deploy. +For example, if the provisioner sets limits that allow only a particular zone to be used, and a pod asks for a different zone, it will not be scheduled. + +Constraints you can request include: + +* **Resource requests**: Request that certain amount of memory or CPU be available. +* **Node selection**: Choose to run on a node that is has a particular label (`nodeSelector`). +* **Node affinity**: Draws a pod to run on nodes with particular attributes (affinity). +* **Topology spread**: Use topology spread to help insure availability of the application. + +Karpenter supports standard Kubernetes scheduling constraints. +This allows you to define a single set of rules that apply to both existing and provisioned capacity. +Pod affinity is a key exception to this rule. + +{{% alert title="Note" color="primary" %}} +Karpenter supports specific [Well-Known Labels, Annotations and Taints](https://kubernetes.io/docs/reference/labels-annotations-taints/) that are useful for scheduling. +{{% /alert %}} + +## Resource requests + +Within a Pod spec, you can both make requests and set limits on resources a pod needs, such as CPU and memory. +For example: + +``` +apiVersion: v1 +kind: Pod +metadata: + name: myapp +spec: + containers: + - name: app + image: myimage + resources: + requests: + memory: "128Mi" + cpu: "500m" + limits: + memory: "256Mi" + cpu: "1000m" +``` +In this example, the container is requesting 128MiB of memory and .5 CPU. +Its limits are set to 256MiB of memory and 1 CPU. +Instance type selection math only uses `requests`, but `limits` may be configured to enable resource oversubscription. + + +See [Managing Resources for Containers](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) for details on resource types supported by Kubernetes, [Specify a memory request and a memory limit](https://kubernetes.io/docs/tasks/configure-pod-container/assign-memory-resource/#specify-a-memory-request-and-a-memory-limit) for examples of memory requests, and [Provisioning Configuration](../../aws/provisioning/) for a list of supported resources. + + +## Selecting nodes + +With `nodeSelector` you can ask for a node that matches selected key-value pairs. +This can include well-known labels or custom labels you create yourself. + +While `nodeSelector` is like node affinity, it doesn't have the same "and/or" matchExpressions that affinity has. +So all key-value pairs must match if you use `nodeSelector`. +Also, `nodeSelector` can do only do inclusions, while `affinity` can do inclusions and exclusions (`In` and `NotIn`). + +### Node selectors + +Here is an example of a `nodeSelector` for selecting nodes: + +``` +nodeSelector: + topology.kubernetes.io/zone: us-west-2a + karpenter.sh/capacity-type: spot +``` +This example features a well-known label (`topology.kubernetes.io/zone`) and a label that is well known to Karpenter (`karpenter.sh/capacity-type`). + +If you want to create a custom label, you should do that at the provisioner level. +Then the pod can declare that custom label. + + +See [nodeSelector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) in the Kubernetes documentation for details. + +### Node affinity + +Examples below illustrate how to use Node affinity to include (`In`) and exclude (`NotIn`) objects. +See [Node affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#node-affinity) for details. +When setting rules, the following Node affinity types define how hard or soft each rule is: + +* **requiredDuringSchedulingIgnoredDuringExecution**: This is a hard rule that must be met. +* **preferredDuringSchedulingIgnoredDuringExecution**: This is a preference, but the pod can run on a node where it is not guaranteed. + +The `IgnoredDuringExecution` part of each tells the pod to keep running, even if conditions change on the node so the rules no longer matched. +You can think of these concepts as `required` and `preferred`, since Kubernetes never implemented other variants of these rules. + +All examples below assume that the provisioner doesn't have constraints to prevent those zones from being used. +The first constraint says you could use `us-west-2a` or `us-west-2b`, the second constraint makes it so only `us-west-2b` can be used. + +``` + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "topology.kubernetes.io/zone" + operator: "In" + values: ["us-west-2a, us-west-2b"] + - key: "topology.kubernetes.io/zone" + operator: "In" + values: ["us-west-2b"] +``` + +Changing the second operator to `NotIn` would allow the pod to run in `us-west-2a` only: + +``` + - key: "topology.kubernetes.io/zone" + operator: "In" + values: ["us-west-2a, us-west-2b"] + - key: "topology.kubernetes.io/zone" + operator: "NotIn" + values: ["us-west-2b"] +``` + +Continuing to add to the example, `nodeAffinity` lets you define terms so if one term doesn't work it goes to the next one. +Here, if `us-west-2a` is not available, the second term will cause the pod to run on a spot instance in `us-west-2d`. + + +``` + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: # OR + - key: "topology.kubernetes.io/zone" # AND + operator: "In" + values: ["us-west-2a, us-west-2b"] + - key: "topology.kubernetes.io/zone" # AND + operator: "NotIn" + values: ["us-west-2b"] + - matchExpressions: # OR + - key: "karpenter.sh/capacity-type" # AND + operator: "In" + values: ["spot"] + - key: "topology.kubernetes.io/zone" # AND + operator: "In" + values: ["us-west-2d"] +``` +In general, Karpenter will go through each of the `nodeSelectorTerms` in order and take the first one that works. +However, if Karpenter fails to provision on the first `nodeSelectorTerms`, it will try again using the second one. +If they all fail, Karpenter will fail to provision the pod. +Karpenter will backoff and retry over time. +So if capacity becomes available, it will schedule the pod without user intervention. + +## Taints and tolerations + +Taints are the opposite of affinity. +Setting a taint on a node tells the scheduler to not run a pod on it unless the pod has explicitly said it can tolerate that taint. +This example shows a Provisioner that was set up with a taint for only running pods that require a GPU, such as the following: + + +``` +apiVersion: karpenter.sh/v1alpha5 +kind: Provisioner +metadata: + name: gpu +spec: + requirements: + - key: node.kubernetes.io/instance-type + operator: In + values: + - p3.2xlarge + - p3.8xlarge + - p3.16xlarge + taints: + - key: nvidia.com/gpu + value: true + effect: “NoSchedule” +``` + +For a pod to request to run on a node that has provisioner, it could set a toleration as follows: + +``` +apiVersion: v1 +kind: Pod +metadata: + name: mygpupod +spec: + containers: + - name: gpuapp + resources: + requests: + nvidia.com/gpu: 1 + limits: + nvidia.com/gpu: 1 + image: mygpucontainer + tolerations: + - key: "nvidia.com/gpu" + operator: "Exists" + effect: "NoSchedule" +``` +See [Taints and Tolerations](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) in the Kubernetes documentation for details. + +## Topology Spread + +By using the Kubernetes `topologySpreadConstraints` you can ask the provisioner to have pods push away from each other to limit the blast radius of an outage. +Think of it as the Kubernetes evolution for pod affinity: it lets you relate pods with respect to nodes while still allowing spread. +For example: + +``` +spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: "topology.kubernetes.io/zone" + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + dev: jjones + - maxSkew: 1 + topologyKey: "kubernetes.io/hostname" + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + dev: jjones + +``` +Adding this to your podspec would result in: + +* Pods being spread across both zones and hosts (`topologyKey`). +* The `dev` `labelSelector` will include all pods with the label of `dev=jjones` in topology calculations. It is recommended to use a selector to match all pods in a deployment. +* No more than one pod difference in the number of pods on each host (`maxSkew`). +For example, if there were three nodes and five pods the pods could be spread 1, 2, 2 or 2, 1, 2 and so on. +If instead the spread were 5, pods could be 5, 0, 0 or 3, 2, 0, or 2, 1, 2 and so on. +* Karpenter is always able to improve skew by launching new nodes in the right zones. Therefore, `whenUnsatisfiable` does not change provisioning behavior. + +See [Pod Topology Spread Constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/) for details. + +## Persistent Volume Topology + +Karpenter automatically detects storage scheduling requirements and includes them in node launch decisions. + +In the following example, the `StorageClass` defines zonal topologies for `us-west-2a` and `us-west-2b` and [binding mode `WaitForFirstConsumer`](https://kubernetes.io/docs/concepts/storage/storage-classes/#volume-binding-mode). +When the pod is created, Karpenter follows references from the `Pod` to `PersistentVolumeClaim` to `StorageClass` and identifies that this pod requires storage in `us-west-2a` and `us-west-2b`. +It randomly selects `us-west-2a`, provisions a node in that zone, and binds the pod to the node. +The CSI driver creates a `PersistentVolume` according to the `PersistentVolumeClaim` and gives it a node affinity rule for `us-west-2a`. + +Later on, the pod is deleted and a new pod is created that requests the same claim. This time, Karpenter identifies that a `PersistentVolume` already exists for the `PersistentVolumeClaim`, and includes its zone `us-west-2a` in the pod's scheduling requirements. + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: app +spec: + containers: ... + volumes: + - name: storage + persistentVolumeClaim: + claimName: ebs-claim +--- +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: ebs +provisioner: ebs.csi.aws.com +volumeBindingMode: WaitForFirstConsumer +allowedTopologies: +- matchLabelExpressions: + - key: topology.ebs.csi.aws.com/zone + values: ["us-west-2a", "us-west-2b"] +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ebs-claim +spec: + accessModes: + - ReadWriteOnce + storageClassName: ebs + resources: + requests: + storage: 4Gi +``` + +{{% alert title="Note" color="primary" %}} +☁️ AWS Specific + +The EBS CSI driver uses `topology.ebs.csi.aws.com/zone` instead of the standard `topology.kubernetes.io/zone` label. Karpenter is aware of label aliasing and translates this label into `topology.kubernetes.io/zone` in memory. When configuring a `StorageClass` for the EBS CSI Driver, you must use `topology.ebs.csi.aws.com/zone`. +{{% /alert %}} + +{{% alert title="Note" color="primary" %}} +The topology key `topology.kubernetes.io/region` is not supported. Legacy in-tree CSI providers specify this label. Instead, install an out-of-tree CSI provider. [Learn more about moving to CSI providers.](https://kubernetes.io/blog/2021/12/10/storage-in-tree-to-csi-migration-status-update/#quick-recap-what-is-csi-migration-and-why-migrate) +{{% /alert %}} diff --git a/website/content/en/v0.7.0/tasks/set-resource-limits.md b/website/content/en/v0.7.0/tasks/set-resource-limits.md new file mode 100644 index 000000000000..a397e8fa62b9 --- /dev/null +++ b/website/content/en/v0.7.0/tasks/set-resource-limits.md @@ -0,0 +1,62 @@ +--- +title: "Set Resource Limits" +linkTitle: "Set Resource Limits" +weight: 10 +--- + +Karpenter automatically provisions instances from the cloud provider. This often incurs hard costs. To control resource utilization and cluster size, use resource limits. + +The provisioner spec includes a limits section (`spec.limits.resources`), which constrains the maximum amount of resources that the provisioner will manage. + +For example, setting "spec.limits.resources.cpu" to "1000" limits the provisioner to a total of 1000 CPU cores across all instances. This prevents unwanted excessive growth of a cluster. + +At this time, Karpenter only supports: +- CPU +- Memory + +CPU limits are described with a `DecimalSI` value, usually a natural integer. + +Memory limits are described with a [`BinarySI` value, such as 1000Gi.](https://github.com/kubernetes/apimachinery/blob/4427f8f31dfbac65d3a044d0168f84c51bfda440/pkg/api/resource/quantity.go#L31) + +You can view the current consumption of cpu and memory on your cluster by running: +``` +kubectl get provisioner -o=jsonpath='{.items[0].status}' +``` + +Review the [Kubernetes core API](https://github.com/kubernetes/api/blob/37748cca582229600a3599b40e9a82a951d8bbbf/core/v1/resource.go#L23) (`k8s.io/api/core/v1`) for more information on `resources`. + +### Implementation + +Karpenter refuses to allocate new resources while at least one resource limit is *exceeded*. In other words, resource limits aren't hard limits, they only apply once Karpenter detects that a limit has been crossed. + +**Example:** + +A resource limit of 1000 CPUs is set. 996 CPU cores are currently allocated. The resource limit is not met. + +In response to pending pods, Karpenter calculates a new 6 core instance is needed. Karpenter *creates* the instance. + +1002 CPU cores are now allocated. The resource limit is in now met/exceeded. + +In response to a new set of pending pods, Karpenter calculates another 6 core instance is needed. Karpenter *does not create* the instance, because the resource limit has been met. + +{{% alert title="Note" color="primary" %}} +Karpenter provisioning is highly parallel. Because of this, limit checking is eventually consistent, which can result in overrun during rapid scale outs. +{{% /alert %}} + +### Example Provisioner: + +``` +apiVersion: karpenter.sh/v1alpha5 +kind: Provisioner +metadata: + name: default +spec: + requirements: + - key: karpenter.sh/capacity-type + operator: In + values: ["spot"] + limits: + resources: + cpu: 1000 + memory: 1000Gi +``` \ No newline at end of file diff --git a/website/content/en/v0.7.0/troubleshooting.md b/website/content/en/v0.7.0/troubleshooting.md new file mode 100644 index 000000000000..c391f1b877a7 --- /dev/null +++ b/website/content/en/v0.7.0/troubleshooting.md @@ -0,0 +1,88 @@ +--- +title: "Troubleshooting" +linkTitle: "Troubleshooting" +weight: 100 +--- + +## Node NotReady + +There are many reasons that a node can fail to join the cluster. +- Permissions +- Security Groups +- Networking + +The easiest way to start debugging is to connect to the instance +```sh +# List the nodes managed by Karpenter +kubectl get node -l karpenter.sh/provisioner-name +# Extract the instance ID +INSTANCE_ID=$(kubectl get node -l karpenter.sh/provisioner-name -ojson | jq -r ".items[0].spec.providerID" | cut -d \/ -f5) +# Connect to the instance +aws ssm start-session --target $INSTANCE_ID +# Check Kubelet logs +sudo journalctl -u kubelet +``` + +## Missing Service Linked Role +Unless your AWS account has already onboarded to EC2 Spot, you will need to create the service linked role to avoid `ServiceLinkedRoleCreationNotPermitted`. +``` +AuthFailure.ServiceLinkedRoleCreationNotPermitted: The provided credentials do not have permission to create the service-linked role for EC2 Spot Instances +``` +This can be resolved by creating the [Service Linked Role](https://docs.aws.amazon.com/batch/latest/userguide/spot_fleet_IAM_role.html). +``` +aws iam create-service-linked-role --aws-service-name spot.amazonaws.com +``` + +## Unable to delete nodes after uninstalling Karpenter +Karpenter adds a [finalizer](https://github.com/aws/karpenter/pull/466) to nodes that it provisions to support graceful node termination. If Karpenter is uninstalled, these finalizers will cause the API Server to block deletion until the finalizers are removed. + +You can fix this by patching the node objects: +- `kubectl edit node ` and remove the line that says `karpenter.sh/termination` in the finalizers field. +- Run the following script that gets all nodes with the finalizer and removes all the finalizers from those nodes. + - NOTE: this will remove ALL finalizers from nodes with the karpenter finalizer. +```{bash} +kubectl get nodes -ojsonpath='{range .items[*].metadata}{@.name}:{@.finalizers}{"\n"}' | grep "karpenter.sh/termination" | cut -d ':' -f 1 | xargs kubectl patch node --type='json' -p='[{"op": "remove", "path": "/metadata/finalizers"}]' +``` + +## Nil issues with Karpenter reallocation +If you create a Karpenter Provisioner while the webhook to default it is unavailable, it's possible to get unintentionally nil fields. [Related Issue](https://github.com/aws/karpenter/issues/463). + + You may see some logs like this. +```{bash} +github.com/aws/karpenter/pkg/controllers/provisioning/v1alpha1/reallocation/utilization.go:84 +0x688 +github.com/aws/karpenter/pkg/controllers/provisioning/v1alpha1/reallocation.(*Controller).Reconcile(0xc000b004c0, 0x23354c0, 0xc000e209f0, 0x235e640, 0xc002566c40, 0x200c786, 0x5, 0xc00259c1b0, 0x1) github.com/aws/karpenter/pkg/controllers/provisioning/v1alpha1/reallocation/controller.go:72 +0x65 +github.com/aws/karpenter/pkg/controllers.(*GenericController).Reconcile(0xc000b00720, 0x23354c0, 0xc000e209f0, 0xc001db9be0, 0x7, 0xc001db9bd0, 0x7, 0xc000e209f0, 0x7fc864172d20, 0xc0000be2a0, ...) +``` +This is fixed in Karpenter v0.2.7+. Reinstall Karpenter on the latest version. + +## Nodes stuck in pending and not running the kubelet due to outdated CNI +If you have an EC2 instance get launched that is stuck in pending and ultimately not running the kubelet, you may see a message like this in your `/var/log/user-data.log`: + +> No entry for c6i.xlarge in /etc/eks/eni-max-pods.txt + +This means that your CNI plugin is out of date. You can find instructions on how to update your plugin [here](https://docs.aws.amazon.com/eks/latest/userguide/managing-vpc-cni.html). + +## Failed calling webhook "defaulting.webhook.provisioners.karpenter.sh" + +If you are not able to create a provisioner due to `Error from server (InternalError): error when creating "provisioner.yaml": Internal error occurred: failed calling webhook "defaulting.webhook.provisioners.karpenter.sh": Post "https://karpenter-webhook.karpenter.svc:443/default-resource?timeout=10s": context deadline exceeded` + +Verify that webhook is running +```text +kubectl get po -A -l karpenter=webhook +NAMESPACE NAME READY STATUS RESTARTS AGE +karpenter karpenter-webhook-d644c7567-cdc4d 1/1 Running 0 37m +karpenter karpenter-webhook-d644c7567-dn9xw 1/1 Running 0 37m +``` + +Webhook service has endpoints assigned to it +```text +kubectl get ep -A -l app.kubernetes.io/component=karpenter +NAMESPACE NAME ENDPOINTS AGE +karpenter karpenter-metrics 10.0.13.104:8080 38m +karpenter karpenter-webhook 10.0.1.25:8443,10.0.30.46:8443 38m +``` + +Your security groups are not blocking you from reaching your webhook. + +This is especially relevant if you have used `terraform-eks-module` version `>=18` since that version changed its security +approach, and now it's much more restrictive. diff --git a/website/static/_redirects b/website/static/_redirects index b993ce6253ce..536dc13bee80 100644 --- a/website/static/_redirects +++ b/website/static/_redirects @@ -2,4 +2,4 @@ /docs/tasks/running-pods/ /docs/tasks/scheduling/ /docs/provisioner-crd/ /docs/provisioner/ /docs/faqs/ /docs/faq/ -/docs/* /v0.6.5/:splat +/docs/* /v0.7.0/:splat