From 835d46badde9af570dc844b8ebded24f90464ed2 Mon Sep 17 00:00:00 2001 From: Swati Sehgal Date: Thu, 4 Apr 2024 12:34:36 +0100 Subject: [PATCH 1/2] Limit serving of insecure metrics by allowing configurable IP Currently we are serving insecure metrics on all IPv4 routable addresses on the local machine (0.0.0.0). In this PR, we make the metric IP configurable in order to ensure that we listen for insecure metrics port only on one IP to reduce security vulnerability. Signed-off-by: Swati Sehgal --- cmd/resource-topology-exporter/main.go | 2 +- hack/get-manifests.sh | 1 + ...ource-topology-exporter-ds-notif-file.yaml | 2 ++ manifests/resource-topology-exporter.yaml | 2 ++ pkg/config/cfgdispatch.go | 1 + pkg/config/defaults.go | 1 + pkg/config/environ.go | 3 +++ pkg/config/flags.go | 1 + pkg/metrics/server/setup.go | 19 +++++++++++++++---- .../resourcetopologyexporter.go | 2 ++ test/data/TestLoadDefaults.expected.json | 2 +- test/data/TestSetDefaults.expected.json | 2 +- .../data/conftree/00-full/_output/output.yaml | 1 + .../conftree/05-full-env/_output/output.yaml | 1 + .../06-full-env-args/_output/output.yaml | 1 + test/e2e/rte/metrics.go | 9 ++++++--- test/e2e/utils/pods/rtepod/rtepod.go | 16 ++++++++++++++++ 17 files changed, 56 insertions(+), 10 deletions(-) diff --git a/cmd/resource-topology-exporter/main.go b/cmd/resource-topology-exporter/main.go index 51fa5d85f..824d0307e 100644 --- a/cmd/resource-topology-exporter/main.go +++ b/cmd/resource-topology-exporter/main.go @@ -87,7 +87,7 @@ func main() { if err != nil { klog.Fatalf("failed to setup metrics: %v", err) } - err = metricssrv.Setup(parsedArgs.RTE.MetricsMode, metricssrv.NewConfig(parsedArgs.RTE.MetricsPort, parsedArgs.RTE.MetricsTLSCfg)) + err = metricssrv.Setup(parsedArgs.RTE.MetricsMode, metricssrv.NewConfig(parsedArgs.RTE.MetricsAddress, parsedArgs.RTE.MetricsPort, parsedArgs.RTE.MetricsTLSCfg)) if err != nil { klog.Fatalf("failed to setup metrics server: %v", err) } diff --git a/hack/get-manifests.sh b/hack/get-manifests.sh index 19b02a81b..150a305d0 100755 --- a/hack/get-manifests.sh +++ b/hack/get-manifests.sh @@ -11,4 +11,5 @@ export RTE_VERBOSE="${RTE_VERBOSE:-5}" export RTE_METRICS_MODE="${RTE_METRICS_MODE:-disabled}" export RTE_METRICS_CLI_AUTH="${RTE_METRICS_CLI_AUTH:-true}" export METRICS_PORT="${METRICS_PORT:-2112}" +export METRICS_ADDRESS="${METRICS_ADDRESS:-127.0.0.1}" envsubst < ${DIRNAME}/../manifests/resource-topology-exporter.yaml diff --git a/manifests/resource-topology-exporter-ds-notif-file.yaml b/manifests/resource-topology-exporter-ds-notif-file.yaml index fe6f321c1..dae5e8340 100644 --- a/manifests/resource-topology-exporter-ds-notif-file.yaml +++ b/manifests/resource-topology-exporter-ds-notif-file.yaml @@ -42,6 +42,8 @@ spec: value: shared-pool-container - name: METRICS_PORT value: "${METRICS_PORT}" + - name: METRICS_ADDRESS + value: "${METRICS_ADDRESS}" volumeMounts: - name: host-sys mountPath: "/host-sys" diff --git a/manifests/resource-topology-exporter.yaml b/manifests/resource-topology-exporter.yaml index 598d5b15a..c2b546978 100644 --- a/manifests/resource-topology-exporter.yaml +++ b/manifests/resource-topology-exporter.yaml @@ -114,6 +114,8 @@ spec: value: shared-pool-container - name: METRICS_PORT value: "${METRICS_PORT}" + - name: METRICS_ADDRESS + value: "${METRICS_ADDRESS}" volumeMounts: - name: host-sys mountPath: "/host-sys" diff --git a/pkg/config/cfgdispatch.go b/pkg/config/cfgdispatch.go index 266e89338..3e6bfcf13 100644 --- a/pkg/config/cfgdispatch.go +++ b/pkg/config/cfgdispatch.go @@ -126,6 +126,7 @@ func dispatchConfObj(obj map[string]interface{}, pArgs *ProgArgs) error { {key: "topologyExporter.addNRTOwnerEnable", out: &pArgs.RTE.AddNRTOwnerEnable}, {key: "topologyExporter.metricsMode", out: &pArgs.RTE.MetricsMode}, {key: "topologyExporter.metricsPort", out: &pArgs.RTE.MetricsPort}, + {key: "topologyExporter.MetricsAddress", out: &pArgs.RTE.MetricsAddress}, {key: "topologyExporter.metricsTLS.certsDir", out: &pArgs.RTE.MetricsTLSCfg.CertsDir}, {key: "topologyExporter.metricsTLS.certFile", out: &pArgs.RTE.MetricsTLSCfg.CertFile}, {key: "topologyExporter.metricsTLS.keyFile", out: &pArgs.RTE.MetricsTLSCfg.KeyFile}, diff --git a/pkg/config/defaults.go b/pkg/config/defaults.go index 900449c9e..530aaa6c4 100644 --- a/pkg/config/defaults.go +++ b/pkg/config/defaults.go @@ -39,6 +39,7 @@ func SetDefaults(pArgs *ProgArgs) { pArgs.RTE.AddNRTOwnerEnable = true pArgs.RTE.MetricsMode = metricssrv.ServingDefault pArgs.RTE.MetricsPort = metricssrv.PortDefault + pArgs.RTE.MetricsAddress = metricssrv.AddressDefault pArgs.RTE.MetricsTLSCfg = metricssrv.NewDefaultTLSConfig() pArgs.RTE.MaxEventsPerTimeUnit = 1 pArgs.RTE.TimeUnitToLimitEvents = time.Second diff --git a/pkg/config/environ.go b/pkg/config/environ.go index 8f625bd5e..ee5659a4a 100644 --- a/pkg/config/environ.go +++ b/pkg/config/environ.go @@ -63,4 +63,7 @@ func FromEnv(pArgs *ProgArgs) { if pArgs.RTE.MetricsPort == 0 { pArgs.RTE.MetricsPort = metricssrv.PortFromEnv() } + if pArgs.RTE.MetricsAddress == "" { + pArgs.RTE.MetricsAddress = metricssrv.AddressFromEnv() + } } diff --git a/pkg/config/flags.go b/pkg/config/flags.go index f754642fb..1885aa706 100644 --- a/pkg/config/flags.go +++ b/pkg/config/flags.go @@ -63,6 +63,7 @@ func FromFlags(pArgs *ProgArgs, args ...string) (string, string, error) { flags.BoolVar(&pArgs.RTE.AddNRTOwnerEnable, "add-nrt-owner", pArgs.RTE.AddNRTOwnerEnable, "RTE will inject NRT's related node as OwnerReference to ensure cleanup if the node is deleted.") flags.StringVar(&pArgs.RTE.MetricsMode, "metrics-mode", pArgs.RTE.MetricsMode, fmt.Sprintf("Select the mode to expose metrics endpoint. Valid options: %s", metricssrv.ServingModeSupported())) flags.IntVar(&pArgs.RTE.MetricsPort, "metrics-port", pArgs.RTE.MetricsPort, "Select the port to listen for the metrics endpoint.") + flags.StringVar(&pArgs.RTE.MetricsAddress, "metrics-ip", pArgs.RTE.MetricsAddress, "Select the IP to listen for the metrics endpoint.") flags.StringVar(&pArgs.RTE.MetricsTLSCfg.CertsDir, "metrics-certs-dir", pArgs.RTE.MetricsTLSCfg.CertsDir, "certificates directory for TLS metrics serving") flags.StringVar(&pArgs.RTE.MetricsTLSCfg.CertFile, "metrics-cert-file", pArgs.RTE.MetricsTLSCfg.CertFile, "certificate file name for TLS metrics serving") flags.StringVar(&pArgs.RTE.MetricsTLSCfg.KeyFile, "metrics-key-file", pArgs.RTE.MetricsTLSCfg.KeyFile, "key file name for TLS metrics serving") diff --git a/pkg/metrics/server/setup.go b/pkg/metrics/server/setup.go index 016b1ae32..ccd6e6406 100644 --- a/pkg/metrics/server/setup.go +++ b/pkg/metrics/server/setup.go @@ -29,7 +29,8 @@ import ( ) const ( - PortDefault = 2112 + PortDefault = 2112 + AddressDefault = "0.0.0.0" TLSRootDir = "/etc/secrets/rte" @@ -60,19 +61,21 @@ type TLSConfig struct { } type Config struct { + IP string Port int TLS TLSConfig } -func NewConfig(port int, tlsConf TLSConfig) Config { +func NewConfig(ip string, port int, tlsConf TLSConfig) Config { return Config{ + IP: ip, Port: port, TLS: tlsConf, } } func NewDefaultConfig() Config { - return NewConfig(PortDefault, NewDefaultTLSConfig()) + return NewConfig(AddressDefault, PortDefault, NewDefaultTLSConfig()) } func (conf TLSConfig) Clone() TLSConfig { @@ -91,7 +94,7 @@ func (conf Config) Validate() error { } func (conf Config) BindAddress() string { - return fmt.Sprintf(":%d", conf.Port) + return fmt.Sprintf("%s:%d", conf.IP, conf.Port) } func ServingModeIsSupported(value string) (string, error) { @@ -130,6 +133,14 @@ func PortFromEnv() int { return port } +func AddressFromEnv() string { + ip, ok := os.LookupEnv("METRICS_ADDRESS") + if !ok { + return "" + } + return ip +} + func Setup(mode string, conf Config) error { if mode == ServingDisabled { klog.Infof("metrics endpoint disabled") diff --git a/pkg/resourcetopologyexporter/resourcetopologyexporter.go b/pkg/resourcetopologyexporter/resourcetopologyexporter.go index f6cb1900f..aa31d337f 100644 --- a/pkg/resourcetopologyexporter/resourcetopologyexporter.go +++ b/pkg/resourcetopologyexporter/resourcetopologyexporter.go @@ -34,6 +34,7 @@ type Args struct { AddNRTOwnerEnable bool `json:"addNRTOwnerEnable,omitempty"` MetricsMode string `json:"metricsMode,omitempty"` MetricsPort int `json:"metricsPort,omitempty"` + MetricsAddress string `json:"metricsAddress,omitempty"` MetricsTLSCfg metricssrv.TLSConfig `json:"metricsTLS,omitempty"` } @@ -52,6 +53,7 @@ func (args Args) Clone() Args { AddNRTOwnerEnable: args.AddNRTOwnerEnable, MetricsMode: args.MetricsMode, MetricsPort: args.MetricsPort, + MetricsAddress: args.MetricsAddress, MetricsTLSCfg: args.MetricsTLSCfg.Clone(), } } diff --git a/test/data/TestLoadDefaults.expected.json b/test/data/TestLoadDefaults.expected.json index aaea10aea..26ef64482 100644 --- a/test/data/TestLoadDefaults.expected.json +++ b/test/data/TestLoadDefaults.expected.json @@ -1 +1 @@ -{"global":{"verbose":2},"nrtUpdater":{"hostname":"TEST_NODE"},"resourceMonitor":{"sysfsRoot":"/sys","podSetFingerprint":true,"podSetFingerprintMethod":"with-exclusive-resources"},"topologyExporter":{"referenceContainer":{"namespace":"TEST_NS","podName":"TEST_POD","containerName":"TEST_CONT"},"kubeletConfigFile":"/podresources/config.yaml","podResourcesSocketPath":"unix:///podresources/kubelet.sock","sleepInterval":60000000000,"podReadinessEnable":true,"maxEventPerTimeUnit":1,"timeUnitToLimitEvents":1000000000,"addNRTOwnerEnable":true,"metricsMode":"disabled","metricsPort":2112,"metricsTLS":{"certsDir":"/etc/secrets/rte","certFile":"tls.crt","keyFile":"tls.key"}}} \ No newline at end of file +{"global":{"verbose":2},"nrtUpdater":{"hostname":"TEST_NODE"},"resourceMonitor":{"sysfsRoot":"/sys","podSetFingerprint":true,"podSetFingerprintMethod":"with-exclusive-resources"},"topologyExporter":{"referenceContainer":{"namespace":"TEST_NS","podName":"TEST_POD","containerName":"TEST_CONT"},"kubeletConfigFile":"/podresources/config.yaml","podResourcesSocketPath":"unix:///podresources/kubelet.sock","sleepInterval":60000000000,"podReadinessEnable":true,"maxEventPerTimeUnit":1,"timeUnitToLimitEvents":1000000000,"addNRTOwnerEnable":true,"metricsMode":"disabled","metricsPort":2112,"metricsAddress":"0.0.0.0","metricsTLS":{"certsDir":"/etc/secrets/rte","certFile":"tls.crt","keyFile":"tls.key"}}} diff --git a/test/data/TestSetDefaults.expected.json b/test/data/TestSetDefaults.expected.json index 28cc2a96c..5df028e5e 100644 --- a/test/data/TestSetDefaults.expected.json +++ b/test/data/TestSetDefaults.expected.json @@ -1 +1 @@ -{"global":{"verbose":2},"nrtUpdater":{},"resourceMonitor":{"sysfsRoot":"/sys","podSetFingerprint":true,"podSetFingerprintMethod":"with-exclusive-resources"},"topologyExporter":{"kubeletConfigFile":"/podresources/config.yaml","podResourcesSocketPath":"unix:///podresources/kubelet.sock","sleepInterval":60000000000,"podReadinessEnable":true,"maxEventPerTimeUnit":1,"timeUnitToLimitEvents":1000000000,"addNRTOwnerEnable":true,"metricsMode":"disabled","metricsPort":2112,"metricsTLS":{"certsDir":"/etc/secrets/rte","certFile":"tls.crt","keyFile":"tls.key"}}} \ No newline at end of file +{"global":{"verbose":2},"nrtUpdater":{},"resourceMonitor":{"sysfsRoot":"/sys","podSetFingerprint":true,"podSetFingerprintMethod":"with-exclusive-resources"},"topologyExporter":{"kubeletConfigFile":"/podresources/config.yaml","podResourcesSocketPath":"unix:///podresources/kubelet.sock","sleepInterval":60000000000,"podReadinessEnable":true,"maxEventPerTimeUnit":1,"timeUnitToLimitEvents":1000000000,"addNRTOwnerEnable":true,"metricsMode":"disabled","metricsPort":2112,"metricsAddress":"0.0.0.0","metricsTLS":{"certsDir":"/etc/secrets/rte","certFile":"tls.crt","keyFile":"tls.key"}}} diff --git a/test/data/conftree/00-full/_output/output.yaml b/test/data/conftree/00-full/_output/output.yaml index af2349e7a..321a38aba 100644 --- a/test/data/conftree/00-full/_output/output.yaml +++ b/test/data/conftree/00-full/_output/output.yaml @@ -20,6 +20,7 @@ resourceMonitor: topologyExporter: kubeletConfigFile: /podresources/config.yaml maxEventPerTimeUnit: 1 + metricsAddress: 0.0.0.0 metricsMode: disabled metricsPort: 2112 metricsTLS: diff --git a/test/data/conftree/05-full-env/_output/output.yaml b/test/data/conftree/05-full-env/_output/output.yaml index a8a804232..4446e7c9d 100644 --- a/test/data/conftree/05-full-env/_output/output.yaml +++ b/test/data/conftree/05-full-env/_output/output.yaml @@ -20,6 +20,7 @@ resourceMonitor: topologyExporter: kubeletConfigFile: /podresources/config.yaml maxEventPerTimeUnit: 1 + metricsAddress: 0.0.0.0 metricsMode: disabled metricsPort: 2112 metricsTLS: diff --git a/test/data/conftree/06-full-env-args/_output/output.yaml b/test/data/conftree/06-full-env-args/_output/output.yaml index aed6d2eec..90ec8bc48 100644 --- a/test/data/conftree/06-full-env-args/_output/output.yaml +++ b/test/data/conftree/06-full-env-args/_output/output.yaml @@ -20,6 +20,7 @@ resourceMonitor: topologyExporter: kubeletConfigFile: /podresources/config.yaml maxEventPerTimeUnit: 1 + metricsAddress: 0.0.0.0 metricsMode: disabled metricsPort: 2112 metricsTLS: diff --git a/test/e2e/rte/metrics.go b/test/e2e/rte/metrics.go index e7f11f122..bfe3b3c16 100644 --- a/test/e2e/rte/metrics.go +++ b/test/e2e/rte/metrics.go @@ -52,6 +52,7 @@ var _ = ginkgo.Describe("[RTE][Monitoring] metrics", func() { hasMetrics bool metricsMode string metricsPort int + MetricsAddress string rtePod *corev1.Pod workerNodes []corev1.Node topologyUpdaterNode *corev1.Node @@ -99,6 +100,8 @@ var _ = ginkgo.Describe("[RTE][Monitoring] metrics", func() { rtePod = &pods.Items[0] if hasMetrics { + MetricsAddress, err = e2ertepod.FindMetricsAddress(rtePod) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) metricsPort, err = e2ertepod.FindMetricsPort(rtePod) gomega.Expect(err).ToNot(gomega.HaveOccurred()) } @@ -120,7 +123,7 @@ var _ = ginkgo.Describe("[RTE][Monitoring] metrics", func() { } rteContainerName, err := e2ertepod.FindRTEContainerName(rtePod) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - cmd := []string{"curl", "-k", "-L", fmt.Sprintf("https://127.0.0.1:%d/metrics", metricsPort)} + cmd := []string{"curl", "-k", "-L", fmt.Sprintf("https://%s:%d/metrics", MetricsAddress, metricsPort)} key := client.ObjectKeyFromObject(rtePod) klog.Infof("executing cmd: %s on pod %q", cmd, key.String()) var stdout, stderr []byte @@ -139,7 +142,7 @@ var _ = ginkgo.Describe("[RTE][Monitoring] metrics", func() { } rteContainerName, err := e2ertepod.FindRTEContainerName(rtePod) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - cmd := []string{"curl", "-L", fmt.Sprintf("http://127.0.0.1:%d/metrics", metricsPort)} + cmd := []string{"curl", "-L", fmt.Sprintf("http://%s:%d/metrics", MetricsAddress, metricsPort)} key := client.ObjectKeyFromObject(rtePod) klog.Infof("executing cmd: %s on pod %q", cmd, key.String()) var stdout, stderr []byte @@ -173,7 +176,7 @@ var _ = ginkgo.Describe("[RTE][Monitoring] metrics", func() { rteContainerName, err := e2ertepod.FindRTEContainerName(rtePod) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - cmd := []string{"curl", "-L", fmt.Sprintf("http://127.0.0.1:%d/metrics", metricsPort)} + cmd := []string{"curl", "-L", fmt.Sprintf("http://%s:%d/metrics", MetricsAddress, metricsPort)} key := client.ObjectKeyFromObject(rtePod) klog.Infof("executing cmd: %s on pod %q", cmd, key.String()) var stdout, stderr []byte diff --git a/test/e2e/utils/pods/rtepod/rtepod.go b/test/e2e/utils/pods/rtepod/rtepod.go index 0c33288d6..e3d5ea709 100644 --- a/test/e2e/utils/pods/rtepod/rtepod.go +++ b/test/e2e/utils/pods/rtepod/rtepod.go @@ -51,6 +51,22 @@ func FindMetricsPort(rtePod *corev1.Pod) (int, error) { return 0, fmt.Errorf("cannot find METRICS_PORT environment variable") } +func FindMetricsAddress(rtePod *corev1.Pod) (string, error) { + for idx := 0; idx < len(rtePod.Spec.Containers); idx++ { + cnt := rtePod.Spec.Containers[idx] // shortcut + if !isRTEContainer(cnt) { + continue + } + + for _, envVar := range cnt.Env { + if envVar.Name == "METRICS_ADDRESS" { + return envVar.Value, nil + } + } + } + return "", fmt.Errorf("cannot find METRICS_ADDRESS environment variable") +} + func FindRTEContainerName(rtePod *corev1.Pod) (string, error) { for idx := 0; idx < len(rtePod.Spec.Containers); idx++ { cnt := rtePod.Spec.Containers[idx] // shortcut From 1042edc657ef97df53c60b230f9d3bfd38918da5 Mon Sep 17 00:00:00 2001 From: Swati Sehgal Date: Thu, 4 Apr 2024 16:40:08 +0100 Subject: [PATCH 2/2] Show verbose curl output for debuggability Signed-off-by: Swati Sehgal --- test/e2e/rte/metrics.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/e2e/rte/metrics.go b/test/e2e/rte/metrics.go index bfe3b3c16..373594dd8 100644 --- a/test/e2e/rte/metrics.go +++ b/test/e2e/rte/metrics.go @@ -123,7 +123,7 @@ var _ = ginkgo.Describe("[RTE][Monitoring] metrics", func() { } rteContainerName, err := e2ertepod.FindRTEContainerName(rtePod) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - cmd := []string{"curl", "-k", "-L", fmt.Sprintf("https://%s:%d/metrics", MetricsAddress, metricsPort)} + cmd := []string{"curl", "-v", "-k", "-L", fmt.Sprintf("https://%s:%d/metrics", MetricsAddress, metricsPort)} key := client.ObjectKeyFromObject(rtePod) klog.Infof("executing cmd: %s on pod %q", cmd, key.String()) var stdout, stderr []byte @@ -142,7 +142,7 @@ var _ = ginkgo.Describe("[RTE][Monitoring] metrics", func() { } rteContainerName, err := e2ertepod.FindRTEContainerName(rtePod) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - cmd := []string{"curl", "-L", fmt.Sprintf("http://%s:%d/metrics", MetricsAddress, metricsPort)} + cmd := []string{"curl", "-v", "-L", fmt.Sprintf("http://%s:%d/metrics", MetricsAddress, metricsPort)} key := client.ObjectKeyFromObject(rtePod) klog.Infof("executing cmd: %s on pod %q", cmd, key.String()) var stdout, stderr []byte @@ -176,7 +176,7 @@ var _ = ginkgo.Describe("[RTE][Monitoring] metrics", func() { rteContainerName, err := e2ertepod.FindRTEContainerName(rtePod) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - cmd := []string{"curl", "-L", fmt.Sprintf("http://%s:%d/metrics", MetricsAddress, metricsPort)} + cmd := []string{"curl", "-v", "-L", fmt.Sprintf("http://%s:%d/metrics", MetricsAddress, metricsPort)} key := client.ObjectKeyFromObject(rtePod) klog.Infof("executing cmd: %s on pod %q", cmd, key.String()) var stdout, stderr []byte