From 7cf98751472d5eb4b69fcf1f9ac13aebad1669b3 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Fri, 27 Sep 2024 02:20:45 +0000 Subject: [PATCH 01/19] DAOS-9825 control:Update Telemetry Endpoint to use HTTPS - Adding new option for telemetry config in server, control and agent yaml file. - Telemetry endpoint can have option to run in both secure (https) and insecure (http) mode. telemetry_config: allow_insecure: false server_cert: /etc/daos/certs/telemetryserver.crt server_key: /etc/daos/certs/telemetryserver.key ca_cert: /etc/daos/certs/daosTelemetryCA.crt - Telemetry old configuration option is supported bur recommend to use the new options. - Updated dmg to create the correct prometheus config install based on telemetry is insecure mode or not. # cat /root/.prometheus.yml scheme: https tls_config: ca_file: /etc/daos/certs/daosTelemetryCA.crt Features: control telemetry Required-githooks: true Signed-off-by: Samir Raval --- docs/admin/administration.md | 2 +- docs/admin/deployment.md | 156 ++++++++++++++++++ src/control/cmd/daos_agent/config.go | 17 +- src/control/cmd/daos_agent/config_test.go | 74 +++++++++ src/control/cmd/daos_agent/infocache.go | 4 +- src/control/cmd/daos_agent/infocache_test.go | 3 +- src/control/cmd/daos_agent/telemetry.go | 9 +- src/control/cmd/dmg/auto_test.go | 5 + src/control/cmd/dmg/main.go | 1 + src/control/cmd/dmg/telemetry.go | 25 +++ src/control/lib/control/config.go | 4 +- src/control/lib/control/http.go | 66 +++++++- src/control/lib/control/http_test.go | 102 +++++++++--- src/control/lib/control/telemetry.go | 38 ++++- src/control/lib/control/telemetry_test.go | 72 ++++---- src/control/lib/telemetry/promexp/httpd.go | 20 ++- .../lib/telemetry/promexp/httpd_test.go | 2 + src/control/security/config.go | 49 ++++-- src/control/server/config/server.go | 19 ++- src/control/server/config/server_test.go | 18 +- src/control/server/server_utils.go | 5 +- src/control/server/telemetry.go | 13 +- .../ftest/control/dmg_telemetry_basic.py | 4 +- .../ftest/control/dmg_telemetry_basic.yaml | 5 + .../ftest/telemetry/basic_client_telemetry.py | 2 +- .../telemetry/basic_client_telemetry.yaml | 11 +- src/tests/ftest/util/agent_utils.py | 7 +- src/tests/ftest/util/agent_utils_params.py | 43 ++++- src/tests/ftest/util/command_utils.py | 52 +++++- src/tests/ftest/util/command_utils_base.py | 80 +++++++++ src/tests/ftest/util/dmg_utils.py | 5 +- src/tests/ftest/util/dmg_utils_params.py | 27 ++- src/tests/ftest/util/launch_utils.py | 34 +++- src/tests/ftest/util/server_utils.py | 8 +- src/tests/ftest/util/server_utils_params.py | 42 ++++- src/tests/ftest/util/telemetry_utils.py | 4 +- utils/certs/SConscript | 4 +- .../certs/gen_telemetry_admin_certificate.sh | 103 ++++++++++++ .../certs/gen_telemetry_server_certificate.sh | 91 ++++++++++ utils/config/daos_agent.yml | 43 +++-- utils/config/daos_control.yml | 10 ++ utils/config/daos_server.yml | 18 +- utils/config/examples/daos_server_local.yml | 4 +- utils/config/examples/daos_server_mdonssd.yml | 4 +- utils/config/examples/daos_server_tcp.yml | 4 +- utils/config/examples/daos_server_ucx.yml | 4 +- utils/config/examples/daos_server_verbs.yml | 4 +- 47 files changed, 1155 insertions(+), 162 deletions(-) create mode 100755 utils/certs/gen_telemetry_admin_certificate.sh create mode 100755 utils/certs/gen_telemetry_server_certificate.sh diff --git a/docs/admin/administration.md b/docs/admin/administration.md index 9859baebd2a..31f2ffb35ac 100644 --- a/docs/admin/administration.md +++ b/docs/admin/administration.md @@ -286,7 +286,7 @@ written to `$HOME/.prometheus.yml`. To start the Prometheus server with the configuration file generated by `dmg`: ``` -prometheus --config-file=$HOME/.prometheus.yml +prometheus --config.file=$HOME/.prometheus.yml ``` ## Storage Operations diff --git a/docs/admin/deployment.md b/docs/admin/deployment.md index 0f5c128b1f4..908ff9f48ca 100644 --- a/docs/admin/deployment.md +++ b/docs/admin/deployment.md @@ -759,6 +759,162 @@ transport_config: key: /etc/daos/certs/admin.key ``` +#### Telemetry Certificate Configuration + +The DAOS Telemetry framework has option to use certificates to authenticate +between server/client and admin node.A set of certificates for a given DAOS systems may be +generated by running the `gen_telemetry_admin_certificate.sh` and `gen_telemetry_server_certificate.sh` script provided with the DAOS +software if there is not an existing TLS certificate infrastructure. The +Both script uses the `openssl` tool to generate all of the +necessary files. + +##### Telemetry Admin script + +This `gen_telemetry_admin_certificate.sh` script needs to run on the system where the `dmg telemetry metrics` command is going to run or on the system where Prometheus is going to be setup for collecting metrics. + +```bash +$ cd /tmp/ +$ gen_telemetry_admin_certificate.sh +Generating Private CA Root Certificate +Generating RSA private key, 3072 bit long modulus (2 primes) +............................................................................................................++++ +.............++++ +e is 65537 (0x010001) +Private CA Root Certificate for Telemetry created in ./daosTelemetryCA +``` + +This will create the key and cert file + +```bash +$ ls -l /tmp/daosTelemetryCA/ +total 12 +-rw-r--r-- 1 root daos_daemons 1460 Sep 27 17:06 daosTelemetryCA.crt +-r-------- 1 root root 2455 Sep 27 17:06 daosTelemetryCA.key +-rw-r--r-- 1 root root 0 Sep 27 17:06 index.txt +-rw-r--r-- 1 root root 3 Sep 27 17:06 serial.txt +``` + +The generated keys and certificates must then be securely distributed to all nodes for which you need to collect the DAOS metrics. + +You can copy this certificates on /etc/daos/certs/ or someother secure location + +##### Telemetry Server script + +This `gen_telemetry_server_certificate.sh` script need to run on the DAOS server/client node for which DAOS metrics needs to be gathered. + +Below files are copied from the Admin node in previous steps. + +```bash +$ ls -l /tmp/daosTelemetryCA/ +total 12 +-rw-r--r-- 1 root daos_daemons 1460 Sep 27 17:06 daosTelemetryCA.crt +-r-------- 1 root root 2455 Sep 27 17:06 daosTelemetryCA.key +-rw-r--r-- 1 root root 0 Sep 27 17:06 index.txt +-rw-r--r-- 1 root root 3 Sep 27 17:06 serial.txt +``` + +Run this script with arguments. +First argument is the File permission you want on certificate,for example below command is run on daos client where it needs to be set as daos_agent user permission. +Second argument is optional for certificate path (By default it's in current directory). +For security reason this script will delete the CA key copied at the end which was copied from the Admin node and will create the local node certificate and key. + +```bash +$ cd daosTelemetryCA/ +$ gen_telemetry_server_certificate.sh daos_agent +Generating Server Certificate +Generating RSA private key, 2048 bit long modulus (2 primes) +.......................+++++ +......................................................................................................+++++ +e is 65537 (0x010001) +Signature ok +subject=CN = wolf-170 +Getting CA Private Key +Required Server Certificate Files: + .//daosTelemetryCA.crt + .//telemetryserver.key + .//telemetryserver.crt +$ ls -l +total 20 +-rw-r--r-- 1 root daos_daemons 1460 Sep 27 17:18 daosTelemetryCA.crt +-rw-r--r-- 1 root root 41 Sep 27 17:19 daosTelemetryCA.srl +-rw-r--r-- 1 root root 0 Sep 27 17:18 index.txt +-rw-r--r-- 1 root root 3 Sep 27 17:18 serial.txt +-rw-r--r-- 1 daos_agent daos_agent 1302 Sep 27 17:19 telemetryserver.crt +-r-------- 1 daos_agent daos_agent 1675 Sep 27 17:19 telemetryserver.key +``` + +Below example is ran with daos_server user on server node + +```bash +$ cd daosTelemetryCA/ +$ gen_telemetry_server_certificate.sh daos_server +Generating Server Certificate +Generating RSA private key, 2048 bit long modulus (2 primes) +.................................................+++++ +.+++++ +e is 65537 (0x010001) +Signature ok +subject=CN = wolf-173 +Getting CA Private Key +Required Server Certificate Files: + .//daosTelemetryCA.crt + .//telemetryserver.key + .//telemetryserver.crt +$ ls -l +total 20 +-rw-r--r-- 1 root daos_daemons 1460 Sep 27 17:24 daosTelemetryCA.crt +-rw-r--r-- 1 root root 41 Sep 27 17:24 daosTelemetryCA.srl +-rw-r--r-- 1 root root 0 Sep 27 17:24 index.txt +-rw-r--r-- 1 root root 3 Sep 27 17:24 serial.txt +-rw-r--r-- 1 daos_server daos_server 1302 Sep 27 17:24 telemetryserver.crt +-r-------- 1 daos_server daos_server 1679 Sep 27 17:24 telemetryserver.key +``` + +You can copy this certificates on /etc/daos/certs/ or someother secure location + +#### Telemetry Yaml Example + +Now you have certificate created and you can add those path in the respective yaml file. + +```yaml +# /etc/daos/daos_server.yml (servers) +telemetry_config: + # To use telemetry in secure mode + allow_insecure: false + # Set the server telemetry endpoint port number + port: 9191 + # Server certificate for use in TLS handshakes + server_cert: /etc/daos/certs/telemetryserver.crt + # Key portion of Server Certificate + server_key: /etc/daos/certs/telemetryserver.key +``` + +```yaml +# /etc/daos/daos_agent.yml (clients) +telemetry_config: + # To use telemetry in secure mode + allow_insecure: false + # Enable client telemetry for all DAOS clients. + enabled: true + # Set the client telemetry endpoint port number + port: 9192 + # Retain client telemetry for a period of time after the client process exits. + retain: 30s + # Server certificate for use in TLS handshakes + server_cert: /etc/daos/certs/telemetryserver.crt + # Key portion of Server Certificate + server_key: /etc/daos/certs/telemetryserver.key +``` + +```yaml +# /etc/daos/daos_control.yml (dmg/admin) +telemetry_config: + # To use telemetry in secure mode + allow_insecure: true + # Custom CA Root certificate for generated certs + ca_cert: /etc/daos/certs/daosTelemetryCA.crt +``` + ### Server Startup The DAOS Server is started as a systemd service. The DAOS Server diff --git a/src/control/cmd/daos_agent/config.go b/src/control/cmd/daos_agent/config.go index 1c5ea0d3f5e..7c86677ff25 100644 --- a/src/control/cmd/daos_agent/config.go +++ b/src/control/cmd/daos_agent/config.go @@ -57,14 +57,12 @@ type Config struct { ExcludeFabricIfaces common.StringSet `yaml:"exclude_fabric_ifaces,omitempty"` FabricInterfaces []*NUMAFabricConfig `yaml:"fabric_ifaces,omitempty"` ProviderIdx uint // TODO SRS-31: Enable with multiprovider functionality - TelemetryPort int `yaml:"telemetry_port,omitempty"` - TelemetryEnabled bool `yaml:"telemetry_enabled,omitempty"` - TelemetryRetain time.Duration `yaml:"telemetry_retain,omitempty"` + TelemetryConfig *security.TelemetryConfig `yaml:"telemetry_config"` } // TelemetryExportEnabled returns true if client telemetry export is enabled. func (c *Config) TelemetryExportEnabled() bool { - return c.TelemetryPort > 0 + return c.TelemetryConfig.Port > 0 } // NUMAFabricConfig defines a list of fabric interfaces that belong to a NUMA @@ -99,14 +97,20 @@ func LoadConfig(cfgPath string) (*Config, error) { return nil, fmt.Errorf("invalid system name: %s", cfg.SystemName) } - if cfg.TelemetryRetain > 0 && cfg.TelemetryPort == 0 { + if cfg.TelemetryConfig.Retain > 0 && cfg.TelemetryConfig.Port == 0 { return nil, errors.New("telemetry_retain requires telemetry_port") } - if cfg.TelemetryEnabled && cfg.TelemetryPort == 0 { + if cfg.TelemetryConfig.Enabled && cfg.TelemetryConfig.Port == 0 { return nil, errors.New("telemetry_enabled requires telemetry_port") } + if cfg.TelemetryConfig.AllowInsecure == false { + if cfg.TelemetryConfig.ServerCert == "" || cfg.TelemetryConfig.ServerKey == "" { + return nil, errors.New("For secure mode, server_cert and server_key required under telemetry_config") + } + } + return cfg, nil } @@ -121,5 +125,6 @@ func DefaultConfig() *Config { LogLevel: common.DefaultControlLogLevel, TransportConfig: security.DefaultAgentTransportConfig(), CredentialConfig: &security.CredentialConfig{}, + TelemetryConfig: security.DefaultClientTelemetryConfig(), } } diff --git a/src/control/cmd/daos_agent/config_test.go b/src/control/cmd/daos_agent/config_test.go index d9aee88b7fd..06280d7d844 100644 --- a/src/control/cmd/daos_agent/config_test.go +++ b/src/control/cmd/daos_agent/config_test.go @@ -88,6 +88,62 @@ transport_config: allow_insecure: true `) + telemetryRetainWithBadPort := test.CreateTestFile(t, dir, ` +name: shire +access_points: ["one:10001", "two:10001"] +port: 4242 +runtime_dir: /tmp/runtime +log_file: /home/frodo/logfile +control_log_mask: debug +transport_config: + allow_insecure: true +telemetry_config: + retain: 1 + port: 0 +`) + + telemetryEnabledWithBadPort := test.CreateTestFile(t, dir, ` +name: shire +access_points: ["one:10001", "two:10001"] +port: 4242 +runtime_dir: /tmp/runtime +log_file: /home/frodo/logfile +control_log_mask: debug +transport_config: + allow_insecure: true +telemetry_config: + enabled: true + port: 0 +`) + + telemetryWithoutServerCert := test.CreateTestFile(t, dir, ` +name: shire +access_points: ["one:10001", "two:10001"] +port: 4242 +runtime_dir: /tmp/runtime +log_file: /home/frodo/logfile +control_log_mask: debug +transport_config: + allow_insecure: true +telemetry_config: + allow_insecure: false + server_cert: "" +`) + + telemetryWithoutServerKey := test.CreateTestFile(t, dir, ` +name: shire +access_points: ["one:10001", "two:10001"] +port: 4242 +runtime_dir: /tmp/runtime +log_file: /home/frodo/logfile +control_log_mask: debug +transport_config: + allow_insecure: true +telemetry_config: + allow_insecure: false + server_key: "" +`) + for name, tc := range map[string]struct { path string expResult *Config @@ -108,6 +164,22 @@ transport_config: path: emptyFile, expResult: DefaultConfig(), }, + "telemetry retain with no port": { + path: telemetryRetainWithBadPort, + expErr: errors.New("telemetry_retain requires telemetry_port"), + }, + "telemetry enabled with no port": { + path: telemetryEnabledWithBadPort, + expErr: errors.New("telemetry_enabled requires telemetry_port"), + }, + "telemetry with secure mode with no server certificate": { + path: telemetryWithoutServerCert, + expErr: errors.New("For secure mode, server_cert and server_key required under telemetry_config"), + }, + "telemetry with secure mode with no server key": { + path: telemetryWithoutServerKey, + expErr: errors.New("For secure mode, server_cert and server_key required under telemetry_config"), + }, "without optional items": { path: withoutOptCfg, expResult: &Config{ @@ -122,6 +194,7 @@ transport_config: AllowInsecure: true, CertificateConfig: DefaultConfig().TransportConfig.CertificateConfig, }, + TelemetryConfig: security.DefaultClientTelemetryConfig(), }, }, "bad log mask": { @@ -154,6 +227,7 @@ transport_config: AllowInsecure: true, CertificateConfig: DefaultConfig().TransportConfig.CertificateConfig, }, + TelemetryConfig: security.DefaultClientTelemetryConfig(), ExcludeFabricIfaces: common.NewStringSet("ib3"), FabricInterfaces: []*NUMAFabricConfig{ { diff --git a/src/control/cmd/daos_agent/infocache.go b/src/control/cmd/daos_agent/infocache.go index 90944a5a010..86adbef75e1 100644 --- a/src/control/cmd/daos_agent/infocache.go +++ b/src/control/cmd/daos_agent/infocache.go @@ -49,8 +49,8 @@ func NewInfoCache(ctx context.Context, log logging.Logger, client control.UnaryI devStateGetter: network.DefaultNetDevStateProvider(log), } - ic.clientTelemetryEnabled.Store(cfg.TelemetryEnabled) - ic.clientTelemetryRetain.Store(cfg.TelemetryRetain > 0) + ic.clientTelemetryEnabled.Store(cfg.TelemetryConfig.Enabled) + ic.clientTelemetryRetain.Store(cfg.TelemetryConfig.Retain > 0) if cfg.DisableCache { ic.DisableAttachInfoCache() diff --git a/src/control/cmd/daos_agent/infocache_test.go b/src/control/cmd/daos_agent/infocache_test.go index 1f658055115..300dd8232db 100644 --- a/src/control/cmd/daos_agent/infocache_test.go +++ b/src/control/cmd/daos_agent/infocache_test.go @@ -25,6 +25,7 @@ import ( "github.com/daos-stack/daos/src/control/lib/hardware" "github.com/daos-stack/daos/src/control/lib/telemetry" "github.com/daos-stack/daos/src/control/logging" + "github.com/daos-stack/daos/src/control/security" ) type testInfoCacheParams struct { @@ -539,7 +540,7 @@ func TestAgent_NewInfoCache(t *testing.T) { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) - + tc.cfg.TelemetryConfig = security.DefaultClientTelemetryConfig() ic := NewInfoCache(test.Context(t), log, nil, tc.cfg) test.AssertEqual(t, tc.expEnabled, ic.IsAttachInfoCacheEnabled(), "") diff --git a/src/control/cmd/daos_agent/telemetry.go b/src/control/cmd/daos_agent/telemetry.go index 4c0e2d35b4c..df222d108b4 100644 --- a/src/control/cmd/daos_agent/telemetry.go +++ b/src/control/cmd/daos_agent/telemetry.go @@ -17,11 +17,14 @@ import ( func startPrometheusExporter(ctx context.Context, log logging.Logger, cs *promexp.ClientSource, cfg *Config) (func(), error) { expCfg := &promexp.ExporterConfig{ - Port: cfg.TelemetryPort, - Title: "DAOS Client Telemetry", + Port: cfg.TelemetryConfig.Port, + Title: "DAOS Client Telemetry", + AllowInsecure: cfg.TelemetryConfig.AllowInsecure, + HttpsCert: cfg.TelemetryConfig.ServerCert, + HttpsKey: cfg.TelemetryConfig.ServerKey, Register: func(ctx context.Context, log logging.Logger) error { c, err := promexp.NewClientCollector(ctx, log, cs, &promexp.CollectorOpts{ - RetainDuration: cfg.TelemetryRetain, + RetainDuration: cfg.TelemetryConfig.Retain, }) if err != nil { return err diff --git a/src/control/cmd/dmg/auto_test.go b/src/control/cmd/dmg/auto_test.go index 48f06427792..a2c7ace937e 100644 --- a/src/control/cmd/dmg/auto_test.go +++ b/src/control/cmd/dmg/auto_test.go @@ -592,6 +592,11 @@ system_ram_reserved: 16 disable_hugepages: false control_log_mask: INFO control_log_file: /tmp/daos_server.log +telemetry_config: + allow_insecure: false + server_cert: /etc/daos/certs/telemetryserver.crt + server_key: /etc/daos/certs/telemetryserver.key + ca_cert: /etc/daos/certs/daosTelemetryCA.crt core_dump_filter: 19 name: daos_server socket_dir: /var/run/daos_server diff --git a/src/control/cmd/dmg/main.go b/src/control/cmd/dmg/main.go index c88845a304c..e3bd8425663 100644 --- a/src/control/cmd/dmg/main.go +++ b/src/control/cmd/dmg/main.go @@ -262,6 +262,7 @@ and access control settings, along with system wide operations.` if opts.Insecure { ctlCfg.TransportConfig.AllowInsecure = true + ctlCfg.TelemetryConfig.AllowInsecure = true } if err := ctlCfg.TransportConfig.PreLoadCertData(); err != nil { return errors.Wrap(err, "Unable to load Certificate Data") diff --git a/src/control/cmd/dmg/telemetry.go b/src/control/cmd/dmg/telemetry.go index 40b84d1512f..db069f3a97b 100644 --- a/src/control/cmd/dmg/telemetry.go +++ b/src/control/cmd/dmg/telemetry.go @@ -197,11 +197,17 @@ type ( Targets []string `yaml:"targets,omitempty"` } + tlsConfig struct { + CaFile string `yaml:"ca_file,omitempty"` + } + scrapeConfig struct { JobName string `yaml:"job_name"` ScrapeInterval time.Duration `yaml:"scrape_interval,omitempty"` ScrapeTimeout time.Duration `yaml:"scrape_timeout,omitempty"` StaticConfigs []*staticConfig `yaml:"static_configs,omitempty"` + Scheme string `yaml:"scheme,omitempty"` + TlsConfig tlsConfig `yaml:"tls_config,omitempty"` } promCfg struct { @@ -261,11 +267,24 @@ func (cmd *telemConfigCmd) configurePrometheus() (*installInfo, error) { } sc.Targets = append(sc.Targets, host+":9191") } + + tc := tlsConfig{} + scheme := "" + if !cmd.cfgCmd.config.TelemetryConfig.AllowInsecure { + cmd.Infof("Prometheus configuration is setup as Secure (https) mode") + tc.CaFile = cmd.cfgCmd.config.TelemetryConfig.CARootPath + scheme = "https" + } else { + cmd.Infof("Prometheus configuration is setup as insecure (http) mode") + } + cfg.ScrapeConfigs = []*scrapeConfig{ { JobName: "daos", ScrapeInterval: 5 * time.Second, StaticConfigs: []*staticConfig{sc}, + Scheme: scheme, + TlsConfig: tc, }, } @@ -303,6 +322,7 @@ type metricsCmd struct { // metricsListCmd provides a list of metrics available from the requested DAOS servers. type metricsListCmd struct { baseCmd + cfgCmd cmdutil.JSONOutputCmd singleHostCmd Port uint32 `short:"p" long:"port" default:"9191" description:"Telemetry port on the host"` @@ -318,6 +338,8 @@ func (cmd *metricsListCmd) Execute(args []string) error { req := new(control.MetricsListReq) req.Port = cmd.Port req.Host = host + req.AllowInsecure = cmd.cfgCmd.config.TelemetryConfig.AllowInsecure + req.CaCertPath = cmd.cfgCmd.config.TelemetryConfig.CARootPath if !cmd.JSONOutputEnabled() { cmd.Info(getConnectingMsg(req.Host, req.Port)) @@ -357,6 +379,7 @@ func getConnectingMsg(host string, port uint32) string { // metricsQueryCmd collects the requested metrics from the requested DAOS servers. type metricsQueryCmd struct { baseCmd + cfgCmd cmdutil.JSONOutputCmd singleHostCmd Port uint32 `short:"p" long:"port" default:"9191" description:"Telemetry port on the host"` @@ -373,6 +396,8 @@ func (cmd *metricsQueryCmd) Execute(args []string) error { req := new(control.MetricsQueryReq) req.Port = cmd.Port req.Host = host + req.AllowInsecure = cmd.cfgCmd.config.TelemetryConfig.AllowInsecure + req.CaCertPath = cmd.cfgCmd.config.TelemetryConfig.CARootPath req.MetricNames = common.TokenizeCommaSeparatedString(cmd.Metrics) if !cmd.JSONOutputEnabled() { diff --git a/src/control/lib/control/config.go b/src/control/lib/control/config.go index bb293f05cb6..37c61056e98 100644 --- a/src/control/lib/control/config.go +++ b/src/control/lib/control/config.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -29,6 +29,7 @@ type Config struct { ControlPort int `yaml:"port"` HostList []string `yaml:"hostlist"` TransportConfig *security.TransportConfig `yaml:"transport_config"` + TelemetryConfig *security.TelemetryConfig `yaml:"telemetry_config"` Path string `yaml:"-"` } @@ -41,6 +42,7 @@ func DefaultConfig() *Config { ControlPort: build.DefaultControlPort, HostList: []string{localServer}, TransportConfig: security.DefaultClientTransportConfig(), + TelemetryConfig: security.DefaultClientTelemetryConfig(), } } diff --git a/src/control/lib/control/http.go b/src/control/lib/control/http.go index 8c689b8145a..69d002dc36c 100644 --- a/src/control/lib/control/http.go +++ b/src/control/lib/control/http.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2021 Intel Corporation. +// (C) Copyright 2021-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -8,6 +8,8 @@ package control import ( "context" + "crypto/tls" + "crypto/x509" "fmt" "io/ioutil" "net/http" @@ -37,12 +39,16 @@ type httpGetter interface { retryer getURL() *url.URL getBody(context.Context) ([]byte, error) + getAllowInsecure() *bool + getCaCertPath() *string } type httpReq struct { - url *url.URL - getFn httpGetFn - getBodyFn func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) + url *url.URL + getFn httpGetFn + allowInsecure *bool + cacertpath *string + getBodyFn func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) } func (r *httpReq) canRetry(err error, cur uint) bool { @@ -77,6 +83,14 @@ func (r *httpReq) getURL() *url.URL { return r.url } +func (r *httpReq) getAllowInsecure() *bool { + return r.allowInsecure +} + +func (r *httpReq) getCaCertPath() *string { + return r.cacertpath +} + func (r *httpReq) httpGetFunc() httpGetFn { if r.getFn == nil { r.getFn = http.Get @@ -88,7 +102,8 @@ func (r *httpReq) getBody(ctx context.Context) ([]byte, error) { if r.getBodyFn == nil { r.getBodyFn = httpGetBody } - return r.getBodyFn(ctx, r.getURL(), r.httpGetFunc(), r.getRetryTimeout()) + + return r.getBodyFn(ctx, r.getURL(), r.httpGetFunc(), r.getRetryTimeout(), r.getAllowInsecure(), r.getCaCertPath()) } func httpGetBodyRetry(ctx context.Context, req httpGetter) ([]byte, error) { @@ -113,9 +128,31 @@ func httpGetBodyRetry(ctx context.Context, req httpGetter) ([]byte, error) { return result, err } +// httpsGetFunc will prepare the GET requested using the certificate for secure mode +// and return the http.Get +func httpsGetFunc(cert []byte) (httpGetFn, error) { + caCertPool := x509.NewCertPool() + result := caCertPool.AppendCertsFromPEM(cert) + if !result { + return nil, errors.New("failed to parse root certificate") + } + + tlsConfig := &tls.Config{ + RootCAs: caCertPool, + } + + tr := &http.Transport{ + TLSClientConfig: tlsConfig, + } + + client := &http.Client{Transport: tr} + + return client.Get, nil +} + // httpGetBody executes a simple HTTP GET request to a given URL and returns the // content of the response body. -func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time.Duration) ([]byte, error) { +func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time.Duration, allowInsecure *bool, cacertpath *string) ([]byte, error) { if url == nil { return nil, errors.New("nil URL") } @@ -128,6 +165,22 @@ func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time. return nil, errors.New("nil get function") } + if *allowInsecure == false { + if cacertpath == nil { + return nil, errors.New("Provide the CA certificate path") + } + + cert, err := ioutil.ReadFile(*cacertpath) + if err != nil { + return nil, errors.Wrap(err, "reading CA cerificate file Error") + } + + get, err = httpsGetFunc(cert) + if err != nil { + return nil, errors.Wrap(err, "https GET request failed") + } + } + httpCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -140,7 +193,6 @@ func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time. errChan <- err return } - respChan <- httpResp }() diff --git a/src/control/lib/control/http_test.go b/src/control/lib/control/http_test.go index 6f28a0c4ce4..20e8d58234f 100644 --- a/src/control/lib/control/http_test.go +++ b/src/control/lib/control/http_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2021-2022 Intel Corporation. +// (C) Copyright 2021-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -123,35 +123,45 @@ func newErrMockReadCloser(err error) *mockReadCloser { func TestControl_httpGetBody(t *testing.T) { defaultURL := &url.URL{Host: "testhost"} + defaultAllowInsecure := true + falseAllowInsecure := false + badCertPerm := "../../security/testdata/certs/badperms.crt" + badCertPath := "wrongpath/notavailable.crt" for name, tc := range map[string]struct { - url *url.URL - timeout time.Duration - cancelCtx bool - getFn httpGetFn - expResult []byte - expErr error + url *url.URL + timeout time.Duration + cancelCtx bool + getFn httpGetFn + allowInsecure *bool + caCertPath *string + expResult []byte + expErr error }{ "nil url": { expErr: errors.New("nil URL"), }, "empty URL": { - url: &url.URL{}, - expErr: errors.New("host address is required"), + url: &url.URL{}, + allowInsecure: &defaultAllowInsecure, + expErr: errors.New("host address is required"), }, "nil getFn": { - url: defaultURL, - expErr: errors.New("nil get function"), + url: defaultURL, + allowInsecure: &defaultAllowInsecure, + expErr: errors.New("nil get function"), }, "getFn error": { - url: defaultURL, + url: defaultURL, + allowInsecure: &defaultAllowInsecure, getFn: func(_ string) (*http.Response, error) { return nil, errors.New("mock getFn") }, expErr: errors.New("mock getFn"), }, "http.Response error": { - url: defaultURL, + url: defaultURL, + allowInsecure: &defaultAllowInsecure, getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusNotFound, @@ -161,7 +171,8 @@ func TestControl_httpGetBody(t *testing.T) { expErr: errors.New("HTTP response error: 404 Not Found"), }, "empty body": { - url: defaultURL, + url: defaultURL, + allowInsecure: &defaultAllowInsecure, getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusOK, @@ -171,7 +182,8 @@ func TestControl_httpGetBody(t *testing.T) { expResult: []byte{}, }, "success with body": { - url: defaultURL, + url: defaultURL, + allowInsecure: &defaultAllowInsecure, getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusOK, @@ -180,8 +192,44 @@ func TestControl_httpGetBody(t *testing.T) { }, expResult: []byte("this is the body of an HTTP response"), }, + "failure with body in secure mode without CA certificate path": { + url: defaultURL, + allowInsecure: &falseAllowInsecure, + getFn: func(_ string) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: newMockReadCloser("this is the body of an HTTP response"), + }, nil + }, + expErr: errors.New("Provide the CA certificate path"), + }, + "failure with body in secure mode with bad CA certificate": { + url: defaultURL, + allowInsecure: &falseAllowInsecure, + caCertPath: &badCertPerm, + getFn: func(_ string) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: newMockReadCloser("this is the body of an HTTP response"), + }, nil + }, + expErr: errors.New("Get \"//testhost\": unsupported protocol scheme"), + }, + "failure with body in secure mode with bad CA certificate path": { + url: defaultURL, + allowInsecure: &falseAllowInsecure, + caCertPath: &badCertPath, + getFn: func(_ string) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: newMockReadCloser("this is the body of an HTTP response"), + }, nil + }, + expErr: errors.New("reading CA cerificate file Error: open wrongpath/notavailable.crt: no such file or directory"), + }, "reading body fails": { - url: defaultURL, + url: defaultURL, + allowInsecure: &defaultAllowInsecure, getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusOK, @@ -191,8 +239,9 @@ func TestControl_httpGetBody(t *testing.T) { expErr: errors.New("reading HTTP response body: mock Read"), }, "request times out": { - url: defaultURL, - timeout: 5 * time.Millisecond, + url: defaultURL, + allowInsecure: &defaultAllowInsecure, + timeout: 5 * time.Millisecond, getFn: func(_ string) (*http.Response, error) { time.Sleep(1 * time.Second) return &http.Response{ @@ -203,8 +252,9 @@ func TestControl_httpGetBody(t *testing.T) { expErr: HTTPReqTimedOut(defaultURL.String()), }, "request canceled": { - url: defaultURL, - cancelCtx: true, + url: defaultURL, + allowInsecure: &defaultAllowInsecure, + cancelCtx: true, getFn: func(_ string) (*http.Response, error) { time.Sleep(1 * time.Second) return &http.Response{ @@ -229,7 +279,7 @@ func TestControl_httpGetBody(t *testing.T) { tc.timeout = time.Second } - result, err := httpGetBody(ctx, tc.url, tc.getFn, tc.timeout) + result, err := httpGetBody(ctx, tc.url, tc.getFn, tc.timeout, tc.allowInsecure, tc.caCertPath) test.CmpErr(t, tc.expErr, err) if diff := cmp.Diff(tc.expResult, result); diff != "" { @@ -247,6 +297,7 @@ type mockHTTPGetter struct { getBodyErr error getBodyCalled uint getBodyFailures uint + caCertPath *string } func (r *mockHTTPGetter) canRetry(err error, cur uint) bool { @@ -273,6 +324,15 @@ func (r *mockHTTPGetter) getURL() *url.URL { } } +func (r *mockHTTPGetter) getAllowInsecure() *bool { + allowInsecure := true + return &allowInsecure +} + +func (r *mockHTTPGetter) getCaCertPath() *string { + return r.caCertPath +} + func (r *mockHTTPGetter) getBody(ctx context.Context) ([]byte, error) { r.getBodyCalled++ if r.getBodyCalled <= r.getBodyFailures { diff --git a/src/control/lib/control/telemetry.go b/src/control/lib/control/telemetry.go index 919e54ff284..0916b496795 100644 --- a/src/control/lib/control/telemetry.go +++ b/src/control/lib/control/telemetry.go @@ -32,9 +32,14 @@ func (m pbMetricMap) Keys() []string { return keys } -func getMetricsURL(host string, port uint32) *url.URL { +func getMetricsURL(host string, port uint32, allowinsecure bool) *url.URL { + scheme := "https" + if allowinsecure { + scheme = "http" + } + return &url.URL{ - Scheme: "http", + Scheme: scheme, Host: fmt.Sprintf("%s:%d", host, port), Path: "metrics", } @@ -78,8 +83,10 @@ type ( // MetricsListReq is used to request the list of metrics. MetricsListReq struct { httpReq - Host string // Host to query for telemetry data - Port uint32 // Port to use for collecting telemetry data + Host string // Host to query for telemetry data + Port uint32 // Port to use for collecting telemetry data + AllowInsecure bool // Set the https end point secure + CaCertPath string // CA Cert path for telemetry } // MetricsListResp contains the list of available metrics. @@ -102,7 +109,13 @@ func MetricsList(ctx context.Context, req *MetricsListReq) (*MetricsListResp, er return nil, errors.New("port must be specified") } - req.url = getMetricsURL(req.Host, req.Port) + if req.AllowInsecure == false && req.CaCertPath == "" { + return nil, errors.New("Provide the CA certificate path") + } + + req.url = getMetricsURL(req.Host, req.Port, req.AllowInsecure) + req.allowInsecure = &req.AllowInsecure + req.cacertpath = &req.CaCertPath scraped, err := scrapeMetrics(ctx, req) if err != nil { @@ -130,8 +143,11 @@ type ( // MetricsQueryReq is used to query telemetry values. MetricsQueryReq struct { httpReq - Host string // host to query for telemetry data - Port uint32 // port to use for collecting telemetry data + Host string // host to query for telemetry data + Port uint32 // port to use for collecting telemetry data + AllowInsecure bool // Set the https end point secure + CaCertPath string // CA Cert path for telemetry + MetricNames []string // if empty, collects all metrics } @@ -155,7 +171,13 @@ func MetricsQuery(ctx context.Context, req *MetricsQueryReq) (*MetricsQueryResp, return nil, errors.New("port must be specified") } - req.url = getMetricsURL(req.Host, req.Port) + if req.AllowInsecure == false && req.CaCertPath == "" { + return nil, errors.New("Provide the CA certificate path") + } + + req.url = getMetricsURL(req.Host, req.Port, req.AllowInsecure) + req.allowInsecure = &req.AllowInsecure + req.cacertpath = &req.CaCertPath scraped, err := scrapeMetrics(ctx, req) if err != nil { diff --git a/src/control/lib/control/telemetry_test.go b/src/control/lib/control/telemetry_test.go index 5887283852d..906a076a230 100644 --- a/src/control/lib/control/telemetry_test.go +++ b/src/control/lib/control/telemetry_test.go @@ -118,10 +118,10 @@ func newTestPBHistogram(numBuckets int) *pclient.Metric { return metric } -func mockScrapeFnSuccess(t *testing.T, metricFam ...*pclient.MetricFamily) func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { +func mockScrapeFnSuccess(t *testing.T, metricFam ...*pclient.MetricFamily) func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { t.Helper() - return func(_ context.Context, _ *url.URL, _ httpGetFn, _ time.Duration) ([]byte, error) { + return func(_ context.Context, _ *url.URL, _ httpGetFn, _ time.Duration, _ *bool, _ *string) ([]byte, error) { var b strings.Builder for _, mf := range metricFam { _, err := expfmt.MetricFamilyToText(&b, mf) @@ -147,12 +147,12 @@ func TestControl_scrapeMetrics(t *testing.T) { for name, tc := range map[string]struct { req httpGetter - scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) + scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) expResult pbMetricMap expErr error }{ "check scrape params": { - scrapeFn: func(_ context.Context, url *url.URL, getter httpGetFn, timeout time.Duration) ([]byte, error) { + scrapeFn: func(_ context.Context, url *url.URL, getter httpGetFn, timeout time.Duration, allowInsecure *bool, caCertPath *string) ([]byte, error) { test.AssertEqual(t, testURL.Scheme, url.Scheme, "") test.AssertEqual(t, testURL.Host, url.Host, "") test.AssertEqual(t, testURL.Path, url.Path, "") @@ -166,19 +166,19 @@ func TestControl_scrapeMetrics(t *testing.T) { expResult: pbMetricMap{}, }, "HTTP scrape error": { - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { return nil, errors.New("mock scrape") }, expErr: errors.New("mock scrape"), }, "scrape returns no content": { - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { return []byte{}, nil }, expResult: pbMetricMap{}, }, "scrape returns bad content": { - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { return []byte("

Hello world

"), nil }, expErr: errors.New("parsing error"), @@ -217,7 +217,7 @@ func TestControl_MetricsList(t *testing.T) { } for name, tc := range map[string]struct { - scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) + scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) req *MetricsListReq expResp *MetricsListResp expErr error @@ -237,20 +237,22 @@ func TestControl_MetricsList(t *testing.T) { }, "scrape failed": { req: &MetricsListReq{ - Host: "host1", - Port: 1066, + Host: "host1", + Port: 1066, + AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { return nil, errors.New("mock scrape") }, expErr: errors.New("mock scrape"), }, "no metrics": { req: &MetricsListReq{ - Host: "host1", - Port: 8888, + Host: "host1", + Port: 8888, + AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { return []byte{}, nil }, expResp: &MetricsListResp{ @@ -259,8 +261,9 @@ func TestControl_MetricsList(t *testing.T) { }, "success": { req: &MetricsListReq{ - Host: "host1", - Port: 7777, + Host: "host1", + Port: 7777, + AllowInsecure: true, }, scrapeFn: mockScrapeFnSuccess(t, testMetricFam...), expResp: &MetricsListResp{ @@ -281,7 +284,7 @@ func TestControl_MetricsList(t *testing.T) { } { t.Run(name, func(t *testing.T) { if tc.scrapeFn == nil { - tc.scrapeFn = func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { + tc.scrapeFn = func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { return nil, nil } } @@ -429,7 +432,7 @@ func TestControl_MetricsQuery(t *testing.T) { } for name, tc := range map[string]struct { - scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) + scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) req *MetricsQueryReq expResp *MetricsQueryResp expErr error @@ -449,20 +452,22 @@ func TestControl_MetricsQuery(t *testing.T) { }, "scrape failed": { req: &MetricsQueryReq{ - Host: "host1", - Port: 1066, + Host: "host1", + Port: 1066, + AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { return nil, errors.New("mock scrape") }, expErr: errors.New("mock scrape"), }, "no metrics": { req: &MetricsQueryReq{ - Host: "host1", - Port: 8888, + Host: "host1", + Port: 8888, + AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { return []byte{}, nil }, expResp: &MetricsQueryResp{ @@ -471,8 +476,9 @@ func TestControl_MetricsQuery(t *testing.T) { }, "all metrics": { req: &MetricsQueryReq{ - Host: "host1", - Port: 7777, + Host: "host1", + Port: 7777, + AllowInsecure: true, }, scrapeFn: mockScrapeFnSuccess(t, testMetricFam...), expResp: &MetricsQueryResp{ @@ -532,9 +538,10 @@ func TestControl_MetricsQuery(t *testing.T) { }, "selected metrics": { req: &MetricsQueryReq{ - Host: "host1", - Port: 7777, - MetricNames: []string{"my_generic", "my_counter"}, + Host: "host1", + Port: 7777, + AllowInsecure: true, + MetricNames: []string{"my_generic", "my_counter"}, }, scrapeFn: mockScrapeFnSuccess(t, testMetricFam...), expResp: &MetricsQueryResp{ @@ -560,9 +567,10 @@ func TestControl_MetricsQuery(t *testing.T) { }, "invalid metric name": { req: &MetricsQueryReq{ - Host: "host1", - Port: 7777, - MetricNames: []string{"my_generic", "fake"}, + Host: "host1", + Port: 7777, + AllowInsecure: true, + MetricNames: []string{"my_generic", "fake"}, }, scrapeFn: mockScrapeFnSuccess(t, testMetricFam...), expErr: errors.New("metric \"fake\" not found"), diff --git a/src/control/lib/telemetry/promexp/httpd.go b/src/control/lib/telemetry/promexp/httpd.go index 2f4c86d485d..0f6a0920dd8 100644 --- a/src/control/lib/telemetry/promexp/httpd.go +++ b/src/control/lib/telemetry/promexp/httpd.go @@ -29,9 +29,12 @@ type ( // ExporterConfig defines the configuration for the Prometheus exporter. ExporterConfig struct { - Port int - Title string - Register RegMonFn + Port int + Title string + Register RegMonFn + AllowInsecure bool + HttpsCert string + HttpsKey string } ) @@ -82,8 +85,15 @@ func StartExporter(ctx context.Context, log logging.Logger, cfg *ExporterConfig) // http listener is a blocking call go func() { log.Infof("Listening on %s", listenAddress) - err := srv.ListenAndServe() - log.Infof("Prometheus web exporter stopped: %s", err.Error()) + if cfg.AllowInsecure { + log.Infof("Prometheus web exporter started with insecure (http) mode") + err := srv.ListenAndServe() + log.Infof("Prometheus web exporter stopped: %s", err.Error()) + } else { + log.Infof("Prometheus web exporter started with secure (https) mode") + err := srv.ListenAndServeTLS(cfg.HttpsCert, cfg.HttpsKey) + log.Infof("Prometheus web exporter stopped: %s", err.Error()) + } }() return func() { diff --git a/src/control/lib/telemetry/promexp/httpd_test.go b/src/control/lib/telemetry/promexp/httpd_test.go index db69e122b71..8504b4e47c9 100644 --- a/src/control/lib/telemetry/promexp/httpd_test.go +++ b/src/control/lib/telemetry/promexp/httpd_test.go @@ -70,7 +70,9 @@ func TestPromExp_StartExporter(t *testing.T) { if tc.cfg != nil { tc.cfg.Title = t.Name() + tc.cfg.AllowInsecure = true } + cleanup, err := promexp.StartExporter(test.Context(t), log, tc.cfg) test.CmpErr(t, tc.expErr, err) if tc.expErr != nil { diff --git a/src/control/security/config.go b/src/control/security/config.go index 7358e2e73ee..5eb3c7b577f 100644 --- a/src/control/security/config.go +++ b/src/control/security/config.go @@ -20,17 +20,20 @@ import ( ) const ( - certDir = "/etc/daos/certs/" - defaultCACert = certDir + "daosCA.crt" - defaultServerCert = certDir + "server.crt" - defaultServerKey = certDir + "server.key" - defaultAdminCert = certDir + "admin.crt" - defaultAdminKey = certDir + "admin.key" - defaultAgentCert = certDir + "agent.crt" - defaultAgentKey = certDir + "agent.key" - defaultClientCertDir = certDir + "clients" - defaultServer = "server" - defaultInsecure = false + certDir = "/etc/daos/certs/" + defaultCACert = certDir + "daosCA.crt" + defaultServerCert = certDir + "server.crt" + defaultServerKey = certDir + "server.key" + defaultAdminCert = certDir + "admin.crt" + defaultAdminKey = certDir + "admin.key" + defaultAgentCert = certDir + "agent.crt" + defaultAgentKey = certDir + "agent.key" + defaultTelemetryServerCert = certDir + "telemetryserver.crt" + defaultTelemetryServerKey = certDir + "telemetryserver.key" + defaultTelemetryCACert = certDir + "daosTelemetryCA.crt" + defaultClientCertDir = certDir + "clients" + defaultServer = "server" + defaultInsecure = false ) // MappedClientUser represents a client user that is mapped to a uid. @@ -105,6 +108,30 @@ type TransportConfig struct { CertificateConfig `yaml:",inline"` } +// TelemetryConfig contains all the information on whether or not to use +// secure endpoint for telemetry and their location if their use is specified. +type TelemetryConfig struct { + Port int `yaml:"port,omitempty"` + AllowInsecure bool `yaml:"allow_insecure"` + Enabled bool `yaml:"enabled,omitempty"` + Retain time.Duration `yaml:"retain,omitempty"` + ServerCert string `yaml:"server_cert,omitempty"` + ServerKey string `yaml:"server_key,omitempty"` + CARootPath string `yaml:"ca_cert,omitempty"` +} + +// DefaultClientTelemetryConfig provides a default telemetry config disabling +// certificate usage and specifying certificates located under /etc/daos/certs. +func DefaultClientTelemetryConfig() *TelemetryConfig { + return &TelemetryConfig{ + Enabled: false, + AllowInsecure: defaultInsecure, + ServerCert: defaultTelemetryServerCert, + ServerKey: defaultTelemetryServerKey, + CARootPath: defaultTelemetryCACert, + } +} + func (tc *TransportConfig) String() string { return fmt.Sprintf("allow insecure: %v", tc.AllowInsecure) } diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index ec94784d7d5..6e0ef620247 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -61,7 +61,7 @@ type Server struct { HelperLogFile string `yaml:"helper_log_file,omitempty"` FWHelperLogFile string `yaml:"firmware_helper_log_file,omitempty"` FaultPath string `yaml:"fault_path,omitempty"` - TelemetryPort int `yaml:"telemetry_port,omitempty"` + TelemetryConfig *security.TelemetryConfig `yaml:"telemetry_config"` CoreDumpFilter uint8 `yaml:"core_dump_filter,omitempty"` ClientEnvVars []string `yaml:"client_env_vars,omitempty"` SupportConfig SupportConfig `yaml:"support_config,omitempty"` @@ -314,7 +314,13 @@ func (cfg *Server) WithFirmwareHelperLogFile(filePath string) *Server { // WithTelemetryPort sets the port for the telemetry exporter. func (cfg *Server) WithTelemetryPort(port int) *Server { - cfg.TelemetryPort = port + cfg.TelemetryConfig.Port = port + return cfg +} + +// WithTelemetryConfig sets the telemetry configuration. +func (cfg *Server) WithTelemetryConfig(cfgTelemetry *security.TelemetryConfig) *Server { + cfg.TelemetryConfig = cfgTelemetry return cfg } @@ -327,6 +333,7 @@ func DefaultServer() *Server { AccessPoints: []string{fmt.Sprintf("localhost:%d", build.DefaultControlPort)}, ControlPort: build.DefaultControlPort, TransportConfig: security.DefaultServerTransportConfig(), + TelemetryConfig: security.DefaultClientTelemetryConfig(), Hyperthreads: false, SystemRamReserved: storage.DefaultSysMemRsvd / humanize.GiByte, Path: defaultConfigPath, @@ -700,8 +707,12 @@ func (cfg *Server) Validate(log logging.Logger) (err error) { return FaultConfigNoProvider case cfg.ControlPort <= 0: return FaultConfigBadControlPort - case cfg.TelemetryPort < 0: - return FaultConfigBadTelemetryPort + } + + if cfg.TelemetryConfig != nil { + if cfg.TelemetryConfig.Port < 0 { + return FaultConfigBadTelemetryPort + } } for idx, ec := range cfg.Engines { diff --git a/src/control/server/config/server_test.go b/src/control/server/config/server_test.go index 6961a0f4190..9a45070de05 100644 --- a/src/control/server/config/server_test.go +++ b/src/control/server/config/server_test.go @@ -104,11 +104,11 @@ func uncommentServerConfig(t *testing.T, outFile string) { } key := fields[0] - // If we're in a server or a storage tier config, reset the + // If we're in a server, a storage tier config, or telemetry config reset the // seen map to allow the same params in different // server configs. lineTmp := strings.TrimLeft(line, " ") - if lineTmp == "-" { + if lineTmp == "-" || lineTmp == "telemetry_config:" { seenKeys = make(map[string]struct{}) } if _, seen := seenKeys[key]; seen && strings.HasSuffix(key, ":") { @@ -246,7 +246,12 @@ func TestServerConfig_Constructed(t *testing.T) { WithControlLogFile("/tmp/daos_server.log"). WithHelperLogFile("/tmp/daos_server_helper.log"). WithFirmwareHelperLogFile("/tmp/daos_firmware_helper.log"). - WithTelemetryPort(9191). + WithTelemetryConfig(&security.TelemetryConfig{ + AllowInsecure: true, + Port: 9191, + ServerCert: "/etc/daos/certs/telemetryserver.crt", + ServerKey: "/etc/daos/certs/telemetryserver.key", + CARootPath: "/etc/daos/certs/daosTelemetryCA.crt"}). WithSystemName("daos_server"). WithSocketDir("./.daos/daos_server"). WithFabricProvider("ofi+verbs;ofi_rxm"). @@ -417,7 +422,12 @@ func TestServerConfig_MDonSSD_Constructed(t *testing.T) { Path: "/var/daos/config", }). WithControlLogFile("/tmp/daos_server.log"). - WithTelemetryPort(9191). + WithTelemetryConfig(&security.TelemetryConfig{ + AllowInsecure: true, + Port: 9191, + ServerCert: "/etc/daos/certs/telemetryserver.crt", + ServerKey: "/etc/daos/certs/telemetryserver.key", + CARootPath: "/etc/daos/certs/daosTelemetryCA.crt"}). WithFabricProvider("ofi+tcp"). WithAccessPoints("example") diff --git a/src/control/server/server_utils.go b/src/control/server/server_utils.go index 269a5201e30..6a412fba40f 100644 --- a/src/control/server/server_utils.go +++ b/src/control/server/server_utils.go @@ -655,14 +655,15 @@ func configureFirstEngine(ctx context.Context, engine *EngineInstance, sysdb *ra // registerTelemetryCallbacks sets telemetry related callbacks to // be triggered when all engines have been started. func registerTelemetryCallbacks(ctx context.Context, srv *server) { - telemPort := srv.cfg.TelemetryPort + telemPort := srv.cfg.TelemetryConfig.Port + if telemPort == 0 { return } srv.OnEnginesStarted(func(ctxIn context.Context) error { srv.log.Debug("starting Prometheus exporter") - cleanup, err := startPrometheusExporter(ctxIn, srv.log, telemPort, srv.harness.Instances()) + cleanup, err := startPrometheusExporter(ctxIn, srv) if err != nil { return err } diff --git a/src/control/server/telemetry.go b/src/control/server/telemetry.go index 4b2f624aff2..0ccac94930b 100644 --- a/src/control/server/telemetry.go +++ b/src/control/server/telemetry.go @@ -68,14 +68,17 @@ func regPromEngineSources(ctx context.Context, log logging.Logger, engines []Eng return nil } -func startPrometheusExporter(ctx context.Context, log logging.Logger, port int, engines []Engine) (func(), error) { +func startPrometheusExporter(ctx context.Context, srv *server) (func(), error) { expCfg := &promexp.ExporterConfig{ - Port: port, - Title: "DAOS Engine Telemetry", + Port: srv.cfg.TelemetryConfig.Port, + Title: "DAOS Engine Telemetry", + AllowInsecure: srv.cfg.TelemetryConfig.AllowInsecure, + HttpsCert: srv.cfg.TelemetryConfig.ServerCert, + HttpsKey: srv.cfg.TelemetryConfig.ServerKey, Register: func(ctx context.Context, log logging.Logger) error { - return regPromEngineSources(ctx, log, engines) + return regPromEngineSources(ctx, srv.log, srv.harness.Instances()) }, } - return promexp.StartExporter(ctx, log, expCfg) + return promexp.StartExporter(ctx, srv.log, expCfg) } diff --git a/src/tests/ftest/control/dmg_telemetry_basic.py b/src/tests/ftest/control/dmg_telemetry_basic.py index 39eb520aef2..45cb8472392 100644 --- a/src/tests/ftest/control/dmg_telemetry_basic.py +++ b/src/tests/ftest/control/dmg_telemetry_basic.py @@ -1,5 +1,5 @@ """ -(C) Copyright 2021-2023 Intel Corporation. +(C) Copyright 2021-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -91,7 +91,7 @@ def test_container_telemetry(self): :avocado: tags=all,pr,daily_regression :avocado: tags=vm - :avocado: tags=control,telemetry,container + :avocado: tags=control,telemetry,container,secure_telemetry :avocado: tags=TestWithTelemetryBasic,test_container_telemetry """ container_qty = self.params.get("container_qty", "/run/test/*", 1) diff --git a/src/tests/ftest/control/dmg_telemetry_basic.yaml b/src/tests/ftest/control/dmg_telemetry_basic.yaml index 69af131fc6f..f9c41e9eafb 100644 --- a/src/tests/ftest/control/dmg_telemetry_basic.yaml +++ b/src/tests/ftest/control/dmg_telemetry_basic.yaml @@ -16,6 +16,8 @@ server_config: class: ram scm_mount: /mnt/daos system_ram_reserved: 1 + telemetry_config: + allow_insecure: false pool: scm_size: 2G container: @@ -23,3 +25,6 @@ container: test: container_qty: 5 open_close_qty: 3 +dmg: + telemetry_config: + allow_insecure: false diff --git a/src/tests/ftest/telemetry/basic_client_telemetry.py b/src/tests/ftest/telemetry/basic_client_telemetry.py index 1d115b4c95e..692da134db4 100644 --- a/src/tests/ftest/telemetry/basic_client_telemetry.py +++ b/src/tests/ftest/telemetry/basic_client_telemetry.py @@ -26,7 +26,7 @@ def test_client_metrics_exist(self): :avocado: tags=all,daily_regression :avocado: tags=vm - :avocado: tags=telemetry + :avocado: tags=telemetry,secure_telemetry :avocado: tags=BasicClientTelemetry,test_client_metrics_exist """ # create pool and container diff --git a/src/tests/ftest/telemetry/basic_client_telemetry.yaml b/src/tests/ftest/telemetry/basic_client_telemetry.yaml index d585dc81fda..82b57ba9961 100644 --- a/src/tests/ftest/telemetry/basic_client_telemetry.yaml +++ b/src/tests/ftest/telemetry/basic_client_telemetry.yaml @@ -18,9 +18,11 @@ server_config: system_ram_reserved: 1 agent_config: - telemetry_port: 9191 - telemetry_retain: 30s - telemetry_enabled: true + telemetry_config: + allow_insecure: false + port: 9191 + retain: 30s + enabled: true pool: scm_size: 2G @@ -44,3 +46,6 @@ ior_write: ior_read: <<: *ior_base flags: "-v -r -R -G 1" +dmg: + telemetry_config: + allow_insecure: false diff --git a/src/tests/ftest/util/agent_utils.py b/src/tests/ftest/util/agent_utils.py index 74b79fb9796..9e0e013e2ae 100644 --- a/src/tests/ftest/util/agent_utils.py +++ b/src/tests/ftest/util/agent_utils.py @@ -7,7 +7,8 @@ import re import socket -from agent_utils_params import DaosAgentTransportCredentials, DaosAgentYamlParameters +from agent_utils_params import (DaosAgentTelemetryCredentials, DaosAgentTransportCredentials, + DaosAgentYamlParameters) from ClusterShell.NodeSet import NodeSet from command_utils import CommandWithSubCommand, SubprocessManager, YamlCommand from command_utils_base import (CommandWithParameters, CommonConfig, EnvironmentVariables, @@ -53,6 +54,7 @@ def get_agent_command(group, cert_dir, bin_dir, config_file, run_user, config_te transport_config = DaosAgentTransportCredentials(cert_dir) common_config = CommonConfig(group, transport_config) config = DaosAgentYamlParameters(config_file, common_config) + config.telemetry_config = DaosAgentTelemetryCredentials(cert_dir) command = DaosAgentCommand(bin_dir, config, run_user=run_user) if config_temp: # Setup the DaosAgentCommand to write the config file data to the @@ -285,6 +287,9 @@ def start(self): # Copy certificates self.manager.job.copy_certificates( get_log_file("daosCA/certs"), self._hosts) + self.manager.job.copy_telemetry_certificates( + get_log_file("daosTelemetryCA"), self._hosts) + self.manager.job.generate_telemetry_certificates(self._hosts, "daos_agent") # Verify the socket directory exists when using a non-systemctl manager if self.verify_socket_dir: diff --git a/src/tests/ftest/util/agent_utils_params.py b/src/tests/ftest/util/agent_utils_params.py index 9fe4ddd455e..1321d37bf03 100644 --- a/src/tests/ftest/util/agent_utils_params.py +++ b/src/tests/ftest/util/agent_utils_params.py @@ -5,7 +5,8 @@ """ import os -from command_utils_base import BasicParameter, LogParameter, TransportCredentials, YamlParameters +from command_utils_base import (BasicParameter, LogParameter, TelemetryCredentials, + TransportCredentials, YamlParameters) class DaosAgentTransportCredentials(TransportCredentials): @@ -32,6 +33,43 @@ def _get_new(self): return DaosAgentTransportCredentials(self._log_dir) +class DaosAgentTelemetryCredentials(TelemetryCredentials): + # pylint: disable=too-few-public-methods + """Telemetry credentials listing certificates for secure communication.""" + + def __init__(self, log_dir=os.path.join(os.sep, "tmp")): + """Initialize a TelemetryConfig object.""" + super().__init__("/run/agent_config/telemetry_config/*", None, log_dir) + + self.port = BasicParameter(None, 9192) + self.enabled = BasicParameter(None) + self.retain = BasicParameter(None) + self.server_cert = LogParameter(self._log_dir, None, "telemetryserver.crt") + self.server_key = LogParameter(self._log_dir, None, "telemetryserver.key") + + def get_certificate_data(self, name_list): + """Get certificate data. + + Args: + name_list (list): list of certificate attribute names. + + Returns: + data (dict): a dictionary of parameter directory name keys and + value. + + """ + data = super().get_certificate_data(name_list) + return data + + def _get_new(self): + """Get a new object based upon this one. + + Returns: + DaosServerTelemetryCredentials: a new DaosServerTelemetryCredentials object + """ + return DaosAgentTelemetryCredentials(self._log_dir) + + class DaosAgentYamlParameters(YamlParameters): """Defines the daos_agent configuration yaml parameters.""" @@ -78,9 +116,6 @@ def __init__(self, filename, common_yaml): self.exclude_fabric_ifaces = BasicParameter(None) self.cache_expiration = BasicParameter(None) self.disable_caching = BasicParameter(None) - self.telemetry_port = BasicParameter(None) - self.telemetry_enabled = BasicParameter(None) - self.telemetry_retain = BasicParameter(None) def update_log_file(self, name): """Update the log file name for the daos agent. diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py index 06da3b27868..e12da1cc3b4 100644 --- a/src/tests/ftest/util/command_utils.py +++ b/src/tests/ftest/util/command_utils.py @@ -21,7 +21,7 @@ from exception_utils import CommandFailure from file_utils import change_file_owner, create_directory, distribute_files from general_utils import (DaosTestError, check_file_exists, get_file_listing, - get_job_manager_class, get_subprocess_stdout, run_command) + get_job_manager_class, get_subprocess_stdout, run_command, run_pcmd) from run_utils import command_as_user, run_remote from user_utils import get_primary_group from yaml_utils import get_yaml_data @@ -1038,6 +1038,56 @@ def copy_certificates(self, source, hosts): self._command, ", ".join(names)) get_file_listing(hosts, names, self.run_user).log_output(self.log) + def copy_telemetry_certificates(self, source, hosts): + """Copy telemetry certificates files from the source to the destination hosts. + + Args: + source (str): source of the certificate files. + hosts (NodeSet): list of the destination hosts. + """ + certfiles = ["daosTelemetryCA.crt", "daosTelemetryCA.key"] + data = self.yaml.telemetry_config.get_certificate_data( + self.yaml.telemetry_config.get_attribute_names(LogParameter)) + destination = list(data.keys())[0] + + for file_name in certfiles: + src_file = os.path.join(source, file_name) + dst_file = os.path.join(destination, file_name) + self.log.debug(" %s -> %s", src_file, dst_file) + result = distribute_files( + self.log, hosts, src_file, dst_file, mkdir=False, + verbose=False, sudo=True, owner=self.certificate_owner) + if not result.passed: + self.log.info( + " WARNING: %s copy failed on %s", dst_file, result.failed_hosts) + + def generate_telemetry_certificates(self, hosts, user): + """Generate the telemetry certificates for the test on server/client. + + Args: + hosts (NodeSet): list of the destination hosts. + user (User): User permission set on telemetry certificate file. + For server, it's daos_server and for client it's daos_agent. + """ + data = self.yaml.telemetry_config.get_certificate_data( + self.yaml.telemetry_config.get_attribute_names(LogParameter)) + destination = list(data.keys())[0] + + if not self.yaml.telemetry_config.allow_insecure.value: + certgen_dir = os.path.abspath( + os.path.join("..", "..", "..", "..", "lib64", "daos", "certgen")) + + command = os.path.join(certgen_dir, "gen_telemetry_server_certificate.sh ") + command = "sudo " + command + user + " " + destination + self.log.debug("Generating the telemetry certificate command %s:", command) + result = run_pcmd(hosts, command, 30) + if result[0]['exit_status'] != 0: + self.fail("Generating the telemetry certificate command Failed") + else: + self.log.info("Generating the telemetry certificate command Passed") + + return 0 + def copy_configuration(self, hosts): """Copy the yaml configuration file to the hosts. diff --git a/src/tests/ftest/util/command_utils_base.py b/src/tests/ftest/util/command_utils_base.py index 837e61b339f..67939e57aad 100644 --- a/src/tests/ftest/util/command_utils_base.py +++ b/src/tests/ftest/util/command_utils_base.py @@ -574,6 +574,7 @@ def __init__(self, namespace, filename=None, title=None, other_params=None): self.filename = filename self.title = title self.other_params = other_params + self.telemetry_config = None def get_params(self, test): """Get values for the yaml parameters from the test yaml file. @@ -588,6 +589,9 @@ def get_params(self, test): if self.other_params is not None: self.other_params.get_params(test) + if self.telemetry_config is not None: + self.telemetry_config.get_params(test) + def get_yaml_data(self): """Convert the parameters into a dictionary to use to write a yaml file. @@ -599,6 +603,12 @@ def get_yaml_data(self): yaml_data = self.other_params.get_yaml_data() else: yaml_data = {} + + if self.telemetry_config is not None: + telemetry_yaml = self.telemetry_config.get_yaml_data() + if telemetry_yaml: + yaml_data["telemetry_config"] = telemetry_yaml + for name in self.get_param_names(): value = getattr(self, name).value if value is not None: @@ -789,6 +799,76 @@ def _get_new(self): return TransportCredentials(self.namespace, self.title, self._log_dir) +class TelemetryCredentials(YamlParameters): + """Telemetry credentials listing certificates for secure communication.""" + + def __init__(self, namespace, title, log_dir): + """Initialize a TelemetryConfig object. + + Args: + namespace (str): yaml namespace (path to parameters) + title (str, optional): namespace under which to place the + parameters when creating the yaml file. Defaults to None. + log_dir (str): location of the certificate files + """ + super().__init__(namespace, None, title) + self._log_dir = log_dir + default_insecure = str(os.environ.get("DAOS_TEST_INSECURE_MODE", True)) + default_insecure = default_insecure.lower() == "true" + self.allow_insecure = BasicParameter(None, default_insecure) + self.port = BasicParameter(None, 9191) + self.retain = None + self.enabled = None + + def get_yaml_data(self): + """Convert the parameters into a dictionary to use to write a yaml file. + + Returns: + dict: a dictionary of parameter name keys and values + + """ + yaml_data = super().get_yaml_data() + + # Convert the boolean value into a string + if self.title is not None: + yaml_data[self.title]["allow_insecure"] = self.allow_insecure.value + else: + yaml_data["allow_insecure"] = self.allow_insecure.value + + return yaml_data + + def get_certificate_data(self, name_list): + """Get certificate data by name_list. + + Args: + name_list (list): list of certificate attribute names. + + Returns: + data (dict): a dictionary of parameter directory name keys and + value. + + """ + data = {} + if not self.allow_insecure.value: + for name in name_list: + value = getattr(self, name).value + if isinstance(value, str): + dir_name, file_name = os.path.split(value) + if dir_name not in data: + data[dir_name] = [file_name] + else: + data[dir_name].append(file_name) + return data + + def _get_new(self): + """Get a new object based upon this one. + + Returns: + TelemetryCredentials: a new TelemetryCredentials object + """ + return TelemetryCredentials(self.namespace, self.title, self._log_dir) + + class CommonConfig(YamlParameters): """Defines common daos_agent and daos_server configuration file parameters. diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index effc3172bac..4d731db7eb4 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -10,7 +10,7 @@ from pwd import getpwuid from dmg_utils_base import DmgCommandBase -from dmg_utils_params import DmgTransportCredentials, DmgYamlParameters +from dmg_utils_params import DmgTelemetryCredentials, DmgTransportCredentials, DmgYamlParameters from exception_utils import CommandFailure from general_utils import dict_to_str, get_numeric_list @@ -39,7 +39,8 @@ def get_dmg_command(group, cert_dir, bin_dir, config_file, config_temp=None, hos """ transport_config = DmgTransportCredentials(cert_dir) - config = DmgYamlParameters(config_file, group, transport_config) + telemetry_config = DmgTelemetryCredentials(cert_dir) + config = DmgYamlParameters(config_file, group, transport_config, telemetry_config) command = DmgCommand(bin_dir, config, hostlist_suffix) if config_temp: # Setup the DaosServerCommand to write the config file data to the diff --git a/src/tests/ftest/util/dmg_utils_params.py b/src/tests/ftest/util/dmg_utils_params.py index ff9048a3b62..5361ce9afa2 100644 --- a/src/tests/ftest/util/dmg_utils_params.py +++ b/src/tests/ftest/util/dmg_utils_params.py @@ -1,10 +1,11 @@ """ - (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ -from command_utils_base import BasicParameter, LogParameter, TransportCredentials, YamlParameters +from command_utils_base import (BasicParameter, LogParameter, TelemetryCredentials, + TransportCredentials, YamlParameters) class DmgTransportCredentials(TransportCredentials): @@ -25,10 +26,27 @@ def _get_new(self): return DmgTransportCredentials(self._log_dir) +class DmgTelemetryCredentials(TelemetryCredentials): + """Transport credentials listing certificates for secure communication.""" + + def __init__(self, log_dir="/tmp"): + """Initialize a TelemetryCredentials object.""" + super().__init__("/run/dmg/telemetry_config/*", None, log_dir) + self.ca_cert = LogParameter(self._log_dir, None, "daosTelemetryCA.crt") + + def _get_new(self): + """Get a new object based upon this one. + + Returns: + DmgTelemetryCredentials: a new DmgTelemetryCredentials object + """ + return DmgTelemetryCredentials(self._log_dir) + + class DmgYamlParameters(YamlParameters): """Defines the dmg configuration yaml parameters.""" - def __init__(self, filename, name, transport): + def __init__(self, filename, name, transport, telemetry=None): """Initialize a DmgYamlParameters object. Args: @@ -57,6 +75,9 @@ def __init__(self, filename, name, transport): self.hostlist = BasicParameter(None, "localhost") self.port = BasicParameter(None, 10001) + if telemetry is not None: + self.telemetry_config = telemetry + def _get_new(self): """Get a new object based upon this one. diff --git a/src/tests/ftest/util/launch_utils.py b/src/tests/ftest/util/launch_utils.py index 0f7284c50ef..7d0f06d16b6 100644 --- a/src/tests/ftest/util/launch_utils.py +++ b/src/tests/ftest/util/launch_utils.py @@ -425,7 +425,12 @@ def prepare(self, logger, test_log_file, test, repeat, user_create, slurm_setup, return status # Generate certificate files for the test - return self._generate_certs(logger) + status = self._generate_certs(logger) + if status: + return status + + # Generate certificate files for the test + return self._generate_telemetry_certs(logger) def execute(self, logger, test, repeat, number, sparse, fail_fast): """Run the specified test. @@ -867,6 +872,33 @@ def _generate_certs(self, logger): message = "Error generating certificates" self.test_result.fail_test(logger, "Prepare", message, sys.exc_info()) return 128 + + return 0 + + def _generate_telemetry_certs(self, logger): + """Generate the certificates for the test. + + Returns: + logger (Logger): logger for the messages produced by this method + int: status code: 0 = success, 128 = failure + + """ + logger.debug("-" * 80) + logger.debug("Generating Telemetry certificate") + test_env = TestEnvironment() + certs_dir = os.path.join(test_env.log_dir, "daosTelemetryCA") + certgen_dir = os.path.abspath( + os.path.join("..", "..", "..", "..", "lib64", "daos", "certgen")) + command = os.path.join(certgen_dir, "gen_telemetry_admin_certificate.sh") + if not run_local(logger, f"/usr/bin/rm -rf {certs_dir}").passed: + message = "Error removing old Telemetry certificates" + self.test_result.fail_test(logger, "Prepare", message, sys.exc_info()) + return 128 + if not run_local(logger, f"{command} {test_env.log_dir}").passed: + message = "Error generating Telemetry certificates" + self.test_result.fail_test(logger, "Prepare", message, sys.exc_info()) + return 128 + return 0 def _collect_crash_files(self, logger): diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 752473021a3..6fc2a897500 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -21,7 +21,8 @@ from host_utils import get_local_host from run_utils import run_remote, stop_processes from server_utils_base import DaosServerCommand, DaosServerInformation, ServerFailed -from server_utils_params import DaosServerTransportCredentials, DaosServerYamlParameters +from server_utils_params import (DaosServerTelemetryCredentials, DaosServerTransportCredentials, + DaosServerYamlParameters) from user_utils import get_chown_command @@ -45,7 +46,10 @@ def get_server_command(group, cert_dir, bin_dir, config_file, config_temp=None): transport_config = DaosServerTransportCredentials(cert_dir) common_config = CommonConfig(group, transport_config) config = DaosServerYamlParameters(config_file, common_config) + config.telemetry_config = DaosServerTelemetryCredentials(cert_dir) + command = DaosServerCommand(bin_dir, config, None) + if config_temp: # Setup the DaosServerCommand to write the config file data to the # temporary file and then copy the file to all the hosts using the @@ -240,6 +244,8 @@ def prepare(self, storage=True): # Copy certificates self.manager.job.copy_certificates(get_log_file("daosCA/certs"), self._hosts) + self.manager.job.copy_telemetry_certificates(get_log_file("daosTelemetryCA"), self._hosts) + self.manager.job.generate_telemetry_certificates(self._hosts, "daos_server") self._prepare_dmg_certificates() # Prepare dmg for running storage format on all server hosts diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index 440ffe68f82..90f04d53039 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -5,7 +5,8 @@ """ import os -from command_utils_base import BasicParameter, LogParameter, TransportCredentials, YamlParameters +from command_utils_base import (BasicParameter, LogParameter, TelemetryCredentials, + TransportCredentials, YamlParameters) MAX_STORAGE_TIERS = 5 @@ -56,6 +57,41 @@ def _get_new(self): return DaosServerTransportCredentials(self._log_dir) +class DaosServerTelemetryCredentials(TelemetryCredentials): + # pylint: disable=too-few-public-methods + """Telemetry credentials listing certificates for secure communication.""" + + def __init__(self, log_dir=os.path.join(os.sep, "tmp")): + """Initialize a TelemetryConfig object.""" + super().__init__("/run/server_config/telemetry_config/*", None, log_dir) + + self.port = BasicParameter(None, 9191) + self.server_cert = LogParameter(self._log_dir, None, "telemetryserver.crt") + self.server_key = LogParameter(self._log_dir, None, "telemetryserver.key") + + def get_certificate_data(self, name_list): + """Get certificate data. + + Args: + name_list (list): list of certificate attribute names. + + Returns: + data (dict): a dictionary of parameter directory name keys and + value. + + """ + data = super().get_certificate_data(name_list) + return data + + def _get_new(self): + """Get a new object based upon this one. + + Returns: + DaosServerTelemetryCredentials: a new DaosServerTelemetryCredentials object + """ + return DaosServerTelemetryCredentials(self._log_dir) + + class DaosServerYamlParameters(YamlParameters): """Defines the daos_server configuration yaml parameters.""" @@ -131,7 +167,6 @@ def __init__(self, filename, common_yaml): self.control_log_mask = BasicParameter(None, "DEBUG") self.control_log_file = LogParameter(log_dir, None, "daos_control.log") self.helper_log_file = LogParameter(log_dir, None, "daos_server_helper.log") - self.telemetry_port = BasicParameter(None, 9191) self.client_env_vars = BasicParameter(None) # Used to drop privileges before starting data plane @@ -142,6 +177,9 @@ def __init__(self, filename, common_yaml): # Control plane metadata parameters. self.metadata_params = ControlMetadataParameters(self.namespace) + # Telemetry Parameters + self.telemetry_config = BasicParameter(None) + # Defines the number of single engine config parameters to define in # the yaml file self.engines_per_host = BasicParameter(None, 0) diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py index d59c8d39e81..a1dbd6576fd 100644 --- a/src/tests/ftest/util/telemetry_utils.py +++ b/src/tests/ftest/util/telemetry_utils.py @@ -365,7 +365,9 @@ class TelemetryUtils(): *_gen_stats_metrics("engine_net_swim_delay"), "engine_net_uri_lookup_timeout", "engine_net_uri_lookup_other", - "engine_net_uri_lookup_self"] + "engine_net_uri_lookup_self", + 'engine_net_quota_exceeded', + 'engine_net_waitq_depth'] ENGINE_RANK_METRICS = [ "engine_rank"] ENGINE_NVME_HEALTH_METRICS = [ diff --git a/utils/certs/SConscript b/utils/certs/SConscript index 446a2059a5a..38fb8ffd1d4 100644 --- a/utils/certs/SConscript +++ b/utils/certs/SConscript @@ -8,7 +8,9 @@ def scons(): env.Install("$PREFIX/lib64/daos/certgen", ['admin.cnf', 'agent.cnf', 'server.cnf', - 'gen_certificates.sh']) + 'gen_certificates.sh', + 'gen_telemetry_admin_certificate.sh', + 'gen_telemetry_server_certificate.sh']) if __name__ == "SCons.Script": diff --git a/utils/certs/gen_telemetry_admin_certificate.sh b/utils/certs/gen_telemetry_admin_certificate.sh new file mode 100755 index 00000000000..e705fcc8796 --- /dev/null +++ b/utils/certs/gen_telemetry_admin_certificate.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# /* +# * (C) Copyright 2024 Intel Corporation. +# * +# * SPDX-License-Identifier: BSD-2-Clause-Patent +# */ + +__usage=" +Usage: gen_telemetry_admin_certificate.sh [DIR] +Generate certificates for DAOS deployment in the [DIR]/daosTelemetryCA. +By default [DIR] is the current directory. +" + +function print_usage () { + >&2 echo "$__usage" +} + +# validity of root CA and keys' certificates +DAYS=1095 + +CA_HOME="${1:-.}/daosTelemetryCA" +# shellcheck disable=SC2128 +CONFIGS="$(dirname "${BASH_SOURCE}")" + +function setup_directories () { + mkdir -p "${CA_HOME}" + chmod 700 "${CA_HOME}" +} + +function generate_ca_cnf () { + echo " +[ ca ] +default_ca = CA_daos_telemetry + +[ CA_daos_telemetry ] +dir = ${CA_HOME} +certs = \$dir + +# Key and Certificate for the root +certificate = \$dir/daosTelemetryCA.crt +private_key = \$dir/daosTelemetryCA.key + +default_md = sha512 # SAFE Crypto Requires SHA-512 +default_days = ${DAYS} # how long to certify for +copy_extensions = copy +unique_subject = no + +[ req ] +prompt = no +distinguished_name = ca_dn +x509_extensions = ca_ext + +[ ca_dn ] +organizationName = DAOS +commonName = DAOS CA TELEMETRY + +[ ca_ext ] +keyUsage = critical,digitalSignature,nonRepudiation,keyEncipherment,keyCertSign +basicConstraints = critical,CA:true,pathlen:1 + +[ signing_policy ] +organizationName = supplied +commonName = supplied + +" > "${CA_HOME}/ca.cnf" +} + +function generate_ca_cert () { + echo "Generating Private CA Root Certificate" + # Generate Private key and set permissions + openssl genrsa -out "${CA_HOME}/daosTelemetryCA.key" 3072 + [[ $EUID -eq 0 ]] && chown root.root "${CA_HOME}/daosTelemetryCA.key" 2>/dev/null + chmod 0400 "${CA_HOME}/daosTelemetryCA.key" + # Generate CA Certificate + openssl req -new -x509 -config "${CA_HOME}/ca.cnf" -days ${DAYS} -sha512 \ + -key "${CA_HOME}/daosTelemetryCA.key" \ + -out "${CA_HOME}/daosTelemetryCA.crt" -batch + [[ $EUID -eq 0 ]] && chown root.daos_daemons "${CA_HOME}/daosTelemetryCA.crt" 2>/dev/null + chmod 0644 "${CA_HOME}/daosTelemetryCA.crt" + # Reset the the CA index + rm -f "${CA_HOME}/index.txt" "${CA_HOME}/serial.txt" + touch "${CA_HOME}/index.txt" + echo '01' > "${CA_HOME}/serial.txt" + echo "Private CA Root Certificate for Telemetry created in ${CA_HOME}" +} + +function cleanup () { + rm -f "${CA_HOME}/ca.cnf" +} + +function main () { + if [[ -d "$CA_HOME" ]] + then + echo "$CA_HOME already exists, exiting." + exit 1 + fi + setup_directories + generate_ca_cnf + generate_ca_cert + cleanup +} + +main diff --git a/utils/certs/gen_telemetry_server_certificate.sh b/utils/certs/gen_telemetry_server_certificate.sh new file mode 100755 index 00000000000..afd3d246190 --- /dev/null +++ b/utils/certs/gen_telemetry_server_certificate.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# /* +# * (C) Copyright 2024 Intel Corporation. +# * +# * SPDX-License-Identifier: BSD-2-Clause-Patent +# */ + +__usage=" + +This is just an example script for testing purpose. +Please modify to use in Production environment. + +Usage: gen_telemetry_server_certificate.sh [USER] [DIR] + USER: DAOS has server and client and the certificate need the specific file permission + based on system usage. + Use "daos_server" if running script on server + Use "daos_agent" if running script on client + + DIR: Generate telemetry certificates for DAOS metrics in the [DIR]. + By default [DIR] is the current directory. +" + +DAYS=1095 + +USER=$1 +CA_HOME="${2:-.}/" +HOSTNAME=$(hostname -s) + +function print_usage () { + >&2 echo "$__usage" +} + +function generate_ca_cnf () { + echo " +[req] +default_md = sha256 +prompt = no +req_extensions = v3_ext +distinguished_name = req_distinguished_name + +[req_distinguished_name] +CN = ${HOSTNAME} + +[v3_ext] +keyUsage = critical,digitalSignature,keyEncipherment +extendedKeyUsage = critical,serverAuth,clientAuth +subjectAltName = DNS:${HOSTNAME} + +" > "${CA_HOME}/telemetry.cnf" +} + +function generate_server_cert () { + echo "Generating Server Certificate" + # Generate Private key and set its permissions + openssl genrsa -out "${CA_HOME}/telemetryserver.key" 2048 + [[ $EUID -eq 0 ]] && chown ${USER}.${USER} "${CA_HOME}/telemetryserver.key" + chmod 0400 "${CA_HOME}/telemetryserver.key" + + # Generate a Certificate Signing Request (CRS) + openssl req -new -key "${CA_HOME}/telemetryserver.key" \ + -out "${CA_HOME}/telemetryserver.csr" -config "${CA_HOME}/telemetry.cnf" + + # Create Certificate from request + openssl x509 -req -in "${CA_HOME}/telemetryserver.csr" -CA "${CA_HOME}/daosTelemetryCA.crt" \ + -CAkey "${CA_HOME}/daosTelemetryCA.key" -CAcreateserial -out "${CA_HOME}/telemetryserver.crt" \ + -days ${DAYS} -sha256 -extfile "$CA_HOME/telemetry.cnf" -extensions v3_ext + + [[ $EUID -eq 0 ]] && chown ${USER}.${USER} "${CA_HOME}/telemetryserver.crt" + chmod 0644 "${CA_HOME}/telemetryserver.crt" + + echo "Required Server Certificate Files: + ${CA_HOME}/daosTelemetryCA.crt + ${CA_HOME}/telemetryserver.key + ${CA_HOME}/telemetryserver.crt" +} + +function cleanup () { + # Remove this key as it's not required after creating the telemetryserver.key + rm -f "${CA_HOME}/daosTelemetryCA.key" + + rm -f "${CA_HOME}/telemetryserver.csr" + rm -f "${CA_HOME}/telemetry.cnf" +} + +function main () { + generate_ca_cnf + generate_server_cert + cleanup +} + +main diff --git a/utils/config/daos_agent.yml b/utils/config/daos_agent.yml index 31dc432abaa..304550cdfef 100644 --- a/utils/config/daos_agent.yml +++ b/utils/config/daos_agent.yml @@ -26,26 +26,35 @@ # default: 10001 #port: 10001 -## Enable HTTP endpoint for remote telemetry collection. -# Note that enabling the endpoint automatically enables -# client telemetry collection. +## Enable Telemetry HTTP/HTTPS endpoint for remote client telemetry collection. # -## default endpoint state: disabled -## default endpoint port: 9192 -#telemetry_port: 9192 - -## Enable client telemetry for all DAOS clients. -# If false, clients will need to optionally enable telemetry by setting -# the D_CLIENT_METRICS_ENABLE environment variable to true. +#telemetry_config: +# # Set the client telemetry endpoint port number +# # default: 9192 +# port: 9192 # -## default: false -#telemetry_enabled: true - -## Retain client telemetry for a period of time after the client -# process exits. +# # Enable client telemetry for all DAOS clients. +# # If false, clients will need to optionally enable telemetry by setting +# # the D_CLIENT_METRICS_ENABLE environment variable to true. +# # default: false +# enabled: true +# +# # Retain client telemetry for a period of time after the client +# # process exits. +# # default 0 (do not retain telemetry after client exit) +# retain: 1m +# +# # In order to disable transport security, uncomment and set allow_insecure +# # to true. Not recommended for production configurations. +# allow_insecure: false +# +# # Server certificate for use in TLS handshakes +# # DAOS client is the HTTPS server to open secure telemetry endpoint. +# server_cert: /etc/daos/certs/telemetryserver.crt # -## default 0 (do not retain telemetry after client exit) -#telemetry_retain: 1m +# # Key portion of Server Certificate +# # DAOS client is the HTTPS server to open secure telemetry endpoint. +# server_key: /etc/daos/certs/telemetryserver.key ## Configuration for user credential management. #credential_config: diff --git a/utils/config/daos_control.yml b/utils/config/daos_control.yml index 5a236052fd7..ea2da17066e 100644 --- a/utils/config/daos_control.yml +++ b/utils/config/daos_control.yml @@ -38,3 +38,13 @@ # cert: /etc/daos/certs/admin.crt # # Key portion of Admin Certificate # key: /etc/daos/certs/admin.key + +## Enable Telemetry HTTP/HTTPS endpoint for remote client telemetry collection. +# +#telemetry_config: +# # In order to disable transport security, uncomment and set allow_insecure +# # to true. Not recommended for production configurations. +# allow_insecure: false +# +# # Custom CA Root certificate for generated telemetry certs +# ca_cert: /etc/daos/certs/daosTelemetryCA.crt diff --git a/utils/config/daos_server.yml b/utils/config/daos_server.yml index c5434ea1c00..0d56afbd589 100644 --- a/utils/config/daos_server.yml +++ b/utils/config/daos_server.yml @@ -256,11 +256,21 @@ #firmware_helper_log_file: /tmp/daos_firmware_helper.log # # -## Enable HTTP endpoint for remote telemetry collection. +## Enable Telemetry HTTP/HTTPS endpoint for remote telemetry collection. # -## default endpoint state: disabled -## default endpoint port: 9191 -#telemetry_port: 9191 +#telemetry_config: +# # In order to disable telemetry security, uncomment and set allow_insecure to false +# allow_insecure: true +# +# # Set the server telemetry endpoint port number +# # default: 9191 +# port: 9191 +# +# # Server certificate for use in TLS handshakes +# server_cert: /etc/daos/certs/telemetryserver.crt +# +# # Key portion of Server Certificate +# server_key: /etc/daos/certs/telemetryserver.key # # ## If desired, a set of client-side environment variables may be diff --git a/utils/config/examples/daos_server_local.yml b/utils/config/examples/daos_server_local.yml index 814ac659824..fa797a92d2f 100644 --- a/utils/config/examples/daos_server_local.yml +++ b/utils/config/examples/daos_server_local.yml @@ -7,7 +7,9 @@ control_log_file: /tmp/daos_server.log transport_config: allow_insecure: true -telemetry_port: 9191 +telemetry_config: + allow_insecure: true + port: 9191 engines: - diff --git a/utils/config/examples/daos_server_mdonssd.yml b/utils/config/examples/daos_server_mdonssd.yml index 043288a59df..8052681fdf4 100644 --- a/utils/config/examples/daos_server_mdonssd.yml +++ b/utils/config/examples/daos_server_mdonssd.yml @@ -26,7 +26,9 @@ control_metadata: # # Key portion of Server Certificate # key: /etc/daos/certs/server.key -telemetry_port: 9191 +telemetry_config: + allow_insecure: true + port: 9191 engines: - diff --git a/utils/config/examples/daos_server_tcp.yml b/utils/config/examples/daos_server_tcp.yml index 635abe89dce..e2fdc4af0d4 100644 --- a/utils/config/examples/daos_server_tcp.yml +++ b/utils/config/examples/daos_server_tcp.yml @@ -7,7 +7,9 @@ provider: ofi+tcp control_log_mask: DEBUG control_log_file: /tmp/daos_server.log -telemetry_port: 9191 +telemetry_config: + allow_insecure: true + port: 9191 ## Transport Credentials Specifying certificates to secure communications ## diff --git a/utils/config/examples/daos_server_ucx.yml b/utils/config/examples/daos_server_ucx.yml index bdd35a4c647..cd76ad6dd05 100644 --- a/utils/config/examples/daos_server_ucx.yml +++ b/utils/config/examples/daos_server_ucx.yml @@ -16,7 +16,9 @@ provider: ucx+dc_x control_log_mask: INFO control_log_file: /tmp/daos_server.log -telemetry_port: 9191 +telemetry_config: + allow_insecure: true + port: 9191 ## Transport Credentials Specifying certificates to secure communications ## diff --git a/utils/config/examples/daos_server_verbs.yml b/utils/config/examples/daos_server_verbs.yml index 667992351fc..d48a2d1330b 100644 --- a/utils/config/examples/daos_server_verbs.yml +++ b/utils/config/examples/daos_server_verbs.yml @@ -7,7 +7,9 @@ provider: ofi+verbs control_log_mask: INFO control_log_file: /tmp/daos_server.log -telemetry_port: 9191 +telemetry_config: + allow_insecure: true + port: 9191 ## Transport Credentials Specifying certificates to secure communications ## From a3bcfc97ff2b64d14bb4c8a03696f3665bce794f Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Mon, 30 Sep 2024 17:35:34 +0000 Subject: [PATCH 02/19] Fixed Test code. Features: control telemetry Required-githooks: true Signed-off-by: Samir Raval --- src/tests/ftest/config_file_gen.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/tests/ftest/config_file_gen.py b/src/tests/ftest/config_file_gen.py index 58f97f3b902..c030abc9f9e 100755 --- a/src/tests/ftest/config_file_gen.py +++ b/src/tests/ftest/config_file_gen.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ - (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -12,11 +12,14 @@ import sys from argparse import ArgumentParser, RawDescriptionHelpFormatter -from util.agent_utils_params import DaosAgentTransportCredentials, DaosAgentYamlParameters +from util.agent_utils_params import (DaosAgentTelemetryCredentials, DaosAgentTransportCredentials, + DaosAgentYamlParameters) from util.command_utils_base import CommonConfig -from util.dmg_utils_params import DmgTransportCredentials, DmgYamlParameters +from util.dmg_utils_params import (DmgTelemetryCredentials, DmgTransportCredentials, + DmgYamlParameters) from util.exception_utils import CommandFailure -from util.server_utils_params import DaosServerTransportCredentials, DaosServerYamlParameters +from util.server_utils_params import (DaosServerTelemetryCredentials, + DaosServerTransportCredentials, DaosServerYamlParameters) def generate_agent_config(args): @@ -31,6 +34,7 @@ def generate_agent_config(args): """ common_cfg = CommonConfig(args.group_name, DaosAgentTransportCredentials()) config = DaosAgentYamlParameters(args.agent_file, common_cfg) + config.telemetry_config = DaosAgentTelemetryCredentials() # Update the configuration file access points config.other_params.access_points.value = args.node_list.split(",") return create_config(args, config) @@ -48,6 +52,7 @@ def generate_server_config(args): """ common_cfg = CommonConfig(args.group_name, DaosServerTransportCredentials()) config = DaosServerYamlParameters(args.server_file, common_cfg) + config.telemetry_config = DaosServerTelemetryCredentials() config.engine_params[0].storage.storage_tiers[0].storage_class.value = "ram" config.engine_params[0].storage.storage_tiers[0].scm_mount.value = "/mnt/daos" config.engine_params[0].storage.storage_tiers[0].scm_size.value = 0 @@ -68,6 +73,7 @@ def generate_dmg_config(args): """ config = DmgYamlParameters( args.dmg_file, args.group_name, DmgTransportCredentials()) + config.telemetry_config = DmgTelemetryCredentials() # Update the configuration file hostlist config.hostlist.value = args.node_list.split(",") return create_config(args, config) From d3f9941ca34c320aabe782626d2d407b2d170b91 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Mon, 30 Sep 2024 17:43:34 +0000 Subject: [PATCH 03/19] Spell correction and script fix. Features: control telemetry Required-githooks: true Signed-off-by: Samir Raval --- src/control/lib/control/http.go | 2 +- src/control/lib/control/http_test.go | 2 +- utils/certs/gen_telemetry_admin_certificate.sh | 2 -- utils/certs/gen_telemetry_server_certificate.sh | 8 ++++---- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/control/lib/control/http.go b/src/control/lib/control/http.go index 69d002dc36c..d93085056c6 100644 --- a/src/control/lib/control/http.go +++ b/src/control/lib/control/http.go @@ -172,7 +172,7 @@ func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time. cert, err := ioutil.ReadFile(*cacertpath) if err != nil { - return nil, errors.Wrap(err, "reading CA cerificate file Error") + return nil, errors.Wrap(err, "reading CA certificate file Error") } get, err = httpsGetFunc(cert) diff --git a/src/control/lib/control/http_test.go b/src/control/lib/control/http_test.go index 20e8d58234f..c15ba079753 100644 --- a/src/control/lib/control/http_test.go +++ b/src/control/lib/control/http_test.go @@ -225,7 +225,7 @@ func TestControl_httpGetBody(t *testing.T) { Body: newMockReadCloser("this is the body of an HTTP response"), }, nil }, - expErr: errors.New("reading CA cerificate file Error: open wrongpath/notavailable.crt: no such file or directory"), + expErr: errors.New("reading CA certificate file Error: open wrongpath/notavailable.crt: no such file or directory"), }, "reading body fails": { url: defaultURL, diff --git a/utils/certs/gen_telemetry_admin_certificate.sh b/utils/certs/gen_telemetry_admin_certificate.sh index e705fcc8796..ced344ca0d8 100755 --- a/utils/certs/gen_telemetry_admin_certificate.sh +++ b/utils/certs/gen_telemetry_admin_certificate.sh @@ -19,8 +19,6 @@ function print_usage () { DAYS=1095 CA_HOME="${1:-.}/daosTelemetryCA" -# shellcheck disable=SC2128 -CONFIGS="$(dirname "${BASH_SOURCE}")" function setup_directories () { mkdir -p "${CA_HOME}" diff --git a/utils/certs/gen_telemetry_server_certificate.sh b/utils/certs/gen_telemetry_server_certificate.sh index afd3d246190..3670a92afec 100755 --- a/utils/certs/gen_telemetry_server_certificate.sh +++ b/utils/certs/gen_telemetry_server_certificate.sh @@ -13,8 +13,8 @@ Please modify to use in Production environment. Usage: gen_telemetry_server_certificate.sh [USER] [DIR] USER: DAOS has server and client and the certificate need the specific file permission based on system usage. - Use "daos_server" if running script on server - Use "daos_agent" if running script on client + Use daos_server if running script on server + Use daos_agent if running script on client DIR: Generate telemetry certificates for DAOS metrics in the [DIR]. By default [DIR] is the current directory. @@ -53,7 +53,7 @@ function generate_server_cert () { echo "Generating Server Certificate" # Generate Private key and set its permissions openssl genrsa -out "${CA_HOME}/telemetryserver.key" 2048 - [[ $EUID -eq 0 ]] && chown ${USER}.${USER} "${CA_HOME}/telemetryserver.key" + [[ $EUID -eq 0 ]] && chown "${USER}"."${USER}" "${CA_HOME}/telemetryserver.key" chmod 0400 "${CA_HOME}/telemetryserver.key" # Generate a Certificate Signing Request (CRS) @@ -65,7 +65,7 @@ function generate_server_cert () { -CAkey "${CA_HOME}/daosTelemetryCA.key" -CAcreateserial -out "${CA_HOME}/telemetryserver.crt" \ -days ${DAYS} -sha256 -extfile "$CA_HOME/telemetry.cnf" -extensions v3_ext - [[ $EUID -eq 0 ]] && chown ${USER}.${USER} "${CA_HOME}/telemetryserver.crt" + [[ $EUID -eq 0 ]] && chown "${USER}"."${USER}" "${CA_HOME}/telemetryserver.crt" chmod 0644 "${CA_HOME}/telemetryserver.crt" echo "Required Server Certificate Files: From d9860a423b6580256d7d9a1aa981cc46d915ec60 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Tue, 1 Oct 2024 03:21:58 +0000 Subject: [PATCH 04/19] Few minor fix and Test case fix. Required-githooks: true Signed-off-by: Samir Raval --- src/tests/ftest/server/storage_tiers.py | 6 ++++-- src/tests/ftest/util/command_utils.py | 10 +++------- src/tests/ftest/util/dmg_utils_params.py | 4 +++- src/tests/ftest/util/server_utils_params.py | 5 +++++ 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/tests/ftest/server/storage_tiers.py b/src/tests/ftest/server/storage_tiers.py index e627b8d62c2..ad946baaa72 100644 --- a/src/tests/ftest/server/storage_tiers.py +++ b/src/tests/ftest/server/storage_tiers.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -8,7 +8,8 @@ import yaml from apricot import TestWithServers from command_utils_base import CommonConfig -from server_utils import DaosServerTransportCredentials, DaosServerYamlParameters +from server_utils import (DaosServerTelemetryCredentials, DaosServerTransportCredentials, + DaosServerYamlParameters) class StorageTiers(TestWithServers): @@ -67,6 +68,7 @@ def test_tiers(self): common_config = CommonConfig("daos_server", DaosServerTransportCredentials()) config = DaosServerYamlParameters(None, common_config) + config.telemetry_config = DaosServerTelemetryCredentials() config.namespace = self.server_config_namespace config.get_params(self) data = config.get_yaml_data() diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py index e12da1cc3b4..09fca6bf3ca 100644 --- a/src/tests/ftest/util/command_utils.py +++ b/src/tests/ftest/util/command_utils.py @@ -1058,8 +1058,8 @@ def copy_telemetry_certificates(self, source, hosts): self.log, hosts, src_file, dst_file, mkdir=False, verbose=False, sudo=True, owner=self.certificate_owner) if not result.passed: - self.log.info( - " WARNING: %s copy failed on %s", dst_file, result.failed_hosts) + self.log.info(" WARNING: %s copy telemetry cert failed on %s", + dst_file, result.failed_hosts) def generate_telemetry_certificates(self, hosts, user): """Generate the telemetry certificates for the test on server/client. @@ -1082,11 +1082,7 @@ def generate_telemetry_certificates(self, hosts, user): self.log.debug("Generating the telemetry certificate command %s:", command) result = run_pcmd(hosts, command, 30) if result[0]['exit_status'] != 0: - self.fail("Generating the telemetry certificate command Failed") - else: - self.log.info("Generating the telemetry certificate command Passed") - - return 0 + self.log.info(" WARNING: command %s failed", command) def copy_configuration(self, hosts): """Copy the yaml configuration file to the hosts. diff --git a/src/tests/ftest/util/dmg_utils_params.py b/src/tests/ftest/util/dmg_utils_params.py index 5361ce9afa2..5d6cea72f38 100644 --- a/src/tests/ftest/util/dmg_utils_params.py +++ b/src/tests/ftest/util/dmg_utils_params.py @@ -27,7 +27,7 @@ def _get_new(self): class DmgTelemetryCredentials(TelemetryCredentials): - """Transport credentials listing certificates for secure communication.""" + """Telemetry credentials listing certificates for secure communication.""" def __init__(self, log_dir="/tmp"): """Initialize a TelemetryCredentials object.""" @@ -54,6 +54,8 @@ def __init__(self, filename, name, transport, telemetry=None): name (str): The DAOS system name. transport (DmgTransportCredentials): dmg security configuration settings. + telemetry (DmgTelemetryCredentials): dmg telemetry + configuration settings. """ super().__init__("/run/dmg/*", filename, None, transport) diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index 90f04d53039..a6ca88d8eb8 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -65,6 +65,11 @@ def __init__(self, log_dir=os.path.join(os.sep, "tmp")): """Initialize a TelemetryConfig object.""" super().__init__("/run/server_config/telemetry_config/*", None, log_dir) + # Additional daos_server telemetry credential parameters: + # - port: : Telemetry endpoint port number + # - server_cert: : Server certificate + # - server_key: : Server Key portion + # self.port = BasicParameter(None, 9191) self.server_cert = LogParameter(self._log_dir, None, "telemetryserver.crt") self.server_key = LogParameter(self._log_dir, None, "telemetryserver.key") From 3c3bbc30aa485dfad219e829e1fc3b5183d46522 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Tue, 1 Oct 2024 16:15:00 +0000 Subject: [PATCH 05/19] Code modified based on review comments. Features: control telemetry Required-githooks: true Signed-off-by: Samir Raval --- src/tests/ftest/util/agent_utils.py | 4 ++-- src/tests/ftest/util/agent_utils_params.py | 14 -------------- src/tests/ftest/util/command_utils.py | 10 +++++----- src/tests/ftest/util/command_utils_base.py | 6 +++--- src/tests/ftest/util/server_utils.py | 6 ++++-- src/tests/ftest/util/server_utils_params.py | 16 +--------------- 6 files changed, 15 insertions(+), 41 deletions(-) diff --git a/src/tests/ftest/util/agent_utils.py b/src/tests/ftest/util/agent_utils.py index 9e0e013e2ae..b7762c69860 100644 --- a/src/tests/ftest/util/agent_utils.py +++ b/src/tests/ftest/util/agent_utils.py @@ -287,9 +287,9 @@ def start(self): # Copy certificates self.manager.job.copy_certificates( get_log_file("daosCA/certs"), self._hosts) - self.manager.job.copy_telemetry_certificates( + self.manager.job.copy_telemetry_root_certificates( get_log_file("daosTelemetryCA"), self._hosts) - self.manager.job.generate_telemetry_certificates(self._hosts, "daos_agent") + self.manager.job.generate_telemetry_server_certificates(self._hosts, "daos_agent") # Verify the socket directory exists when using a non-systemctl manager if self.verify_socket_dir: diff --git a/src/tests/ftest/util/agent_utils_params.py b/src/tests/ftest/util/agent_utils_params.py index 1321d37bf03..1d80b627e1f 100644 --- a/src/tests/ftest/util/agent_utils_params.py +++ b/src/tests/ftest/util/agent_utils_params.py @@ -47,20 +47,6 @@ def __init__(self, log_dir=os.path.join(os.sep, "tmp")): self.server_cert = LogParameter(self._log_dir, None, "telemetryserver.crt") self.server_key = LogParameter(self._log_dir, None, "telemetryserver.key") - def get_certificate_data(self, name_list): - """Get certificate data. - - Args: - name_list (list): list of certificate attribute names. - - Returns: - data (dict): a dictionary of parameter directory name keys and - value. - - """ - data = super().get_certificate_data(name_list) - return data - def _get_new(self): """Get a new object based upon this one. diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py index 09fca6bf3ca..77da3e85249 100644 --- a/src/tests/ftest/util/command_utils.py +++ b/src/tests/ftest/util/command_utils.py @@ -21,7 +21,7 @@ from exception_utils import CommandFailure from file_utils import change_file_owner, create_directory, distribute_files from general_utils import (DaosTestError, check_file_exists, get_file_listing, - get_job_manager_class, get_subprocess_stdout, run_command, run_pcmd) + get_job_manager_class, get_subprocess_stdout, run_command) from run_utils import command_as_user, run_remote from user_utils import get_primary_group from yaml_utils import get_yaml_data @@ -1038,7 +1038,7 @@ def copy_certificates(self, source, hosts): self._command, ", ".join(names)) get_file_listing(hosts, names, self.run_user).log_output(self.log) - def copy_telemetry_certificates(self, source, hosts): + def copy_telemetry_root_certificates(self, source, hosts): """Copy telemetry certificates files from the source to the destination hosts. Args: @@ -1061,7 +1061,7 @@ def copy_telemetry_certificates(self, source, hosts): self.log.info(" WARNING: %s copy telemetry cert failed on %s", dst_file, result.failed_hosts) - def generate_telemetry_certificates(self, hosts, user): + def generate_telemetry_server_certificates(self, hosts, user): """Generate the telemetry certificates for the test on server/client. Args: @@ -1080,8 +1080,8 @@ def generate_telemetry_certificates(self, hosts, user): command = os.path.join(certgen_dir, "gen_telemetry_server_certificate.sh ") command = "sudo " + command + user + " " + destination self.log.debug("Generating the telemetry certificate command %s:", command) - result = run_pcmd(hosts, command, 30) - if result[0]['exit_status'] != 0: + result = run_remote(self.log, hosts, command, 30) + if not result.passed: self.log.info(" WARNING: command %s failed", command) def copy_configuration(self, hosts): diff --git a/src/tests/ftest/util/command_utils_base.py b/src/tests/ftest/util/command_utils_base.py index 67939e57aad..2a42670ab16 100644 --- a/src/tests/ftest/util/command_utils_base.py +++ b/src/tests/ftest/util/command_utils_base.py @@ -803,12 +803,12 @@ class TelemetryCredentials(YamlParameters): """Telemetry credentials listing certificates for secure communication.""" def __init__(self, namespace, title, log_dir): - """Initialize a TelemetryConfig object. + """Initialize a TelemetryCredentials object. Args: namespace (str): yaml namespace (path to parameters) - title (str, optional): namespace under which to place the - parameters when creating the yaml file. Defaults to None. + title (str): namespace under which to place the + parameters when creating the yaml file. log_dir (str): location of the certificate files """ super().__init__(namespace, None, title) diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 6fc2a897500..90639206ea5 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -244,8 +244,10 @@ def prepare(self, storage=True): # Copy certificates self.manager.job.copy_certificates(get_log_file("daosCA/certs"), self._hosts) - self.manager.job.copy_telemetry_certificates(get_log_file("daosTelemetryCA"), self._hosts) - self.manager.job.generate_telemetry_certificates(self._hosts, "daos_server") + self.manager.job.copy_telemetry_root_certificates(get_log_file("daosTelemetryCA"), + self._hosts) + self.manager.job.generate_telemetry_server_certificates(self._hosts, + "daos_server") self._prepare_dmg_certificates() # Prepare dmg for running storage format on all server hosts diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index a6ca88d8eb8..909a3d83b9b 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -62,7 +62,7 @@ class DaosServerTelemetryCredentials(TelemetryCredentials): """Telemetry credentials listing certificates for secure communication.""" def __init__(self, log_dir=os.path.join(os.sep, "tmp")): - """Initialize a TelemetryConfig object.""" + """Initialize a DaosServerTelemetryCredentials object.""" super().__init__("/run/server_config/telemetry_config/*", None, log_dir) # Additional daos_server telemetry credential parameters: @@ -74,20 +74,6 @@ def __init__(self, log_dir=os.path.join(os.sep, "tmp")): self.server_cert = LogParameter(self._log_dir, None, "telemetryserver.crt") self.server_key = LogParameter(self._log_dir, None, "telemetryserver.key") - def get_certificate_data(self, name_list): - """Get certificate data. - - Args: - name_list (list): list of certificate attribute names. - - Returns: - data (dict): a dictionary of parameter directory name keys and - value. - - """ - data = super().get_certificate_data(name_list) - return data - def _get_new(self): """Get a new object based upon this one. From 3b9be62c0b14cfc8435727293601f062a1621994 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Thu, 3 Oct 2024 18:18:30 +0000 Subject: [PATCH 06/19] Code updated based on review comments. Features: control telemetry Required-githooks: true Signed-off-by: Samir Raval --- docs/admin/deployment.md | 24 +++++----- src/control/cmd/daos_agent/config.go | 23 ++++++++-- src/control/cmd/daos_agent/config_test.go | 24 +++++----- src/control/cmd/daos_agent/telemetry.go | 4 +- src/control/cmd/dmg/auto_test.go | 6 +-- src/control/lib/control/http.go | 12 ++--- src/control/lib/control/http_test.go | 31 ++++++------- src/control/lib/control/telemetry.go | 4 +- src/control/lib/control/telemetry_test.go | 28 +++++------ src/control/lib/telemetry/promexp/httpd.go | 1 + src/control/security/config.go | 46 +++++++++---------- src/control/server/config/server.go | 4 ++ src/control/server/config/server_test.go | 8 ++-- src/control/server/telemetry.go | 4 +- src/tests/ftest/config_file_gen.py | 15 +++--- src/tests/ftest/server/storage_tiers.py | 4 +- .../telemetry/basic_client_telemetry.yaml | 6 +-- src/tests/ftest/util/agent_utils.py | 4 +- src/tests/ftest/util/agent_utils_params.py | 18 ++++---- src/tests/ftest/util/command_utils_base.py | 14 +++--- src/tests/ftest/util/dmg_utils.py | 4 +- src/tests/ftest/util/dmg_utils_params.py | 12 ++--- src/tests/ftest/util/server_utils.py | 4 +- src/tests/ftest/util/server_utils_params.py | 20 ++++---- .../certs/gen_telemetry_server_certificate.sh | 26 +++++------ utils/config/daos_agent.yml | 10 ++-- utils/config/daos_control.yml | 8 ++-- utils/config/daos_server.yml | 8 ++-- utils/config/examples/daos_server_local.yml | 2 +- utils/config/examples/daos_server_mdonssd.yml | 2 +- utils/config/examples/daos_server_tcp.yml | 2 +- utils/config/examples/daos_server_ucx.yml | 2 +- utils/config/examples/daos_server_verbs.yml | 2 +- 33 files changed, 201 insertions(+), 181 deletions(-) diff --git a/docs/admin/deployment.md b/docs/admin/deployment.md index 908ff9f48ca..5a11d8cf41e 100644 --- a/docs/admin/deployment.md +++ b/docs/admin/deployment.md @@ -831,16 +831,16 @@ subject=CN = wolf-170 Getting CA Private Key Required Server Certificate Files: .//daosTelemetryCA.crt - .//telemetryserver.key - .//telemetryserver.crt + .//telemetry.key + .//telemetry.crt $ ls -l total 20 -rw-r--r-- 1 root daos_daemons 1460 Sep 27 17:18 daosTelemetryCA.crt -rw-r--r-- 1 root root 41 Sep 27 17:19 daosTelemetryCA.srl -rw-r--r-- 1 root root 0 Sep 27 17:18 index.txt -rw-r--r-- 1 root root 3 Sep 27 17:18 serial.txt --rw-r--r-- 1 daos_agent daos_agent 1302 Sep 27 17:19 telemetryserver.crt --r-------- 1 daos_agent daos_agent 1675 Sep 27 17:19 telemetryserver.key +-rw-r--r-- 1 daos_agent daos_agent 1302 Sep 27 17:19 telemetry.crt +-r-------- 1 daos_agent daos_agent 1675 Sep 27 17:19 telemetry.key ``` Below example is ran with daos_server user on server node @@ -858,16 +858,16 @@ subject=CN = wolf-173 Getting CA Private Key Required Server Certificate Files: .//daosTelemetryCA.crt - .//telemetryserver.key - .//telemetryserver.crt + .//telemetry.key + .//telemetry.crt $ ls -l total 20 -rw-r--r-- 1 root daos_daemons 1460 Sep 27 17:24 daosTelemetryCA.crt -rw-r--r-- 1 root root 41 Sep 27 17:24 daosTelemetryCA.srl -rw-r--r-- 1 root root 0 Sep 27 17:24 index.txt -rw-r--r-- 1 root root 3 Sep 27 17:24 serial.txt --rw-r--r-- 1 daos_server daos_server 1302 Sep 27 17:24 telemetryserver.crt --r-------- 1 daos_server daos_server 1679 Sep 27 17:24 telemetryserver.key +-rw-r--r-- 1 daos_server daos_server 1302 Sep 27 17:24 telemetry.crt +-r-------- 1 daos_server daos_server 1679 Sep 27 17:24 telemetry.key ``` You can copy this certificates on /etc/daos/certs/ or someother secure location @@ -884,9 +884,9 @@ telemetry_config: # Set the server telemetry endpoint port number port: 9191 # Server certificate for use in TLS handshakes - server_cert: /etc/daos/certs/telemetryserver.crt + https_cert: /etc/daos/certs/telemetry.crt # Key portion of Server Certificate - server_key: /etc/daos/certs/telemetryserver.key + https_key: /etc/daos/certs/telemetry.key ``` ```yaml @@ -901,9 +901,9 @@ telemetry_config: # Retain client telemetry for a period of time after the client process exits. retain: 30s # Server certificate for use in TLS handshakes - server_cert: /etc/daos/certs/telemetryserver.crt + https_cert: /etc/daos/certs/telemetry.crt # Key portion of Server Certificate - server_key: /etc/daos/certs/telemetryserver.key + https_key: /etc/daos/certs/telemetry.key ``` ```yaml diff --git a/src/control/cmd/daos_agent/config.go b/src/control/cmd/daos_agent/config.go index 7c86677ff25..f604cd5a1a7 100644 --- a/src/control/cmd/daos_agent/config.go +++ b/src/control/cmd/daos_agent/config.go @@ -58,6 +58,10 @@ type Config struct { FabricInterfaces []*NUMAFabricConfig `yaml:"fabric_ifaces,omitempty"` ProviderIdx uint // TODO SRS-31: Enable with multiprovider functionality TelemetryConfig *security.TelemetryConfig `yaml:"telemetry_config"` + // Support Old config options. + TelemetryPort int `yaml:"telemetry_port,omitempty"` + TelemetryEnabled bool `yaml:"telemetry_enabled,omitempty"` + TelemetryRetain time.Duration `yaml:"telemetry_retain,omitempty"` } // TelemetryExportEnabled returns true if client telemetry export is enabled. @@ -97,6 +101,19 @@ func LoadConfig(cfgPath string) (*Config, error) { return nil, fmt.Errorf("invalid system name: %s", cfg.SystemName) } + // Support Old config options and copy it to the underline new structure value. + if cfg.TelemetryRetain > 0 { + cfg.TelemetryConfig.Retain = cfg.TelemetryRetain + } + + if cfg.TelemetryPort != 0 { + cfg.TelemetryConfig.Port = cfg.TelemetryPort + } + + if cfg.TelemetryEnabled { + cfg.TelemetryConfig.Enabled = cfg.TelemetryEnabled + } + if cfg.TelemetryConfig.Retain > 0 && cfg.TelemetryConfig.Port == 0 { return nil, errors.New("telemetry_retain requires telemetry_port") } @@ -105,9 +122,9 @@ func LoadConfig(cfgPath string) (*Config, error) { return nil, errors.New("telemetry_enabled requires telemetry_port") } - if cfg.TelemetryConfig.AllowInsecure == false { - if cfg.TelemetryConfig.ServerCert == "" || cfg.TelemetryConfig.ServerKey == "" { - return nil, errors.New("For secure mode, server_cert and server_key required under telemetry_config") + if !cfg.TelemetryConfig.AllowInsecure { + if cfg.TelemetryConfig.HttpsCert == "" || cfg.TelemetryConfig.HttpsKey == "" { + return nil, errors.New("For secure mode, https_cert and https_key required under telemetry_config") } } diff --git a/src/control/cmd/daos_agent/config_test.go b/src/control/cmd/daos_agent/config_test.go index 06280d7d844..32ff3b7f157 100644 --- a/src/control/cmd/daos_agent/config_test.go +++ b/src/control/cmd/daos_agent/config_test.go @@ -98,8 +98,8 @@ control_log_mask: debug transport_config: allow_insecure: true telemetry_config: - retain: 1 - port: 0 + telemetry_retain: 1m + telemetry_port: 0 `) telemetryEnabledWithBadPort := test.CreateTestFile(t, dir, ` @@ -112,11 +112,11 @@ control_log_mask: debug transport_config: allow_insecure: true telemetry_config: - enabled: true - port: 0 + telemetry_enabled: true + telemetry_port: 0 `) - telemetryWithoutServerCert := test.CreateTestFile(t, dir, ` + telemetryWithoutHttpsCert := test.CreateTestFile(t, dir, ` name: shire access_points: ["one:10001", "two:10001"] port: 4242 @@ -127,10 +127,10 @@ transport_config: allow_insecure: true telemetry_config: allow_insecure: false - server_cert: "" + https_cert: "" `) - telemetryWithoutServerKey := test.CreateTestFile(t, dir, ` + telemetryWithoutHttpsKey := test.CreateTestFile(t, dir, ` name: shire access_points: ["one:10001", "two:10001"] port: 4242 @@ -141,7 +141,7 @@ transport_config: allow_insecure: true telemetry_config: allow_insecure: false - server_key: "" + https_key: "" `) for name, tc := range map[string]struct { @@ -173,12 +173,12 @@ telemetry_config: expErr: errors.New("telemetry_enabled requires telemetry_port"), }, "telemetry with secure mode with no server certificate": { - path: telemetryWithoutServerCert, - expErr: errors.New("For secure mode, server_cert and server_key required under telemetry_config"), + path: telemetryWithoutHttpsCert, + expErr: errors.New("For secure mode, https_cert and https_key required under telemetry_config"), }, "telemetry with secure mode with no server key": { - path: telemetryWithoutServerKey, - expErr: errors.New("For secure mode, server_cert and server_key required under telemetry_config"), + path: telemetryWithoutHttpsKey, + expErr: errors.New("For secure mode, https_cert and https_key required under telemetry_config"), }, "without optional items": { path: withoutOptCfg, diff --git a/src/control/cmd/daos_agent/telemetry.go b/src/control/cmd/daos_agent/telemetry.go index df222d108b4..60bd83d0b33 100644 --- a/src/control/cmd/daos_agent/telemetry.go +++ b/src/control/cmd/daos_agent/telemetry.go @@ -20,8 +20,8 @@ func startPrometheusExporter(ctx context.Context, log logging.Logger, cs *promex Port: cfg.TelemetryConfig.Port, Title: "DAOS Client Telemetry", AllowInsecure: cfg.TelemetryConfig.AllowInsecure, - HttpsCert: cfg.TelemetryConfig.ServerCert, - HttpsKey: cfg.TelemetryConfig.ServerKey, + HttpsCert: cfg.TelemetryConfig.HttpsCert, + HttpsKey: cfg.TelemetryConfig.HttpsKey, Register: func(ctx context.Context, log logging.Logger) error { c, err := promexp.NewClientCollector(ctx, log, cs, &promexp.CollectorOpts{ RetainDuration: cfg.TelemetryConfig.Retain, diff --git a/src/control/cmd/dmg/auto_test.go b/src/control/cmd/dmg/auto_test.go index a2c7ace937e..b1b26eb7d7f 100644 --- a/src/control/cmd/dmg/auto_test.go +++ b/src/control/cmd/dmg/auto_test.go @@ -593,9 +593,9 @@ disable_hugepages: false control_log_mask: INFO control_log_file: /tmp/daos_server.log telemetry_config: - allow_insecure: false - server_cert: /etc/daos/certs/telemetryserver.crt - server_key: /etc/daos/certs/telemetryserver.key + allow_insecure: true + https_cert: /etc/daos/certs/telemetry.crt + https_key: /etc/daos/certs/telemetry.key ca_cert: /etc/daos/certs/daosTelemetryCA.crt core_dump_filter: 19 name: daos_server diff --git a/src/control/lib/control/http.go b/src/control/lib/control/http.go index d93085056c6..6f4b80d135f 100644 --- a/src/control/lib/control/http.go +++ b/src/control/lib/control/http.go @@ -39,16 +39,16 @@ type httpGetter interface { retryer getURL() *url.URL getBody(context.Context) ([]byte, error) - getAllowInsecure() *bool + getAllowInsecure() bool getCaCertPath() *string } type httpReq struct { url *url.URL getFn httpGetFn - allowInsecure *bool + allowInsecure bool cacertpath *string - getBodyFn func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) + getBodyFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) } func (r *httpReq) canRetry(err error, cur uint) bool { @@ -83,7 +83,7 @@ func (r *httpReq) getURL() *url.URL { return r.url } -func (r *httpReq) getAllowInsecure() *bool { +func (r *httpReq) getAllowInsecure() bool { return r.allowInsecure } @@ -152,7 +152,7 @@ func httpsGetFunc(cert []byte) (httpGetFn, error) { // httpGetBody executes a simple HTTP GET request to a given URL and returns the // content of the response body. -func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time.Duration, allowInsecure *bool, cacertpath *string) ([]byte, error) { +func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time.Duration, allowInsecure bool, cacertpath *string) ([]byte, error) { if url == nil { return nil, errors.New("nil URL") } @@ -165,7 +165,7 @@ func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time. return nil, errors.New("nil get function") } - if *allowInsecure == false { + if !allowInsecure { if cacertpath == nil { return nil, errors.New("Provide the CA certificate path") } diff --git a/src/control/lib/control/http_test.go b/src/control/lib/control/http_test.go index c15ba079753..1ed2224c62a 100644 --- a/src/control/lib/control/http_test.go +++ b/src/control/lib/control/http_test.go @@ -133,7 +133,7 @@ func TestControl_httpGetBody(t *testing.T) { timeout time.Duration cancelCtx bool getFn httpGetFn - allowInsecure *bool + allowInsecure bool caCertPath *string expResult []byte expErr error @@ -143,17 +143,17 @@ func TestControl_httpGetBody(t *testing.T) { }, "empty URL": { url: &url.URL{}, - allowInsecure: &defaultAllowInsecure, + allowInsecure: defaultAllowInsecure, expErr: errors.New("host address is required"), }, "nil getFn": { url: defaultURL, - allowInsecure: &defaultAllowInsecure, + allowInsecure: defaultAllowInsecure, expErr: errors.New("nil get function"), }, "getFn error": { url: defaultURL, - allowInsecure: &defaultAllowInsecure, + allowInsecure: defaultAllowInsecure, getFn: func(_ string) (*http.Response, error) { return nil, errors.New("mock getFn") }, @@ -161,7 +161,7 @@ func TestControl_httpGetBody(t *testing.T) { }, "http.Response error": { url: defaultURL, - allowInsecure: &defaultAllowInsecure, + allowInsecure: defaultAllowInsecure, getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusNotFound, @@ -172,7 +172,7 @@ func TestControl_httpGetBody(t *testing.T) { }, "empty body": { url: defaultURL, - allowInsecure: &defaultAllowInsecure, + allowInsecure: defaultAllowInsecure, getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusOK, @@ -183,7 +183,7 @@ func TestControl_httpGetBody(t *testing.T) { }, "success with body": { url: defaultURL, - allowInsecure: &defaultAllowInsecure, + allowInsecure: defaultAllowInsecure, getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusOK, @@ -194,7 +194,7 @@ func TestControl_httpGetBody(t *testing.T) { }, "failure with body in secure mode without CA certificate path": { url: defaultURL, - allowInsecure: &falseAllowInsecure, + allowInsecure: falseAllowInsecure, getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusOK, @@ -205,7 +205,7 @@ func TestControl_httpGetBody(t *testing.T) { }, "failure with body in secure mode with bad CA certificate": { url: defaultURL, - allowInsecure: &falseAllowInsecure, + allowInsecure: falseAllowInsecure, caCertPath: &badCertPerm, getFn: func(_ string) (*http.Response, error) { return &http.Response{ @@ -217,7 +217,7 @@ func TestControl_httpGetBody(t *testing.T) { }, "failure with body in secure mode with bad CA certificate path": { url: defaultURL, - allowInsecure: &falseAllowInsecure, + allowInsecure: falseAllowInsecure, caCertPath: &badCertPath, getFn: func(_ string) (*http.Response, error) { return &http.Response{ @@ -229,7 +229,7 @@ func TestControl_httpGetBody(t *testing.T) { }, "reading body fails": { url: defaultURL, - allowInsecure: &defaultAllowInsecure, + allowInsecure: defaultAllowInsecure, getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusOK, @@ -240,7 +240,7 @@ func TestControl_httpGetBody(t *testing.T) { }, "request times out": { url: defaultURL, - allowInsecure: &defaultAllowInsecure, + allowInsecure: defaultAllowInsecure, timeout: 5 * time.Millisecond, getFn: func(_ string) (*http.Response, error) { time.Sleep(1 * time.Second) @@ -253,7 +253,7 @@ func TestControl_httpGetBody(t *testing.T) { }, "request canceled": { url: defaultURL, - allowInsecure: &defaultAllowInsecure, + allowInsecure: defaultAllowInsecure, cancelCtx: true, getFn: func(_ string) (*http.Response, error) { time.Sleep(1 * time.Second) @@ -324,9 +324,8 @@ func (r *mockHTTPGetter) getURL() *url.URL { } } -func (r *mockHTTPGetter) getAllowInsecure() *bool { - allowInsecure := true - return &allowInsecure +func (r *mockHTTPGetter) getAllowInsecure() bool { + return true } func (r *mockHTTPGetter) getCaCertPath() *string { diff --git a/src/control/lib/control/telemetry.go b/src/control/lib/control/telemetry.go index 0916b496795..9c4dea20e23 100644 --- a/src/control/lib/control/telemetry.go +++ b/src/control/lib/control/telemetry.go @@ -114,7 +114,7 @@ func MetricsList(ctx context.Context, req *MetricsListReq) (*MetricsListResp, er } req.url = getMetricsURL(req.Host, req.Port, req.AllowInsecure) - req.allowInsecure = &req.AllowInsecure + req.allowInsecure = req.AllowInsecure req.cacertpath = &req.CaCertPath scraped, err := scrapeMetrics(ctx, req) @@ -176,7 +176,7 @@ func MetricsQuery(ctx context.Context, req *MetricsQueryReq) (*MetricsQueryResp, } req.url = getMetricsURL(req.Host, req.Port, req.AllowInsecure) - req.allowInsecure = &req.AllowInsecure + req.allowInsecure = req.AllowInsecure req.cacertpath = &req.CaCertPath scraped, err := scrapeMetrics(ctx, req) diff --git a/src/control/lib/control/telemetry_test.go b/src/control/lib/control/telemetry_test.go index 906a076a230..a7720347a44 100644 --- a/src/control/lib/control/telemetry_test.go +++ b/src/control/lib/control/telemetry_test.go @@ -118,10 +118,10 @@ func newTestPBHistogram(numBuckets int) *pclient.Metric { return metric } -func mockScrapeFnSuccess(t *testing.T, metricFam ...*pclient.MetricFamily) func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { +func mockScrapeFnSuccess(t *testing.T, metricFam ...*pclient.MetricFamily) func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { t.Helper() - return func(_ context.Context, _ *url.URL, _ httpGetFn, _ time.Duration, _ *bool, _ *string) ([]byte, error) { + return func(_ context.Context, _ *url.URL, _ httpGetFn, _ time.Duration, _ bool, _ *string) ([]byte, error) { var b strings.Builder for _, mf := range metricFam { _, err := expfmt.MetricFamilyToText(&b, mf) @@ -147,12 +147,12 @@ func TestControl_scrapeMetrics(t *testing.T) { for name, tc := range map[string]struct { req httpGetter - scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) + scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) expResult pbMetricMap expErr error }{ "check scrape params": { - scrapeFn: func(_ context.Context, url *url.URL, getter httpGetFn, timeout time.Duration, allowInsecure *bool, caCertPath *string) ([]byte, error) { + scrapeFn: func(_ context.Context, url *url.URL, getter httpGetFn, timeout time.Duration, allowInsecure bool, caCertPath *string) ([]byte, error) { test.AssertEqual(t, testURL.Scheme, url.Scheme, "") test.AssertEqual(t, testURL.Host, url.Host, "") test.AssertEqual(t, testURL.Path, url.Path, "") @@ -166,19 +166,19 @@ func TestControl_scrapeMetrics(t *testing.T) { expResult: pbMetricMap{}, }, "HTTP scrape error": { - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { return nil, errors.New("mock scrape") }, expErr: errors.New("mock scrape"), }, "scrape returns no content": { - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { return []byte{}, nil }, expResult: pbMetricMap{}, }, "scrape returns bad content": { - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { return []byte("

Hello world

"), nil }, expErr: errors.New("parsing error"), @@ -217,7 +217,7 @@ func TestControl_MetricsList(t *testing.T) { } for name, tc := range map[string]struct { - scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) + scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) req *MetricsListReq expResp *MetricsListResp expErr error @@ -241,7 +241,7 @@ func TestControl_MetricsList(t *testing.T) { Port: 1066, AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { return nil, errors.New("mock scrape") }, expErr: errors.New("mock scrape"), @@ -252,7 +252,7 @@ func TestControl_MetricsList(t *testing.T) { Port: 8888, AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { return []byte{}, nil }, expResp: &MetricsListResp{ @@ -284,7 +284,7 @@ func TestControl_MetricsList(t *testing.T) { } { t.Run(name, func(t *testing.T) { if tc.scrapeFn == nil { - tc.scrapeFn = func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { + tc.scrapeFn = func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { return nil, nil } } @@ -432,7 +432,7 @@ func TestControl_MetricsQuery(t *testing.T) { } for name, tc := range map[string]struct { - scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) + scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) req *MetricsQueryReq expResp *MetricsQueryResp expErr error @@ -456,7 +456,7 @@ func TestControl_MetricsQuery(t *testing.T) { Port: 1066, AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { return nil, errors.New("mock scrape") }, expErr: errors.New("mock scrape"), @@ -467,7 +467,7 @@ func TestControl_MetricsQuery(t *testing.T) { Port: 8888, AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, *bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { return []byte{}, nil }, expResp: &MetricsQueryResp{ diff --git a/src/control/lib/telemetry/promexp/httpd.go b/src/control/lib/telemetry/promexp/httpd.go index 0f6a0920dd8..7597bc5005d 100644 --- a/src/control/lib/telemetry/promexp/httpd.go +++ b/src/control/lib/telemetry/promexp/httpd.go @@ -85,6 +85,7 @@ func StartExporter(ctx context.Context, log logging.Logger, cfg *ExporterConfig) // http listener is a blocking call go func() { log.Infof("Listening on %s", listenAddress) + log.Infof("cfg.AllowInsecure %s", cfg.AllowInsecure) if cfg.AllowInsecure { log.Infof("Prometheus web exporter started with insecure (http) mode") err := srv.ListenAndServe() diff --git a/src/control/security/config.go b/src/control/security/config.go index 5eb3c7b577f..42bb9c587c9 100644 --- a/src/control/security/config.go +++ b/src/control/security/config.go @@ -20,20 +20,20 @@ import ( ) const ( - certDir = "/etc/daos/certs/" - defaultCACert = certDir + "daosCA.crt" - defaultServerCert = certDir + "server.crt" - defaultServerKey = certDir + "server.key" - defaultAdminCert = certDir + "admin.crt" - defaultAdminKey = certDir + "admin.key" - defaultAgentCert = certDir + "agent.crt" - defaultAgentKey = certDir + "agent.key" - defaultTelemetryServerCert = certDir + "telemetryserver.crt" - defaultTelemetryServerKey = certDir + "telemetryserver.key" - defaultTelemetryCACert = certDir + "daosTelemetryCA.crt" - defaultClientCertDir = certDir + "clients" - defaultServer = "server" - defaultInsecure = false + certDir = "/etc/daos/certs/" + defaultCACert = certDir + "daosCA.crt" + defaultServerCert = certDir + "server.crt" + defaultServerKey = certDir + "server.key" + defaultAdminCert = certDir + "admin.crt" + defaultAdminKey = certDir + "admin.key" + defaultAgentCert = certDir + "agent.crt" + defaultAgentKey = certDir + "agent.key" + defaultTelemetryCert = certDir + "telemetry.crt" + defaultTelemetryKey = certDir + "telemetry.key" + defaultTelemetryCACert = certDir + "daosTelemetryCA.crt" + defaultClientCertDir = certDir + "clients" + defaultServer = "server" + defaultInsecure = false ) // MappedClientUser represents a client user that is mapped to a uid. @@ -111,12 +111,12 @@ type TransportConfig struct { // TelemetryConfig contains all the information on whether or not to use // secure endpoint for telemetry and their location if their use is specified. type TelemetryConfig struct { - Port int `yaml:"port,omitempty"` - AllowInsecure bool `yaml:"allow_insecure"` - Enabled bool `yaml:"enabled,omitempty"` - Retain time.Duration `yaml:"retain,omitempty"` - ServerCert string `yaml:"server_cert,omitempty"` - ServerKey string `yaml:"server_key,omitempty"` + Port int `yaml:"telemetry_port,omitempty"` + AllowInsecure bool `yaml:"allow_insecure,omitempty"` + Enabled bool `yaml:"telemetry_enabled,omitempty"` + Retain time.Duration `yaml:"telemetry_retain,omitempty"` + HttpsCert string `yaml:"https_cert,omitempty"` + HttpsKey string `yaml:"https_key,omitempty"` CARootPath string `yaml:"ca_cert,omitempty"` } @@ -125,9 +125,9 @@ type TelemetryConfig struct { func DefaultClientTelemetryConfig() *TelemetryConfig { return &TelemetryConfig{ Enabled: false, - AllowInsecure: defaultInsecure, - ServerCert: defaultTelemetryServerCert, - ServerKey: defaultTelemetryServerKey, + AllowInsecure: true, + HttpsCert: defaultTelemetryCert, + HttpsKey: defaultTelemetryKey, CARootPath: defaultTelemetryCACert, } } diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index 6e0ef620247..038b322832d 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -61,6 +61,7 @@ type Server struct { HelperLogFile string `yaml:"helper_log_file,omitempty"` FWHelperLogFile string `yaml:"firmware_helper_log_file,omitempty"` FaultPath string `yaml:"fault_path,omitempty"` + TelemetryPort int `yaml:"telemetry_port,omitempty"` TelemetryConfig *security.TelemetryConfig `yaml:"telemetry_config"` CoreDumpFilter uint8 `yaml:"core_dump_filter,omitempty"` ClientEnvVars []string `yaml:"client_env_vars,omitempty"` @@ -707,6 +708,9 @@ func (cfg *Server) Validate(log logging.Logger) (err error) { return FaultConfigNoProvider case cfg.ControlPort <= 0: return FaultConfigBadControlPort + //Support old configuration option + case cfg.TelemetryPort < 0: + return FaultConfigBadTelemetryPort } if cfg.TelemetryConfig != nil { diff --git a/src/control/server/config/server_test.go b/src/control/server/config/server_test.go index 9a45070de05..51076d8d10c 100644 --- a/src/control/server/config/server_test.go +++ b/src/control/server/config/server_test.go @@ -249,8 +249,8 @@ func TestServerConfig_Constructed(t *testing.T) { WithTelemetryConfig(&security.TelemetryConfig{ AllowInsecure: true, Port: 9191, - ServerCert: "/etc/daos/certs/telemetryserver.crt", - ServerKey: "/etc/daos/certs/telemetryserver.key", + HttpsCert: "/etc/daos/certs/telemetry.crt", + HttpsKey: "/etc/daos/certs/telemetry.key", CARootPath: "/etc/daos/certs/daosTelemetryCA.crt"}). WithSystemName("daos_server"). WithSocketDir("./.daos/daos_server"). @@ -425,8 +425,8 @@ func TestServerConfig_MDonSSD_Constructed(t *testing.T) { WithTelemetryConfig(&security.TelemetryConfig{ AllowInsecure: true, Port: 9191, - ServerCert: "/etc/daos/certs/telemetryserver.crt", - ServerKey: "/etc/daos/certs/telemetryserver.key", + HttpsCert: "/etc/daos/certs/telemetry.crt", + HttpsKey: "/etc/daos/certs/telemetry.key", CARootPath: "/etc/daos/certs/daosTelemetryCA.crt"}). WithFabricProvider("ofi+tcp"). WithAccessPoints("example") diff --git a/src/control/server/telemetry.go b/src/control/server/telemetry.go index 0ccac94930b..28848f6879e 100644 --- a/src/control/server/telemetry.go +++ b/src/control/server/telemetry.go @@ -73,8 +73,8 @@ func startPrometheusExporter(ctx context.Context, srv *server) (func(), error) { Port: srv.cfg.TelemetryConfig.Port, Title: "DAOS Engine Telemetry", AllowInsecure: srv.cfg.TelemetryConfig.AllowInsecure, - HttpsCert: srv.cfg.TelemetryConfig.ServerCert, - HttpsKey: srv.cfg.TelemetryConfig.ServerKey, + HttpsCert: srv.cfg.TelemetryConfig.HttpsCert, + HttpsKey: srv.cfg.TelemetryConfig.HttpsKey, Register: func(ctx context.Context, log logging.Logger) error { return regPromEngineSources(ctx, srv.log, srv.harness.Instances()) }, diff --git a/src/tests/ftest/config_file_gen.py b/src/tests/ftest/config_file_gen.py index c030abc9f9e..9ab409ac533 100755 --- a/src/tests/ftest/config_file_gen.py +++ b/src/tests/ftest/config_file_gen.py @@ -12,14 +12,13 @@ import sys from argparse import ArgumentParser, RawDescriptionHelpFormatter -from util.agent_utils_params import (DaosAgentTelemetryCredentials, DaosAgentTransportCredentials, +from util.agent_utils_params import (DaosAgentTelemetryConfig, DaosAgentTransportCredentials, DaosAgentYamlParameters) from util.command_utils_base import CommonConfig -from util.dmg_utils_params import (DmgTelemetryCredentials, DmgTransportCredentials, - DmgYamlParameters) +from util.dmg_utils_params import DmgTelemetryConfig, DmgTransportCredentials, DmgYamlParameters from util.exception_utils import CommandFailure -from util.server_utils_params import (DaosServerTelemetryCredentials, - DaosServerTransportCredentials, DaosServerYamlParameters) +from util.server_utils_params import (DaosServerTelemetryConfig, DaosServerTransportCredentials, + DaosServerYamlParameters) def generate_agent_config(args): @@ -34,7 +33,7 @@ def generate_agent_config(args): """ common_cfg = CommonConfig(args.group_name, DaosAgentTransportCredentials()) config = DaosAgentYamlParameters(args.agent_file, common_cfg) - config.telemetry_config = DaosAgentTelemetryCredentials() + config.telemetry_config = DaosAgentTelemetryConfig() # Update the configuration file access points config.other_params.access_points.value = args.node_list.split(",") return create_config(args, config) @@ -52,7 +51,7 @@ def generate_server_config(args): """ common_cfg = CommonConfig(args.group_name, DaosServerTransportCredentials()) config = DaosServerYamlParameters(args.server_file, common_cfg) - config.telemetry_config = DaosServerTelemetryCredentials() + config.telemetry_config = DaosServerTelemetryConfig() config.engine_params[0].storage.storage_tiers[0].storage_class.value = "ram" config.engine_params[0].storage.storage_tiers[0].scm_mount.value = "/mnt/daos" config.engine_params[0].storage.storage_tiers[0].scm_size.value = 0 @@ -73,7 +72,7 @@ def generate_dmg_config(args): """ config = DmgYamlParameters( args.dmg_file, args.group_name, DmgTransportCredentials()) - config.telemetry_config = DmgTelemetryCredentials() + config.telemetry_config = DmgTelemetryConfig() # Update the configuration file hostlist config.hostlist.value = args.node_list.split(",") return create_config(args, config) diff --git a/src/tests/ftest/server/storage_tiers.py b/src/tests/ftest/server/storage_tiers.py index ad946baaa72..536c1c52baf 100644 --- a/src/tests/ftest/server/storage_tiers.py +++ b/src/tests/ftest/server/storage_tiers.py @@ -8,7 +8,7 @@ import yaml from apricot import TestWithServers from command_utils_base import CommonConfig -from server_utils import (DaosServerTelemetryCredentials, DaosServerTransportCredentials, +from server_utils import (DaosServerTelemetryConfig, DaosServerTransportCredentials, DaosServerYamlParameters) @@ -68,7 +68,7 @@ def test_tiers(self): common_config = CommonConfig("daos_server", DaosServerTransportCredentials()) config = DaosServerYamlParameters(None, common_config) - config.telemetry_config = DaosServerTelemetryCredentials() + config.telemetry_config = DaosServerTelemetryConfig() config.namespace = self.server_config_namespace config.get_params(self) data = config.get_yaml_data() diff --git a/src/tests/ftest/telemetry/basic_client_telemetry.yaml b/src/tests/ftest/telemetry/basic_client_telemetry.yaml index 82b57ba9961..71c6c361cd9 100644 --- a/src/tests/ftest/telemetry/basic_client_telemetry.yaml +++ b/src/tests/ftest/telemetry/basic_client_telemetry.yaml @@ -20,9 +20,9 @@ server_config: agent_config: telemetry_config: allow_insecure: false - port: 9191 - retain: 30s - enabled: true + telemetry_port: 9191 + telemetry_retain: 30s + telemetry_enabled: true pool: scm_size: 2G diff --git a/src/tests/ftest/util/agent_utils.py b/src/tests/ftest/util/agent_utils.py index b7762c69860..416cefdbf78 100644 --- a/src/tests/ftest/util/agent_utils.py +++ b/src/tests/ftest/util/agent_utils.py @@ -7,7 +7,7 @@ import re import socket -from agent_utils_params import (DaosAgentTelemetryCredentials, DaosAgentTransportCredentials, +from agent_utils_params import (DaosAgentTelemetryConfig, DaosAgentTransportCredentials, DaosAgentYamlParameters) from ClusterShell.NodeSet import NodeSet from command_utils import CommandWithSubCommand, SubprocessManager, YamlCommand @@ -54,7 +54,7 @@ def get_agent_command(group, cert_dir, bin_dir, config_file, run_user, config_te transport_config = DaosAgentTransportCredentials(cert_dir) common_config = CommonConfig(group, transport_config) config = DaosAgentYamlParameters(config_file, common_config) - config.telemetry_config = DaosAgentTelemetryCredentials(cert_dir) + config.telemetry_config = DaosAgentTelemetryConfig(cert_dir) command = DaosAgentCommand(bin_dir, config, run_user=run_user) if config_temp: # Setup the DaosAgentCommand to write the config file data to the diff --git a/src/tests/ftest/util/agent_utils_params.py b/src/tests/ftest/util/agent_utils_params.py index 1d80b627e1f..b6a04bf874f 100644 --- a/src/tests/ftest/util/agent_utils_params.py +++ b/src/tests/ftest/util/agent_utils_params.py @@ -5,7 +5,7 @@ """ import os -from command_utils_base import (BasicParameter, LogParameter, TelemetryCredentials, +from command_utils_base import (BasicParameter, LogParameter, TelemetryConfig, TransportCredentials, YamlParameters) @@ -33,7 +33,7 @@ def _get_new(self): return DaosAgentTransportCredentials(self._log_dir) -class DaosAgentTelemetryCredentials(TelemetryCredentials): +class DaosAgentTelemetryConfig(TelemetryConfig): # pylint: disable=too-few-public-methods """Telemetry credentials listing certificates for secure communication.""" @@ -41,19 +41,19 @@ def __init__(self, log_dir=os.path.join(os.sep, "tmp")): """Initialize a TelemetryConfig object.""" super().__init__("/run/agent_config/telemetry_config/*", None, log_dir) - self.port = BasicParameter(None, 9192) - self.enabled = BasicParameter(None) - self.retain = BasicParameter(None) - self.server_cert = LogParameter(self._log_dir, None, "telemetryserver.crt") - self.server_key = LogParameter(self._log_dir, None, "telemetryserver.key") + self.telemetry_port = BasicParameter(None, 9192) + self.telemetry_enabled = BasicParameter(None) + self.telemetry_retain = BasicParameter(None) + self.https_cert = LogParameter(self._log_dir, None, "telemetry.crt") + self.https_key = LogParameter(self._log_dir, None, "telemetry.key") def _get_new(self): """Get a new object based upon this one. Returns: - DaosServerTelemetryCredentials: a new DaosServerTelemetryCredentials object + DaosServerTelemetryConfig: a new DaosServerTelemetryConfig object """ - return DaosAgentTelemetryCredentials(self._log_dir) + return DaosAgentTelemetryConfig(self._log_dir) class DaosAgentYamlParameters(YamlParameters): diff --git a/src/tests/ftest/util/command_utils_base.py b/src/tests/ftest/util/command_utils_base.py index 2a42670ab16..5b39ca6015a 100644 --- a/src/tests/ftest/util/command_utils_base.py +++ b/src/tests/ftest/util/command_utils_base.py @@ -799,11 +799,11 @@ def _get_new(self): return TransportCredentials(self.namespace, self.title, self._log_dir) -class TelemetryCredentials(YamlParameters): +class TelemetryConfig(YamlParameters): """Telemetry credentials listing certificates for secure communication.""" def __init__(self, namespace, title, log_dir): - """Initialize a TelemetryCredentials object. + """Initialize a TelemetryConfig object. Args: namespace (str): yaml namespace (path to parameters) @@ -816,9 +816,9 @@ def __init__(self, namespace, title, log_dir): default_insecure = str(os.environ.get("DAOS_TEST_INSECURE_MODE", True)) default_insecure = default_insecure.lower() == "true" self.allow_insecure = BasicParameter(None, default_insecure) - self.port = BasicParameter(None, 9191) - self.retain = None - self.enabled = None + self.telemetry_port = BasicParameter(None, 9191) + self.telemetry_retain = None + self.telemetry_enabled = None def get_yaml_data(self): """Convert the parameters into a dictionary to use to write a yaml file. @@ -864,9 +864,9 @@ def _get_new(self): """Get a new object based upon this one. Returns: - TelemetryCredentials: a new TelemetryCredentials object + TelemetryConfig: a new TelemetryConfig object """ - return TelemetryCredentials(self.namespace, self.title, self._log_dir) + return TelemetryConfig(self.namespace, self.title, self._log_dir) class CommonConfig(YamlParameters): diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index 4d731db7eb4..51e324ea937 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -10,7 +10,7 @@ from pwd import getpwuid from dmg_utils_base import DmgCommandBase -from dmg_utils_params import DmgTelemetryCredentials, DmgTransportCredentials, DmgYamlParameters +from dmg_utils_params import DmgTelemetryConfig, DmgTransportCredentials, DmgYamlParameters from exception_utils import CommandFailure from general_utils import dict_to_str, get_numeric_list @@ -39,7 +39,7 @@ def get_dmg_command(group, cert_dir, bin_dir, config_file, config_temp=None, hos """ transport_config = DmgTransportCredentials(cert_dir) - telemetry_config = DmgTelemetryCredentials(cert_dir) + telemetry_config = DmgTelemetryConfig(cert_dir) config = DmgYamlParameters(config_file, group, transport_config, telemetry_config) command = DmgCommand(bin_dir, config, hostlist_suffix) if config_temp: diff --git a/src/tests/ftest/util/dmg_utils_params.py b/src/tests/ftest/util/dmg_utils_params.py index 5d6cea72f38..32b7ec022b6 100644 --- a/src/tests/ftest/util/dmg_utils_params.py +++ b/src/tests/ftest/util/dmg_utils_params.py @@ -4,7 +4,7 @@ SPDX-License-Identifier: BSD-2-Clause-Patent """ -from command_utils_base import (BasicParameter, LogParameter, TelemetryCredentials, +from command_utils_base import (BasicParameter, LogParameter, TelemetryConfig, TransportCredentials, YamlParameters) @@ -26,11 +26,11 @@ def _get_new(self): return DmgTransportCredentials(self._log_dir) -class DmgTelemetryCredentials(TelemetryCredentials): +class DmgTelemetryConfig(TelemetryConfig): """Telemetry credentials listing certificates for secure communication.""" def __init__(self, log_dir="/tmp"): - """Initialize a TelemetryCredentials object.""" + """Initialize a TelemetryConfig object.""" super().__init__("/run/dmg/telemetry_config/*", None, log_dir) self.ca_cert = LogParameter(self._log_dir, None, "daosTelemetryCA.crt") @@ -38,9 +38,9 @@ def _get_new(self): """Get a new object based upon this one. Returns: - DmgTelemetryCredentials: a new DmgTelemetryCredentials object + DmgTelemetryConfig: a new DmgTelemetryConfig object """ - return DmgTelemetryCredentials(self._log_dir) + return DmgTelemetryConfig(self._log_dir) class DmgYamlParameters(YamlParameters): @@ -54,7 +54,7 @@ def __init__(self, filename, name, transport, telemetry=None): name (str): The DAOS system name. transport (DmgTransportCredentials): dmg security configuration settings. - telemetry (DmgTelemetryCredentials): dmg telemetry + telemetry (DmgTelemetryConfig): dmg telemetry configuration settings. """ super().__init__("/run/dmg/*", filename, None, transport) diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 90639206ea5..e444b0c78aa 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -21,7 +21,7 @@ from host_utils import get_local_host from run_utils import run_remote, stop_processes from server_utils_base import DaosServerCommand, DaosServerInformation, ServerFailed -from server_utils_params import (DaosServerTelemetryCredentials, DaosServerTransportCredentials, +from server_utils_params import (DaosServerTelemetryConfig, DaosServerTransportCredentials, DaosServerYamlParameters) from user_utils import get_chown_command @@ -46,7 +46,7 @@ def get_server_command(group, cert_dir, bin_dir, config_file, config_temp=None): transport_config = DaosServerTransportCredentials(cert_dir) common_config = CommonConfig(group, transport_config) config = DaosServerYamlParameters(config_file, common_config) - config.telemetry_config = DaosServerTelemetryCredentials(cert_dir) + config.telemetry_config = DaosServerTelemetryConfig(cert_dir) command = DaosServerCommand(bin_dir, config, None) diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index 909a3d83b9b..d9b33f61701 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -5,7 +5,7 @@ """ import os -from command_utils_base import (BasicParameter, LogParameter, TelemetryCredentials, +from command_utils_base import (BasicParameter, LogParameter, TelemetryConfig, TransportCredentials, YamlParameters) MAX_STORAGE_TIERS = 5 @@ -57,30 +57,30 @@ def _get_new(self): return DaosServerTransportCredentials(self._log_dir) -class DaosServerTelemetryCredentials(TelemetryCredentials): +class DaosServerTelemetryConfig(TelemetryConfig): # pylint: disable=too-few-public-methods """Telemetry credentials listing certificates for secure communication.""" def __init__(self, log_dir=os.path.join(os.sep, "tmp")): - """Initialize a DaosServerTelemetryCredentials object.""" + """Initialize a DaosServerTelemetryConfig object.""" super().__init__("/run/server_config/telemetry_config/*", None, log_dir) # Additional daos_server telemetry credential parameters: # - port: : Telemetry endpoint port number - # - server_cert: : Server certificate - # - server_key: : Server Key portion + # - https_cert: : Server certificate + # - https_key: : Server Key portion # - self.port = BasicParameter(None, 9191) - self.server_cert = LogParameter(self._log_dir, None, "telemetryserver.crt") - self.server_key = LogParameter(self._log_dir, None, "telemetryserver.key") + self.telemetry_port = BasicParameter(None, 9191) + self.https_cert = LogParameter(self._log_dir, None, "telemetry.crt") + self.https_key = LogParameter(self._log_dir, None, "telemetry.key") def _get_new(self): """Get a new object based upon this one. Returns: - DaosServerTelemetryCredentials: a new DaosServerTelemetryCredentials object + DaosServerTelemetryConfig: a new DaosServerTelemetryConfig object """ - return DaosServerTelemetryCredentials(self._log_dir) + return DaosServerTelemetryConfig(self._log_dir) class DaosServerYamlParameters(YamlParameters): diff --git a/utils/certs/gen_telemetry_server_certificate.sh b/utils/certs/gen_telemetry_server_certificate.sh index 3670a92afec..f1de56da85e 100755 --- a/utils/certs/gen_telemetry_server_certificate.sh +++ b/utils/certs/gen_telemetry_server_certificate.sh @@ -52,33 +52,33 @@ subjectAltName = DNS:${HOSTNAME} function generate_server_cert () { echo "Generating Server Certificate" # Generate Private key and set its permissions - openssl genrsa -out "${CA_HOME}/telemetryserver.key" 2048 - [[ $EUID -eq 0 ]] && chown "${USER}"."${USER}" "${CA_HOME}/telemetryserver.key" - chmod 0400 "${CA_HOME}/telemetryserver.key" + openssl genrsa -out "${CA_HOME}/telemetry.key" 2048 + [[ $EUID -eq 0 ]] && chown "${USER}"."${USER}" "${CA_HOME}/telemetry.key" + chmod 0400 "${CA_HOME}/telemetry.key" # Generate a Certificate Signing Request (CRS) - openssl req -new -key "${CA_HOME}/telemetryserver.key" \ - -out "${CA_HOME}/telemetryserver.csr" -config "${CA_HOME}/telemetry.cnf" + openssl req -new -key "${CA_HOME}/telemetry.key" \ + -out "${CA_HOME}/telemetry.csr" -config "${CA_HOME}/telemetry.cnf" # Create Certificate from request - openssl x509 -req -in "${CA_HOME}/telemetryserver.csr" -CA "${CA_HOME}/daosTelemetryCA.crt" \ - -CAkey "${CA_HOME}/daosTelemetryCA.key" -CAcreateserial -out "${CA_HOME}/telemetryserver.crt" \ + openssl x509 -req -in "${CA_HOME}/telemetry.csr" -CA "${CA_HOME}/daosTelemetryCA.crt" \ + -CAkey "${CA_HOME}/daosTelemetryCA.key" -CAcreateserial -out "${CA_HOME}/telemetry.crt" \ -days ${DAYS} -sha256 -extfile "$CA_HOME/telemetry.cnf" -extensions v3_ext - [[ $EUID -eq 0 ]] && chown "${USER}"."${USER}" "${CA_HOME}/telemetryserver.crt" - chmod 0644 "${CA_HOME}/telemetryserver.crt" + [[ $EUID -eq 0 ]] && chown "${USER}"."${USER}" "${CA_HOME}/telemetry.crt" + chmod 0644 "${CA_HOME}/telemetry.crt" echo "Required Server Certificate Files: ${CA_HOME}/daosTelemetryCA.crt - ${CA_HOME}/telemetryserver.key - ${CA_HOME}/telemetryserver.crt" + ${CA_HOME}/telemetry.key + ${CA_HOME}/telemetry.crt" } function cleanup () { - # Remove this key as it's not required after creating the telemetryserver.key + # Remove this key as it's not required after creating the telemetry.key rm -f "${CA_HOME}/daosTelemetryCA.key" - rm -f "${CA_HOME}/telemetryserver.csr" + rm -f "${CA_HOME}/telemetry.csr" rm -f "${CA_HOME}/telemetry.cnf" } diff --git a/utils/config/daos_agent.yml b/utils/config/daos_agent.yml index 304550cdfef..7315ecb3bb6 100644 --- a/utils/config/daos_agent.yml +++ b/utils/config/daos_agent.yml @@ -31,18 +31,18 @@ #telemetry_config: # # Set the client telemetry endpoint port number # # default: 9192 -# port: 9192 +# telemetry_port: 9192 # # # Enable client telemetry for all DAOS clients. # # If false, clients will need to optionally enable telemetry by setting # # the D_CLIENT_METRICS_ENABLE environment variable to true. # # default: false -# enabled: true +# telemetry_enabled: true # # # Retain client telemetry for a period of time after the client # # process exits. # # default 0 (do not retain telemetry after client exit) -# retain: 1m +# telemetry_retain: 1m # # # In order to disable transport security, uncomment and set allow_insecure # # to true. Not recommended for production configurations. @@ -50,11 +50,11 @@ # # # Server certificate for use in TLS handshakes # # DAOS client is the HTTPS server to open secure telemetry endpoint. -# server_cert: /etc/daos/certs/telemetryserver.crt +# https_cert: /etc/daos/certs/telemetry.crt # # # Key portion of Server Certificate # # DAOS client is the HTTPS server to open secure telemetry endpoint. -# server_key: /etc/daos/certs/telemetryserver.key +# https_key: /etc/daos/certs/telemetry.key ## Configuration for user credential management. #credential_config: diff --git a/utils/config/daos_control.yml b/utils/config/daos_control.yml index ea2da17066e..076168b35e2 100644 --- a/utils/config/daos_control.yml +++ b/utils/config/daos_control.yml @@ -39,12 +39,12 @@ # # Key portion of Admin Certificate # key: /etc/daos/certs/admin.key -## Enable Telemetry HTTP/HTTPS endpoint for remote client telemetry collection. +## Configuration for telemetry collection commands. # #telemetry_config: -# # In order to disable transport security, uncomment and set allow_insecure -# # to true. Not recommended for production configurations. -# allow_insecure: false +# # In order to enabled transport security, uncomment and set allow_insecure +# # to false. +# allow_insecure: true # # # Custom CA Root certificate for generated telemetry certs # ca_cert: /etc/daos/certs/daosTelemetryCA.crt diff --git a/utils/config/daos_server.yml b/utils/config/daos_server.yml index 0d56afbd589..cdb2a2bd61e 100644 --- a/utils/config/daos_server.yml +++ b/utils/config/daos_server.yml @@ -259,18 +259,18 @@ ## Enable Telemetry HTTP/HTTPS endpoint for remote telemetry collection. # #telemetry_config: -# # In order to disable telemetry security, uncomment and set allow_insecure to false +# # In order to enabled telemetry security, uncomment and set allow_insecure to false # allow_insecure: true # # # Set the server telemetry endpoint port number # # default: 9191 -# port: 9191 +# telemetry_port: 9191 # # # Server certificate for use in TLS handshakes -# server_cert: /etc/daos/certs/telemetryserver.crt +# https_cert: /etc/daos/certs/telemetry.crt # # # Key portion of Server Certificate -# server_key: /etc/daos/certs/telemetryserver.key +# https_key: /etc/daos/certs/telemetry.key # # ## If desired, a set of client-side environment variables may be diff --git a/utils/config/examples/daos_server_local.yml b/utils/config/examples/daos_server_local.yml index fa797a92d2f..e23ef691ffe 100644 --- a/utils/config/examples/daos_server_local.yml +++ b/utils/config/examples/daos_server_local.yml @@ -9,7 +9,7 @@ transport_config: telemetry_config: allow_insecure: true - port: 9191 + telemetry_port: 9191 engines: - diff --git a/utils/config/examples/daos_server_mdonssd.yml b/utils/config/examples/daos_server_mdonssd.yml index 8052681fdf4..7ea5267de64 100644 --- a/utils/config/examples/daos_server_mdonssd.yml +++ b/utils/config/examples/daos_server_mdonssd.yml @@ -28,7 +28,7 @@ control_metadata: telemetry_config: allow_insecure: true - port: 9191 + telemetry_port: 9191 engines: - diff --git a/utils/config/examples/daos_server_tcp.yml b/utils/config/examples/daos_server_tcp.yml index e2fdc4af0d4..39459d412e6 100644 --- a/utils/config/examples/daos_server_tcp.yml +++ b/utils/config/examples/daos_server_tcp.yml @@ -9,7 +9,7 @@ control_log_file: /tmp/daos_server.log telemetry_config: allow_insecure: true - port: 9191 + telemetry_port: 9191 ## Transport Credentials Specifying certificates to secure communications ## diff --git a/utils/config/examples/daos_server_ucx.yml b/utils/config/examples/daos_server_ucx.yml index cd76ad6dd05..8b0b5c4c7d2 100644 --- a/utils/config/examples/daos_server_ucx.yml +++ b/utils/config/examples/daos_server_ucx.yml @@ -18,7 +18,7 @@ control_log_file: /tmp/daos_server.log telemetry_config: allow_insecure: true - port: 9191 + telemetry_port: 9191 ## Transport Credentials Specifying certificates to secure communications ## diff --git a/utils/config/examples/daos_server_verbs.yml b/utils/config/examples/daos_server_verbs.yml index d48a2d1330b..c5b416faa0f 100644 --- a/utils/config/examples/daos_server_verbs.yml +++ b/utils/config/examples/daos_server_verbs.yml @@ -9,7 +9,7 @@ control_log_file: /tmp/daos_server.log telemetry_config: allow_insecure: true - port: 9191 + telemetry_port: 9191 ## Transport Credentials Specifying certificates to secure communications ## From 82b37d1320662e07383aa4e315e766c605e1f1c1 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Thu, 14 Nov 2024 01:38:24 +0000 Subject: [PATCH 07/19] Code updated based on review comments. Features: control telemetry Required-githooks: true Signed-off-by: Samir Raval --- src/control/cmd/dmg/auto_test.go | 1 - src/control/cmd/dmg/telemetry.go | 38 ++++++- src/control/lib/control/http.go | 48 ++++++--- src/control/lib/control/http_test.go | 5 +- src/control/lib/control/telemetry.go | 24 ++--- src/control/lib/telemetry/promexp/httpd.go | 1 - src/control/security/config.go | 28 +++-- src/control/server/config/server_test.go | 6 +- .../gen_telemetry_server_certificate.sh | 32 ++++++ src/tests/ftest/util/agent_utils.py | 2 - src/tests/ftest/util/command_utils.py | 26 +---- src/tests/ftest/util/dmg_utils_params.py | 1 - src/tests/ftest/util/launch_utils.py | 33 +----- src/tests/ftest/util/server_utils.py | 5 +- utils/certs/SConscript | 4 +- .../certs/gen_telemetry_admin_certificate.sh | 101 ------------------ .../certs/gen_telemetry_server_certificate.sh | 91 ---------------- 17 files changed, 126 insertions(+), 320 deletions(-) create mode 100755 src/tests/ftest/scripts/gen_telemetry_server_certificate.sh delete mode 100755 utils/certs/gen_telemetry_admin_certificate.sh delete mode 100755 utils/certs/gen_telemetry_server_certificate.sh diff --git a/src/control/cmd/dmg/auto_test.go b/src/control/cmd/dmg/auto_test.go index b1b26eb7d7f..39b140bfdb1 100644 --- a/src/control/cmd/dmg/auto_test.go +++ b/src/control/cmd/dmg/auto_test.go @@ -596,7 +596,6 @@ telemetry_config: allow_insecure: true https_cert: /etc/daos/certs/telemetry.crt https_key: /etc/daos/certs/telemetry.key - ca_cert: /etc/daos/certs/daosTelemetryCA.crt core_dump_filter: 19 name: daos_server socket_dir: /var/run/daos_server diff --git a/src/control/cmd/dmg/telemetry.go b/src/control/cmd/dmg/telemetry.go index db069f3a97b..2ff1a83e598 100644 --- a/src/control/cmd/dmg/telemetry.go +++ b/src/control/cmd/dmg/telemetry.go @@ -338,16 +338,31 @@ func (cmd *metricsListCmd) Execute(args []string) error { req := new(control.MetricsListReq) req.Port = cmd.Port req.Host = host - req.AllowInsecure = cmd.cfgCmd.config.TelemetryConfig.AllowInsecure req.CaCertPath = cmd.cfgCmd.config.TelemetryConfig.CARootPath if !cmd.JSONOutputEnabled() { cmd.Info(getConnectingMsg(req.Host, req.Port)) } + // Trying Secure Mode First, It will ignore the certificate if it's not provided + // or request with the certificate. + req.AllowInsecure = false + if req.CaCertPath == "" { + cmd.Debug("Trying Secure Mode (HTTPS) first, ignoring certificate") + } else { + cmd.Debug("Trying Secure Mode (HTTPS) first with certificate") + } + resp, err := control.MetricsList(cmd.MustLogCtx(), req) if err != nil { - return err + cmd.Errorf("Secure Mode (HTTPS) failed: %s", err.Error()) + //Trying Insecure Mode + req.AllowInsecure = !req.AllowInsecure + cmd.Debug("Trying Insecure Mode (HTTP)") + resp, err = control.MetricsList(cmd.MustLogCtx(), req) + if err != nil { + return err + } } if cmd.JSONOutputEnabled() { @@ -396,7 +411,6 @@ func (cmd *metricsQueryCmd) Execute(args []string) error { req := new(control.MetricsQueryReq) req.Port = cmd.Port req.Host = host - req.AllowInsecure = cmd.cfgCmd.config.TelemetryConfig.AllowInsecure req.CaCertPath = cmd.cfgCmd.config.TelemetryConfig.CARootPath req.MetricNames = common.TokenizeCommaSeparatedString(cmd.Metrics) @@ -404,9 +418,25 @@ func (cmd *metricsQueryCmd) Execute(args []string) error { cmd.Info(getConnectingMsg(req.Host, req.Port)) } + // Trying Secure Mode First, It will ignore the certificate if it's not provided + // or request with the certificate. + req.AllowInsecure = false + if req.CaCertPath == "" { + cmd.Debug("Trying Secure Mode (HTTPS) first, ignoring certificate") + } else { + cmd.Debug("Trying Secure Mode (HTTPS) first with certificate") + } + resp, err := control.MetricsQuery(cmd.MustLogCtx(), req) if err != nil { - return err + cmd.Errorf("Secure Mode (HTTPS) failed: %s", err.Error()) + //Trying Insecure Mode + req.AllowInsecure = !req.AllowInsecure + cmd.Debug("Trying Insecure Mode (HTTP)") + resp, err = control.MetricsQuery(cmd.MustLogCtx(), req) + if err != nil { + return err + } } if cmd.JSONOutputEnabled() { diff --git a/src/control/lib/control/http.go b/src/control/lib/control/http.go index 6f4b80d135f..d5ac19e41be 100644 --- a/src/control/lib/control/http.go +++ b/src/control/lib/control/http.go @@ -128,9 +128,14 @@ func httpGetBodyRetry(ctx context.Context, req httpGetter) ([]byte, error) { return result, err } -// httpsGetFunc will prepare the GET requested using the certificate for secure mode +// httpsSecureGetFunc will prepare the GET requested using the certificate for secure mode // and return the http.Get -func httpsGetFunc(cert []byte) (httpGetFn, error) { +func httpsSecureGetFunc(cacertpath string) (httpGetFn, error) { + cert, err := ioutil.ReadFile(cacertpath) + if err != nil { + return nil, errors.Wrap(err, "reading CA certificate file Error") + } + caCertPool := x509.NewCertPool() result := caCertPool.AppendCertsFromPEM(cert) if !result { @@ -150,6 +155,22 @@ func httpsGetFunc(cert []byte) (httpGetFn, error) { return client.Get, nil } +// httpsInsecureGetFunc will prepare the GET requested without certificate for secure mode +// and return the http.Get +func httpsInsecureGetFunc() httpGetFn { + tlsConfig := &tls.Config{ + InsecureSkipVerify: true, + } + + tr := &http.Transport{ + TLSClientConfig: tlsConfig, + } + + client := &http.Client{Transport: tr} + + return client.Get +} + // httpGetBody executes a simple HTTP GET request to a given URL and returns the // content of the response body. func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time.Duration, allowInsecure bool, cacertpath *string) ([]byte, error) { @@ -165,19 +186,15 @@ func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time. return nil, errors.New("nil get function") } - if !allowInsecure { - if cacertpath == nil { - return nil, errors.New("Provide the CA certificate path") - } - - cert, err := ioutil.ReadFile(*cacertpath) - if err != nil { - return nil, errors.Wrap(err, "reading CA certificate file Error") - } - - get, err = httpsGetFunc(cert) - if err != nil { - return nil, errors.Wrap(err, "https GET request failed") + if allowInsecure == false { + if cacertpath == nil || *cacertpath == "" { + get = httpsInsecureGetFunc() + } else { + var err error + get, err = httpsSecureGetFunc(*cacertpath) + if err != nil { + return nil, err + } } } @@ -186,7 +203,6 @@ func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time. respChan := make(chan *http.Response) errChan := make(chan error) - go func() { httpResp, err := get(url.String()) if err != nil { diff --git a/src/control/lib/control/http_test.go b/src/control/lib/control/http_test.go index 1ed2224c62a..9efda5c06fc 100644 --- a/src/control/lib/control/http_test.go +++ b/src/control/lib/control/http_test.go @@ -198,10 +198,9 @@ func TestControl_httpGetBody(t *testing.T) { getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusOK, - Body: newMockReadCloser("this is the body of an HTTP response"), }, nil }, - expErr: errors.New("Provide the CA certificate path"), + expErr: errors.New("Get \"//testhost\": unsupported protocol scheme"), }, "failure with body in secure mode with bad CA certificate": { url: defaultURL, @@ -210,7 +209,6 @@ func TestControl_httpGetBody(t *testing.T) { getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusOK, - Body: newMockReadCloser("this is the body of an HTTP response"), }, nil }, expErr: errors.New("Get \"//testhost\": unsupported protocol scheme"), @@ -222,7 +220,6 @@ func TestControl_httpGetBody(t *testing.T) { getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusOK, - Body: newMockReadCloser("this is the body of an HTTP response"), }, nil }, expErr: errors.New("reading CA certificate file Error: open wrongpath/notavailable.crt: no such file or directory"), diff --git a/src/control/lib/control/telemetry.go b/src/control/lib/control/telemetry.go index 9c4dea20e23..45110cf9eb5 100644 --- a/src/control/lib/control/telemetry.go +++ b/src/control/lib/control/telemetry.go @@ -14,6 +14,7 @@ import ( "strings" "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/logging" "github.com/pkg/errors" pclient "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" @@ -83,10 +84,11 @@ type ( // MetricsListReq is used to request the list of metrics. MetricsListReq struct { httpReq - Host string // Host to query for telemetry data - Port uint32 // Port to use for collecting telemetry data - AllowInsecure bool // Set the https end point secure - CaCertPath string // CA Cert path for telemetry + Host string // Host to query for telemetry data + Port uint32 // Port to use for collecting telemetry data + AllowInsecure bool // Set the https end point secure + CaCertPath string // CA Cert path for telemetry + Log logging.Logger // Logging the info } // MetricsListResp contains the list of available metrics. @@ -109,14 +111,9 @@ func MetricsList(ctx context.Context, req *MetricsListReq) (*MetricsListResp, er return nil, errors.New("port must be specified") } - if req.AllowInsecure == false && req.CaCertPath == "" { - return nil, errors.New("Provide the CA certificate path") - } - - req.url = getMetricsURL(req.Host, req.Port, req.AllowInsecure) req.allowInsecure = req.AllowInsecure req.cacertpath = &req.CaCertPath - + req.url = getMetricsURL(req.Host, req.Port, req.allowInsecure) scraped, err := scrapeMetrics(ctx, req) if err != nil { return nil, errors.Wrap(err, "unable to list metrics") @@ -171,14 +168,9 @@ func MetricsQuery(ctx context.Context, req *MetricsQueryReq) (*MetricsQueryResp, return nil, errors.New("port must be specified") } - if req.AllowInsecure == false && req.CaCertPath == "" { - return nil, errors.New("Provide the CA certificate path") - } - - req.url = getMetricsURL(req.Host, req.Port, req.AllowInsecure) req.allowInsecure = req.AllowInsecure req.cacertpath = &req.CaCertPath - + req.url = getMetricsURL(req.Host, req.Port, req.allowInsecure) scraped, err := scrapeMetrics(ctx, req) if err != nil { return nil, errors.Wrap(err, "unable to query metrics") diff --git a/src/control/lib/telemetry/promexp/httpd.go b/src/control/lib/telemetry/promexp/httpd.go index 7597bc5005d..0f6a0920dd8 100644 --- a/src/control/lib/telemetry/promexp/httpd.go +++ b/src/control/lib/telemetry/promexp/httpd.go @@ -85,7 +85,6 @@ func StartExporter(ctx context.Context, log logging.Logger, cfg *ExporterConfig) // http listener is a blocking call go func() { log.Infof("Listening on %s", listenAddress) - log.Infof("cfg.AllowInsecure %s", cfg.AllowInsecure) if cfg.AllowInsecure { log.Infof("Prometheus web exporter started with insecure (http) mode") err := srv.ListenAndServe() diff --git a/src/control/security/config.go b/src/control/security/config.go index 42bb9c587c9..9f6a3392946 100644 --- a/src/control/security/config.go +++ b/src/control/security/config.go @@ -20,20 +20,19 @@ import ( ) const ( - certDir = "/etc/daos/certs/" - defaultCACert = certDir + "daosCA.crt" - defaultServerCert = certDir + "server.crt" - defaultServerKey = certDir + "server.key" - defaultAdminCert = certDir + "admin.crt" - defaultAdminKey = certDir + "admin.key" - defaultAgentCert = certDir + "agent.crt" - defaultAgentKey = certDir + "agent.key" - defaultTelemetryCert = certDir + "telemetry.crt" - defaultTelemetryKey = certDir + "telemetry.key" - defaultTelemetryCACert = certDir + "daosTelemetryCA.crt" - defaultClientCertDir = certDir + "clients" - defaultServer = "server" - defaultInsecure = false + certDir = "/etc/daos/certs/" + defaultCACert = certDir + "daosCA.crt" + defaultServerCert = certDir + "server.crt" + defaultServerKey = certDir + "server.key" + defaultAdminCert = certDir + "admin.crt" + defaultAdminKey = certDir + "admin.key" + defaultAgentCert = certDir + "agent.crt" + defaultAgentKey = certDir + "agent.key" + defaultTelemetryCert = certDir + "telemetry.crt" + defaultTelemetryKey = certDir + "telemetry.key" + defaultClientCertDir = certDir + "clients" + defaultServer = "server" + defaultInsecure = false ) // MappedClientUser represents a client user that is mapped to a uid. @@ -128,7 +127,6 @@ func DefaultClientTelemetryConfig() *TelemetryConfig { AllowInsecure: true, HttpsCert: defaultTelemetryCert, HttpsKey: defaultTelemetryKey, - CARootPath: defaultTelemetryCACert, } } diff --git a/src/control/server/config/server_test.go b/src/control/server/config/server_test.go index 51076d8d10c..41a4468083a 100644 --- a/src/control/server/config/server_test.go +++ b/src/control/server/config/server_test.go @@ -250,8 +250,7 @@ func TestServerConfig_Constructed(t *testing.T) { AllowInsecure: true, Port: 9191, HttpsCert: "/etc/daos/certs/telemetry.crt", - HttpsKey: "/etc/daos/certs/telemetry.key", - CARootPath: "/etc/daos/certs/daosTelemetryCA.crt"}). + HttpsKey: "/etc/daos/certs/telemetry.key"}). WithSystemName("daos_server"). WithSocketDir("./.daos/daos_server"). WithFabricProvider("ofi+verbs;ofi_rxm"). @@ -426,8 +425,7 @@ func TestServerConfig_MDonSSD_Constructed(t *testing.T) { AllowInsecure: true, Port: 9191, HttpsCert: "/etc/daos/certs/telemetry.crt", - HttpsKey: "/etc/daos/certs/telemetry.key", - CARootPath: "/etc/daos/certs/daosTelemetryCA.crt"}). + HttpsKey: "/etc/daos/certs/telemetry.key"}). WithFabricProvider("ofi+tcp"). WithAccessPoints("example") diff --git a/src/tests/ftest/scripts/gen_telemetry_server_certificate.sh b/src/tests/ftest/scripts/gen_telemetry_server_certificate.sh new file mode 100755 index 00000000000..6d359fbecaa --- /dev/null +++ b/src/tests/ftest/scripts/gen_telemetry_server_certificate.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# /* +# * (C) Copyright 2024 Intel Corporation. +# * +# * SPDX-License-Identifier: BSD-2-Clause-Patent +# */ + +__usage=" + +This is just an example script for testing purpose. +Please modify to use in Production environment. + +Usage: gen_telemetry_server_certificate.sh [USER] [DIR] + USER: DAOS has server and client and the certificate need the specific file permission + based on system usage. + Use daos_server if running script on server + Use daos_agent if running script on client + + DIR: Generate telemetry certificates for DAOS metrics in the [DIR]. + By default [DIR] is the current directory. +" +DAYS=1095 + +USER=$1 +CA_HOME="${2:-.}/" +HOSTNAME=$(hostname -s) + +openssl req -x509 -newkey rsa:4096 -keyout "${CA_HOME}/telemetry.key" -out "${CA_HOME}/telemetry.crt" -sha256 -days ${DAYS} -nodes -subj "/CN=\"${HOSTNAME}\"" +chmod 0400 "${CA_HOME}/telemetry.key" +chmod 0644 "${CA_HOME}/telemetry.crt" +chown "${USER}"."${USER}" "${CA_HOME}/telemetry.key" +chown "${USER}"."${USER}" "${CA_HOME}/telemetry.crt" diff --git a/src/tests/ftest/util/agent_utils.py b/src/tests/ftest/util/agent_utils.py index 416cefdbf78..f437bc36e04 100644 --- a/src/tests/ftest/util/agent_utils.py +++ b/src/tests/ftest/util/agent_utils.py @@ -287,8 +287,6 @@ def start(self): # Copy certificates self.manager.job.copy_certificates( get_log_file("daosCA/certs"), self._hosts) - self.manager.job.copy_telemetry_root_certificates( - get_log_file("daosTelemetryCA"), self._hosts) self.manager.job.generate_telemetry_server_certificates(self._hosts, "daos_agent") # Verify the socket directory exists when using a non-systemctl manager diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py index 77da3e85249..49373e37cc9 100644 --- a/src/tests/ftest/util/command_utils.py +++ b/src/tests/ftest/util/command_utils.py @@ -1038,29 +1038,6 @@ def copy_certificates(self, source, hosts): self._command, ", ".join(names)) get_file_listing(hosts, names, self.run_user).log_output(self.log) - def copy_telemetry_root_certificates(self, source, hosts): - """Copy telemetry certificates files from the source to the destination hosts. - - Args: - source (str): source of the certificate files. - hosts (NodeSet): list of the destination hosts. - """ - certfiles = ["daosTelemetryCA.crt", "daosTelemetryCA.key"] - data = self.yaml.telemetry_config.get_certificate_data( - self.yaml.telemetry_config.get_attribute_names(LogParameter)) - destination = list(data.keys())[0] - - for file_name in certfiles: - src_file = os.path.join(source, file_name) - dst_file = os.path.join(destination, file_name) - self.log.debug(" %s -> %s", src_file, dst_file) - result = distribute_files( - self.log, hosts, src_file, dst_file, mkdir=False, - verbose=False, sudo=True, owner=self.certificate_owner) - if not result.passed: - self.log.info(" WARNING: %s copy telemetry cert failed on %s", - dst_file, result.failed_hosts) - def generate_telemetry_server_certificates(self, hosts, user): """Generate the telemetry certificates for the test on server/client. @@ -1075,8 +1052,7 @@ def generate_telemetry_server_certificates(self, hosts, user): if not self.yaml.telemetry_config.allow_insecure.value: certgen_dir = os.path.abspath( - os.path.join("..", "..", "..", "..", "lib64", "daos", "certgen")) - + os.path.join(os.getcwd(), "scripts")) command = os.path.join(certgen_dir, "gen_telemetry_server_certificate.sh ") command = "sudo " + command + user + " " + destination self.log.debug("Generating the telemetry certificate command %s:", command) diff --git a/src/tests/ftest/util/dmg_utils_params.py b/src/tests/ftest/util/dmg_utils_params.py index 32b7ec022b6..f1c262c924f 100644 --- a/src/tests/ftest/util/dmg_utils_params.py +++ b/src/tests/ftest/util/dmg_utils_params.py @@ -32,7 +32,6 @@ class DmgTelemetryConfig(TelemetryConfig): def __init__(self, log_dir="/tmp"): """Initialize a TelemetryConfig object.""" super().__init__("/run/dmg/telemetry_config/*", None, log_dir) - self.ca_cert = LogParameter(self._log_dir, None, "daosTelemetryCA.crt") def _get_new(self): """Get a new object based upon this one. diff --git a/src/tests/ftest/util/launch_utils.py b/src/tests/ftest/util/launch_utils.py index 7d0f06d16b6..2d33cceac05 100644 --- a/src/tests/ftest/util/launch_utils.py +++ b/src/tests/ftest/util/launch_utils.py @@ -425,12 +425,7 @@ def prepare(self, logger, test_log_file, test, repeat, user_create, slurm_setup, return status # Generate certificate files for the test - status = self._generate_certs(logger) - if status: - return status - - # Generate certificate files for the test - return self._generate_telemetry_certs(logger) + return self._generate_certs(logger) def execute(self, logger, test, repeat, number, sparse, fail_fast): """Run the specified test. @@ -875,32 +870,6 @@ def _generate_certs(self, logger): return 0 - def _generate_telemetry_certs(self, logger): - """Generate the certificates for the test. - - Returns: - logger (Logger): logger for the messages produced by this method - int: status code: 0 = success, 128 = failure - - """ - logger.debug("-" * 80) - logger.debug("Generating Telemetry certificate") - test_env = TestEnvironment() - certs_dir = os.path.join(test_env.log_dir, "daosTelemetryCA") - certgen_dir = os.path.abspath( - os.path.join("..", "..", "..", "..", "lib64", "daos", "certgen")) - command = os.path.join(certgen_dir, "gen_telemetry_admin_certificate.sh") - if not run_local(logger, f"/usr/bin/rm -rf {certs_dir}").passed: - message = "Error removing old Telemetry certificates" - self.test_result.fail_test(logger, "Prepare", message, sys.exc_info()) - return 128 - if not run_local(logger, f"{command} {test_env.log_dir}").passed: - message = "Error generating Telemetry certificates" - self.test_result.fail_test(logger, "Prepare", message, sys.exc_info()) - return 128 - - return 0 - def _collect_crash_files(self, logger): """Move any avocado crash files into job-results/latest/crashes. diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index e444b0c78aa..65cfb83c19a 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -244,10 +244,7 @@ def prepare(self, storage=True): # Copy certificates self.manager.job.copy_certificates(get_log_file("daosCA/certs"), self._hosts) - self.manager.job.copy_telemetry_root_certificates(get_log_file("daosTelemetryCA"), - self._hosts) - self.manager.job.generate_telemetry_server_certificates(self._hosts, - "daos_server") + self.manager.job.generate_telemetry_server_certificates(self._hosts, "daos_server") self._prepare_dmg_certificates() # Prepare dmg for running storage format on all server hosts diff --git a/utils/certs/SConscript b/utils/certs/SConscript index 38fb8ffd1d4..446a2059a5a 100644 --- a/utils/certs/SConscript +++ b/utils/certs/SConscript @@ -8,9 +8,7 @@ def scons(): env.Install("$PREFIX/lib64/daos/certgen", ['admin.cnf', 'agent.cnf', 'server.cnf', - 'gen_certificates.sh', - 'gen_telemetry_admin_certificate.sh', - 'gen_telemetry_server_certificate.sh']) + 'gen_certificates.sh']) if __name__ == "SCons.Script": diff --git a/utils/certs/gen_telemetry_admin_certificate.sh b/utils/certs/gen_telemetry_admin_certificate.sh deleted file mode 100755 index ced344ca0d8..00000000000 --- a/utils/certs/gen_telemetry_admin_certificate.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/bin/bash -# /* -# * (C) Copyright 2024 Intel Corporation. -# * -# * SPDX-License-Identifier: BSD-2-Clause-Patent -# */ - -__usage=" -Usage: gen_telemetry_admin_certificate.sh [DIR] -Generate certificates for DAOS deployment in the [DIR]/daosTelemetryCA. -By default [DIR] is the current directory. -" - -function print_usage () { - >&2 echo "$__usage" -} - -# validity of root CA and keys' certificates -DAYS=1095 - -CA_HOME="${1:-.}/daosTelemetryCA" - -function setup_directories () { - mkdir -p "${CA_HOME}" - chmod 700 "${CA_HOME}" -} - -function generate_ca_cnf () { - echo " -[ ca ] -default_ca = CA_daos_telemetry - -[ CA_daos_telemetry ] -dir = ${CA_HOME} -certs = \$dir - -# Key and Certificate for the root -certificate = \$dir/daosTelemetryCA.crt -private_key = \$dir/daosTelemetryCA.key - -default_md = sha512 # SAFE Crypto Requires SHA-512 -default_days = ${DAYS} # how long to certify for -copy_extensions = copy -unique_subject = no - -[ req ] -prompt = no -distinguished_name = ca_dn -x509_extensions = ca_ext - -[ ca_dn ] -organizationName = DAOS -commonName = DAOS CA TELEMETRY - -[ ca_ext ] -keyUsage = critical,digitalSignature,nonRepudiation,keyEncipherment,keyCertSign -basicConstraints = critical,CA:true,pathlen:1 - -[ signing_policy ] -organizationName = supplied -commonName = supplied - -" > "${CA_HOME}/ca.cnf" -} - -function generate_ca_cert () { - echo "Generating Private CA Root Certificate" - # Generate Private key and set permissions - openssl genrsa -out "${CA_HOME}/daosTelemetryCA.key" 3072 - [[ $EUID -eq 0 ]] && chown root.root "${CA_HOME}/daosTelemetryCA.key" 2>/dev/null - chmod 0400 "${CA_HOME}/daosTelemetryCA.key" - # Generate CA Certificate - openssl req -new -x509 -config "${CA_HOME}/ca.cnf" -days ${DAYS} -sha512 \ - -key "${CA_HOME}/daosTelemetryCA.key" \ - -out "${CA_HOME}/daosTelemetryCA.crt" -batch - [[ $EUID -eq 0 ]] && chown root.daos_daemons "${CA_HOME}/daosTelemetryCA.crt" 2>/dev/null - chmod 0644 "${CA_HOME}/daosTelemetryCA.crt" - # Reset the the CA index - rm -f "${CA_HOME}/index.txt" "${CA_HOME}/serial.txt" - touch "${CA_HOME}/index.txt" - echo '01' > "${CA_HOME}/serial.txt" - echo "Private CA Root Certificate for Telemetry created in ${CA_HOME}" -} - -function cleanup () { - rm -f "${CA_HOME}/ca.cnf" -} - -function main () { - if [[ -d "$CA_HOME" ]] - then - echo "$CA_HOME already exists, exiting." - exit 1 - fi - setup_directories - generate_ca_cnf - generate_ca_cert - cleanup -} - -main diff --git a/utils/certs/gen_telemetry_server_certificate.sh b/utils/certs/gen_telemetry_server_certificate.sh deleted file mode 100755 index f1de56da85e..00000000000 --- a/utils/certs/gen_telemetry_server_certificate.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/bin/bash -# /* -# * (C) Copyright 2024 Intel Corporation. -# * -# * SPDX-License-Identifier: BSD-2-Clause-Patent -# */ - -__usage=" - -This is just an example script for testing purpose. -Please modify to use in Production environment. - -Usage: gen_telemetry_server_certificate.sh [USER] [DIR] - USER: DAOS has server and client and the certificate need the specific file permission - based on system usage. - Use daos_server if running script on server - Use daos_agent if running script on client - - DIR: Generate telemetry certificates for DAOS metrics in the [DIR]. - By default [DIR] is the current directory. -" - -DAYS=1095 - -USER=$1 -CA_HOME="${2:-.}/" -HOSTNAME=$(hostname -s) - -function print_usage () { - >&2 echo "$__usage" -} - -function generate_ca_cnf () { - echo " -[req] -default_md = sha256 -prompt = no -req_extensions = v3_ext -distinguished_name = req_distinguished_name - -[req_distinguished_name] -CN = ${HOSTNAME} - -[v3_ext] -keyUsage = critical,digitalSignature,keyEncipherment -extendedKeyUsage = critical,serverAuth,clientAuth -subjectAltName = DNS:${HOSTNAME} - -" > "${CA_HOME}/telemetry.cnf" -} - -function generate_server_cert () { - echo "Generating Server Certificate" - # Generate Private key and set its permissions - openssl genrsa -out "${CA_HOME}/telemetry.key" 2048 - [[ $EUID -eq 0 ]] && chown "${USER}"."${USER}" "${CA_HOME}/telemetry.key" - chmod 0400 "${CA_HOME}/telemetry.key" - - # Generate a Certificate Signing Request (CRS) - openssl req -new -key "${CA_HOME}/telemetry.key" \ - -out "${CA_HOME}/telemetry.csr" -config "${CA_HOME}/telemetry.cnf" - - # Create Certificate from request - openssl x509 -req -in "${CA_HOME}/telemetry.csr" -CA "${CA_HOME}/daosTelemetryCA.crt" \ - -CAkey "${CA_HOME}/daosTelemetryCA.key" -CAcreateserial -out "${CA_HOME}/telemetry.crt" \ - -days ${DAYS} -sha256 -extfile "$CA_HOME/telemetry.cnf" -extensions v3_ext - - [[ $EUID -eq 0 ]] && chown "${USER}"."${USER}" "${CA_HOME}/telemetry.crt" - chmod 0644 "${CA_HOME}/telemetry.crt" - - echo "Required Server Certificate Files: - ${CA_HOME}/daosTelemetryCA.crt - ${CA_HOME}/telemetry.key - ${CA_HOME}/telemetry.crt" -} - -function cleanup () { - # Remove this key as it's not required after creating the telemetry.key - rm -f "${CA_HOME}/daosTelemetryCA.key" - - rm -f "${CA_HOME}/telemetry.csr" - rm -f "${CA_HOME}/telemetry.cnf" -} - -function main () { - generate_ca_cnf - generate_server_cert - cleanup -} - -main From 1329d1e88c9906974c0aadb18a4eae69442f7ac7 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Mon, 2 Dec 2024 23:16:35 +0000 Subject: [PATCH 08/19] Code updated based on review comments. Features: control telemetry Required-githooks: true Signed-off-by: Samir Raval --- docs/admin/deployment.md | 120 ++-------------------- src/control/cmd/dmg/telemetry.go | 21 ++-- src/control/lib/control/http.go | 67 +++++------- src/control/lib/control/http_test.go | 48 +++------ src/control/lib/control/telemetry.go | 25 +++-- src/control/lib/control/telemetry_test.go | 28 ++--- src/control/security/config.go | 14 +-- src/tests/ftest/util/dmg_utils_params.py | 1 + utils/config/daos_control.yml | 4 +- 9 files changed, 86 insertions(+), 242 deletions(-) diff --git a/docs/admin/deployment.md b/docs/admin/deployment.md index 5a11d8cf41e..84b7c859ba6 100644 --- a/docs/admin/deployment.md +++ b/docs/admin/deployment.md @@ -762,119 +762,13 @@ transport_config: #### Telemetry Certificate Configuration The DAOS Telemetry framework has option to use certificates to authenticate -between server/client and admin node.A set of certificates for a given DAOS systems may be -generated by running the `gen_telemetry_admin_certificate.sh` and `gen_telemetry_server_certificate.sh` script provided with the DAOS -software if there is not an existing TLS certificate infrastructure. The -Both script uses the `openssl` tool to generate all of the -necessary files. - -##### Telemetry Admin script - -This `gen_telemetry_admin_certificate.sh` script needs to run on the system where the `dmg telemetry metrics` command is going to run or on the system where Prometheus is going to be setup for collecting metrics. - -```bash -$ cd /tmp/ -$ gen_telemetry_admin_certificate.sh -Generating Private CA Root Certificate -Generating RSA private key, 3072 bit long modulus (2 primes) -............................................................................................................++++ -.............++++ -e is 65537 (0x010001) -Private CA Root Certificate for Telemetry created in ./daosTelemetryCA -``` - -This will create the key and cert file - -```bash -$ ls -l /tmp/daosTelemetryCA/ -total 12 --rw-r--r-- 1 root daos_daemons 1460 Sep 27 17:06 daosTelemetryCA.crt --r-------- 1 root root 2455 Sep 27 17:06 daosTelemetryCA.key --rw-r--r-- 1 root root 0 Sep 27 17:06 index.txt --rw-r--r-- 1 root root 3 Sep 27 17:06 serial.txt -``` - -The generated keys and certificates must then be securely distributed to all nodes for which you need to collect the DAOS metrics. - -You can copy this certificates on /etc/daos/certs/ or someother secure location - -##### Telemetry Server script - -This `gen_telemetry_server_certificate.sh` script need to run on the DAOS server/client node for which DAOS metrics needs to be gathered. - -Below files are copied from the Admin node in previous steps. - -```bash -$ ls -l /tmp/daosTelemetryCA/ -total 12 --rw-r--r-- 1 root daos_daemons 1460 Sep 27 17:06 daosTelemetryCA.crt --r-------- 1 root root 2455 Sep 27 17:06 daosTelemetryCA.key --rw-r--r-- 1 root root 0 Sep 27 17:06 index.txt --rw-r--r-- 1 root root 3 Sep 27 17:06 serial.txt -``` - -Run this script with arguments. -First argument is the File permission you want on certificate,for example below command is run on daos client where it needs to be set as daos_agent user permission. -Second argument is optional for certificate path (By default it's in current directory). -For security reason this script will delete the CA key copied at the end which was copied from the Admin node and will create the local node certificate and key. - -```bash -$ cd daosTelemetryCA/ -$ gen_telemetry_server_certificate.sh daos_agent -Generating Server Certificate -Generating RSA private key, 2048 bit long modulus (2 primes) -.......................+++++ -......................................................................................................+++++ -e is 65537 (0x010001) -Signature ok -subject=CN = wolf-170 -Getting CA Private Key -Required Server Certificate Files: - .//daosTelemetryCA.crt - .//telemetry.key - .//telemetry.crt -$ ls -l -total 20 --rw-r--r-- 1 root daos_daemons 1460 Sep 27 17:18 daosTelemetryCA.crt --rw-r--r-- 1 root root 41 Sep 27 17:19 daosTelemetryCA.srl --rw-r--r-- 1 root root 0 Sep 27 17:18 index.txt --rw-r--r-- 1 root root 3 Sep 27 17:18 serial.txt --rw-r--r-- 1 daos_agent daos_agent 1302 Sep 27 17:19 telemetry.crt --r-------- 1 daos_agent daos_agent 1675 Sep 27 17:19 telemetry.key -``` - -Below example is ran with daos_server user on server node - -```bash -$ cd daosTelemetryCA/ -$ gen_telemetry_server_certificate.sh daos_server -Generating Server Certificate -Generating RSA private key, 2048 bit long modulus (2 primes) -.................................................+++++ -.+++++ -e is 65537 (0x010001) -Signature ok -subject=CN = wolf-173 -Getting CA Private Key -Required Server Certificate Files: - .//daosTelemetryCA.crt - .//telemetry.key - .//telemetry.crt -$ ls -l -total 20 --rw-r--r-- 1 root daos_daemons 1460 Sep 27 17:24 daosTelemetryCA.crt --rw-r--r-- 1 root root 41 Sep 27 17:24 daosTelemetryCA.srl --rw-r--r-- 1 root root 0 Sep 27 17:24 index.txt --rw-r--r-- 1 root root 3 Sep 27 17:24 serial.txt --rw-r--r-- 1 daos_server daos_server 1302 Sep 27 17:24 telemetry.crt --r-------- 1 daos_server daos_server 1679 Sep 27 17:24 telemetry.key -``` - -You can copy this certificates on /etc/daos/certs/ or someother secure location +between server/client and admin node. +Creating of certificate is not part of DAOS scope and it is up to Admin to +generate the certificate and add it to the DAOS server and client system. #### Telemetry Yaml Example -Now you have certificate created and you can add those path in the respective yaml file. +Information on telelmetry config parameters in respective yaml file. ```yaml # /etc/daos/daos_server.yml (servers) @@ -910,9 +804,9 @@ telemetry_config: # /etc/daos/daos_control.yml (dmg/admin) telemetry_config: # To use telemetry in secure mode - allow_insecure: true - # Custom CA Root certificate for generated certs - ca_cert: /etc/daos/certs/daosTelemetryCA.crt + allow_insecure: false + # Skip the Server certificate verification. Recomendate for testing purpose only. + https_exception: true ``` ### Server Startup diff --git a/src/control/cmd/dmg/telemetry.go b/src/control/cmd/dmg/telemetry.go index b4fdab4ae82..ac03a47fdfe 100644 --- a/src/control/cmd/dmg/telemetry.go +++ b/src/control/cmd/dmg/telemetry.go @@ -198,7 +198,7 @@ type ( } tlsConfig struct { - CaFile string `yaml:"ca_file,omitempty"` + InsecureSkipVerify bool `yaml:"insecure_skip_verify,omitempty"` } scrapeConfig struct { @@ -269,7 +269,7 @@ func (cmd *telemConfigCmd) configurePrometheus() (*installInfo, error) { scheme := "" if !cmd.cfgCmd.config.TelemetryConfig.AllowInsecure { cmd.Infof("Prometheus configuration is setup as Secure (https) mode") - tc.CaFile = cmd.cfgCmd.config.TelemetryConfig.CARootPath + tc.InsecureSkipVerify = cmd.cfgCmd.config.TelemetryConfig.HttpsException scheme = "https" } else { cmd.Infof("Prometheus configuration is setup as insecure (http) mode") @@ -335,7 +335,7 @@ func (cmd *metricsListCmd) Execute(args []string) error { req := new(control.MetricsListReq) req.Port = cmd.Port req.Host = host - req.CaCertPath = cmd.cfgCmd.config.TelemetryConfig.CARootPath + req.HttpsException = cmd.cfgCmd.config.TelemetryConfig.HttpsException if !cmd.JSONOutputEnabled() { cmd.Info(getConnectingMsg(req.Host, req.Port)) @@ -343,11 +343,10 @@ func (cmd *metricsListCmd) Execute(args []string) error { // Trying Secure Mode First, It will ignore the certificate if it's not provided // or request with the certificate. - req.AllowInsecure = false - if req.CaCertPath == "" { - cmd.Debug("Trying Secure Mode (HTTPS) first, ignoring certificate") + if req.AllowInsecure { + cmd.Debug("Trying Secure Mode (HTTPS) with Exception") } else { - cmd.Debug("Trying Secure Mode (HTTPS) first with certificate") + cmd.Debug("Trying Secure Mode (HTTPS) with system certificate") } resp, err := control.MetricsList(cmd.MustLogCtx(), req) @@ -408,7 +407,7 @@ func (cmd *metricsQueryCmd) Execute(args []string) error { req := new(control.MetricsQueryReq) req.Port = cmd.Port req.Host = host - req.CaCertPath = cmd.cfgCmd.config.TelemetryConfig.CARootPath + req.HttpsException = cmd.cfgCmd.config.TelemetryConfig.HttpsException req.MetricNames = common.TokenizeCommaSeparatedString(cmd.Metrics) if !cmd.JSONOutputEnabled() { @@ -418,11 +417,7 @@ func (cmd *metricsQueryCmd) Execute(args []string) error { // Trying Secure Mode First, It will ignore the certificate if it's not provided // or request with the certificate. req.AllowInsecure = false - if req.CaCertPath == "" { - cmd.Debug("Trying Secure Mode (HTTPS) first, ignoring certificate") - } else { - cmd.Debug("Trying Secure Mode (HTTPS) first with certificate") - } + cmd.Debug("Trying Secure Mode (HTTPS) first, with system certificate") resp, err := control.MetricsQuery(cmd.MustLogCtx(), req) if err != nil { diff --git a/src/control/lib/control/http.go b/src/control/lib/control/http.go index d5ac19e41be..2ff1f4157f6 100644 --- a/src/control/lib/control/http.go +++ b/src/control/lib/control/http.go @@ -40,15 +40,16 @@ type httpGetter interface { getURL() *url.URL getBody(context.Context) ([]byte, error) getAllowInsecure() bool - getCaCertPath() *string + getHttpsException() bool } type httpReq struct { - url *url.URL - getFn httpGetFn - allowInsecure bool - cacertpath *string - getBodyFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) + url *url.URL + getFn httpGetFn + allowInsecure bool + cacertpath *string + httpsException bool + getBodyFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) } func (r *httpReq) canRetry(err error, cur uint) bool { @@ -87,8 +88,8 @@ func (r *httpReq) getAllowInsecure() bool { return r.allowInsecure } -func (r *httpReq) getCaCertPath() *string { - return r.cacertpath +func (r *httpReq) getHttpsException() bool { + return r.httpsException } func (r *httpReq) httpGetFunc() httpGetFn { @@ -103,7 +104,7 @@ func (r *httpReq) getBody(ctx context.Context) ([]byte, error) { r.getBodyFn = httpGetBody } - return r.getBodyFn(ctx, r.getURL(), r.httpGetFunc(), r.getRetryTimeout(), r.getAllowInsecure(), r.getCaCertPath()) + return r.getBodyFn(ctx, r.getURL(), r.httpGetFunc(), r.getRetryTimeout(), r.getAllowInsecure(), r.getHttpsException()) } func httpGetBodyRetry(ctx context.Context, req httpGetter) ([]byte, error) { @@ -130,36 +131,18 @@ func httpGetBodyRetry(ctx context.Context, req httpGetter) ([]byte, error) { // httpsSecureGetFunc will prepare the GET requested using the certificate for secure mode // and return the http.Get -func httpsSecureGetFunc(cacertpath string) (httpGetFn, error) { - cert, err := ioutil.ReadFile(cacertpath) - if err != nil { - return nil, errors.Wrap(err, "reading CA certificate file Error") - } - - caCertPool := x509.NewCertPool() - result := caCertPool.AppendCertsFromPEM(cert) - if !result { - return nil, errors.New("failed to parse root certificate") +func httpsSecureGetFunc(httpsException bool) (httpGetFn, error) { + rootCAs, _ := x509.SystemCertPool() + if rootCAs == nil { + return nil, errors.New("Failed to load system root certificates") } tlsConfig := &tls.Config{ - RootCAs: caCertPool, - } - - tr := &http.Transport{ - TLSClientConfig: tlsConfig, + RootCAs: rootCAs, } - client := &http.Client{Transport: tr} - - return client.Get, nil -} - -// httpsInsecureGetFunc will prepare the GET requested without certificate for secure mode -// and return the http.Get -func httpsInsecureGetFunc() httpGetFn { - tlsConfig := &tls.Config{ - InsecureSkipVerify: true, + if httpsException { + tlsConfig.InsecureSkipVerify = true } tr := &http.Transport{ @@ -168,12 +151,12 @@ func httpsInsecureGetFunc() httpGetFn { client := &http.Client{Transport: tr} - return client.Get + return client.Get, nil } // httpGetBody executes a simple HTTP GET request to a given URL and returns the // content of the response body. -func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time.Duration, allowInsecure bool, cacertpath *string) ([]byte, error) { +func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time.Duration, allowInsecure bool, httpsException bool) ([]byte, error) { if url == nil { return nil, errors.New("nil URL") } @@ -187,14 +170,10 @@ func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time. } if allowInsecure == false { - if cacertpath == nil || *cacertpath == "" { - get = httpsInsecureGetFunc() - } else { - var err error - get, err = httpsSecureGetFunc(*cacertpath) - if err != nil { - return nil, err - } + var err error + get, err = httpsSecureGetFunc(httpsException) + if err != nil { + return nil, err } } diff --git a/src/control/lib/control/http_test.go b/src/control/lib/control/http_test.go index 9efda5c06fc..c9dfb6015d7 100644 --- a/src/control/lib/control/http_test.go +++ b/src/control/lib/control/http_test.go @@ -125,18 +125,16 @@ func TestControl_httpGetBody(t *testing.T) { defaultURL := &url.URL{Host: "testhost"} defaultAllowInsecure := true falseAllowInsecure := false - badCertPerm := "../../security/testdata/certs/badperms.crt" - badCertPath := "wrongpath/notavailable.crt" for name, tc := range map[string]struct { - url *url.URL - timeout time.Duration - cancelCtx bool - getFn httpGetFn - allowInsecure bool - caCertPath *string - expResult []byte - expErr error + url *url.URL + timeout time.Duration + cancelCtx bool + getFn httpGetFn + allowInsecure bool + httpsException bool + expResult []byte + expErr error }{ "nil url": { expErr: errors.New("nil URL"), @@ -202,28 +200,6 @@ func TestControl_httpGetBody(t *testing.T) { }, expErr: errors.New("Get \"//testhost\": unsupported protocol scheme"), }, - "failure with body in secure mode with bad CA certificate": { - url: defaultURL, - allowInsecure: falseAllowInsecure, - caCertPath: &badCertPerm, - getFn: func(_ string) (*http.Response, error) { - return &http.Response{ - StatusCode: http.StatusOK, - }, nil - }, - expErr: errors.New("Get \"//testhost\": unsupported protocol scheme"), - }, - "failure with body in secure mode with bad CA certificate path": { - url: defaultURL, - allowInsecure: falseAllowInsecure, - caCertPath: &badCertPath, - getFn: func(_ string) (*http.Response, error) { - return &http.Response{ - StatusCode: http.StatusOK, - }, nil - }, - expErr: errors.New("reading CA certificate file Error: open wrongpath/notavailable.crt: no such file or directory"), - }, "reading body fails": { url: defaultURL, allowInsecure: defaultAllowInsecure, @@ -276,7 +252,7 @@ func TestControl_httpGetBody(t *testing.T) { tc.timeout = time.Second } - result, err := httpGetBody(ctx, tc.url, tc.getFn, tc.timeout, tc.allowInsecure, tc.caCertPath) + result, err := httpGetBody(ctx, tc.url, tc.getFn, tc.timeout, tc.allowInsecure, tc.httpsException) test.CmpErr(t, tc.expErr, err) if diff := cmp.Diff(tc.expResult, result); diff != "" { @@ -294,7 +270,7 @@ type mockHTTPGetter struct { getBodyErr error getBodyCalled uint getBodyFailures uint - caCertPath *string + httpsException bool } func (r *mockHTTPGetter) canRetry(err error, cur uint) bool { @@ -325,8 +301,8 @@ func (r *mockHTTPGetter) getAllowInsecure() bool { return true } -func (r *mockHTTPGetter) getCaCertPath() *string { - return r.caCertPath +func (r *mockHTTPGetter) getHttpsException() bool { + return true } func (r *mockHTTPGetter) getBody(ctx context.Context) ([]byte, error) { diff --git a/src/control/lib/control/telemetry.go b/src/control/lib/control/telemetry.go index 45110cf9eb5..108453584d6 100644 --- a/src/control/lib/control/telemetry.go +++ b/src/control/lib/control/telemetry.go @@ -84,11 +84,11 @@ type ( // MetricsListReq is used to request the list of metrics. MetricsListReq struct { httpReq - Host string // Host to query for telemetry data - Port uint32 // Port to use for collecting telemetry data - AllowInsecure bool // Set the https end point secure - CaCertPath string // CA Cert path for telemetry - Log logging.Logger // Logging the info + Host string // Host to query for telemetry data + Port uint32 // Port to use for collecting telemetry data + AllowInsecure bool // Set the https end point secure + HttpsException bool // Use the Https with Exception (Insecure) + Log logging.Logger // Logging the info } // MetricsListResp contains the list of available metrics. @@ -112,7 +112,7 @@ func MetricsList(ctx context.Context, req *MetricsListReq) (*MetricsListResp, er } req.allowInsecure = req.AllowInsecure - req.cacertpath = &req.CaCertPath + req.httpsException = req.HttpsException req.url = getMetricsURL(req.Host, req.Port, req.allowInsecure) scraped, err := scrapeMetrics(ctx, req) if err != nil { @@ -140,12 +140,11 @@ type ( // MetricsQueryReq is used to query telemetry values. MetricsQueryReq struct { httpReq - Host string // host to query for telemetry data - Port uint32 // port to use for collecting telemetry data - AllowInsecure bool // Set the https end point secure - CaCertPath string // CA Cert path for telemetry - - MetricNames []string // if empty, collects all metrics + Host string // host to query for telemetry data + Port uint32 // port to use for collecting telemetry data + AllowInsecure bool // Set the https end point secure + HttpsException bool // Use the Https with Exception (Insecure) + MetricNames []string // if empty, collects all metrics } // MetricsQueryResp contains the list of telemetry values per host. @@ -169,7 +168,7 @@ func MetricsQuery(ctx context.Context, req *MetricsQueryReq) (*MetricsQueryResp, } req.allowInsecure = req.AllowInsecure - req.cacertpath = &req.CaCertPath + req.httpsException = req.HttpsException req.url = getMetricsURL(req.Host, req.Port, req.allowInsecure) scraped, err := scrapeMetrics(ctx, req) if err != nil { diff --git a/src/control/lib/control/telemetry_test.go b/src/control/lib/control/telemetry_test.go index 8da1b59a4e9..af0528be5f5 100644 --- a/src/control/lib/control/telemetry_test.go +++ b/src/control/lib/control/telemetry_test.go @@ -118,10 +118,10 @@ func newTestPBHistogram(numBuckets int) *pclient.Metric { return metric } -func mockScrapeFnSuccess(t *testing.T, metricFam ...*pclient.MetricFamily) func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { +func mockScrapeFnSuccess(t *testing.T, metricFam ...*pclient.MetricFamily) func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { t.Helper() - return func(_ context.Context, _ *url.URL, _ httpGetFn, _ time.Duration, _ bool, _ *string) ([]byte, error) { + return func(_ context.Context, _ *url.URL, _ httpGetFn, _ time.Duration, _ bool, _ bool) ([]byte, error) { var b strings.Builder for _, mf := range metricFam { _, err := expfmt.MetricFamilyToText(&b, mf) @@ -147,12 +147,12 @@ func TestControl_scrapeMetrics(t *testing.T) { for name, tc := range map[string]struct { req httpGetter - scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) + scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) expResult pbMetricMap expErr error }{ "check scrape params": { - scrapeFn: func(_ context.Context, url *url.URL, getter httpGetFn, timeout time.Duration, allowInsecure bool, caCertPath *string) ([]byte, error) { + scrapeFn: func(_ context.Context, url *url.URL, getter httpGetFn, timeout time.Duration, allowInsecure bool, httpsException bool) ([]byte, error) { test.AssertEqual(t, testURL.Scheme, url.Scheme, "") test.AssertEqual(t, testURL.Host, url.Host, "") test.AssertEqual(t, testURL.Path, url.Path, "") @@ -166,19 +166,19 @@ func TestControl_scrapeMetrics(t *testing.T) { expResult: pbMetricMap{}, }, "HTTP scrape error": { - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { return nil, errors.New("mock scrape") }, expErr: errors.New("mock scrape"), }, "scrape returns no content": { - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { return []byte{}, nil }, expResult: pbMetricMap{}, }, "scrape returns bad content": { - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { return []byte("

Hello world

"), nil }, expErr: errors.New("parsing error"), @@ -217,7 +217,7 @@ func TestControl_MetricsList(t *testing.T) { } for name, tc := range map[string]struct { - scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) + scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) req *MetricsListReq expResp *MetricsListResp expErr error @@ -241,7 +241,7 @@ func TestControl_MetricsList(t *testing.T) { Port: 1066, AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { return nil, errors.New("mock scrape") }, expErr: errors.New("mock scrape"), @@ -252,7 +252,7 @@ func TestControl_MetricsList(t *testing.T) { Port: 8888, AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { return []byte{}, nil }, expResp: &MetricsListResp{ @@ -284,7 +284,7 @@ func TestControl_MetricsList(t *testing.T) { } { t.Run(name, func(t *testing.T) { if tc.scrapeFn == nil { - tc.scrapeFn = func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { + tc.scrapeFn = func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { return nil, nil } } @@ -432,7 +432,7 @@ func TestControl_MetricsQuery(t *testing.T) { } for name, tc := range map[string]struct { - scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) + scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) req *MetricsQueryReq expResp *MetricsQueryResp expErr error @@ -456,7 +456,7 @@ func TestControl_MetricsQuery(t *testing.T) { Port: 1066, AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { return nil, errors.New("mock scrape") }, expErr: errors.New("mock scrape"), @@ -467,7 +467,7 @@ func TestControl_MetricsQuery(t *testing.T) { Port: 8888, AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, *string) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { return []byte{}, nil }, expResp: &MetricsQueryResp{ diff --git a/src/control/security/config.go b/src/control/security/config.go index 9f6a3392946..485e7ffdf87 100644 --- a/src/control/security/config.go +++ b/src/control/security/config.go @@ -110,13 +110,13 @@ type TransportConfig struct { // TelemetryConfig contains all the information on whether or not to use // secure endpoint for telemetry and their location if their use is specified. type TelemetryConfig struct { - Port int `yaml:"telemetry_port,omitempty"` - AllowInsecure bool `yaml:"allow_insecure,omitempty"` - Enabled bool `yaml:"telemetry_enabled,omitempty"` - Retain time.Duration `yaml:"telemetry_retain,omitempty"` - HttpsCert string `yaml:"https_cert,omitempty"` - HttpsKey string `yaml:"https_key,omitempty"` - CARootPath string `yaml:"ca_cert,omitempty"` + Port int `yaml:"telemetry_port,omitempty"` + AllowInsecure bool `yaml:"allow_insecure,omitempty"` + Enabled bool `yaml:"telemetry_enabled,omitempty"` + Retain time.Duration `yaml:"telemetry_retain,omitempty"` + HttpsCert string `yaml:"https_cert,omitempty"` + HttpsKey string `yaml:"https_key,omitempty"` + HttpsException bool `yaml:"https_exception,omitempty"` } // DefaultClientTelemetryConfig provides a default telemetry config disabling diff --git a/src/tests/ftest/util/dmg_utils_params.py b/src/tests/ftest/util/dmg_utils_params.py index f1c262c924f..7faa37a0f40 100644 --- a/src/tests/ftest/util/dmg_utils_params.py +++ b/src/tests/ftest/util/dmg_utils_params.py @@ -32,6 +32,7 @@ class DmgTelemetryConfig(TelemetryConfig): def __init__(self, log_dir="/tmp"): """Initialize a TelemetryConfig object.""" super().__init__("/run/dmg/telemetry_config/*", None, log_dir) + self.https_exception = BasicParameter(None, True) def _get_new(self): """Get a new object based upon this one. diff --git a/utils/config/daos_control.yml b/utils/config/daos_control.yml index 076168b35e2..cd0566d41c2 100644 --- a/utils/config/daos_control.yml +++ b/utils/config/daos_control.yml @@ -46,5 +46,5 @@ # # to false. # allow_insecure: true # -# # Custom CA Root certificate for generated telemetry certs -# ca_cert: /etc/daos/certs/daosTelemetryCA.crt +# # Skip the Server certificate verification. Recommended for testing purpose only. +# https_exception: true From f67aed2b56aa4b6246b66865f5aab6cca6cc6e4e Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Fri, 13 Dec 2024 22:04:56 +0000 Subject: [PATCH 09/19] Code updated based on review comments. Features: control telemetry Required-githooks: true Signed-off-by: Samir Raval --- src/control/lib/control/http.go | 1 - src/tests/ftest/config_file_gen.py | 15 ++++++++------- src/tests/ftest/server/storage_tiers.py | 4 ++-- src/tests/ftest/util/agent_utils.py | 4 ++-- src/tests/ftest/util/agent_utils_params.py | 10 +++++----- src/tests/ftest/util/command_utils_base.py | 8 ++++---- src/tests/ftest/util/dmg_utils.py | 4 ++-- src/tests/ftest/util/dmg_utils_params.py | 12 ++++++------ src/tests/ftest/util/launch_utils.py | 1 - src/tests/ftest/util/server_utils.py | 4 ++-- src/tests/ftest/util/server_utils_params.py | 10 +++++----- 11 files changed, 36 insertions(+), 37 deletions(-) diff --git a/src/control/lib/control/http.go b/src/control/lib/control/http.go index 2ff1f4157f6..8a89cbaf61a 100644 --- a/src/control/lib/control/http.go +++ b/src/control/lib/control/http.go @@ -47,7 +47,6 @@ type httpReq struct { url *url.URL getFn httpGetFn allowInsecure bool - cacertpath *string httpsException bool getBodyFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) } diff --git a/src/tests/ftest/config_file_gen.py b/src/tests/ftest/config_file_gen.py index 9ab409ac533..c030abc9f9e 100755 --- a/src/tests/ftest/config_file_gen.py +++ b/src/tests/ftest/config_file_gen.py @@ -12,13 +12,14 @@ import sys from argparse import ArgumentParser, RawDescriptionHelpFormatter -from util.agent_utils_params import (DaosAgentTelemetryConfig, DaosAgentTransportCredentials, +from util.agent_utils_params import (DaosAgentTelemetryCredentials, DaosAgentTransportCredentials, DaosAgentYamlParameters) from util.command_utils_base import CommonConfig -from util.dmg_utils_params import DmgTelemetryConfig, DmgTransportCredentials, DmgYamlParameters +from util.dmg_utils_params import (DmgTelemetryCredentials, DmgTransportCredentials, + DmgYamlParameters) from util.exception_utils import CommandFailure -from util.server_utils_params import (DaosServerTelemetryConfig, DaosServerTransportCredentials, - DaosServerYamlParameters) +from util.server_utils_params import (DaosServerTelemetryCredentials, + DaosServerTransportCredentials, DaosServerYamlParameters) def generate_agent_config(args): @@ -33,7 +34,7 @@ def generate_agent_config(args): """ common_cfg = CommonConfig(args.group_name, DaosAgentTransportCredentials()) config = DaosAgentYamlParameters(args.agent_file, common_cfg) - config.telemetry_config = DaosAgentTelemetryConfig() + config.telemetry_config = DaosAgentTelemetryCredentials() # Update the configuration file access points config.other_params.access_points.value = args.node_list.split(",") return create_config(args, config) @@ -51,7 +52,7 @@ def generate_server_config(args): """ common_cfg = CommonConfig(args.group_name, DaosServerTransportCredentials()) config = DaosServerYamlParameters(args.server_file, common_cfg) - config.telemetry_config = DaosServerTelemetryConfig() + config.telemetry_config = DaosServerTelemetryCredentials() config.engine_params[0].storage.storage_tiers[0].storage_class.value = "ram" config.engine_params[0].storage.storage_tiers[0].scm_mount.value = "/mnt/daos" config.engine_params[0].storage.storage_tiers[0].scm_size.value = 0 @@ -72,7 +73,7 @@ def generate_dmg_config(args): """ config = DmgYamlParameters( args.dmg_file, args.group_name, DmgTransportCredentials()) - config.telemetry_config = DmgTelemetryConfig() + config.telemetry_config = DmgTelemetryCredentials() # Update the configuration file hostlist config.hostlist.value = args.node_list.split(",") return create_config(args, config) diff --git a/src/tests/ftest/server/storage_tiers.py b/src/tests/ftest/server/storage_tiers.py index 536c1c52baf..ad946baaa72 100644 --- a/src/tests/ftest/server/storage_tiers.py +++ b/src/tests/ftest/server/storage_tiers.py @@ -8,7 +8,7 @@ import yaml from apricot import TestWithServers from command_utils_base import CommonConfig -from server_utils import (DaosServerTelemetryConfig, DaosServerTransportCredentials, +from server_utils import (DaosServerTelemetryCredentials, DaosServerTransportCredentials, DaosServerYamlParameters) @@ -68,7 +68,7 @@ def test_tiers(self): common_config = CommonConfig("daos_server", DaosServerTransportCredentials()) config = DaosServerYamlParameters(None, common_config) - config.telemetry_config = DaosServerTelemetryConfig() + config.telemetry_config = DaosServerTelemetryCredentials() config.namespace = self.server_config_namespace config.get_params(self) data = config.get_yaml_data() diff --git a/src/tests/ftest/util/agent_utils.py b/src/tests/ftest/util/agent_utils.py index f437bc36e04..3fdd421b9dc 100644 --- a/src/tests/ftest/util/agent_utils.py +++ b/src/tests/ftest/util/agent_utils.py @@ -7,7 +7,7 @@ import re import socket -from agent_utils_params import (DaosAgentTelemetryConfig, DaosAgentTransportCredentials, +from agent_utils_params import (DaosAgentTelemetryCredentials, DaosAgentTransportCredentials, DaosAgentYamlParameters) from ClusterShell.NodeSet import NodeSet from command_utils import CommandWithSubCommand, SubprocessManager, YamlCommand @@ -54,7 +54,7 @@ def get_agent_command(group, cert_dir, bin_dir, config_file, run_user, config_te transport_config = DaosAgentTransportCredentials(cert_dir) common_config = CommonConfig(group, transport_config) config = DaosAgentYamlParameters(config_file, common_config) - config.telemetry_config = DaosAgentTelemetryConfig(cert_dir) + config.telemetry_config = DaosAgentTelemetryCredentials(cert_dir) command = DaosAgentCommand(bin_dir, config, run_user=run_user) if config_temp: # Setup the DaosAgentCommand to write the config file data to the diff --git a/src/tests/ftest/util/agent_utils_params.py b/src/tests/ftest/util/agent_utils_params.py index b6a04bf874f..0a5ac57d539 100644 --- a/src/tests/ftest/util/agent_utils_params.py +++ b/src/tests/ftest/util/agent_utils_params.py @@ -5,7 +5,7 @@ """ import os -from command_utils_base import (BasicParameter, LogParameter, TelemetryConfig, +from command_utils_base import (BasicParameter, LogParameter, TelemetryCredentials, TransportCredentials, YamlParameters) @@ -33,12 +33,12 @@ def _get_new(self): return DaosAgentTransportCredentials(self._log_dir) -class DaosAgentTelemetryConfig(TelemetryConfig): +class DaosAgentTelemetryCredentials(TelemetryCredentials): # pylint: disable=too-few-public-methods """Telemetry credentials listing certificates for secure communication.""" def __init__(self, log_dir=os.path.join(os.sep, "tmp")): - """Initialize a TelemetryConfig object.""" + """Initialize a TelemetryCredentials object.""" super().__init__("/run/agent_config/telemetry_config/*", None, log_dir) self.telemetry_port = BasicParameter(None, 9192) @@ -51,9 +51,9 @@ def _get_new(self): """Get a new object based upon this one. Returns: - DaosServerTelemetryConfig: a new DaosServerTelemetryConfig object + DaosServerTelemetryCredentials: a new DaosServerTelemetryCredentials object """ - return DaosAgentTelemetryConfig(self._log_dir) + return DaosAgentTelemetryCredentials(self._log_dir) class DaosAgentYamlParameters(YamlParameters): diff --git a/src/tests/ftest/util/command_utils_base.py b/src/tests/ftest/util/command_utils_base.py index 5b39ca6015a..20a0caea1de 100644 --- a/src/tests/ftest/util/command_utils_base.py +++ b/src/tests/ftest/util/command_utils_base.py @@ -799,11 +799,11 @@ def _get_new(self): return TransportCredentials(self.namespace, self.title, self._log_dir) -class TelemetryConfig(YamlParameters): +class TelemetryCredentials(YamlParameters): """Telemetry credentials listing certificates for secure communication.""" def __init__(self, namespace, title, log_dir): - """Initialize a TelemetryConfig object. + """Initialize a TelemetryCredentials object. Args: namespace (str): yaml namespace (path to parameters) @@ -864,9 +864,9 @@ def _get_new(self): """Get a new object based upon this one. Returns: - TelemetryConfig: a new TelemetryConfig object + TelemetryCredentials: a new TelemetryCredentials object """ - return TelemetryConfig(self.namespace, self.title, self._log_dir) + return TelemetryCredentials(self.namespace, self.title, self._log_dir) class CommonConfig(YamlParameters): diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index d935a2dfda3..55f4d8d6705 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -10,7 +10,7 @@ from pwd import getpwuid from dmg_utils_base import DmgCommandBase -from dmg_utils_params import DmgTelemetryConfig, DmgTransportCredentials, DmgYamlParameters +from dmg_utils_params import DmgTelemetryCredentials, DmgTransportCredentials, DmgYamlParameters from exception_utils import CommandFailure from general_utils import dict_to_str, get_numeric_list @@ -39,7 +39,7 @@ def get_dmg_command(group, cert_dir, bin_dir, config_file, config_temp=None, hos """ transport_config = DmgTransportCredentials(cert_dir) - telemetry_config = DmgTelemetryConfig(cert_dir) + telemetry_config = DmgTelemetryCredentials(cert_dir) config = DmgYamlParameters(config_file, group, transport_config, telemetry_config) command = DmgCommand(bin_dir, config, hostlist_suffix) if config_temp: diff --git a/src/tests/ftest/util/dmg_utils_params.py b/src/tests/ftest/util/dmg_utils_params.py index 7faa37a0f40..6f928a96796 100644 --- a/src/tests/ftest/util/dmg_utils_params.py +++ b/src/tests/ftest/util/dmg_utils_params.py @@ -4,7 +4,7 @@ SPDX-License-Identifier: BSD-2-Clause-Patent """ -from command_utils_base import (BasicParameter, LogParameter, TelemetryConfig, +from command_utils_base import (BasicParameter, LogParameter, TelemetryCredentials, TransportCredentials, YamlParameters) @@ -26,11 +26,11 @@ def _get_new(self): return DmgTransportCredentials(self._log_dir) -class DmgTelemetryConfig(TelemetryConfig): +class DmgTelemetryCredentials(TelemetryCredentials): """Telemetry credentials listing certificates for secure communication.""" def __init__(self, log_dir="/tmp"): - """Initialize a TelemetryConfig object.""" + """Initialize a TelemetryCredentials object.""" super().__init__("/run/dmg/telemetry_config/*", None, log_dir) self.https_exception = BasicParameter(None, True) @@ -38,9 +38,9 @@ def _get_new(self): """Get a new object based upon this one. Returns: - DmgTelemetryConfig: a new DmgTelemetryConfig object + DmgTelemetryCredentials: a new DmgTelemetryCredentials object """ - return DmgTelemetryConfig(self._log_dir) + return DmgTelemetryCredentials(self._log_dir) class DmgYamlParameters(YamlParameters): @@ -54,7 +54,7 @@ def __init__(self, filename, name, transport, telemetry=None): name (str): The DAOS system name. transport (DmgTransportCredentials): dmg security configuration settings. - telemetry (DmgTelemetryConfig): dmg telemetry + telemetry (DmgTelemetryCredentials): dmg telemetry configuration settings. """ super().__init__("/run/dmg/*", filename, None, transport) diff --git a/src/tests/ftest/util/launch_utils.py b/src/tests/ftest/util/launch_utils.py index 2d33cceac05..0f7284c50ef 100644 --- a/src/tests/ftest/util/launch_utils.py +++ b/src/tests/ftest/util/launch_utils.py @@ -867,7 +867,6 @@ def _generate_certs(self, logger): message = "Error generating certificates" self.test_result.fail_test(logger, "Prepare", message, sys.exc_info()) return 128 - return 0 def _collect_crash_files(self, logger): diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 65cfb83c19a..245a115085f 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -21,7 +21,7 @@ from host_utils import get_local_host from run_utils import run_remote, stop_processes from server_utils_base import DaosServerCommand, DaosServerInformation, ServerFailed -from server_utils_params import (DaosServerTelemetryConfig, DaosServerTransportCredentials, +from server_utils_params import (DaosServerTelemetryCredentials, DaosServerTransportCredentials, DaosServerYamlParameters) from user_utils import get_chown_command @@ -46,7 +46,7 @@ def get_server_command(group, cert_dir, bin_dir, config_file, config_temp=None): transport_config = DaosServerTransportCredentials(cert_dir) common_config = CommonConfig(group, transport_config) config = DaosServerYamlParameters(config_file, common_config) - config.telemetry_config = DaosServerTelemetryConfig(cert_dir) + config.telemetry_config = DaosServerTelemetryCredentials(cert_dir) command = DaosServerCommand(bin_dir, config, None) diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index 55ffc606733..68e90afeab6 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -5,7 +5,7 @@ """ import os -from command_utils_base import (BasicParameter, LogParameter, TelemetryConfig, +from command_utils_base import (BasicParameter, LogParameter, TelemetryCredentials, TransportCredentials, YamlParameters) MAX_STORAGE_TIERS = 5 @@ -57,12 +57,12 @@ def _get_new(self): return DaosServerTransportCredentials(self._log_dir) -class DaosServerTelemetryConfig(TelemetryConfig): +class DaosServerTelemetryCredentials(TelemetryCredentials): # pylint: disable=too-few-public-methods """Telemetry credentials listing certificates for secure communication.""" def __init__(self, log_dir=os.path.join(os.sep, "tmp")): - """Initialize a DaosServerTelemetryConfig object.""" + """Initialize a DaosServerTelemetryCredentials object.""" super().__init__("/run/server_config/telemetry_config/*", None, log_dir) # Additional daos_server telemetry credential parameters: @@ -78,9 +78,9 @@ def _get_new(self): """Get a new object based upon this one. Returns: - DaosServerTelemetryConfig: a new DaosServerTelemetryConfig object + DaosServerTelemetryCredentials: a new DaosServerTelemetryCredentials object """ - return DaosServerTelemetryConfig(self._log_dir) + return DaosServerTelemetryCredentials(self._log_dir) class DaosServerYamlParameters(YamlParameters): From 599ec1eda6c98f74f732b014c34fe413e2edde50 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Fri, 13 Dec 2024 22:53:10 +0000 Subject: [PATCH 10/19] Revert "Code updated based on review comments." This reverts commit f67aed2b56aa4b6246b66865f5aab6cca6cc6e4e. Signed-off-by: Samir Raval --- src/control/lib/control/http.go | 1 + src/tests/ftest/config_file_gen.py | 15 +++++++-------- src/tests/ftest/server/storage_tiers.py | 4 ++-- src/tests/ftest/util/agent_utils.py | 4 ++-- src/tests/ftest/util/agent_utils_params.py | 10 +++++----- src/tests/ftest/util/command_utils_base.py | 8 ++++---- src/tests/ftest/util/dmg_utils.py | 4 ++-- src/tests/ftest/util/dmg_utils_params.py | 12 ++++++------ src/tests/ftest/util/launch_utils.py | 1 + src/tests/ftest/util/server_utils.py | 4 ++-- src/tests/ftest/util/server_utils_params.py | 10 +++++----- 11 files changed, 37 insertions(+), 36 deletions(-) diff --git a/src/control/lib/control/http.go b/src/control/lib/control/http.go index 8a89cbaf61a..2ff1f4157f6 100644 --- a/src/control/lib/control/http.go +++ b/src/control/lib/control/http.go @@ -47,6 +47,7 @@ type httpReq struct { url *url.URL getFn httpGetFn allowInsecure bool + cacertpath *string httpsException bool getBodyFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) } diff --git a/src/tests/ftest/config_file_gen.py b/src/tests/ftest/config_file_gen.py index c030abc9f9e..9ab409ac533 100755 --- a/src/tests/ftest/config_file_gen.py +++ b/src/tests/ftest/config_file_gen.py @@ -12,14 +12,13 @@ import sys from argparse import ArgumentParser, RawDescriptionHelpFormatter -from util.agent_utils_params import (DaosAgentTelemetryCredentials, DaosAgentTransportCredentials, +from util.agent_utils_params import (DaosAgentTelemetryConfig, DaosAgentTransportCredentials, DaosAgentYamlParameters) from util.command_utils_base import CommonConfig -from util.dmg_utils_params import (DmgTelemetryCredentials, DmgTransportCredentials, - DmgYamlParameters) +from util.dmg_utils_params import DmgTelemetryConfig, DmgTransportCredentials, DmgYamlParameters from util.exception_utils import CommandFailure -from util.server_utils_params import (DaosServerTelemetryCredentials, - DaosServerTransportCredentials, DaosServerYamlParameters) +from util.server_utils_params import (DaosServerTelemetryConfig, DaosServerTransportCredentials, + DaosServerYamlParameters) def generate_agent_config(args): @@ -34,7 +33,7 @@ def generate_agent_config(args): """ common_cfg = CommonConfig(args.group_name, DaosAgentTransportCredentials()) config = DaosAgentYamlParameters(args.agent_file, common_cfg) - config.telemetry_config = DaosAgentTelemetryCredentials() + config.telemetry_config = DaosAgentTelemetryConfig() # Update the configuration file access points config.other_params.access_points.value = args.node_list.split(",") return create_config(args, config) @@ -52,7 +51,7 @@ def generate_server_config(args): """ common_cfg = CommonConfig(args.group_name, DaosServerTransportCredentials()) config = DaosServerYamlParameters(args.server_file, common_cfg) - config.telemetry_config = DaosServerTelemetryCredentials() + config.telemetry_config = DaosServerTelemetryConfig() config.engine_params[0].storage.storage_tiers[0].storage_class.value = "ram" config.engine_params[0].storage.storage_tiers[0].scm_mount.value = "/mnt/daos" config.engine_params[0].storage.storage_tiers[0].scm_size.value = 0 @@ -73,7 +72,7 @@ def generate_dmg_config(args): """ config = DmgYamlParameters( args.dmg_file, args.group_name, DmgTransportCredentials()) - config.telemetry_config = DmgTelemetryCredentials() + config.telemetry_config = DmgTelemetryConfig() # Update the configuration file hostlist config.hostlist.value = args.node_list.split(",") return create_config(args, config) diff --git a/src/tests/ftest/server/storage_tiers.py b/src/tests/ftest/server/storage_tiers.py index ad946baaa72..536c1c52baf 100644 --- a/src/tests/ftest/server/storage_tiers.py +++ b/src/tests/ftest/server/storage_tiers.py @@ -8,7 +8,7 @@ import yaml from apricot import TestWithServers from command_utils_base import CommonConfig -from server_utils import (DaosServerTelemetryCredentials, DaosServerTransportCredentials, +from server_utils import (DaosServerTelemetryConfig, DaosServerTransportCredentials, DaosServerYamlParameters) @@ -68,7 +68,7 @@ def test_tiers(self): common_config = CommonConfig("daos_server", DaosServerTransportCredentials()) config = DaosServerYamlParameters(None, common_config) - config.telemetry_config = DaosServerTelemetryCredentials() + config.telemetry_config = DaosServerTelemetryConfig() config.namespace = self.server_config_namespace config.get_params(self) data = config.get_yaml_data() diff --git a/src/tests/ftest/util/agent_utils.py b/src/tests/ftest/util/agent_utils.py index 3fdd421b9dc..f437bc36e04 100644 --- a/src/tests/ftest/util/agent_utils.py +++ b/src/tests/ftest/util/agent_utils.py @@ -7,7 +7,7 @@ import re import socket -from agent_utils_params import (DaosAgentTelemetryCredentials, DaosAgentTransportCredentials, +from agent_utils_params import (DaosAgentTelemetryConfig, DaosAgentTransportCredentials, DaosAgentYamlParameters) from ClusterShell.NodeSet import NodeSet from command_utils import CommandWithSubCommand, SubprocessManager, YamlCommand @@ -54,7 +54,7 @@ def get_agent_command(group, cert_dir, bin_dir, config_file, run_user, config_te transport_config = DaosAgentTransportCredentials(cert_dir) common_config = CommonConfig(group, transport_config) config = DaosAgentYamlParameters(config_file, common_config) - config.telemetry_config = DaosAgentTelemetryCredentials(cert_dir) + config.telemetry_config = DaosAgentTelemetryConfig(cert_dir) command = DaosAgentCommand(bin_dir, config, run_user=run_user) if config_temp: # Setup the DaosAgentCommand to write the config file data to the diff --git a/src/tests/ftest/util/agent_utils_params.py b/src/tests/ftest/util/agent_utils_params.py index 0a5ac57d539..b6a04bf874f 100644 --- a/src/tests/ftest/util/agent_utils_params.py +++ b/src/tests/ftest/util/agent_utils_params.py @@ -5,7 +5,7 @@ """ import os -from command_utils_base import (BasicParameter, LogParameter, TelemetryCredentials, +from command_utils_base import (BasicParameter, LogParameter, TelemetryConfig, TransportCredentials, YamlParameters) @@ -33,12 +33,12 @@ def _get_new(self): return DaosAgentTransportCredentials(self._log_dir) -class DaosAgentTelemetryCredentials(TelemetryCredentials): +class DaosAgentTelemetryConfig(TelemetryConfig): # pylint: disable=too-few-public-methods """Telemetry credentials listing certificates for secure communication.""" def __init__(self, log_dir=os.path.join(os.sep, "tmp")): - """Initialize a TelemetryCredentials object.""" + """Initialize a TelemetryConfig object.""" super().__init__("/run/agent_config/telemetry_config/*", None, log_dir) self.telemetry_port = BasicParameter(None, 9192) @@ -51,9 +51,9 @@ def _get_new(self): """Get a new object based upon this one. Returns: - DaosServerTelemetryCredentials: a new DaosServerTelemetryCredentials object + DaosServerTelemetryConfig: a new DaosServerTelemetryConfig object """ - return DaosAgentTelemetryCredentials(self._log_dir) + return DaosAgentTelemetryConfig(self._log_dir) class DaosAgentYamlParameters(YamlParameters): diff --git a/src/tests/ftest/util/command_utils_base.py b/src/tests/ftest/util/command_utils_base.py index 20a0caea1de..5b39ca6015a 100644 --- a/src/tests/ftest/util/command_utils_base.py +++ b/src/tests/ftest/util/command_utils_base.py @@ -799,11 +799,11 @@ def _get_new(self): return TransportCredentials(self.namespace, self.title, self._log_dir) -class TelemetryCredentials(YamlParameters): +class TelemetryConfig(YamlParameters): """Telemetry credentials listing certificates for secure communication.""" def __init__(self, namespace, title, log_dir): - """Initialize a TelemetryCredentials object. + """Initialize a TelemetryConfig object. Args: namespace (str): yaml namespace (path to parameters) @@ -864,9 +864,9 @@ def _get_new(self): """Get a new object based upon this one. Returns: - TelemetryCredentials: a new TelemetryCredentials object + TelemetryConfig: a new TelemetryConfig object """ - return TelemetryCredentials(self.namespace, self.title, self._log_dir) + return TelemetryConfig(self.namespace, self.title, self._log_dir) class CommonConfig(YamlParameters): diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index 55f4d8d6705..d935a2dfda3 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -10,7 +10,7 @@ from pwd import getpwuid from dmg_utils_base import DmgCommandBase -from dmg_utils_params import DmgTelemetryCredentials, DmgTransportCredentials, DmgYamlParameters +from dmg_utils_params import DmgTelemetryConfig, DmgTransportCredentials, DmgYamlParameters from exception_utils import CommandFailure from general_utils import dict_to_str, get_numeric_list @@ -39,7 +39,7 @@ def get_dmg_command(group, cert_dir, bin_dir, config_file, config_temp=None, hos """ transport_config = DmgTransportCredentials(cert_dir) - telemetry_config = DmgTelemetryCredentials(cert_dir) + telemetry_config = DmgTelemetryConfig(cert_dir) config = DmgYamlParameters(config_file, group, transport_config, telemetry_config) command = DmgCommand(bin_dir, config, hostlist_suffix) if config_temp: diff --git a/src/tests/ftest/util/dmg_utils_params.py b/src/tests/ftest/util/dmg_utils_params.py index 6f928a96796..7faa37a0f40 100644 --- a/src/tests/ftest/util/dmg_utils_params.py +++ b/src/tests/ftest/util/dmg_utils_params.py @@ -4,7 +4,7 @@ SPDX-License-Identifier: BSD-2-Clause-Patent """ -from command_utils_base import (BasicParameter, LogParameter, TelemetryCredentials, +from command_utils_base import (BasicParameter, LogParameter, TelemetryConfig, TransportCredentials, YamlParameters) @@ -26,11 +26,11 @@ def _get_new(self): return DmgTransportCredentials(self._log_dir) -class DmgTelemetryCredentials(TelemetryCredentials): +class DmgTelemetryConfig(TelemetryConfig): """Telemetry credentials listing certificates for secure communication.""" def __init__(self, log_dir="/tmp"): - """Initialize a TelemetryCredentials object.""" + """Initialize a TelemetryConfig object.""" super().__init__("/run/dmg/telemetry_config/*", None, log_dir) self.https_exception = BasicParameter(None, True) @@ -38,9 +38,9 @@ def _get_new(self): """Get a new object based upon this one. Returns: - DmgTelemetryCredentials: a new DmgTelemetryCredentials object + DmgTelemetryConfig: a new DmgTelemetryConfig object """ - return DmgTelemetryCredentials(self._log_dir) + return DmgTelemetryConfig(self._log_dir) class DmgYamlParameters(YamlParameters): @@ -54,7 +54,7 @@ def __init__(self, filename, name, transport, telemetry=None): name (str): The DAOS system name. transport (DmgTransportCredentials): dmg security configuration settings. - telemetry (DmgTelemetryCredentials): dmg telemetry + telemetry (DmgTelemetryConfig): dmg telemetry configuration settings. """ super().__init__("/run/dmg/*", filename, None, transport) diff --git a/src/tests/ftest/util/launch_utils.py b/src/tests/ftest/util/launch_utils.py index 0f7284c50ef..2d33cceac05 100644 --- a/src/tests/ftest/util/launch_utils.py +++ b/src/tests/ftest/util/launch_utils.py @@ -867,6 +867,7 @@ def _generate_certs(self, logger): message = "Error generating certificates" self.test_result.fail_test(logger, "Prepare", message, sys.exc_info()) return 128 + return 0 def _collect_crash_files(self, logger): diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 245a115085f..65cfb83c19a 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -21,7 +21,7 @@ from host_utils import get_local_host from run_utils import run_remote, stop_processes from server_utils_base import DaosServerCommand, DaosServerInformation, ServerFailed -from server_utils_params import (DaosServerTelemetryCredentials, DaosServerTransportCredentials, +from server_utils_params import (DaosServerTelemetryConfig, DaosServerTransportCredentials, DaosServerYamlParameters) from user_utils import get_chown_command @@ -46,7 +46,7 @@ def get_server_command(group, cert_dir, bin_dir, config_file, config_temp=None): transport_config = DaosServerTransportCredentials(cert_dir) common_config = CommonConfig(group, transport_config) config = DaosServerYamlParameters(config_file, common_config) - config.telemetry_config = DaosServerTelemetryCredentials(cert_dir) + config.telemetry_config = DaosServerTelemetryConfig(cert_dir) command = DaosServerCommand(bin_dir, config, None) diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index 68e90afeab6..55ffc606733 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -5,7 +5,7 @@ """ import os -from command_utils_base import (BasicParameter, LogParameter, TelemetryCredentials, +from command_utils_base import (BasicParameter, LogParameter, TelemetryConfig, TransportCredentials, YamlParameters) MAX_STORAGE_TIERS = 5 @@ -57,12 +57,12 @@ def _get_new(self): return DaosServerTransportCredentials(self._log_dir) -class DaosServerTelemetryCredentials(TelemetryCredentials): +class DaosServerTelemetryConfig(TelemetryConfig): # pylint: disable=too-few-public-methods """Telemetry credentials listing certificates for secure communication.""" def __init__(self, log_dir=os.path.join(os.sep, "tmp")): - """Initialize a DaosServerTelemetryCredentials object.""" + """Initialize a DaosServerTelemetryConfig object.""" super().__init__("/run/server_config/telemetry_config/*", None, log_dir) # Additional daos_server telemetry credential parameters: @@ -78,9 +78,9 @@ def _get_new(self): """Get a new object based upon this one. Returns: - DaosServerTelemetryCredentials: a new DaosServerTelemetryCredentials object + DaosServerTelemetryConfig: a new DaosServerTelemetryConfig object """ - return DaosServerTelemetryCredentials(self._log_dir) + return DaosServerTelemetryConfig(self._log_dir) class DaosServerYamlParameters(YamlParameters): From e7833f87d415218f0ff2d0a6d68a818c4c200e94 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Fri, 13 Dec 2024 22:54:38 +0000 Subject: [PATCH 11/19] Code modified based on review comments Features: control telemetry Required-githooks: true Signed-off-by: Samir Raval --- src/control/lib/control/http.go | 1 - 1 file changed, 1 deletion(-) diff --git a/src/control/lib/control/http.go b/src/control/lib/control/http.go index 2ff1f4157f6..8a89cbaf61a 100644 --- a/src/control/lib/control/http.go +++ b/src/control/lib/control/http.go @@ -47,7 +47,6 @@ type httpReq struct { url *url.URL getFn httpGetFn allowInsecure bool - cacertpath *string httpsException bool getBodyFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) } From 8186949160f119f6a6f9413c65f212524e020a41 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Tue, 7 Jan 2025 16:29:54 +0000 Subject: [PATCH 12/19] Updated based on review comments. Features: control telemetry Required-githooks: true Signed-off-by: Samir Raval --- src/tests/ftest/util/command_utils.py | 6 +++--- src/tests/ftest/util/command_utils_base.py | 22 +++++++++++----------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py index ac9f94449d4..2b4371998d2 100644 --- a/src/tests/ftest/util/command_utils.py +++ b/src/tests/ftest/util/command_utils.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2018-2024 Intel Corporation. + (C) Copyright 2018-2025 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -1049,7 +1049,7 @@ def generate_telemetry_server_certificates(self, hosts, user): Args: hosts (NodeSet): list of the destination hosts. - user (User): User permission set on telemetry certificate file. + user (str): User permission set on telemetry certificate file. For server, it's daos_server and for client it's daos_agent. """ data = self.yaml.telemetry_config.get_certificate_data( @@ -1060,7 +1060,7 @@ def generate_telemetry_server_certificates(self, hosts, user): certgen_dir = os.path.abspath( os.path.join(os.getcwd(), "scripts")) command = os.path.join(certgen_dir, "gen_telemetry_server_certificate.sh ") - command = "sudo " + command + user + " " + destination + command = command_as_user(command + user + " " + destination, "root") self.log.debug("Generating the telemetry certificate command %s:", command) result = run_remote(self.log, hosts, command, 30) if not result.passed: diff --git a/src/tests/ftest/util/command_utils_base.py b/src/tests/ftest/util/command_utils_base.py index ff2c1499b7e..e85479969ae 100644 --- a/src/tests/ftest/util/command_utils_base.py +++ b/src/tests/ftest/util/command_utils_base.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2020-2025 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -829,7 +829,6 @@ def get_yaml_data(self): """ yaml_data = super().get_yaml_data() - # Convert the boolean value into a string if self.title is not None: yaml_data[self.title]["allow_insecure"] = self.allow_insecure.value else: @@ -849,15 +848,16 @@ def get_certificate_data(self, name_list): """ data = {} - if not self.allow_insecure.value: - for name in name_list: - value = getattr(self, name).value - if isinstance(value, str): - dir_name, file_name = os.path.split(value) - if dir_name not in data: - data[dir_name] = [file_name] - else: - data[dir_name].append(file_name) + if self.allow_insecure.value: + return data + for name in name_list: + value = getattr(self, name).value + if isinstance(value, str): + dir_name, file_name = os.path.split(value) + if dir_name not in data: + data[dir_name] = [file_name] + else: + data[dir_name].append(file_name) return data def _get_new(self): From 17800cbf1595fb07d993a6f40de43d117c678b4d Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Tue, 7 Jan 2025 17:56:06 +0000 Subject: [PATCH 13/19] Copyright fix. Features: control telemetry Signed-off-by: Samir Raval --- src/tests/ftest/util/command_utils.py | 3 ++- src/tests/ftest/util/command_utils_base.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py index 2b4371998d2..dae615dc12e 100644 --- a/src/tests/ftest/util/command_utils.py +++ b/src/tests/ftest/util/command_utils.py @@ -1,5 +1,6 @@ """ - (C) Copyright 2018-2025 Intel Corporation. + (C) Copyright 2018-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ diff --git a/src/tests/ftest/util/command_utils_base.py b/src/tests/ftest/util/command_utils_base.py index e85479969ae..e289ebf9d45 100644 --- a/src/tests/ftest/util/command_utils_base.py +++ b/src/tests/ftest/util/command_utils_base.py @@ -1,5 +1,6 @@ """ - (C) Copyright 2020-2025 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ From 5abf1aa1633b7626c9e60656baa5f54099e871ee Mon Sep 17 00:00:00 2001 From: Samir raval Date: Fri, 21 Feb 2025 16:17:14 -0800 Subject: [PATCH 14/19] Code modified based on review comments Features: control telemetry Signed-off-by: Samir Raval --- docs/admin/deployment.md | 14 +-- src/control/cmd/daos_agent/config.go | 41 ++++--- src/control/cmd/daos_agent/config_test.go | 8 +- src/control/cmd/daos_agent/telemetry.go | 9 +- src/control/cmd/dmg/auto_test.go | 4 +- src/control/cmd/dmg/main.go | 1 - src/control/cmd/dmg/telemetry.go | 36 ++----- src/control/lib/control/http.go | 43 +++----- src/control/lib/control/http_test.go | 72 ++++--------- src/control/lib/control/telemetry.go | 20 ++-- src/control/lib/control/telemetry_test.go | 28 ++--- src/control/lib/telemetry/promexp/httpd.go | 2 +- src/control/security/config.go | 20 ++-- src/control/server/config/server_test.go | 12 +-- src/control/server/telemetry.go | 9 +- .../ftest/control/dmg_telemetry_basic.yaml | 11 +- .../gen_telemetry_admin_certificate.sh | 101 ++++++++++++++++++ .../gen_telemetry_server_certificate.sh | 69 +++++++++++- .../telemetry/basic_client_telemetry.yaml | 12 ++- src/tests/ftest/util/agent_utils.py | 14 ++- src/tests/ftest/util/agent_utils_params.py | 2 - src/tests/ftest/util/command_utils.py | 52 ++++++--- src/tests/ftest/util/command_utils_base.py | 15 +-- src/tests/ftest/util/dmg_utils_params.py | 1 - src/tests/ftest/util/launch_utils.py | 44 +++++++- src/tests/ftest/util/server_utils.py | 12 ++- src/tests/ftest/util/server_utils_params.py | 2 - src/tests/ftest/util/telemetry_test_base.py | 65 +++++++++++ utils/config/daos_agent.yml | 4 - utils/config/daos_control.yml | 10 -- utils/config/daos_server.yml | 3 - utils/config/examples/daos_server_local.yml | 1 - utils/config/examples/daos_server_mdonssd.yml | 1 - utils/config/examples/daos_server_tcp.yml | 1 - utils/config/examples/daos_server_ucx.yml | 1 - utils/config/examples/daos_server_verbs.yml | 1 - 36 files changed, 465 insertions(+), 276 deletions(-) create mode 100755 src/tests/ftest/scripts/gen_telemetry_admin_certificate.sh diff --git a/docs/admin/deployment.md b/docs/admin/deployment.md index bddd2edd1d8..585bb737e8c 100644 --- a/docs/admin/deployment.md +++ b/docs/admin/deployment.md @@ -759,12 +759,10 @@ transport_config: key: /etc/daos/certs/admin.key ``` -#### Telemetry Certificate Configuration +#### Telemetry Endpoint Configuration -The DAOS Telemetry framework has option to use certificates to authenticate -between server/client and admin node. -Creating of certificate is not part of DAOS scope and it is up to Admin to -generate the certificate and add it to the DAOS server and client system. +DAOS telemetry is accessed via an HTTP endpoint. This endpoint may be secured with over-the-wire transport security by configuring an HTTPS certificate and key. This must be a standard HTTPS certificate. DAOS server/agent/admin authentication certificates *cannot* be reused for this purpose. +If the server has an existing HTTPS certificate for its domain, it may be reused for the telemetry endpoint. Otherwise the administrator must acquire a certificate from an accepted Certificate Authority (CA). #### Telemetry Yaml Example @@ -773,8 +771,6 @@ Information on telelmetry config parameters in respective yaml file. ```yaml # /etc/daos/daos_server.yml (servers) telemetry_config: - # To use telemetry in secure mode - allow_insecure: false # Set the server telemetry endpoint port number port: 9191 # Server certificate for use in TLS handshakes @@ -786,8 +782,6 @@ telemetry_config: ```yaml # /etc/daos/daos_agent.yml (clients) telemetry_config: - # To use telemetry in secure mode - allow_insecure: false # Enable client telemetry for all DAOS clients. enabled: true # Set the client telemetry endpoint port number @@ -805,8 +799,6 @@ telemetry_config: telemetry_config: # To use telemetry in secure mode allow_insecure: false - # Skip the Server certificate verification. Recomendate for testing purpose only. - https_exception: true ``` ### Server Startup diff --git a/src/control/cmd/daos_agent/config.go b/src/control/cmd/daos_agent/config.go index 61b4e00a1e9..374b97d1ae1 100644 --- a/src/control/cmd/daos_agent/config.go +++ b/src/control/cmd/daos_agent/config.go @@ -40,6 +40,13 @@ func (rm refreshMinutes) Duration() time.Duration { return time.Duration(rm) } +// Support Old config options. +type LegacyConfig struct { + TelemetryPort int `yaml:"telemetry_port,omitempty"` + TelemetryEnabled bool `yaml:"telemetry_enabled,omitempty"` + TelemetryRetain time.Duration `yaml:"telemetry_retain,omitempty"` +} + // Config defines the agent configuration. type Config struct { SystemName string `yaml:"name"` @@ -59,10 +66,7 @@ type Config struct { FabricInterfaces []*NUMAFabricConfig `yaml:"fabric_ifaces,omitempty"` ProviderIdx uint // TODO SRS-31: Enable with multiprovider functionality TelemetryConfig *security.TelemetryConfig `yaml:"telemetry_config"` - // Support Old config options. - TelemetryPort int `yaml:"telemetry_port,omitempty"` - TelemetryEnabled bool `yaml:"telemetry_enabled,omitempty"` - TelemetryRetain time.Duration `yaml:"telemetry_retain,omitempty"` + Legacy LegacyConfig `yaml:",inline"` } // Validate performs basic validation of the configuration. @@ -76,16 +80,19 @@ func (c *Config) Validate() error { } // Support Old config options and copy it to the underline new structure value. - if c.TelemetryRetain > 0 { - c.TelemetryConfig.Retain = c.TelemetryRetain + if c.Legacy.TelemetryRetain > 0 { + c.TelemetryConfig.Retain = c.Legacy.TelemetryRetain + fmt.Println(" WARNING - Deprecated:telemetry_retain options can be move under telemetry_config") } - if c.TelemetryPort != 0 { - c.TelemetryConfig.Port = c.TelemetryPort + if c.Legacy.TelemetryPort != 0 { + c.TelemetryConfig.Port = c.Legacy.TelemetryPort + fmt.Println(" WARNING - Deprecated:telemetry_port options can be move under telemetry_config") } - if c.TelemetryEnabled { - c.TelemetryConfig.Enabled = c.TelemetryEnabled + if c.Legacy.TelemetryEnabled { + c.TelemetryConfig.Enabled = c.Legacy.TelemetryEnabled + fmt.Println(" WARNING - Deprecated:telemetry_enabled options can be move under telemetry_config") } if c.TelemetryConfig.Retain > 0 && c.TelemetryConfig.Port == 0 { @@ -100,6 +107,14 @@ func (c *Config) Validate() error { return errors.New("cannot specify both exclude_fabric_ifaces and include_fabric_ifaces") } + if len(c.TelemetryConfig.HttpsCert) == 0 && len(c.TelemetryConfig.HttpsKey) > 0 { + return errors.New("For secure mode, https_cert and https_key both required") + } + + if len(c.TelemetryConfig.HttpsCert) > 0 && len(c.TelemetryConfig.HttpsKey) == 0 { + return errors.New("For secure mode, https_cert and https_key both required") + } + return nil } @@ -140,12 +155,6 @@ func LoadConfig(cfgPath string) (*Config, error) { return nil, errors.Wrap(err, "agent config validation failed") } - if !cfg.TelemetryConfig.AllowInsecure { - if cfg.TelemetryConfig.HttpsCert == "" || cfg.TelemetryConfig.HttpsKey == "" { - return nil, errors.New("For secure mode, https_cert and https_key required under telemetry_config") - } - } - return cfg, nil } diff --git a/src/control/cmd/daos_agent/config_test.go b/src/control/cmd/daos_agent/config_test.go index 34a83ba2dbc..24a1a1f3744 100644 --- a/src/control/cmd/daos_agent/config_test.go +++ b/src/control/cmd/daos_agent/config_test.go @@ -134,8 +134,8 @@ control_log_mask: debug transport_config: allow_insecure: true telemetry_config: - allow_insecure: false https_cert: "" + https_key: "valid_key" `) telemetryWithoutHttpsKey := test.CreateTestFile(t, dir, ` @@ -148,7 +148,7 @@ control_log_mask: debug transport_config: allow_insecure: true telemetry_config: - allow_insecure: false + https_cert: "valid_crt" https_key: "" `) @@ -182,11 +182,11 @@ telemetry_config: }, "telemetry with secure mode with no server certificate": { path: telemetryWithoutHttpsCert, - expErr: errors.New("For secure mode, https_cert and https_key required under telemetry_config"), + expErr: errors.New("For secure mode, https_cert and https_key both required"), }, "telemetry with secure mode with no server key": { path: telemetryWithoutHttpsKey, - expErr: errors.New("For secure mode, https_cert and https_key required under telemetry_config"), + expErr: errors.New("For secure mode, https_cert and https_key both required"), }, "without optional items": { path: withoutOptCfg, diff --git a/src/control/cmd/daos_agent/telemetry.go b/src/control/cmd/daos_agent/telemetry.go index 60bd83d0b33..20ae56e0988 100644 --- a/src/control/cmd/daos_agent/telemetry.go +++ b/src/control/cmd/daos_agent/telemetry.go @@ -17,11 +17,10 @@ import ( func startPrometheusExporter(ctx context.Context, log logging.Logger, cs *promexp.ClientSource, cfg *Config) (func(), error) { expCfg := &promexp.ExporterConfig{ - Port: cfg.TelemetryConfig.Port, - Title: "DAOS Client Telemetry", - AllowInsecure: cfg.TelemetryConfig.AllowInsecure, - HttpsCert: cfg.TelemetryConfig.HttpsCert, - HttpsKey: cfg.TelemetryConfig.HttpsKey, + Port: cfg.TelemetryConfig.Port, + Title: "DAOS Client Telemetry", + HttpsCert: cfg.TelemetryConfig.HttpsCert, + HttpsKey: cfg.TelemetryConfig.HttpsKey, Register: func(ctx context.Context, log logging.Logger) error { c, err := promexp.NewClientCollector(ctx, log, cs, &promexp.CollectorOpts{ RetainDuration: cfg.TelemetryConfig.Retain, diff --git a/src/control/cmd/dmg/auto_test.go b/src/control/cmd/dmg/auto_test.go index 2c9b5701e2b..bbdd99500f4 100644 --- a/src/control/cmd/dmg/auto_test.go +++ b/src/control/cmd/dmg/auto_test.go @@ -596,9 +596,7 @@ disable_hugepages: false control_log_mask: INFO control_log_file: /tmp/daos_server.log telemetry_config: - allow_insecure: true - https_cert: /etc/daos/certs/telemetry.crt - https_key: /etc/daos/certs/telemetry.key + telemetry_port: 9191 core_dump_filter: 19 name: daos_server socket_dir: /var/run/daos_server diff --git a/src/control/cmd/dmg/main.go b/src/control/cmd/dmg/main.go index e3bd8425663..c88845a304c 100644 --- a/src/control/cmd/dmg/main.go +++ b/src/control/cmd/dmg/main.go @@ -262,7 +262,6 @@ and access control settings, along with system wide operations.` if opts.Insecure { ctlCfg.TransportConfig.AllowInsecure = true - ctlCfg.TelemetryConfig.AllowInsecure = true } if err := ctlCfg.TransportConfig.PreLoadCertData(); err != nil { return errors.Wrap(err, "Unable to load Certificate Data") diff --git a/src/control/cmd/dmg/telemetry.go b/src/control/cmd/dmg/telemetry.go index 5806e44cef4..2e3fb5d8f31 100644 --- a/src/control/cmd/dmg/telemetry.go +++ b/src/control/cmd/dmg/telemetry.go @@ -196,17 +196,12 @@ type ( Targets []string `yaml:"targets,omitempty"` } - tlsConfig struct { - InsecureSkipVerify bool `yaml:"insecure_skip_verify,omitempty"` - } - scrapeConfig struct { JobName string `yaml:"job_name"` ScrapeInterval time.Duration `yaml:"scrape_interval,omitempty"` ScrapeTimeout time.Duration `yaml:"scrape_timeout,omitempty"` StaticConfigs []*staticConfig `yaml:"static_configs,omitempty"` Scheme string `yaml:"scheme,omitempty"` - TlsConfig tlsConfig `yaml:"tls_config,omitempty"` } promCfg struct { @@ -264,11 +259,9 @@ func (cmd *telemConfigCmd) configurePrometheus() (*installInfo, error) { return nil, err } - tc := tlsConfig{} scheme := "" - if !cmd.cfgCmd.config.TelemetryConfig.AllowInsecure { + if cmd.cfgCmd.config.TelemetryConfig.HttpsCert != "" && cmd.cfgCmd.config.TelemetryConfig.HttpsKey != "" { cmd.Infof("Prometheus configuration is setup as Secure (https) mode") - tc.InsecureSkipVerify = cmd.cfgCmd.config.TelemetryConfig.HttpsException scheme = "https" } else { cmd.Infof("Prometheus configuration is setup as insecure (http) mode") @@ -280,7 +273,6 @@ func (cmd *telemConfigCmd) configurePrometheus() (*installInfo, error) { ScrapeInterval: 5 * time.Second, StaticConfigs: []*staticConfig{sc}, Scheme: scheme, - TlsConfig: tc, }, } @@ -334,25 +326,17 @@ func (cmd *metricsListCmd) Execute(args []string) error { req := new(control.MetricsListReq) req.Port = cmd.Port req.Host = host - req.HttpsException = cmd.cfgCmd.config.TelemetryConfig.HttpsException if !cmd.JSONOutputEnabled() { cmd.Info(getConnectingMsg(req.Host, req.Port)) } - // Trying Secure Mode First, It will ignore the certificate if it's not provided - // or request with the certificate. - if req.AllowInsecure { - cmd.Debug("Trying Secure Mode (HTTPS) with Exception") - } else { - cmd.Debug("Trying Secure Mode (HTTPS) with system certificate") - } - + // Try Secure Mode resp, err := control.MetricsList(cmd.MustLogCtx(), req) if err != nil { - cmd.Errorf("Secure Mode (HTTPS) failed: %s", err.Error()) - //Trying Insecure Mode - req.AllowInsecure = !req.AllowInsecure + cmd.Debugf("Secure Mode (HTTPS) failed: %s", err.Error()) + // Trying Insecure Mode + req.AllowInsecure = true cmd.Debug("Trying Insecure Mode (HTTP)") resp, err = control.MetricsList(cmd.MustLogCtx(), req) if err != nil { @@ -406,23 +390,17 @@ func (cmd *metricsQueryCmd) Execute(args []string) error { req := new(control.MetricsQueryReq) req.Port = cmd.Port req.Host = host - req.HttpsException = cmd.cfgCmd.config.TelemetryConfig.HttpsException req.MetricNames = common.TokenizeCommaSeparatedString(cmd.Metrics) if !cmd.JSONOutputEnabled() { cmd.Info(getConnectingMsg(req.Host, req.Port)) } - // Trying Secure Mode First, It will ignore the certificate if it's not provided - // or request with the certificate. - req.AllowInsecure = false - cmd.Debug("Trying Secure Mode (HTTPS) first, with system certificate") - resp, err := control.MetricsQuery(cmd.MustLogCtx(), req) if err != nil { - cmd.Errorf("Secure Mode (HTTPS) failed: %s", err.Error()) + cmd.Debugf("Secure Mode (HTTPS) failed: %s", err.Error()) //Trying Insecure Mode - req.AllowInsecure = !req.AllowInsecure + req.AllowInsecure = true cmd.Debug("Trying Insecure Mode (HTTP)") resp, err = control.MetricsQuery(cmd.MustLogCtx(), req) if err != nil { diff --git a/src/control/lib/control/http.go b/src/control/lib/control/http.go index 929719c90f5..b1de84ca731 100644 --- a/src/control/lib/control/http.go +++ b/src/control/lib/control/http.go @@ -39,16 +39,13 @@ type httpGetter interface { retryer getURL() *url.URL getBody(context.Context) ([]byte, error) - getAllowInsecure() bool - getHttpsException() bool } type httpReq struct { - url *url.URL - getFn httpGetFn - allowInsecure bool - httpsException bool - getBodyFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) + url *url.URL + getFn httpGetFn + allowInsecure bool + getBodyFn func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) } func (r *httpReq) canRetry(err error, cur uint) bool { @@ -87,14 +84,15 @@ func (r *httpReq) getAllowInsecure() bool { return r.allowInsecure } -func (r *httpReq) getHttpsException() bool { - return r.httpsException -} - func (r *httpReq) httpGetFunc() httpGetFn { if r.getFn == nil { r.getFn = http.Get } + + if r.allowInsecure == false { + r.getFn = httpsSecureGetFunc() + } + return r.getFn } @@ -103,7 +101,7 @@ func (r *httpReq) getBody(ctx context.Context) ([]byte, error) { r.getBodyFn = httpGetBody } - return r.getBodyFn(ctx, r.getURL(), r.httpGetFunc(), r.getRetryTimeout(), r.getAllowInsecure(), r.getHttpsException()) + return r.getBodyFn(ctx, r.getURL(), r.httpGetFunc(), r.getRetryTimeout()) } func httpGetBodyRetry(ctx context.Context, req httpGetter) ([]byte, error) { @@ -130,32 +128,25 @@ func httpGetBodyRetry(ctx context.Context, req httpGetter) ([]byte, error) { // httpsSecureGetFunc will prepare the GET requested using the certificate for secure mode // and return the http.Get -func httpsSecureGetFunc(httpsException bool) (httpGetFn, error) { +func httpsSecureGetFunc() httpGetFn { rootCAs, _ := x509.SystemCertPool() - if rootCAs == nil { - return nil, errors.New("Failed to load system root certificates") - } tlsConfig := &tls.Config{ RootCAs: rootCAs, } - if httpsException { - tlsConfig.InsecureSkipVerify = true - } - tr := &http.Transport{ TLSClientConfig: tlsConfig, } client := &http.Client{Transport: tr} - return client.Get, nil + return client.Get } // httpGetBody executes a simple HTTP GET request to a given URL and returns the // content of the response body. -func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time.Duration, allowInsecure bool, httpsException bool) ([]byte, error) { +func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time.Duration) ([]byte, error) { if url == nil { return nil, errors.New("nil URL") } @@ -168,14 +159,6 @@ func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time. return nil, errors.New("nil get function") } - if allowInsecure == false { - var err error - get, err = httpsSecureGetFunc(httpsException) - if err != nil { - return nil, err - } - } - httpCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() diff --git a/src/control/lib/control/http_test.go b/src/control/lib/control/http_test.go index c9dfb6015d7..4318b9248dc 100644 --- a/src/control/lib/control/http_test.go +++ b/src/control/lib/control/http_test.go @@ -123,43 +123,35 @@ func newErrMockReadCloser(err error) *mockReadCloser { func TestControl_httpGetBody(t *testing.T) { defaultURL := &url.URL{Host: "testhost"} - defaultAllowInsecure := true - falseAllowInsecure := false for name, tc := range map[string]struct { - url *url.URL - timeout time.Duration - cancelCtx bool - getFn httpGetFn - allowInsecure bool - httpsException bool - expResult []byte - expErr error + url *url.URL + timeout time.Duration + cancelCtx bool + getFn httpGetFn + expResult []byte + expErr error }{ "nil url": { expErr: errors.New("nil URL"), }, "empty URL": { - url: &url.URL{}, - allowInsecure: defaultAllowInsecure, - expErr: errors.New("host address is required"), + url: &url.URL{}, + expErr: errors.New("host address is required"), }, "nil getFn": { - url: defaultURL, - allowInsecure: defaultAllowInsecure, - expErr: errors.New("nil get function"), + url: defaultURL, + expErr: errors.New("nil get function"), }, "getFn error": { - url: defaultURL, - allowInsecure: defaultAllowInsecure, + url: defaultURL, getFn: func(_ string) (*http.Response, error) { return nil, errors.New("mock getFn") }, expErr: errors.New("mock getFn"), }, "http.Response error": { - url: defaultURL, - allowInsecure: defaultAllowInsecure, + url: defaultURL, getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusNotFound, @@ -169,8 +161,7 @@ func TestControl_httpGetBody(t *testing.T) { expErr: errors.New("HTTP response error: 404 Not Found"), }, "empty body": { - url: defaultURL, - allowInsecure: defaultAllowInsecure, + url: defaultURL, getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusOK, @@ -180,8 +171,7 @@ func TestControl_httpGetBody(t *testing.T) { expResult: []byte{}, }, "success with body": { - url: defaultURL, - allowInsecure: defaultAllowInsecure, + url: defaultURL, getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusOK, @@ -190,19 +180,8 @@ func TestControl_httpGetBody(t *testing.T) { }, expResult: []byte("this is the body of an HTTP response"), }, - "failure with body in secure mode without CA certificate path": { - url: defaultURL, - allowInsecure: falseAllowInsecure, - getFn: func(_ string) (*http.Response, error) { - return &http.Response{ - StatusCode: http.StatusOK, - }, nil - }, - expErr: errors.New("Get \"//testhost\": unsupported protocol scheme"), - }, "reading body fails": { - url: defaultURL, - allowInsecure: defaultAllowInsecure, + url: defaultURL, getFn: func(_ string) (*http.Response, error) { return &http.Response{ StatusCode: http.StatusOK, @@ -212,9 +191,8 @@ func TestControl_httpGetBody(t *testing.T) { expErr: errors.New("reading HTTP response body: mock Read"), }, "request times out": { - url: defaultURL, - allowInsecure: defaultAllowInsecure, - timeout: 5 * time.Millisecond, + url: defaultURL, + timeout: 5 * time.Millisecond, getFn: func(_ string) (*http.Response, error) { time.Sleep(1 * time.Second) return &http.Response{ @@ -225,9 +203,8 @@ func TestControl_httpGetBody(t *testing.T) { expErr: HTTPReqTimedOut(defaultURL.String()), }, "request canceled": { - url: defaultURL, - allowInsecure: defaultAllowInsecure, - cancelCtx: true, + url: defaultURL, + cancelCtx: true, getFn: func(_ string) (*http.Response, error) { time.Sleep(1 * time.Second) return &http.Response{ @@ -252,7 +229,7 @@ func TestControl_httpGetBody(t *testing.T) { tc.timeout = time.Second } - result, err := httpGetBody(ctx, tc.url, tc.getFn, tc.timeout, tc.allowInsecure, tc.httpsException) + result, err := httpGetBody(ctx, tc.url, tc.getFn, tc.timeout) test.CmpErr(t, tc.expErr, err) if diff := cmp.Diff(tc.expResult, result); diff != "" { @@ -270,7 +247,6 @@ type mockHTTPGetter struct { getBodyErr error getBodyCalled uint getBodyFailures uint - httpsException bool } func (r *mockHTTPGetter) canRetry(err error, cur uint) bool { @@ -297,14 +273,6 @@ func (r *mockHTTPGetter) getURL() *url.URL { } } -func (r *mockHTTPGetter) getAllowInsecure() bool { - return true -} - -func (r *mockHTTPGetter) getHttpsException() bool { - return true -} - func (r *mockHTTPGetter) getBody(ctx context.Context) ([]byte, error) { r.getBodyCalled++ if r.getBodyCalled <= r.getBodyFailures { diff --git a/src/control/lib/control/telemetry.go b/src/control/lib/control/telemetry.go index 108453584d6..ad513e8251f 100644 --- a/src/control/lib/control/telemetry.go +++ b/src/control/lib/control/telemetry.go @@ -84,11 +84,10 @@ type ( // MetricsListReq is used to request the list of metrics. MetricsListReq struct { httpReq - Host string // Host to query for telemetry data - Port uint32 // Port to use for collecting telemetry data - AllowInsecure bool // Set the https end point secure - HttpsException bool // Use the Https with Exception (Insecure) - Log logging.Logger // Logging the info + Host string // Host to query for telemetry data + Port uint32 // Port to use for collecting telemetry data + AllowInsecure bool // Set the https end point secure + Log logging.Logger // Logging the info } // MetricsListResp contains the list of available metrics. @@ -112,7 +111,6 @@ func MetricsList(ctx context.Context, req *MetricsListReq) (*MetricsListResp, er } req.allowInsecure = req.AllowInsecure - req.httpsException = req.HttpsException req.url = getMetricsURL(req.Host, req.Port, req.allowInsecure) scraped, err := scrapeMetrics(ctx, req) if err != nil { @@ -140,11 +138,10 @@ type ( // MetricsQueryReq is used to query telemetry values. MetricsQueryReq struct { httpReq - Host string // host to query for telemetry data - Port uint32 // port to use for collecting telemetry data - AllowInsecure bool // Set the https end point secure - HttpsException bool // Use the Https with Exception (Insecure) - MetricNames []string // if empty, collects all metrics + Host string // host to query for telemetry data + Port uint32 // port to use for collecting telemetry data + AllowInsecure bool // Set the https end point secure + MetricNames []string // if empty, collects all metrics } // MetricsQueryResp contains the list of telemetry values per host. @@ -168,7 +165,6 @@ func MetricsQuery(ctx context.Context, req *MetricsQueryReq) (*MetricsQueryResp, } req.allowInsecure = req.AllowInsecure - req.httpsException = req.HttpsException req.url = getMetricsURL(req.Host, req.Port, req.allowInsecure) scraped, err := scrapeMetrics(ctx, req) if err != nil { diff --git a/src/control/lib/control/telemetry_test.go b/src/control/lib/control/telemetry_test.go index af0528be5f5..0864111746e 100644 --- a/src/control/lib/control/telemetry_test.go +++ b/src/control/lib/control/telemetry_test.go @@ -118,10 +118,10 @@ func newTestPBHistogram(numBuckets int) *pclient.Metric { return metric } -func mockScrapeFnSuccess(t *testing.T, metricFam ...*pclient.MetricFamily) func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { +func mockScrapeFnSuccess(t *testing.T, metricFam ...*pclient.MetricFamily) func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { t.Helper() - return func(_ context.Context, _ *url.URL, _ httpGetFn, _ time.Duration, _ bool, _ bool) ([]byte, error) { + return func(_ context.Context, _ *url.URL, _ httpGetFn, _ time.Duration) ([]byte, error) { var b strings.Builder for _, mf := range metricFam { _, err := expfmt.MetricFamilyToText(&b, mf) @@ -147,12 +147,12 @@ func TestControl_scrapeMetrics(t *testing.T) { for name, tc := range map[string]struct { req httpGetter - scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) + scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) expResult pbMetricMap expErr error }{ "check scrape params": { - scrapeFn: func(_ context.Context, url *url.URL, getter httpGetFn, timeout time.Duration, allowInsecure bool, httpsException bool) ([]byte, error) { + scrapeFn: func(_ context.Context, url *url.URL, getter httpGetFn, timeout time.Duration) ([]byte, error) { test.AssertEqual(t, testURL.Scheme, url.Scheme, "") test.AssertEqual(t, testURL.Host, url.Host, "") test.AssertEqual(t, testURL.Path, url.Path, "") @@ -166,19 +166,19 @@ func TestControl_scrapeMetrics(t *testing.T) { expResult: pbMetricMap{}, }, "HTTP scrape error": { - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { return nil, errors.New("mock scrape") }, expErr: errors.New("mock scrape"), }, "scrape returns no content": { - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { return []byte{}, nil }, expResult: pbMetricMap{}, }, "scrape returns bad content": { - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { return []byte("

Hello world

"), nil }, expErr: errors.New("parsing error"), @@ -217,7 +217,7 @@ func TestControl_MetricsList(t *testing.T) { } for name, tc := range map[string]struct { - scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) + scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) req *MetricsListReq expResp *MetricsListResp expErr error @@ -241,7 +241,7 @@ func TestControl_MetricsList(t *testing.T) { Port: 1066, AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { return nil, errors.New("mock scrape") }, expErr: errors.New("mock scrape"), @@ -252,7 +252,7 @@ func TestControl_MetricsList(t *testing.T) { Port: 8888, AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { return []byte{}, nil }, expResp: &MetricsListResp{ @@ -284,7 +284,7 @@ func TestControl_MetricsList(t *testing.T) { } { t.Run(name, func(t *testing.T) { if tc.scrapeFn == nil { - tc.scrapeFn = func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { + tc.scrapeFn = func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { return nil, nil } } @@ -432,7 +432,7 @@ func TestControl_MetricsQuery(t *testing.T) { } for name, tc := range map[string]struct { - scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) + scrapeFn func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) req *MetricsQueryReq expResp *MetricsQueryResp expErr error @@ -456,7 +456,7 @@ func TestControl_MetricsQuery(t *testing.T) { Port: 1066, AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { return nil, errors.New("mock scrape") }, expErr: errors.New("mock scrape"), @@ -467,7 +467,7 @@ func TestControl_MetricsQuery(t *testing.T) { Port: 8888, AllowInsecure: true, }, - scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration, bool, bool) ([]byte, error) { + scrapeFn: func(context.Context, *url.URL, httpGetFn, time.Duration) ([]byte, error) { return []byte{}, nil }, expResp: &MetricsQueryResp{ diff --git a/src/control/lib/telemetry/promexp/httpd.go b/src/control/lib/telemetry/promexp/httpd.go index 0f6a0920dd8..2d92b925d2f 100644 --- a/src/control/lib/telemetry/promexp/httpd.go +++ b/src/control/lib/telemetry/promexp/httpd.go @@ -85,7 +85,7 @@ func StartExporter(ctx context.Context, log logging.Logger, cfg *ExporterConfig) // http listener is a blocking call go func() { log.Infof("Listening on %s", listenAddress) - if cfg.AllowInsecure { + if cfg.HttpsCert == "" && cfg.HttpsKey == "" { log.Infof("Prometheus web exporter started with insecure (http) mode") err := srv.ListenAndServe() log.Infof("Prometheus web exporter stopped: %s", err.Error()) diff --git a/src/control/security/config.go b/src/control/security/config.go index 485e7ffdf87..2aa7d7fed18 100644 --- a/src/control/security/config.go +++ b/src/control/security/config.go @@ -110,23 +110,21 @@ type TransportConfig struct { // TelemetryConfig contains all the information on whether or not to use // secure endpoint for telemetry and their location if their use is specified. type TelemetryConfig struct { - Port int `yaml:"telemetry_port,omitempty"` - AllowInsecure bool `yaml:"allow_insecure,omitempty"` - Enabled bool `yaml:"telemetry_enabled,omitempty"` - Retain time.Duration `yaml:"telemetry_retain,omitempty"` - HttpsCert string `yaml:"https_cert,omitempty"` - HttpsKey string `yaml:"https_key,omitempty"` - HttpsException bool `yaml:"https_exception,omitempty"` + Port int `yaml:"telemetry_port,omitempty"` + Enabled bool `yaml:"telemetry_enabled,omitempty"` + Retain time.Duration `yaml:"telemetry_retain,omitempty"` + HttpsCert string `yaml:"https_cert,omitempty"` + HttpsKey string `yaml:"https_key,omitempty"` } // DefaultClientTelemetryConfig provides a default telemetry config disabling // certificate usage and specifying certificates located under /etc/daos/certs. func DefaultClientTelemetryConfig() *TelemetryConfig { return &TelemetryConfig{ - Enabled: false, - AllowInsecure: true, - HttpsCert: defaultTelemetryCert, - HttpsKey: defaultTelemetryKey, + Port: 9191, + Enabled: false, + HttpsCert: "", + HttpsKey: "", } } diff --git a/src/control/server/config/server_test.go b/src/control/server/config/server_test.go index b3b03dffcb1..c72ae07ec3d 100644 --- a/src/control/server/config/server_test.go +++ b/src/control/server/config/server_test.go @@ -250,10 +250,9 @@ func TestServerConfig_Constructed(t *testing.T) { WithHelperLogFile("/tmp/daos_server_helper.log"). WithFirmwareHelperLogFile("/tmp/daos_firmware_helper.log"). WithTelemetryConfig(&security.TelemetryConfig{ - AllowInsecure: true, - Port: 9191, - HttpsCert: "/etc/daos/certs/telemetry.crt", - HttpsKey: "/etc/daos/certs/telemetry.key"}). + Port: 9191, + HttpsCert: "/etc/daos/certs/telemetry.crt", + HttpsKey: "/etc/daos/certs/telemetry.key"}). WithSystemName("daos_server"). WithSocketDir("./.daos/daos_server"). WithFabricProvider("ofi+verbs;ofi_rxm"). @@ -425,10 +424,7 @@ func TestServerConfig_MDonSSD_Constructed(t *testing.T) { }). WithControlLogFile("/tmp/daos_server.log"). WithTelemetryConfig(&security.TelemetryConfig{ - AllowInsecure: true, - Port: 9191, - HttpsCert: "/etc/daos/certs/telemetry.crt", - HttpsKey: "/etc/daos/certs/telemetry.key"}). + Port: 9191}). WithFabricProvider("ofi+tcp"). WithMgmtSvcReplicas("example1", "example2", "example3") diff --git a/src/control/server/telemetry.go b/src/control/server/telemetry.go index 28848f6879e..b25eeb2b8f2 100644 --- a/src/control/server/telemetry.go +++ b/src/control/server/telemetry.go @@ -70,11 +70,10 @@ func regPromEngineSources(ctx context.Context, log logging.Logger, engines []Eng func startPrometheusExporter(ctx context.Context, srv *server) (func(), error) { expCfg := &promexp.ExporterConfig{ - Port: srv.cfg.TelemetryConfig.Port, - Title: "DAOS Engine Telemetry", - AllowInsecure: srv.cfg.TelemetryConfig.AllowInsecure, - HttpsCert: srv.cfg.TelemetryConfig.HttpsCert, - HttpsKey: srv.cfg.TelemetryConfig.HttpsKey, + Port: srv.cfg.TelemetryConfig.Port, + Title: "DAOS Engine Telemetry", + HttpsCert: srv.cfg.TelemetryConfig.HttpsCert, + HttpsKey: srv.cfg.TelemetryConfig.HttpsKey, Register: func(ctx context.Context, log logging.Logger) error { return regPromEngineSources(ctx, srv.log, srv.harness.Instances()) }, diff --git a/src/tests/ftest/control/dmg_telemetry_basic.yaml b/src/tests/ftest/control/dmg_telemetry_basic.yaml index f9c41e9eafb..894113cf5b7 100644 --- a/src/tests/ftest/control/dmg_telemetry_basic.yaml +++ b/src/tests/ftest/control/dmg_telemetry_basic.yaml @@ -4,6 +4,8 @@ hosts: timeouts: test_telemetry_list: 60 test_container_telemetry: 230 +setup: + start_servers_once: False server_config: name: daos_server engines_per_host: 1 @@ -17,7 +19,11 @@ server_config: scm_mount: /mnt/daos system_ram_reserved: 1 telemetry_config: - allow_insecure: false + secure_mode: !mux + case1: + telemetry_secure_mode: true + case2: + telemetry_secure_mode: false pool: scm_size: 2G container: @@ -25,6 +31,3 @@ container: test: container_qty: 5 open_close_qty: 3 -dmg: - telemetry_config: - allow_insecure: false diff --git a/src/tests/ftest/scripts/gen_telemetry_admin_certificate.sh b/src/tests/ftest/scripts/gen_telemetry_admin_certificate.sh new file mode 100755 index 00000000000..ecf8f04a0cc --- /dev/null +++ b/src/tests/ftest/scripts/gen_telemetry_admin_certificate.sh @@ -0,0 +1,101 @@ +#!/bin/bash +# /* +# * (C) Copyright 2024 Intel Corporation. +# * +# * SPDX-License-Identifier: BSD-2-Clause-Patent +# */ + +__usage=" +Usage: gen_telemetry_admin_certificate.sh [DIR] +Generate certificates for DAOS deployment in the [DIR]/daosTelemetryCA. +By default [DIR] is the current directory. +" + +function print_usage () { + >&2 echo "$__usage" +} + +# validity of root CA and keys' certificates +DAYS=1095 + +CA_HOME="${1:-.}/daosTelemetryCA" + +function setup_directories () { + mkdir -p "${CA_HOME}" + chmod 700 "${CA_HOME}" +} + +function generate_ca_cnf () { + echo " +[ ca ] +default_ca = CA_daos_telemetry + +[ CA_daos_telemetry ] +dir = ${CA_HOME} +certs = \$dir + +# Key and Certificate for the root +certificate = \$dir/daosTelemetryCA.crt +private_key = \$dir/daosTelemetryCA.key + +default_md = sha512 # SAFE Crypto Requires SHA-512 +default_days = ${DAYS} # how long to certify for +copy_extensions = copy +unique_subject = no + +[ req ] +prompt = no +distinguished_name = ca_dn +x509_extensions = ca_ext + +[ ca_dn ] +organizationName = DAOS +commonName = DAOS CA TELEMETRY + +[ ca_ext ] +keyUsage = critical,digitalSignature,nonRepudiation,keyEncipherment,keyCertSign +basicConstraints = critical,CA:true,pathlen:1 + +[ signing_policy ] +organizationName = supplied +commonName = supplied + +" > "${CA_HOME}/ca.cnf" +} + +function generate_ca_cert () { + echo "Generating Private CA Root Certificate" + # Generate Private key and set permissions + openssl genrsa -out "${CA_HOME}/daosTelemetryCA.key" 3072 + [[ $EUID -eq 0 ]] && chown root.root "${CA_HOME}/daosTelemetryCA.key" 2>/dev/null + chmod 0400 "${CA_HOME}/daosTelemetryCA.key" + # Generate CA Certificate + openssl req -new -x509 -config "${CA_HOME}/ca.cnf" -days ${DAYS} -sha512 \ + -key "${CA_HOME}/daosTelemetryCA.key" \ + -out "${CA_HOME}/daosTelemetryCA.crt" -batch + [[ $EUID -eq 0 ]] && chown root.daos_daemons "${CA_HOME}/daosTelemetryCA.crt" 2>/dev/null + chmod 0644 "${CA_HOME}/daosTelemetryCA.crt" + # Reset the the CA index + rm -f "${CA_HOME}/index.txt" "${CA_HOME}/serial.txt" + touch "${CA_HOME}/index.txt" + echo '01' > "${CA_HOME}/serial.txt" + echo "Private CA Root Certificate for Telemetry created in ${CA_HOME}" +} + +function cleanup () { + rm -f "${CA_HOME}/ca.cnf" +} + +function main () { + if [[ -d "$CA_HOME" ]] + then + echo "$CA_HOME already exists, exiting." + exit 1 + fi + setup_directories + generate_ca_cnf + generate_ca_cert + cleanup +} + +main \ No newline at end of file diff --git a/src/tests/ftest/scripts/gen_telemetry_server_certificate.sh b/src/tests/ftest/scripts/gen_telemetry_server_certificate.sh index 6d359fbecaa..ef1ea0578d1 100755 --- a/src/tests/ftest/scripts/gen_telemetry_server_certificate.sh +++ b/src/tests/ftest/scripts/gen_telemetry_server_certificate.sh @@ -19,14 +19,73 @@ Usage: gen_telemetry_server_certificate.sh [USER] [DIR] DIR: Generate telemetry certificates for DAOS metrics in the [DIR]. By default [DIR] is the current directory. " + DAYS=1095 USER=$1 CA_HOME="${2:-.}/" HOSTNAME=$(hostname -s) -openssl req -x509 -newkey rsa:4096 -keyout "${CA_HOME}/telemetry.key" -out "${CA_HOME}/telemetry.crt" -sha256 -days ${DAYS} -nodes -subj "/CN=\"${HOSTNAME}\"" -chmod 0400 "${CA_HOME}/telemetry.key" -chmod 0644 "${CA_HOME}/telemetry.crt" -chown "${USER}"."${USER}" "${CA_HOME}/telemetry.key" -chown "${USER}"."${USER}" "${CA_HOME}/telemetry.crt" +function print_usage () { + >&2 echo "$__usage" +} + +function generate_ca_cnf () { + echo " +[req] +default_md = sha256 +prompt = no +req_extensions = v3_ext +distinguished_name = req_distinguished_name + +[req_distinguished_name] +CN = ${HOSTNAME} + +[v3_ext] +keyUsage = critical,digitalSignature,keyEncipherment +extendedKeyUsage = critical,serverAuth,clientAuth +subjectAltName = DNS:${HOSTNAME} + +" > "${CA_HOME}/telemetry.cnf" +} + +function generate_server_cert () { + echo "Generating Server Certificate" + # Generate Private key and set its permissions + openssl genrsa -out "${CA_HOME}/telemetry.key" 2048 + [[ $EUID -eq 0 ]] && chown "${USER}"."${USER}" "${CA_HOME}/telemetry.key" + chmod 0400 "${CA_HOME}/telemetry.key" + + # Generate a Certificate Signing Request (CRS) + openssl req -new -key "${CA_HOME}/telemetry.key" \ + -out "${CA_HOME}/telemetry.csr" -config "${CA_HOME}/telemetry.cnf" + + # Create Certificate from request + openssl x509 -req -in "${CA_HOME}/telemetry.csr" -CA "${CA_HOME}/daosTelemetryCA.crt" \ + -CAkey "${CA_HOME}/daosTelemetryCA.key" -CAcreateserial -out "${CA_HOME}/telemetry.crt" \ + -days ${DAYS} -sha256 -extfile "$CA_HOME/telemetry.cnf" -extensions v3_ext + + [[ $EUID -eq 0 ]] && chown "${USER}"."${USER}" "${CA_HOME}/telemetry.crt" + chmod 0644 "${CA_HOME}/telemetry.crt" + + echo "Required Server Certificate Files: + ${CA_HOME}/daosTelemetryCA.crt + ${CA_HOME}/telemetry.key + ${CA_HOME}/telemetry.crt" +} + +function cleanup () { + # Remove this key as it's not required after creating the telemetry.key + rm -f "${CA_HOME}/daosTelemetryCA.key" + + rm -f "${CA_HOME}/telemetry.csr" + rm -f "${CA_HOME}/telemetry.cnf" +} + +function main () { + generate_ca_cnf + generate_server_cert + cleanup +} + +main \ No newline at end of file diff --git a/src/tests/ftest/telemetry/basic_client_telemetry.yaml b/src/tests/ftest/telemetry/basic_client_telemetry.yaml index 71c6c361cd9..ff001c95105 100644 --- a/src/tests/ftest/telemetry/basic_client_telemetry.yaml +++ b/src/tests/ftest/telemetry/basic_client_telemetry.yaml @@ -4,6 +4,9 @@ hosts: timeout: 180 +setup: + start_agents_once: False + server_config: name: daos_server engines_per_host: 1 @@ -19,10 +22,14 @@ server_config: agent_config: telemetry_config: - allow_insecure: false telemetry_port: 9191 telemetry_retain: 30s telemetry_enabled: true + secure_mode: !mux + case1: + telemetry_secure_mode: true + case2: + telemetry_secure_mode: false pool: scm_size: 2G @@ -46,6 +53,3 @@ ior_write: ior_read: <<: *ior_base flags: "-v -r -R -G 1" -dmg: - telemetry_config: - allow_insecure: false diff --git a/src/tests/ftest/util/agent_utils.py b/src/tests/ftest/util/agent_utils.py index f437bc36e04..df86eda6145 100644 --- a/src/tests/ftest/util/agent_utils.py +++ b/src/tests/ftest/util/agent_utils.py @@ -262,6 +262,9 @@ def __init__(self, group, bin_dir, cert_dir, config_file, run_user, config_temp= # Support disabling verifying the socket directory (runtime_dir) for tests self.verify_socket_dir = True + # Set the certificate dir + self.telemetry_certificate_dir = cert_dir + def _set_hosts(self, hosts, path, slots): """Set the hosts used to execute the daos command. @@ -287,8 +290,6 @@ def start(self): # Copy certificates self.manager.job.copy_certificates( get_log_file("daosCA/certs"), self._hosts) - self.manager.job.generate_telemetry_server_certificates(self._hosts, "daos_agent") - # Verify the socket directory exists when using a non-systemctl manager if self.verify_socket_dir: self.verify_socket_directory(self.manager.job.certificate_owner) @@ -392,3 +393,12 @@ def get_socket_dir(self): str: the socket directory """ return self.get_config_value("runtime_dir") + + def prepare_telemetry_certificate(self): + """Prepare Telemetry certificate""" + self.manager.job.copy_telemetry_root_certificates(get_log_file("daosTelemetryCA"), + self.telemetry_certificate_dir, + self._hosts) + self.manager.job.generate_telemetry_server_certificates(self._hosts, + "daos_agent", + self.telemetry_certificate_dir) diff --git a/src/tests/ftest/util/agent_utils_params.py b/src/tests/ftest/util/agent_utils_params.py index 889c8debf48..59b995437b0 100644 --- a/src/tests/ftest/util/agent_utils_params.py +++ b/src/tests/ftest/util/agent_utils_params.py @@ -44,8 +44,6 @@ def __init__(self, log_dir=os.path.join(os.sep, "tmp")): self.telemetry_port = BasicParameter(None, 9192) self.telemetry_enabled = BasicParameter(None) self.telemetry_retain = BasicParameter(None) - self.https_cert = LogParameter(self._log_dir, None, "telemetry.crt") - self.https_key = LogParameter(self._log_dir, None, "telemetry.key") def _get_new(self): """Get a new object based upon this one. diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py index dae615dc12e..df0cfbb9f2c 100644 --- a/src/tests/ftest/util/command_utils.py +++ b/src/tests/ftest/util/command_utils.py @@ -1045,27 +1045,47 @@ def copy_certificates(self, source, hosts): self._command, ", ".join(names)) get_file_listing(hosts, names, self.run_user).log_output(self.log) - def generate_telemetry_server_certificates(self, hosts, user): + def copy_telemetry_root_certificates(self, source, destination, hosts): + """Copy telemetry certificates files from the source to the destination hosts. + Args: + source (str) : source of the certificate files. + destination (str): copy file destination dir. + hosts (NodeSet): list of the destination hosts. + """ + certfiles = ["daosTelemetryCA.crt", "daosTelemetryCA.key"] + + for file_name in certfiles: + src_file = os.path.join(source, file_name) + dst_file = os.path.join(destination, file_name) + self.log.debug(" %s -> %s", src_file, dst_file) + result = distribute_files( + self.log, hosts, src_file, dst_file, mkdir=False, + verbose=False, sudo=True, owner=self.certificate_owner) + if not result.passed: + self.log.info(" WARNING: %s copy telemetry cert failed on %s", + dst_file, result.failed_hosts) + + def generate_telemetry_server_certificates(self, hosts, user, destination): """Generate the telemetry certificates for the test on server/client. Args: hosts (NodeSet): list of the destination hosts. - user (str): User permission set on telemetry certificate file. + user (User): User permission set on telemetry certificate file. For server, it's daos_server and for client it's daos_agent. - """ - data = self.yaml.telemetry_config.get_certificate_data( - self.yaml.telemetry_config.get_attribute_names(LogParameter)) - destination = list(data.keys())[0] - - if not self.yaml.telemetry_config.allow_insecure.value: - certgen_dir = os.path.abspath( - os.path.join(os.getcwd(), "scripts")) - command = os.path.join(certgen_dir, "gen_telemetry_server_certificate.sh ") - command = command_as_user(command + user + " " + destination, "root") - self.log.debug("Generating the telemetry certificate command %s:", command) - result = run_remote(self.log, hosts, command, 30) - if not result.passed: - self.log.info(" WARNING: command %s failed", command) + destination (str): Generate telemetry certificates in to directory. + + Raises: + CommandFailure: if there is an error running script on remote machine. + """ + certgen_dir = os.path.abspath( + os.path.join(os.getcwd(), "scripts")) + command = os.path.join(certgen_dir, "gen_telemetry_server_certificate.sh ") + command = "sudo " + command + user + " " + destination + self.log.debug("Generating the telemetry certificate command %s:", command) + result = run_remote(self.log, hosts, command, 30) + if not result.passed: + raise CommandFailure( + f"ERROR: Failed to generate the secure certificate {result.failed_hosts}") def copy_configuration(self, hosts): """Copy the yaml configuration file to the hosts. diff --git a/src/tests/ftest/util/command_utils_base.py b/src/tests/ftest/util/command_utils_base.py index e289ebf9d45..d5197cbbbd3 100644 --- a/src/tests/ftest/util/command_utils_base.py +++ b/src/tests/ftest/util/command_utils_base.py @@ -814,9 +814,6 @@ def __init__(self, namespace, title, log_dir): """ super().__init__(namespace, None, title) self._log_dir = log_dir - default_insecure = str(os.environ.get("DAOS_TEST_INSECURE_MODE", True)) - default_insecure = default_insecure.lower() == "true" - self.allow_insecure = BasicParameter(None, default_insecure) self.telemetry_port = BasicParameter(None, 9191) self.telemetry_retain = None self.telemetry_enabled = None @@ -829,28 +826,18 @@ def get_yaml_data(self): """ yaml_data = super().get_yaml_data() - - if self.title is not None: - yaml_data[self.title]["allow_insecure"] = self.allow_insecure.value - else: - yaml_data["allow_insecure"] = self.allow_insecure.value - return yaml_data def get_certificate_data(self, name_list): """Get certificate data by name_list. - Args: name_list (list): list of certificate attribute names. - Returns: data (dict): a dictionary of parameter directory name keys and value. - """ data = {} - if self.allow_insecure.value: - return data + for name in name_list: value = getattr(self, name).value if isinstance(value, str): diff --git a/src/tests/ftest/util/dmg_utils_params.py b/src/tests/ftest/util/dmg_utils_params.py index 7faa37a0f40..f1c262c924f 100644 --- a/src/tests/ftest/util/dmg_utils_params.py +++ b/src/tests/ftest/util/dmg_utils_params.py @@ -32,7 +32,6 @@ class DmgTelemetryConfig(TelemetryConfig): def __init__(self, log_dir="/tmp"): """Initialize a TelemetryConfig object.""" super().__init__("/run/dmg/telemetry_config/*", None, log_dir) - self.https_exception = BasicParameter(None, True) def _get_new(self): """Get a new object based upon this one. diff --git a/src/tests/ftest/util/launch_utils.py b/src/tests/ftest/util/launch_utils.py index 2d33cceac05..715dceb4a60 100644 --- a/src/tests/ftest/util/launch_utils.py +++ b/src/tests/ftest/util/launch_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2022-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -425,7 +426,12 @@ def prepare(self, logger, test_log_file, test, repeat, user_create, slurm_setup, return status # Generate certificate files for the test - return self._generate_certs(logger) + status = self._generate_certs(logger) + if status: + return status + + # Generate telemetry certificate files for the test + return self._generate_telemetry_certs(logger) def execute(self, logger, test, repeat, number, sparse, fail_fast): """Run the specified test. @@ -870,6 +876,42 @@ def _generate_certs(self, logger): return 0 + def _generate_telemetry_certs(self, logger): + """Generate the telemetry certificates for the test and + copy to system default certificate location + Returns: + logger (Logger): logger for the messages produced by this method + int: status code: 0 = success, 128 = failure + """ + logger.debug("-" * 80) + logger.debug("Generating telemetry certificate") + test_env = TestEnvironment() + certs_dir = os.path.join(test_env.log_dir, "daosTelemetryCA") + certgen_dir = os.path.abspath(os.path.join(os.getcwd(), "scripts")) + command = os.path.join(certgen_dir, "gen_telemetry_admin_certificate.sh") + if not run_local(logger, f"/usr/bin/rm -rf {certs_dir}").passed: + message = "Error removing old telemetry certificates" + self.test_result.fail_test(logger, "Prepare", message, sys.exc_info()) + return 128 + if not run_local(logger, f"{command} {test_env.log_dir}").passed: + message = "Error generating telemetry certificates" + self.test_result.fail_test(logger, "Prepare", message, sys.exc_info()) + return 128 + + from_dir = os.path.join(certs_dir, "daosTelemetryCA.crt") + to_dir = "/etc/pki/ca-trust/source/anchors/" + if not run_local(logger, f"sudo cp -rf {from_dir} {to_dir}").passed: + message = "Copy telemetry certificate failed" + self.test_result.fail_test(logger, "Prepare", message, sys.exc_info()) + return 128 + + if not run_local(logger, "sudo update-ca-trust").passed: + message = "Error running update-ca-trust command" + self.test_result.fail_test(logger, "Prepare", message, sys.exc_info()) + return 128 + + return 0 + def _collect_crash_files(self, logger): """Move any avocado crash files into job-results/latest/crashes. diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index cd284a39a2a..cd9b568db7d 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -143,6 +143,8 @@ def __init__(self, group, bin_dir, # defined in the self.manager.job.yaml object. self._external_yaml_data = None + self.telemetry_certificate_dir = svr_cert_dir + @property def engines(self): """Get the total number of engines. @@ -227,6 +229,15 @@ def _prepare_dmg_hostlist(self, hosts=None): hosts = self._hosts self.dmg.hostlist = hosts + def prepare_telemetry_certificate(self): + """Prepare Telemetry certificate""" + self.manager.job.copy_telemetry_root_certificates(get_log_file("daosTelemetryCA"), + self.telemetry_certificate_dir, + self._hosts) + self.manager.job.generate_telemetry_server_certificates(self._hosts, + "daos_server", + self.telemetry_certificate_dir) + def prepare(self, storage=True): """Prepare to start daos_server. @@ -245,7 +256,6 @@ def prepare(self, storage=True): # Copy certificates self.manager.job.copy_certificates(get_log_file("daosCA/certs"), self._hosts) - self.manager.job.generate_telemetry_server_certificates(self._hosts, "daos_server") self._prepare_dmg_certificates() # Prepare dmg for running storage format on all server hosts diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index 050a6cdc17a..4a73ca8a491 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -71,8 +71,6 @@ def __init__(self, log_dir=os.path.join(os.sep, "tmp")): # - https_key: : Server Key portion # self.telemetry_port = BasicParameter(None, 9191) - self.https_cert = LogParameter(self._log_dir, None, "telemetry.crt") - self.https_key = LogParameter(self._log_dir, None, "telemetry.key") def _get_new(self): """Get a new object based upon this one. diff --git a/src/tests/ftest/util/telemetry_test_base.py b/src/tests/ftest/util/telemetry_test_base.py index 6a2389935f7..571f296b336 100644 --- a/src/tests/ftest/util/telemetry_test_base.py +++ b/src/tests/ftest/util/telemetry_test_base.py @@ -1,8 +1,11 @@ """ (C) Copyright 2021-2024 Intel Corporation. +(C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ +import os + from apricot import TestWithServers from telemetry_utils import ClientTelemetryUtils, TelemetryUtils @@ -24,6 +27,10 @@ def setUp(self): self.telemetry = TelemetryUtils( self.get_dmg_command(), self.server_managers[0].hosts) + # Setup Secure Server mode + if self.params.get("telemetry_secure_mode", '/run/server_config/*'): + self.secure_server_telemetry_setup() + def compare_lists(self, expected, actual, indent, prefix, description): """Compare two lists. @@ -264,6 +271,33 @@ def sum_values(metric_out): return total + def secure_server_telemetry_setup(self): + """ Setup secure server certificate for telemetry.""" + self.log.info("Secure Server Telemetry Setup start") + + # Create the Certificate + self.server_managers[0].prepare_telemetry_certificate() + yaml_data = self.server_managers[0].manager.job.yaml.get_yaml_data() + + # Update the certificate in yaml dictionary. + https_cert = os.path.join(self.server_managers[0].telemetry_certificate_dir, + "telemetry.crt") + https_key = os.path.join(self.server_managers[0].telemetry_certificate_dir, + "telemetry.key") + yaml_data["telemetry_config"].update({"https_cert": https_cert}) + yaml_data["telemetry_config"].update({"https_key": https_key}) + + # Update the current yaml file. + self.server_managers[0].manager.job.create_yaml_file(yaml_data) + + # Restart the DAOS servers + self.log.info("Stop DAOS servers") + self.server_managers[0].manager.stop() + self.log.info("Start daos_server and detect the DAOS I/O engine message") + self.server_managers[0].restart(hosts=self.hostlist_servers) + + self.log.info("Secure Server Telemetry Setup End") + class TestWithClientTelemetry(TestWithTelemetry): """Test client telemetry metrics. @@ -276,6 +310,10 @@ def setUp(self): self.telemetry = ClientTelemetryUtils( self.get_dmg_command(), self.server_managers[0].hosts, self.hostlist_clients) + # Setup Secure Agent mode + if self.params.get("telemetry_secure_mode", '/run/agent_config/*'): + self.secure_client_telemetry_setup() + def verify_client_telemetry_list(self, with_pools=False): """Verify the dmg telemetry metrics list command output.""" # Define a list of expected telemetry metrics names @@ -296,3 +334,30 @@ def verify_client_telemetry_list(self, with_pools=False): self.fail("\n".join(errors)) self.log.info("Test PASSED") + + def secure_client_telemetry_setup(self): + """ Setup secure client certificate for telemetry.""" + self.log.info("Secure Client Telemetry Setup start") + + # Create the Certificate + self.agent_managers[0].prepare_telemetry_certificate() + yaml_data = self.agent_managers[0].manager.job.yaml.get_yaml_data() + + # Update the certificate in yaml dictionary. + https_cert = os.path.join(self.agent_managers[0].telemetry_certificate_dir, + "telemetry.crt") + https_key = os.path.join(self.agent_managers[0].telemetry_certificate_dir, + "telemetry.key") + yaml_data["telemetry_config"].update({"https_cert": https_cert}) + yaml_data["telemetry_config"].update({"https_key": https_key}) + + # Update the current yaml file. + self.agent_managers[0].manager.job.create_yaml_file(yaml_data) + + # Restart the DAOS Agent + self.log.info("Stop DAOS agents") + self.agent_managers[0].stop() + self.log.info("Start DAOS agents") + self.agent_managers[0].start() + + self.log.info("Secure Client Telemetry Setup End") diff --git a/utils/config/daos_agent.yml b/utils/config/daos_agent.yml index f260a910f7d..72be13d0ad6 100644 --- a/utils/config/daos_agent.yml +++ b/utils/config/daos_agent.yml @@ -44,10 +44,6 @@ # # default 0 (do not retain telemetry after client exit) # telemetry_retain: 1m # -# # In order to disable transport security, uncomment and set allow_insecure -# # to true. Not recommended for production configurations. -# allow_insecure: false -# # # Server certificate for use in TLS handshakes # # DAOS client is the HTTPS server to open secure telemetry endpoint. # https_cert: /etc/daos/certs/telemetry.crt diff --git a/utils/config/daos_control.yml b/utils/config/daos_control.yml index cd0566d41c2..5a236052fd7 100644 --- a/utils/config/daos_control.yml +++ b/utils/config/daos_control.yml @@ -38,13 +38,3 @@ # cert: /etc/daos/certs/admin.crt # # Key portion of Admin Certificate # key: /etc/daos/certs/admin.key - -## Configuration for telemetry collection commands. -# -#telemetry_config: -# # In order to enabled transport security, uncomment and set allow_insecure -# # to false. -# allow_insecure: true -# -# # Skip the Server certificate verification. Recommended for testing purpose only. -# https_exception: true diff --git a/utils/config/daos_server.yml b/utils/config/daos_server.yml index 24b2ff3ccd3..f0521b83d14 100644 --- a/utils/config/daos_server.yml +++ b/utils/config/daos_server.yml @@ -263,9 +263,6 @@ ## Enable Telemetry HTTP/HTTPS endpoint for remote telemetry collection. # #telemetry_config: -# # In order to enabled telemetry security, uncomment and set allow_insecure to false -# allow_insecure: true -# # # Set the server telemetry endpoint port number # # default: 9191 # telemetry_port: 9191 diff --git a/utils/config/examples/daos_server_local.yml b/utils/config/examples/daos_server_local.yml index 237f9f27799..0322720e15f 100644 --- a/utils/config/examples/daos_server_local.yml +++ b/utils/config/examples/daos_server_local.yml @@ -8,7 +8,6 @@ transport_config: allow_insecure: true telemetry_config: - allow_insecure: true telemetry_port: 9191 engines: diff --git a/utils/config/examples/daos_server_mdonssd.yml b/utils/config/examples/daos_server_mdonssd.yml index 1b46daf7351..d108802ee7a 100644 --- a/utils/config/examples/daos_server_mdonssd.yml +++ b/utils/config/examples/daos_server_mdonssd.yml @@ -30,7 +30,6 @@ control_metadata: # key: /etc/daos/certs/server.key telemetry_config: - allow_insecure: true telemetry_port: 9191 engines: diff --git a/utils/config/examples/daos_server_tcp.yml b/utils/config/examples/daos_server_tcp.yml index 475baed1483..6b05b68bafa 100644 --- a/utils/config/examples/daos_server_tcp.yml +++ b/utils/config/examples/daos_server_tcp.yml @@ -11,7 +11,6 @@ control_log_mask: DEBUG control_log_file: /tmp/daos_server.log telemetry_config: - allow_insecure: true telemetry_port: 9191 ## Transport Credentials Specifying certificates to secure communications diff --git a/utils/config/examples/daos_server_ucx.yml b/utils/config/examples/daos_server_ucx.yml index 9f29dbb5664..616bb3970f7 100644 --- a/utils/config/examples/daos_server_ucx.yml +++ b/utils/config/examples/daos_server_ucx.yml @@ -20,7 +20,6 @@ control_log_mask: INFO control_log_file: /tmp/daos_server.log telemetry_config: - allow_insecure: true telemetry_port: 9191 ## Transport Credentials Specifying certificates to secure communications diff --git a/utils/config/examples/daos_server_verbs.yml b/utils/config/examples/daos_server_verbs.yml index fc199e52234..db4f732eb1a 100644 --- a/utils/config/examples/daos_server_verbs.yml +++ b/utils/config/examples/daos_server_verbs.yml @@ -11,7 +11,6 @@ control_log_mask: INFO control_log_file: /tmp/daos_server.log telemetry_config: - allow_insecure: true telemetry_port: 9191 ## Transport Credentials Specifying certificates to secure communications From 45dd6857627c2a8f2daba7d036662f0fbcbd428f Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Sat, 22 Feb 2025 18:42:38 +0000 Subject: [PATCH 15/19] Few minor update based on review comments. Updated Copyright on files based on Ci warning. Features: control telemetry Signed-off-by: Samir Raval --- src/control/cmd/daos_agent/config.go | 1 + src/control/cmd/daos_agent/config_test.go | 1 + src/control/cmd/daos_agent/infocache.go | 1 + src/control/cmd/daos_agent/infocache_test.go | 1 + src/control/cmd/daos_agent/telemetry.go | 1 + src/control/cmd/dmg/auto_test.go | 1 + src/control/cmd/dmg/telemetry.go | 1 + src/control/lib/control/config.go | 1 + src/control/lib/control/http.go | 1 + src/control/lib/control/http_test.go | 1 + src/tests/ftest/scripts/gen_telemetry_server_certificate.sh | 4 ++-- src/tests/ftest/util/command_utils.py | 2 +- 12 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/control/cmd/daos_agent/config.go b/src/control/cmd/daos_agent/config.go index 374b97d1ae1..ba0d946955d 100644 --- a/src/control/cmd/daos_agent/config.go +++ b/src/control/cmd/daos_agent/config.go @@ -1,5 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/cmd/daos_agent/config_test.go b/src/control/cmd/daos_agent/config_test.go index 24a1a1f3744..e79f42933ef 100644 --- a/src/control/cmd/daos_agent/config_test.go +++ b/src/control/cmd/daos_agent/config_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/cmd/daos_agent/infocache.go b/src/control/cmd/daos_agent/infocache.go index 4fce2b47200..5030ceb1104 100644 --- a/src/control/cmd/daos_agent/infocache.go +++ b/src/control/cmd/daos_agent/infocache.go @@ -1,5 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/cmd/daos_agent/infocache_test.go b/src/control/cmd/daos_agent/infocache_test.go index 300dd8232db..7343af4a689 100644 --- a/src/control/cmd/daos_agent/infocache_test.go +++ b/src/control/cmd/daos_agent/infocache_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/cmd/daos_agent/telemetry.go b/src/control/cmd/daos_agent/telemetry.go index 20ae56e0988..f9f5f08d55b 100644 --- a/src/control/cmd/daos_agent/telemetry.go +++ b/src/control/cmd/daos_agent/telemetry.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/cmd/dmg/auto_test.go b/src/control/cmd/dmg/auto_test.go index bbdd99500f4..3064e0652a5 100644 --- a/src/control/cmd/dmg/auto_test.go +++ b/src/control/cmd/dmg/auto_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/cmd/dmg/telemetry.go b/src/control/cmd/dmg/telemetry.go index 2e3fb5d8f31..6b6c7acc217 100644 --- a/src/control/cmd/dmg/telemetry.go +++ b/src/control/cmd/dmg/telemetry.go @@ -1,5 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/lib/control/config.go b/src/control/lib/control/config.go index e4a5452b841..5fea677bf6c 100644 --- a/src/control/lib/control/config.go +++ b/src/control/lib/control/config.go @@ -1,5 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/lib/control/http.go b/src/control/lib/control/http.go index b1de84ca731..790ade741cc 100644 --- a/src/control/lib/control/http.go +++ b/src/control/lib/control/http.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/lib/control/http_test.go b/src/control/lib/control/http_test.go index 4318b9248dc..c14d4c0bff6 100644 --- a/src/control/lib/control/http_test.go +++ b/src/control/lib/control/http_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/tests/ftest/scripts/gen_telemetry_server_certificate.sh b/src/tests/ftest/scripts/gen_telemetry_server_certificate.sh index ef1ea0578d1..695af13df11 100755 --- a/src/tests/ftest/scripts/gen_telemetry_server_certificate.sh +++ b/src/tests/ftest/scripts/gen_telemetry_server_certificate.sh @@ -1,14 +1,14 @@ #!/bin/bash # /* # * (C) Copyright 2024 Intel Corporation. +# * (C) Copyright 2025 Hewlett Packard Enterprise Development LP # * # * SPDX-License-Identifier: BSD-2-Clause-Patent # */ __usage=" -This is just an example script for testing purpose. -Please modify to use in Production environment. +This script generates a simple self-signed DAOS telemetry HTTPS certificate for use by the DAOS ftest framework. Usage: gen_telemetry_server_certificate.sh [USER] [DIR] USER: DAOS has server and client and the certificate need the specific file permission diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py index df0cfbb9f2c..30b25147a99 100644 --- a/src/tests/ftest/util/command_utils.py +++ b/src/tests/ftest/util/command_utils.py @@ -1070,7 +1070,7 @@ def generate_telemetry_server_certificates(self, hosts, user, destination): Args: hosts (NodeSet): list of the destination hosts. - user (User): User permission set on telemetry certificate file. + user (str): User permission set on telemetry certificate file. For server, it's daos_server and for client it's daos_agent. destination (str): Generate telemetry certificates in to directory. From 587354d09dae02de03043be119d323e27d119b7c Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Sat, 22 Feb 2025 18:51:50 +0000 Subject: [PATCH 16/19] Updated Copyright on files based on Ci warning. Features: control telemetry Signed-off-by: Samir Raval --- src/control/lib/control/telemetry.go | 1 + src/control/lib/control/telemetry_test.go | 1 + src/control/lib/telemetry/promexp/httpd.go | 1 + src/control/lib/telemetry/promexp/httpd_test.go | 1 + src/control/security/config.go | 1 + src/control/server/config/server.go | 1 + src/control/server/config/server_test.go | 1 + src/control/server/server_utils.go | 1 + src/control/server/telemetry.go | 1 + src/tests/ftest/config_file_gen.py | 1 + src/tests/ftest/control/dmg_telemetry_basic.py | 1 + src/tests/ftest/scripts/gen_telemetry_admin_certificate.sh | 3 ++- src/tests/ftest/server/storage_tiers.py | 1 + src/tests/ftest/telemetry/basic_client_telemetry.py | 1 + src/tests/ftest/util/agent_utils.py | 1 + src/tests/ftest/util/agent_utils_params.py | 1 + src/tests/ftest/util/dmg_utils.py | 1 + src/tests/ftest/util/dmg_utils_params.py | 1 + src/tests/ftest/util/server_utils.py | 1 + src/tests/ftest/util/server_utils_params.py | 1 + 20 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/control/lib/control/telemetry.go b/src/control/lib/control/telemetry.go index ad513e8251f..5051c80b61e 100644 --- a/src/control/lib/control/telemetry.go +++ b/src/control/lib/control/telemetry.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/lib/control/telemetry_test.go b/src/control/lib/control/telemetry_test.go index 0864111746e..cf52dcc5c3a 100644 --- a/src/control/lib/control/telemetry_test.go +++ b/src/control/lib/control/telemetry_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/lib/telemetry/promexp/httpd.go b/src/control/lib/telemetry/promexp/httpd.go index 2d92b925d2f..50e1b5a8717 100644 --- a/src/control/lib/telemetry/promexp/httpd.go +++ b/src/control/lib/telemetry/promexp/httpd.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/lib/telemetry/promexp/httpd_test.go b/src/control/lib/telemetry/promexp/httpd_test.go index 8504b4e47c9..1d57ff5dd62 100644 --- a/src/control/lib/telemetry/promexp/httpd_test.go +++ b/src/control/lib/telemetry/promexp/httpd_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/security/config.go b/src/control/security/config.go index 2aa7d7fed18..d92958087be 100644 --- a/src/control/security/config.go +++ b/src/control/security/config.go @@ -1,5 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index 63eb9814426..b3ea3a163fa 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -1,5 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/server/config/server_test.go b/src/control/server/config/server_test.go index c72ae07ec3d..e0f6e8ab7a2 100644 --- a/src/control/server/config/server_test.go +++ b/src/control/server/config/server_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/server/server_utils.go b/src/control/server/server_utils.go index 288e74ec139..5838af285cf 100644 --- a/src/control/server/server_utils.go +++ b/src/control/server/server_utils.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/server/telemetry.go b/src/control/server/telemetry.go index b25eeb2b8f2..e8ac73003ae 100644 --- a/src/control/server/telemetry.go +++ b/src/control/server/telemetry.go @@ -1,5 +1,6 @@ // // (C) Copyright 2018-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/tests/ftest/config_file_gen.py b/src/tests/ftest/config_file_gen.py index d5b4f72ee2e..43c662cda9b 100755 --- a/src/tests/ftest/config_file_gen.py +++ b/src/tests/ftest/config_file_gen.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 """ (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent """ diff --git a/src/tests/ftest/control/dmg_telemetry_basic.py b/src/tests/ftest/control/dmg_telemetry_basic.py index 45cb8472392..0809a7a5868 100644 --- a/src/tests/ftest/control/dmg_telemetry_basic.py +++ b/src/tests/ftest/control/dmg_telemetry_basic.py @@ -1,5 +1,6 @@ """ (C) Copyright 2021-2024 Intel Corporation. +(C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent """ diff --git a/src/tests/ftest/scripts/gen_telemetry_admin_certificate.sh b/src/tests/ftest/scripts/gen_telemetry_admin_certificate.sh index ecf8f04a0cc..ee1fc9dcf8a 100755 --- a/src/tests/ftest/scripts/gen_telemetry_admin_certificate.sh +++ b/src/tests/ftest/scripts/gen_telemetry_admin_certificate.sh @@ -1,6 +1,7 @@ #!/bin/bash # /* # * (C) Copyright 2024 Intel Corporation. +# * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. # * # * SPDX-License-Identifier: BSD-2-Clause-Patent # */ @@ -98,4 +99,4 @@ function main () { cleanup } -main \ No newline at end of file +main diff --git a/src/tests/ftest/server/storage_tiers.py b/src/tests/ftest/server/storage_tiers.py index 536c1c52baf..3ee1dc08ab0 100644 --- a/src/tests/ftest/server/storage_tiers.py +++ b/src/tests/ftest/server/storage_tiers.py @@ -1,5 +1,6 @@ """ (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent """ diff --git a/src/tests/ftest/telemetry/basic_client_telemetry.py b/src/tests/ftest/telemetry/basic_client_telemetry.py index 5d0236dbb7a..256f1b86614 100644 --- a/src/tests/ftest/telemetry/basic_client_telemetry.py +++ b/src/tests/ftest/telemetry/basic_client_telemetry.py @@ -1,5 +1,6 @@ """ (C) Copyright 2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent """ diff --git a/src/tests/ftest/util/agent_utils.py b/src/tests/ftest/util/agent_utils.py index df86eda6145..a06d6c4aa96 100644 --- a/src/tests/ftest/util/agent_utils.py +++ b/src/tests/ftest/util/agent_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2019-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent """ diff --git a/src/tests/ftest/util/agent_utils_params.py b/src/tests/ftest/util/agent_utils_params.py index 59b995437b0..ee372a91519 100644 --- a/src/tests/ftest/util/agent_utils_params.py +++ b/src/tests/ftest/util/agent_utils_params.py @@ -1,5 +1,6 @@ """ (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent """ diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index aad42e662a8..5904042bba1 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2018-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent """ diff --git a/src/tests/ftest/util/dmg_utils_params.py b/src/tests/ftest/util/dmg_utils_params.py index f1c262c924f..fbc49653066 100644 --- a/src/tests/ftest/util/dmg_utils_params.py +++ b/src/tests/ftest/util/dmg_utils_params.py @@ -1,5 +1,6 @@ """ (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent """ diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index cd9b568db7d..afb53a57a70 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2018-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent """ diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index 4a73ca8a491..d059c43ebec 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -1,5 +1,6 @@ """ (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent """ From fbfa2ed3c0874d6d7dbdaeab6a2826d272e1d6e1 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Sun, 23 Feb 2025 04:51:09 +0000 Subject: [PATCH 17/19] Code fixed based on Ci result. Removed the unwanted code for dmg telemetry. Features: control telemetry Signed-off-by: Samir Raval --- src/control/cmd/dmg/telemetry.go | 13 +++------- src/tests/ftest/config_file_gen.py | 5 ++-- src/tests/ftest/util/agent_utils_params.py | 5 +--- src/tests/ftest/util/command_utils_base.py | 4 +--- src/tests/ftest/util/dmg_utils.py | 7 +++--- src/tests/ftest/util/dmg_utils_params.py | 28 +++------------------- 6 files changed, 13 insertions(+), 49 deletions(-) diff --git a/src/control/cmd/dmg/telemetry.go b/src/control/cmd/dmg/telemetry.go index 6b6c7acc217..1760ca64ec0 100644 --- a/src/control/cmd/dmg/telemetry.go +++ b/src/control/cmd/dmg/telemetry.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2024 Intel Corporation. +// (C) Copyright 2019-2025 Intel Corporation. // (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -260,20 +260,13 @@ func (cmd *telemConfigCmd) configurePrometheus() (*installInfo, error) { return nil, err } - scheme := "" - if cmd.cfgCmd.config.TelemetryConfig.HttpsCert != "" && cmd.cfgCmd.config.TelemetryConfig.HttpsKey != "" { - cmd.Infof("Prometheus configuration is setup as Secure (https) mode") - scheme = "https" - } else { - cmd.Infof("Prometheus configuration is setup as insecure (http) mode") - } - + cmd.Infof("WARNING: By default, Prometheus configuration will be created for insecure (http) mode") + cmd.Infof("Configures the protocol scheme for secure mode in config file. [scheme: = https]") cfg.ScrapeConfigs = []*scrapeConfig{ { JobName: "daos", ScrapeInterval: 5 * time.Second, StaticConfigs: []*staticConfig{sc}, - Scheme: scheme, }, } diff --git a/src/tests/ftest/config_file_gen.py b/src/tests/ftest/config_file_gen.py index 43c662cda9b..d0960fcff66 100755 --- a/src/tests/ftest/config_file_gen.py +++ b/src/tests/ftest/config_file_gen.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ - (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2020-2025 Intel Corporation. (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent @@ -16,7 +16,7 @@ from util.agent_utils_params import (DaosAgentTelemetryConfig, DaosAgentTransportCredentials, DaosAgentYamlParameters) from util.command_utils_base import CommonConfig -from util.dmg_utils_params import DmgTelemetryConfig, DmgTransportCredentials, DmgYamlParameters +from util.dmg_utils_params import DmgTransportCredentials, DmgYamlParameters from util.exception_utils import CommandFailure from util.server_utils_params import (DaosServerTelemetryConfig, DaosServerTransportCredentials, DaosServerYamlParameters) @@ -73,7 +73,6 @@ def generate_dmg_config(args): """ config = DmgYamlParameters( args.dmg_file, args.group_name, DmgTransportCredentials()) - config.telemetry_config = DmgTelemetryConfig() # Update the configuration file hostlist config.hostlist.value = args.node_list.split(",") return create_config(args, config) diff --git a/src/tests/ftest/util/agent_utils_params.py b/src/tests/ftest/util/agent_utils_params.py index ee372a91519..72314343fcf 100644 --- a/src/tests/ftest/util/agent_utils_params.py +++ b/src/tests/ftest/util/agent_utils_params.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2020-2025 Intel Corporation. (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent @@ -105,9 +105,6 @@ def __init__(self, filename, common_yaml): self.exclude_fabric_ifaces = BasicParameter(None) self.cache_expiration = BasicParameter(None) self.disable_caching = BasicParameter(None) - self.telemetry_port = BasicParameter(None) - self.telemetry_enabled = BasicParameter(None) - self.telemetry_retain = BasicParameter(None) self.access_points = BasicParameter(None, ["localhost"]) def update_log_file(self, name): diff --git a/src/tests/ftest/util/command_utils_base.py b/src/tests/ftest/util/command_utils_base.py index d5197cbbbd3..614eab4f6f2 100644 --- a/src/tests/ftest/util/command_utils_base.py +++ b/src/tests/ftest/util/command_utils_base.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2020-2025 Intel Corporation. (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent @@ -815,8 +815,6 @@ def __init__(self, namespace, title, log_dir): super().__init__(namespace, None, title) self._log_dir = log_dir self.telemetry_port = BasicParameter(None, 9191) - self.telemetry_retain = None - self.telemetry_enabled = None def get_yaml_data(self): """Convert the parameters into a dictionary to use to write a yaml file. diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index 2632126c2bf..8a22fc14c3f 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2018-2024 Intel Corporation. + (C) Copyright 2018-2025 Intel Corporation. (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent @@ -11,7 +11,7 @@ from pwd import getpwuid from dmg_utils_base import DmgCommandBase -from dmg_utils_params import DmgTelemetryConfig, DmgTransportCredentials, DmgYamlParameters +from dmg_utils_params import DmgTransportCredentials, DmgYamlParameters from exception_utils import CommandFailure from general_utils import dict_to_str, get_numeric_list @@ -40,8 +40,7 @@ def get_dmg_command(group, cert_dir, bin_dir, config_file, config_temp=None, hos """ transport_config = DmgTransportCredentials(cert_dir) - telemetry_config = DmgTelemetryConfig(cert_dir) - config = DmgYamlParameters(config_file, group, transport_config, telemetry_config) + config = DmgYamlParameters(config_file, group, transport_config) command = DmgCommand(bin_dir, config, hostlist_suffix) if config_temp: # Setup the DaosServerCommand to write the config file data to the diff --git a/src/tests/ftest/util/dmg_utils_params.py b/src/tests/ftest/util/dmg_utils_params.py index fbc49653066..5d4afa573f1 100644 --- a/src/tests/ftest/util/dmg_utils_params.py +++ b/src/tests/ftest/util/dmg_utils_params.py @@ -1,12 +1,11 @@ """ - (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2020-2025 Intel Corporation. (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent """ -from command_utils_base import (BasicParameter, LogParameter, TelemetryConfig, - TransportCredentials, YamlParameters) +from command_utils_base import BasicParameter, LogParameter, TransportCredentials, YamlParameters class DmgTransportCredentials(TransportCredentials): @@ -27,26 +26,10 @@ def _get_new(self): return DmgTransportCredentials(self._log_dir) -class DmgTelemetryConfig(TelemetryConfig): - """Telemetry credentials listing certificates for secure communication.""" - - def __init__(self, log_dir="/tmp"): - """Initialize a TelemetryConfig object.""" - super().__init__("/run/dmg/telemetry_config/*", None, log_dir) - - def _get_new(self): - """Get a new object based upon this one. - - Returns: - DmgTelemetryConfig: a new DmgTelemetryConfig object - """ - return DmgTelemetryConfig(self._log_dir) - - class DmgYamlParameters(YamlParameters): """Defines the dmg configuration yaml parameters.""" - def __init__(self, filename, name, transport, telemetry=None): + def __init__(self, filename, name, transport): """Initialize a DmgYamlParameters object. Args: @@ -54,8 +37,6 @@ def __init__(self, filename, name, transport, telemetry=None): name (str): The DAOS system name. transport (DmgTransportCredentials): dmg security configuration settings. - telemetry (DmgTelemetryConfig): dmg telemetry - configuration settings. """ super().__init__("/run/dmg/*", filename, None, transport) @@ -77,9 +58,6 @@ def __init__(self, filename, name, transport, telemetry=None): self.hostlist = BasicParameter(None, "localhost") self.port = BasicParameter(None, 10001) - if telemetry is not None: - self.telemetry_config = telemetry - def _get_new(self): """Get a new object based upon this one. From e44a62642c299675e01c22007ea38cdbf828951c Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Sun, 23 Feb 2025 17:20:51 +0000 Subject: [PATCH 18/19] Test Fixed based on Ci results and running again. Features: control telemetry Signed-off-by: Samir Raval --- src/tests/ftest/telemetry/dfs_client_telemetry.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tests/ftest/telemetry/dfs_client_telemetry.yaml b/src/tests/ftest/telemetry/dfs_client_telemetry.yaml index e0dd33d1f87..e074fc792e9 100644 --- a/src/tests/ftest/telemetry/dfs_client_telemetry.yaml +++ b/src/tests/ftest/telemetry/dfs_client_telemetry.yaml @@ -18,9 +18,10 @@ server_config: system_ram_reserved: 1 agent_config: - telemetry_port: 9191 - telemetry_retain: 30s - telemetry_enabled: true + telemetry_config: + telemetry_port: 9191 + telemetry_retain: 30s + telemetry_enabled: true pool: scm_size: 2G From 22d7f3dbc72300347ec4a68c67093f94500e8427 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Tue, 25 Feb 2025 18:28:39 +0000 Subject: [PATCH 19/19] Code modified based on review comments. Features: control telemetry Signed-off-by: Samir Raval --- src/tests/ftest/util/agent_utils.py | 10 ++++------ src/tests/ftest/util/command_utils.py | 9 ++++++--- src/tests/ftest/util/command_utils_base.py | 10 ---------- src/tests/ftest/util/dmg_utils.py | 2 +- src/tests/ftest/util/dmg_utils_params.py | 2 +- src/tests/ftest/util/launch_utils.py | 7 +++++-- src/tests/ftest/util/server_utils.py | 10 ++++------ 7 files changed, 21 insertions(+), 29 deletions(-) diff --git a/src/tests/ftest/util/agent_utils.py b/src/tests/ftest/util/agent_utils.py index a06d6c4aa96..3504f28edfc 100644 --- a/src/tests/ftest/util/agent_utils.py +++ b/src/tests/ftest/util/agent_utils.py @@ -397,9 +397,7 @@ def get_socket_dir(self): def prepare_telemetry_certificate(self): """Prepare Telemetry certificate""" - self.manager.job.copy_telemetry_root_certificates(get_log_file("daosTelemetryCA"), - self.telemetry_certificate_dir, - self._hosts) - self.manager.job.generate_telemetry_server_certificates(self._hosts, - "daos_agent", - self.telemetry_certificate_dir) + self.manager.job.copy_telemetry_root_certificates( + get_log_file("daosTelemetryCA"), self.telemetry_certificate_dir, self._hosts) + self.manager.job.generate_telemetry_server_certificates( + self._hosts, "daos_agent", self.telemetry_certificate_dir) diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py index f8d29bd1749..8e3e3588476 100644 --- a/src/tests/ftest/util/command_utils.py +++ b/src/tests/ftest/util/command_utils.py @@ -1051,6 +1051,9 @@ def copy_telemetry_root_certificates(self, source, destination, hosts): source (str) : source of the certificate files. destination (str): copy file destination dir. hosts (NodeSet): list of the destination hosts. + + Raises: + CommandFailure: if there is an error copying certificate. """ certfiles = ["daosTelemetryCA.crt", "daosTelemetryCA.key"] @@ -1062,8 +1065,8 @@ def copy_telemetry_root_certificates(self, source, destination, hosts): self.log, hosts, src_file, dst_file, mkdir=False, verbose=False, sudo=True, owner=self.certificate_owner) if not result.passed: - self.log.info(" WARNING: %s copy telemetry cert failed on %s", - dst_file, result.failed_hosts) + raise CommandFailure( + f"Error: copy telemetry cert file {dst_file} failed on {result.failed_hosts}") def generate_telemetry_server_certificates(self, hosts, user, destination): """Generate the telemetry certificates for the test on server/client. @@ -1080,7 +1083,7 @@ def generate_telemetry_server_certificates(self, hosts, user, destination): certgen_dir = os.path.abspath( os.path.join(os.getcwd(), "scripts")) command = os.path.join(certgen_dir, "gen_telemetry_server_certificate.sh ") - command = "sudo " + command + user + " " + destination + command = command_as_user(command + user + " " + destination, "root") self.log.debug("Generating the telemetry certificate command %s:", command) result = run_remote(self.log, hosts, command, 30) if not result.passed: diff --git a/src/tests/ftest/util/command_utils_base.py b/src/tests/ftest/util/command_utils_base.py index 614eab4f6f2..a0ee1fcec8b 100644 --- a/src/tests/ftest/util/command_utils_base.py +++ b/src/tests/ftest/util/command_utils_base.py @@ -816,16 +816,6 @@ def __init__(self, namespace, title, log_dir): self._log_dir = log_dir self.telemetry_port = BasicParameter(None, 9191) - def get_yaml_data(self): - """Convert the parameters into a dictionary to use to write a yaml file. - - Returns: - dict: a dictionary of parameter name keys and values - - """ - yaml_data = super().get_yaml_data() - return yaml_data - def get_certificate_data(self, name_list): """Get certificate data by name_list. Args: diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index 8a22fc14c3f..94b0eda7e21 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2018-2025 Intel Corporation. + (C) Copyright 2018-2024 Intel Corporation. (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent diff --git a/src/tests/ftest/util/dmg_utils_params.py b/src/tests/ftest/util/dmg_utils_params.py index 5d4afa573f1..9f6c764cf92 100644 --- a/src/tests/ftest/util/dmg_utils_params.py +++ b/src/tests/ftest/util/dmg_utils_params.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2020-2025 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. (C) Copyright 2025 Hewlett Packard Enterprise Development LP. SPDX-License-Identifier: BSD-2-Clause-Patent diff --git a/src/tests/ftest/util/launch_utils.py b/src/tests/ftest/util/launch_utils.py index 8892e24819f..9ca715bfdf6 100644 --- a/src/tests/ftest/util/launch_utils.py +++ b/src/tests/ftest/util/launch_utils.py @@ -878,9 +878,12 @@ def _generate_certs(self, logger): def _generate_telemetry_certs(self, logger): """Generate the telemetry certificates for the test and - copy to system default certificate location - Returns: + copy to system default certificate location + + Args: logger (Logger): logger for the messages produced by this method + + Returns: int: status code: 0 = success, 128 = failure """ logger.debug("-" * 80) diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 9581de26526..0931c256230 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -232,12 +232,10 @@ def _prepare_dmg_hostlist(self, hosts=None): def prepare_telemetry_certificate(self): """Prepare Telemetry certificate""" - self.manager.job.copy_telemetry_root_certificates(get_log_file("daosTelemetryCA"), - self.telemetry_certificate_dir, - self._hosts) - self.manager.job.generate_telemetry_server_certificates(self._hosts, - "daos_server", - self.telemetry_certificate_dir) + self.manager.job.copy_telemetry_root_certificates( + get_log_file("daosTelemetryCA"), self.telemetry_certificate_dir, self._hosts) + self.manager.job.generate_telemetry_server_certificates( + self._hosts, "daos_server", self.telemetry_certificate_dir) def prepare(self, storage=True): """Prepare to start daos_server.