Skip to content

Commit

Permalink
feat(config): allow config dir to be passed as argument
Browse files Browse the repository at this point in the history
This commit now allows default hard-coded config directory to be passed
as an argument. This allow quickly changing between different
configurations to be stored separately and to switch between them
(especially during development).

The commit also
* simplifies global config initialization by ensuring  it is initialised
  at the time kepler's main function is executed and fail with error that
  step fails.

* It also cleans up use of config object to read CGroup info by creating
  a `realSystem` struct that handles this functionality.

Signed-off-by: Sunil Thaha <[email protected]>
  • Loading branch information
sthaha committed Nov 18, 2024
1 parent f434c66 commit 26cba61
Show file tree
Hide file tree
Showing 21 changed files with 142 additions and 102 deletions.
38 changes: 22 additions & 16 deletions cmd/exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ const (

// AppConfig holds the configuration info for the application.
type AppConfig struct {
BaseDir string
Address string
MetricsPath string
EnableGPU bool
Expand All @@ -69,21 +70,22 @@ type AppConfig struct {

func newAppConfig() *AppConfig {
// Initialize flags
_config := &AppConfig{}
flag.StringVar(&_config.Address, "address", "0.0.0.0:8888", "bind address")
flag.StringVar(&_config.MetricsPath, "metrics-path", "/metrics", "metrics path")
flag.BoolVar(&_config.EnableGPU, "enable-gpu", false, "whether enable gpu (need to have libnvidia-ml installed)")
flag.BoolVar(&_config.EnableEBPFCgroupID, "enable-cgroup-id", true, "whether enable eBPF to collect cgroup id")
flag.BoolVar(&_config.ExposeHardwareCounterMetrics, "expose-hardware-counter-metrics", true, "whether expose hardware counter as prometheus metrics")
flag.BoolVar(&_config.EnableMSR, "enable-msr", false, "whether MSR is allowed to obtain energy data")
flag.StringVar(&_config.Kubeconfig, "kubeconfig", "", "absolute path to the kubeconfig file, if empty we use the in-cluster configuration")
flag.BoolVar(&_config.ApiserverEnabled, "apiserver", true, "if apiserver is disabled, we collect pod information from kubelet")
flag.StringVar(&_config.RedfishCredFilePath, "redfish-cred-file-path", "", "path to the redfish credential file")
flag.BoolVar(&_config.ExposeEstimatedIdlePower, "expose-estimated-idle-power", false, "Whether to expose the estimated idle power as a metric")
flag.StringVar(&_config.MachineSpecFilePath, "machine-spec", "", "path to the machine spec file in json format")
flag.BoolVar(&_config.DisablePowerMeter, "disable-power-meter", false, "whether manually disable power meter read and forcefully apply the estimator for node powers")

return _config
cfg := &AppConfig{}
flag.StringVar(&cfg.BaseDir, "config-dir", config.BaseDir, "path to config base directory")
flag.StringVar(&cfg.Address, "address", "0.0.0.0:8888", "bind address")
flag.StringVar(&cfg.MetricsPath, "metrics-path", "/metrics", "metrics path")
flag.BoolVar(&cfg.EnableGPU, "enable-gpu", false, "whether enable gpu (need to have libnvidia-ml installed)")
flag.BoolVar(&cfg.EnableEBPFCgroupID, "enable-cgroup-id", true, "whether enable eBPF to collect cgroup id")
flag.BoolVar(&cfg.ExposeHardwareCounterMetrics, "expose-hardware-counter-metrics", true, "whether expose hardware counter as prometheus metrics")
flag.BoolVar(&cfg.EnableMSR, "enable-msr", false, "whether MSR is allowed to obtain energy data")
flag.StringVar(&cfg.Kubeconfig, "kubeconfig", "", "absolute path to the kubeconfig file, if empty we use the in-cluster configuration")
flag.BoolVar(&cfg.ApiserverEnabled, "apiserver", true, "if apiserver is disabled, we collect pod information from kubelet")
flag.StringVar(&cfg.RedfishCredFilePath, "redfish-cred-file-path", "", "path to the redfish credential file")
flag.BoolVar(&cfg.ExposeEstimatedIdlePower, "expose-estimated-idle-power", false, "Whether to expose the estimated idle power as a metric")
flag.StringVar(&cfg.MachineSpecFilePath, "machine-spec", "", "path to the machine spec file in json format")
flag.BoolVar(&cfg.DisablePowerMeter, "disable-power-meter", false, "whether manually disable power meter read and forcefully apply the estimator for node powers")

return cfg
}

func healthProbe(w http.ResponseWriter, req *http.Request) {
Expand All @@ -99,7 +101,11 @@ func main() {
klog.InitFlags(nil)
appConfig := newAppConfig() // Initialize appConfig and define flags
flag.Parse() // Parse command-line flags
config.GetConfig() // Initialize the configuration

if _, err := config.Initialize(appConfig.BaseDir); err != nil {
klog.Fatalf("Failed to initialize config: %v", err)
}

klog.Infof("Kepler running on version: %s", build.Version)

registry := metrics.GetRegistry()
Expand Down
6 changes: 6 additions & 0 deletions pkg/collector/metric_collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,13 @@ func newMockCollector(mockAttacher bpf.Exporter) *Collector {

var _ = Describe("Test Collector Unit", func() {

BeforeEach(func() {
_, err := config.Initialize(".")
Expect(err).NotTo(HaveOccurred())
})

It("Get container power", func() {

bpfExporter := bpf.NewMockExporter(bpf.DefaultSupportedMetrics())
metricCollector := newMockCollector(bpfExporter)
// The default estimator model is the ratio
Expand Down
5 changes: 5 additions & 0 deletions pkg/collector/stats/container_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ import (

var _ = Describe("Test Container Metric", func() {

BeforeEach(func() {
_, err := config.Initialize(".")
Expect(err).NotTo(HaveOccurred())
})

It("Test ResetDeltaValues", func() {
SetMockedCollectorMetrics()
c := NewContainerStats("containerA", "podA", "test", "containerIDA")
Expand Down
3 changes: 3 additions & 0 deletions pkg/collector/stats/node_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ var _ = Describe("Test Node Metric", func() {
)

BeforeEach(func() {
_, err := config.Initialize(".")
Expect(err).NotTo(HaveOccurred())

SetMockedCollectorMetrics()
processMetrics = CreateMockedProcessStats(2)
nodeMetrics = CreateMockedNodeStats()
Expand Down
5 changes: 5 additions & 0 deletions pkg/collector/stats/process_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ import (

var _ = Describe("ProcessMetric", func() {

BeforeEach(func() {
_, err := config.Initialize(".")
Expect(err).NotTo(HaveOccurred())
})

It("Test ResetDeltaValues", func() {
SetMockedCollectorMetrics()
metrics := CreateMockedProcessStats(1)
Expand Down
4 changes: 3 additions & 1 deletion pkg/collector/stats/stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ import (

var _ = Describe("Stats", func() {
It("Test InitAvailableParamAndMetrics", func() {
config.GetConfig()
_, err := config.Initialize(".")
Expect(err).NotTo(HaveOccurred())

config.SetEnabledHardwareCounterMetrics(false)
exp := []string{}
Expect(len(GetProcessFeatureNames()) >= len(exp)).To(BeTrue())
Expand Down
1 change: 0 additions & 1 deletion pkg/collector/stats/test_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ const (
// SetMockedCollectorMetrics adds all metric to a process, otherwise it will not create the right usageMetric with all elements. The usageMetric is used in the Prediction Power Models
// TODO: do not use a fixed usageMetric array in the power models, a structured data is more disarable.
func SetMockedCollectorMetrics() {
config.GetConfig()
if gpu := acc.GetActiveAcceleratorByType(config.GPU); gpu != nil {
err := gpu.Device().Init() // create structure instances that will be accessed to create a processMetric
klog.Fatalln(err)
Expand Down
Loading

0 comments on commit 26cba61

Please sign in to comment.