Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow specifying Collector's own Resource in the config #5402

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@
### 💡 Enhancements 💡

- Update OTLP to v0.17.0 (#5335)
- Add optional min/max fields to histograms (#5399)
- Add optional min/max fields to histograms (#5399)
- User-defined Resource attributes can be specified under `service.telemetry.resource`
configuration key and will be included as metric lables for own telemetry.
If `service.instance.id` is not specified it will be auto-generated. Previously
`service.instance.id` was always auto-generated, so the default of the new
behavior matches the old behavior. (#5402)

### 🧰 Bug fixes 🧰

Expand Down
7 changes: 7 additions & 0 deletions config/moved_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ type Service struct {
type ServiceTelemetry struct {
Logs ServiceTelemetryLogs `mapstructure:"logs"`
Metrics ServiceTelemetryMetrics `mapstructure:"metrics"`

// Resource specifies user-defined attributes to include with all emitted telemetry.
// For metrics the attributes become Prometheus labels.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not 100% true, only if the exporter is prometheus.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I can just delete this line to void misleading?

// Note that some attributes are added automatically (e.g. service.version) even
// if they are not specified here. In order to suppress such attributes the
// attribute must be specified in this map with null YAML value (nil string pointer).
Resource map[string]*string `mapstructure:"resource"`
}

// ServiceTelemetryLogs defines the configurable settings for service telemetry logs.
Expand Down
173 changes: 146 additions & 27 deletions service/collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ import (

"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/config"
"go.opentelemetry.io/collector/config/mapconverter/overwritepropertiesmapconverter"
"go.opentelemetry.io/collector/internal/testcomponents"
"go.opentelemetry.io/collector/internal/testutil"
"go.opentelemetry.io/collector/internal/version"
"go.opentelemetry.io/collector/service/featuregate"
)

Expand Down Expand Up @@ -178,7 +178,111 @@ func TestCollectorFailedShutdown(t *testing.T) {
assert.Equal(t, Closed, col.GetState())
}

func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter) {
// mapConverter applies extraMap of config settings. Useful for overriding the config
// for testing purposes. Keys must use "::" delimiter between levels.
type mapConverter struct {
extraMap map[string]*string
}

func (m mapConverter) Convert(ctx context.Context, cfgMap *config.Map) error {
for k, v := range m.extraMap {
cfgMap.Set(k, v)
}
return nil
}

type labelState int

const (
labelNotPresent labelState = iota
labelSpecificValue
labelAnyValue
)

type labelValue struct {
label string
state labelState
}

type ownMetricsTestCase struct {
name string
userDefinedResource map[string]*string
expectedLabels map[string]labelValue
}

var testResourceAttrValue = "resource_attr_test_value"
var testInstanceID = "test_instance_id"
var testServiceVersion = "2022-05-20"

var ownMetricsTestCases = []ownMetricsTestCase{
{
name: "no resource",
userDefinedResource: nil,
// All labels added to all collector metrics by default are listed below.
// These labels are hard coded here in order to avoid inadvertent changes:
// at this point changing labels should be treated as a breaking changing
// and requires a good justification. The reason is that changes to metric
// names or labels can break alerting, dashboards, etc that are used to
// monitor the Collector in production deployments.
expectedLabels: map[string]labelValue{
"service_instance_id": {state: labelAnyValue},
"service_version": {label: version.Version, state: labelSpecificValue},
},
},
{
name: "resource with custom attr",
userDefinedResource: map[string]*string{
"custom_resource_attr": &testResourceAttrValue,
},
expectedLabels: map[string]labelValue{
"service_instance_id": {state: labelAnyValue},
"service_version": {label: version.Version, state: labelSpecificValue},
"custom_resource_attr": {label: "resource_attr_test_value", state: labelSpecificValue},
},
},
{
name: "override service.instance.id",
userDefinedResource: map[string]*string{
"service.instance.id": &testInstanceID,
},
expectedLabels: map[string]labelValue{
"service_instance_id": {label: "test_instance_id", state: labelSpecificValue},
"service_version": {label: version.Version, state: labelSpecificValue},
},
},
{
name: "suppress service.instance.id",
userDefinedResource: map[string]*string{
"service.instance.id": nil, // nil value in config is used to suppress attributes.
},
expectedLabels: map[string]labelValue{
"service_instance_id": {state: labelNotPresent},
"service_version": {label: version.Version, state: labelSpecificValue},
},
},
{
name: "override service.version",
userDefinedResource: map[string]*string{
"service.version": &testServiceVersion,
},
expectedLabels: map[string]labelValue{
"service_instance_id": {state: labelAnyValue},
"service_version": {label: "2022-05-20", state: labelSpecificValue},
},
},
{
name: "suppress service.version",
userDefinedResource: map[string]*string{
"service.version": nil, // nil value in config is used to suppress attributes.
},
expectedLabels: map[string]labelValue{
"service_instance_id": {state: labelAnyValue},
"service_version": {state: labelNotPresent},
},
},
}

func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter, tc ownMetricsTestCase) {
factories, err := testcomponents.NewDefaultFactories()
require.NoError(t, err)
var once sync.Once
Expand All @@ -194,10 +298,19 @@ func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter
cfgSet := newDefaultConfigProviderSettings([]string{
filepath.Join("testdata", "otelcol-config.yaml"),
})

// Prepare config properties to be merged with the main config.
extraCfgAsProps := map[string]*string{
// Set the metrics address to expose own metrics on.
"service::telemetry::metrics::address": &metricsAddr,
}
// Also include resource attributes under the service.telemetry.resource key.
for k, v := range tc.userDefinedResource {
extraCfgAsProps["service::telemetry::resource::"+k] = v
}

cfgSet.MapConverters = append([]config.MapConverter{
overwritepropertiesmapconverter.New(
[]string{"service.telemetry.metrics.address=" + metricsAddr},
)},
mapConverter{extraCfgAsProps}},
cfgSet.MapConverters...,
)
cfgProvider, err := NewConfigProvider(cfgSet)
Expand All @@ -220,16 +333,7 @@ func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter
assert.Equal(t, col.telemetry.Logger, col.GetLogger())
assert.True(t, loggingHookCalled)

// All labels added to all collector metrics by default are listed below.
// These labels are hard coded here in order to avoid inadvertent changes:
// at this point changing labels should be treated as a breaking changing
// and requires a good justification. The reason is that changes to metric
// names or labels can break alerting, dashboards, etc that are used to
// monitor the Collector in production deployments.
mandatoryLabels := []string{
"service_instance_id",
}
assertMetrics(t, metricsAddr, mandatoryLabels)
assertMetrics(t, metricsAddr, tc.expectedLabels)

assertZPages(t)
col.signalsChannel <- syscall.SIGTERM
Expand All @@ -239,15 +343,23 @@ func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter
}

func TestCollectorStartWithOpenCensusMetrics(t *testing.T) {
testCollectorStartHelper(t, newColTelemetry(featuregate.NewRegistry()))
for _, tc := range ownMetricsTestCases {
t.Run(tc.name, func(t *testing.T) {
testCollectorStartHelper(t, newColTelemetry(featuregate.NewRegistry()), tc)
})
}
}

func TestCollectorStartWithOpenTelemetryMetrics(t *testing.T) {
colTel := newColTelemetry(featuregate.NewRegistry())
colTel.registry.Apply(map[string]bool{
useOtelForInternalMetricsfeatureGateID: true,
})
testCollectorStartHelper(t, colTel)
for _, tc := range ownMetricsTestCases {
t.Run(tc.name, func(t *testing.T) {
colTel := newColTelemetry(featuregate.NewRegistry())
colTel.registry.Apply(map[string]bool{
useOtelForInternalMetricsfeatureGateID: true,
})
testCollectorStartHelper(t, colTel, tc)
})
}
}

func TestCollectorShutdownBeforeRun(t *testing.T) {
Expand Down Expand Up @@ -316,7 +428,7 @@ func (tel *mockColTelemetry) shutdown() error {
return errors.New("err1")
}

func assertMetrics(t *testing.T, metricsAddr string, mandatoryLabels []string) {
func assertMetrics(t *testing.T, metricsAddr string, expectedLabels map[string]labelValue) {
client := &http.Client{}
resp, err := client.Get("http://" + metricsAddr + "/metrics")
require.NoError(t, err)
Expand All @@ -341,14 +453,21 @@ func assertMetrics(t *testing.T, metricsAddr string, mandatoryLabels []string) {
metricName[:len(prefix)+1]+"...")

for _, metric := range metricFamily.Metric {
var labelNames []string
labelMap := map[string]string{}
for _, labelPair := range metric.Label {
labelNames = append(labelNames, *labelPair.Name)
labelMap[*labelPair.Name] = *labelPair.Value
}

for _, mandatoryLabel := range mandatoryLabels {
// require is used here so test fails with a single message.
require.Contains(t, labelNames, mandatoryLabel, "mandatory label %q not present", mandatoryLabel)
for k, v := range expectedLabels {
switch v.state {
case labelNotPresent:
_, present := labelMap[k]
assert.False(t, present, "label %q must not be present", k)
case labelSpecificValue:
require.Equal(t, v.label, labelMap[k], "mandatory label %q value mismatch", k)
case labelAnyValue:
assert.NotEmpty(t, labelMap[k], "mandatory label %q not present", k)
}
}
}
}
Expand Down
39 changes: 29 additions & 10 deletions service/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ func (tel *colTelemetry) init(col *Collector) error {
func (tel *colTelemetry) initOnce(col *Collector) error {
logger := col.telemetry.Logger
cfg := col.service.config.Telemetry
resource := cfg.Resource

level := cfg.Metrics.Level
metricsAddr := cfg.Metrics.Address
Expand All @@ -109,8 +110,29 @@ func (tel *colTelemetry) initOnce(col *Collector) error {

logger.Info("Setting up own telemetry...")

instanceUUID, _ := uuid.NewRandom()
instanceID := instanceUUID.String()
// Construct telemetry attributes from resource attributes.
telAttrs := map[string]string{}
for k, v := range resource {
// nil value indicates that the attribute should not be included in the telemetry.
if v != nil {
telAttrs[k] = *v
}
}

if _, ok := resource[semconv.AttributeServiceInstanceID]; !ok {
// AttributeServiceInstanceID is not specified in the config. Auto-generate one.
instanceUUID, _ := uuid.NewRandom()
instanceID := instanceUUID.String()
telAttrs[semconv.AttributeServiceInstanceID] = instanceID
}

if AddCollectorVersionTag {
tigrannajaryan marked this conversation as resolved.
Show resolved Hide resolved
if _, ok := resource[semconv.AttributeServiceVersion]; !ok {
// AttributeServiceVersion is not specified in the config. Use the actual
// build version.
telAttrs[semconv.AttributeServiceVersion] = version.Version
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On non-core distros, this will show latest, right? We probably want to explicitly pass a version on the Collector settings to avoid this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't verified, but I think you are right. This PR didn't change the behavior.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, I missed that this was not introduced by this PR. I will make a note to reproduce and open an issue then. We probably want to get rid of version.Version entirely and leave passing the version to the builder instead.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is another semi-related issue: for Agent Management the Supervisor needs a way to fetch the Collector's version.

If we are going to change how versions are embedded in the Collector I want to use the opportunity to see if we can make it fetchable by the Supervisor. A few options:

  • Have a manifest file with version number recorded in it and include the manifest in Collector's installation packages.
  • Try to fetch version number from executable directly (as a symbol?). Not very desirable since it requires platform specific code.
  • Read from CLI by using --version flag (not very reliable since the output format is not precisely defined).
  • Something else?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I submitted a separate issue for this #5438

}
}

var pe http.Handler
if tel.registry.IsEnabled(useOtelForInternalMetricsfeatureGateID) {
Expand All @@ -120,7 +142,7 @@ func (tel *colTelemetry) initOnce(col *Collector) error {
}
pe = otelHandler
} else {
ocHandler, err := tel.initOpenCensus(col, instanceID)
ocHandler, err := tel.initOpenCensus(col, telAttrs)
if err != nil {
return err
}
Expand All @@ -131,8 +153,7 @@ func (tel *colTelemetry) initOnce(col *Collector) error {
"Serving Prometheus metrics",
zap.String(zapKeyTelemetryAddress, metricsAddr),
zap.String(zapKeyTelemetryLevel, level.String()),
zap.String(semconv.AttributeServiceInstanceID, instanceID),
zap.String(semconv.AttributeServiceVersion, version.Version),
zap.Any("Resource", resource),
)

mux := http.NewServeMux()
Expand All @@ -152,7 +173,7 @@ func (tel *colTelemetry) initOnce(col *Collector) error {
return nil
}

func (tel *colTelemetry) initOpenCensus(col *Collector, instanceID string) (http.Handler, error) {
func (tel *colTelemetry) initOpenCensus(col *Collector, telAttrs map[string]string) (http.Handler, error) {
processMetricsViews, err := telemetry2.NewProcessMetricsViews(getBallastSize(col.service.host))
if err != nil {
return nil, err
Expand All @@ -178,10 +199,8 @@ func (tel *colTelemetry) initOpenCensus(col *Collector, instanceID string) (http

opts.ConstLabels = make(map[string]string)

opts.ConstLabels[sanitizePrometheusKey(semconv.AttributeServiceInstanceID)] = instanceID

if AddCollectorVersionTag {
opts.ConstLabels[sanitizePrometheusKey(semconv.AttributeServiceVersion)] = version.Version
for k, v := range telAttrs {
opts.ConstLabels[sanitizePrometheusKey(k)] = v
}

pe, err := prometheus.NewExporter(opts)
Expand Down