Skip to content

Commit

Permalink
Allow specifying Collector's own Resource in the config (#5402)
Browse files Browse the repository at this point in the history
Resolves #5398

This adds a sub-section in the `service.telemetry` config section where any
Resource attributes can be specified.

For example we can have the following in the config:

service:
  telemetry:
    resource:
      service.instance.id: 01G3EN4NW306AFVGQT5ZYC0GEK
      service.namespace: onlineshop
      deployment.environment: production
  • Loading branch information
tigrannajaryan authored May 26, 2022
1 parent 79b7835 commit 87abc21
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 38 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@
### 💡 Enhancements 💡

- Update OTLP to v0.17.0 (#5335)
- Add optional min/max fields to histograms (#5399)
- Add optional min/max fields to histograms (#5399)
- User-defined Resource attributes can be specified under `service.telemetry.resource`
configuration key and will be included as metric lables for own telemetry.
If `service.instance.id` is not specified it will be auto-generated. Previously
`service.instance.id` was always auto-generated, so the default of the new
behavior matches the old behavior. (#5402)

## v0.51.0 Beta

Expand Down
7 changes: 7 additions & 0 deletions config/moved_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ type Service struct {
type ServiceTelemetry struct {
Logs ServiceTelemetryLogs `mapstructure:"logs"`
Metrics ServiceTelemetryMetrics `mapstructure:"metrics"`

// Resource specifies user-defined attributes to include with all emitted telemetry.
// For metrics the attributes become Prometheus labels.
// Note that some attributes are added automatically (e.g. service.version) even
// if they are not specified here. In order to suppress such attributes the
// attribute must be specified in this map with null YAML value (nil string pointer).
Resource map[string]*string `mapstructure:"resource"`
}

// ServiceTelemetryLogs defines the configurable settings for service telemetry logs.
Expand Down
173 changes: 146 additions & 27 deletions service/collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ import (

"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/config"
"go.opentelemetry.io/collector/config/mapconverter/overwritepropertiesmapconverter"
"go.opentelemetry.io/collector/internal/testcomponents"
"go.opentelemetry.io/collector/internal/testutil"
"go.opentelemetry.io/collector/internal/version"
"go.opentelemetry.io/collector/service/featuregate"
)

Expand Down Expand Up @@ -178,7 +178,111 @@ func TestCollectorFailedShutdown(t *testing.T) {
assert.Equal(t, Closed, col.GetState())
}

func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter) {
// mapConverter applies extraMap of config settings. Useful for overriding the config
// for testing purposes. Keys must use "::" delimiter between levels.
type mapConverter struct {
extraMap map[string]*string
}

func (m mapConverter) Convert(ctx context.Context, cfgMap *config.Map) error {
for k, v := range m.extraMap {
cfgMap.Set(k, v)
}
return nil
}

type labelState int

const (
labelNotPresent labelState = iota
labelSpecificValue
labelAnyValue
)

type labelValue struct {
label string
state labelState
}

type ownMetricsTestCase struct {
name string
userDefinedResource map[string]*string
expectedLabels map[string]labelValue
}

var testResourceAttrValue = "resource_attr_test_value"
var testInstanceID = "test_instance_id"
var testServiceVersion = "2022-05-20"

var ownMetricsTestCases = []ownMetricsTestCase{
{
name: "no resource",
userDefinedResource: nil,
// All labels added to all collector metrics by default are listed below.
// These labels are hard coded here in order to avoid inadvertent changes:
// at this point changing labels should be treated as a breaking changing
// and requires a good justification. The reason is that changes to metric
// names or labels can break alerting, dashboards, etc that are used to
// monitor the Collector in production deployments.
expectedLabels: map[string]labelValue{
"service_instance_id": {state: labelAnyValue},
"service_version": {label: version.Version, state: labelSpecificValue},
},
},
{
name: "resource with custom attr",
userDefinedResource: map[string]*string{
"custom_resource_attr": &testResourceAttrValue,
},
expectedLabels: map[string]labelValue{
"service_instance_id": {state: labelAnyValue},
"service_version": {label: version.Version, state: labelSpecificValue},
"custom_resource_attr": {label: "resource_attr_test_value", state: labelSpecificValue},
},
},
{
name: "override service.instance.id",
userDefinedResource: map[string]*string{
"service.instance.id": &testInstanceID,
},
expectedLabels: map[string]labelValue{
"service_instance_id": {label: "test_instance_id", state: labelSpecificValue},
"service_version": {label: version.Version, state: labelSpecificValue},
},
},
{
name: "suppress service.instance.id",
userDefinedResource: map[string]*string{
"service.instance.id": nil, // nil value in config is used to suppress attributes.
},
expectedLabels: map[string]labelValue{
"service_instance_id": {state: labelNotPresent},
"service_version": {label: version.Version, state: labelSpecificValue},
},
},
{
name: "override service.version",
userDefinedResource: map[string]*string{
"service.version": &testServiceVersion,
},
expectedLabels: map[string]labelValue{
"service_instance_id": {state: labelAnyValue},
"service_version": {label: "2022-05-20", state: labelSpecificValue},
},
},
{
name: "suppress service.version",
userDefinedResource: map[string]*string{
"service.version": nil, // nil value in config is used to suppress attributes.
},
expectedLabels: map[string]labelValue{
"service_instance_id": {state: labelAnyValue},
"service_version": {state: labelNotPresent},
},
},
}

func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter, tc ownMetricsTestCase) {
factories, err := testcomponents.NewDefaultFactories()
require.NoError(t, err)
var once sync.Once
Expand All @@ -194,10 +298,19 @@ func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter
cfgSet := newDefaultConfigProviderSettings([]string{
filepath.Join("testdata", "otelcol-config.yaml"),
})

// Prepare config properties to be merged with the main config.
extraCfgAsProps := map[string]*string{
// Set the metrics address to expose own metrics on.
"service::telemetry::metrics::address": &metricsAddr,
}
// Also include resource attributes under the service.telemetry.resource key.
for k, v := range tc.userDefinedResource {
extraCfgAsProps["service::telemetry::resource::"+k] = v
}

cfgSet.MapConverters = append([]config.MapConverter{
overwritepropertiesmapconverter.New(
[]string{"service.telemetry.metrics.address=" + metricsAddr},
)},
mapConverter{extraCfgAsProps}},
cfgSet.MapConverters...,
)
cfgProvider, err := NewConfigProvider(cfgSet)
Expand All @@ -220,16 +333,7 @@ func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter
assert.Equal(t, col.telemetry.Logger, col.GetLogger())
assert.True(t, loggingHookCalled)

// All labels added to all collector metrics by default are listed below.
// These labels are hard coded here in order to avoid inadvertent changes:
// at this point changing labels should be treated as a breaking changing
// and requires a good justification. The reason is that changes to metric
// names or labels can break alerting, dashboards, etc that are used to
// monitor the Collector in production deployments.
mandatoryLabels := []string{
"service_instance_id",
}
assertMetrics(t, metricsAddr, mandatoryLabels)
assertMetrics(t, metricsAddr, tc.expectedLabels)

assertZPages(t)
col.signalsChannel <- syscall.SIGTERM
Expand All @@ -239,15 +343,23 @@ func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter
}

func TestCollectorStartWithOpenCensusMetrics(t *testing.T) {
testCollectorStartHelper(t, newColTelemetry(featuregate.NewRegistry()))
for _, tc := range ownMetricsTestCases {
t.Run(tc.name, func(t *testing.T) {
testCollectorStartHelper(t, newColTelemetry(featuregate.NewRegistry()), tc)
})
}
}

func TestCollectorStartWithOpenTelemetryMetrics(t *testing.T) {
colTel := newColTelemetry(featuregate.NewRegistry())
colTel.registry.Apply(map[string]bool{
useOtelForInternalMetricsfeatureGateID: true,
})
testCollectorStartHelper(t, colTel)
for _, tc := range ownMetricsTestCases {
t.Run(tc.name, func(t *testing.T) {
colTel := newColTelemetry(featuregate.NewRegistry())
colTel.registry.Apply(map[string]bool{
useOtelForInternalMetricsfeatureGateID: true,
})
testCollectorStartHelper(t, colTel, tc)
})
}
}

func TestCollectorShutdownBeforeRun(t *testing.T) {
Expand Down Expand Up @@ -316,7 +428,7 @@ func (tel *mockColTelemetry) shutdown() error {
return errors.New("err1")
}

func assertMetrics(t *testing.T, metricsAddr string, mandatoryLabels []string) {
func assertMetrics(t *testing.T, metricsAddr string, expectedLabels map[string]labelValue) {
client := &http.Client{}
resp, err := client.Get("http://" + metricsAddr + "/metrics")
require.NoError(t, err)
Expand All @@ -341,14 +453,21 @@ func assertMetrics(t *testing.T, metricsAddr string, mandatoryLabels []string) {
metricName[:len(prefix)+1]+"...")

for _, metric := range metricFamily.Metric {
var labelNames []string
labelMap := map[string]string{}
for _, labelPair := range metric.Label {
labelNames = append(labelNames, *labelPair.Name)
labelMap[*labelPair.Name] = *labelPair.Value
}

for _, mandatoryLabel := range mandatoryLabels {
// require is used here so test fails with a single message.
require.Contains(t, labelNames, mandatoryLabel, "mandatory label %q not present", mandatoryLabel)
for k, v := range expectedLabels {
switch v.state {
case labelNotPresent:
_, present := labelMap[k]
assert.False(t, present, "label %q must not be present", k)
case labelSpecificValue:
require.Equal(t, v.label, labelMap[k], "mandatory label %q value mismatch", k)
case labelAnyValue:
assert.NotEmpty(t, labelMap[k], "mandatory label %q not present", k)
}
}
}
}
Expand Down
39 changes: 29 additions & 10 deletions service/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ func (tel *colTelemetry) init(col *Collector) error {
func (tel *colTelemetry) initOnce(col *Collector) error {
logger := col.telemetry.Logger
cfg := col.service.config.Telemetry
resource := cfg.Resource

level := cfg.Metrics.Level
metricsAddr := cfg.Metrics.Address
Expand All @@ -109,8 +110,29 @@ func (tel *colTelemetry) initOnce(col *Collector) error {

logger.Info("Setting up own telemetry...")

instanceUUID, _ := uuid.NewRandom()
instanceID := instanceUUID.String()
// Construct telemetry attributes from resource attributes.
telAttrs := map[string]string{}
for k, v := range resource {
// nil value indicates that the attribute should not be included in the telemetry.
if v != nil {
telAttrs[k] = *v
}
}

if _, ok := resource[semconv.AttributeServiceInstanceID]; !ok {
// AttributeServiceInstanceID is not specified in the config. Auto-generate one.
instanceUUID, _ := uuid.NewRandom()
instanceID := instanceUUID.String()
telAttrs[semconv.AttributeServiceInstanceID] = instanceID
}

if AddCollectorVersionTag {
if _, ok := resource[semconv.AttributeServiceVersion]; !ok {
// AttributeServiceVersion is not specified in the config. Use the actual
// build version.
telAttrs[semconv.AttributeServiceVersion] = version.Version
}
}

var pe http.Handler
if tel.registry.IsEnabled(useOtelForInternalMetricsfeatureGateID) {
Expand All @@ -120,7 +142,7 @@ func (tel *colTelemetry) initOnce(col *Collector) error {
}
pe = otelHandler
} else {
ocHandler, err := tel.initOpenCensus(col, instanceID)
ocHandler, err := tel.initOpenCensus(col, telAttrs)
if err != nil {
return err
}
Expand All @@ -131,8 +153,7 @@ func (tel *colTelemetry) initOnce(col *Collector) error {
"Serving Prometheus metrics",
zap.String(zapKeyTelemetryAddress, metricsAddr),
zap.String(zapKeyTelemetryLevel, level.String()),
zap.String(semconv.AttributeServiceInstanceID, instanceID),
zap.String(semconv.AttributeServiceVersion, version.Version),
zap.Any("Resource", resource),
)

mux := http.NewServeMux()
Expand All @@ -152,7 +173,7 @@ func (tel *colTelemetry) initOnce(col *Collector) error {
return nil
}

func (tel *colTelemetry) initOpenCensus(col *Collector, instanceID string) (http.Handler, error) {
func (tel *colTelemetry) initOpenCensus(col *Collector, telAttrs map[string]string) (http.Handler, error) {
processMetricsViews, err := telemetry2.NewProcessMetricsViews(getBallastSize(col.service.host))
if err != nil {
return nil, err
Expand All @@ -178,10 +199,8 @@ func (tel *colTelemetry) initOpenCensus(col *Collector, instanceID string) (http

opts.ConstLabels = make(map[string]string)

opts.ConstLabels[sanitizePrometheusKey(semconv.AttributeServiceInstanceID)] = instanceID

if AddCollectorVersionTag {
opts.ConstLabels[sanitizePrometheusKey(semconv.AttributeServiceVersion)] = version.Version
for k, v := range telAttrs {
opts.ConstLabels[sanitizePrometheusKey(k)] = v
}

pe, err := prometheus.NewExporter(opts)
Expand Down

0 comments on commit 87abc21

Please sign in to comment.