Skip to content

Commit

Permalink
ref(spans): Write sentry tags to dedicated field (#2555)
Browse files Browse the repository at this point in the history
Up until now, we've materialized span tags into `span.data`. This
creates several problems:

* Potential clashes between keys set by the user and keys set by sentry.
* Pollution of the UI because `span.data` entries show up in the event
details view.
* Most importantly: `span.data` is [subject to PII
scrubbing](#1953), so tags
computed by Relay are sometimes overwritten by PII scrubbing in the next
Relay instance.

Instead of writing into `span.data`, create a new top-level object in
the span called `sentry_tags`. The same naming has been used
[here](https://github.com/getsentry/sentry-kafka-schemas/blob/79fb0900e3a9a4da6f0db15eab1b5d27f42ffeb7/schemas/snuba-spans.v1.schema.json#L81-L83).

For now, double-write span tags into both `span.data` and
`span.sentry_tags` until all users of these tags have switched to
`sentry_tags`.
  • Loading branch information
jjbayer authored Oct 4, 2023
1 parent 700d02e commit 415e5cb
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 78 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
- Fix hot-loop burning CPU when upstream service is unavailable. ([#2518](https://github.com/getsentry/relay/pull/2518))
- Extract new low-cardinality transaction duration metric for statistical detectors. ([#2513](https://github.com/getsentry/relay/pull/2513))
- Introduce reservoir sampling rule. ([#2550](https://github.com/getsentry/relay/pull/2550))
- Write span tags to `span.sentry_tags`. ([#2555](https://github.com/getsentry/relay/pull/2555))

## 23.9.1

Expand Down
40 changes: 20 additions & 20 deletions relay-dynamic-config/src/defaults.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ pub fn add_span_metrics(project_config: &mut ProjectConfig) {
condition: span_op_conditions.clone(),
tags: vec![TagSpec {
key: "transaction".into(),
field: Some("span.data.transaction".into()),
field: Some("span.sentry_tags.transaction".into()),
value: None,
condition: None,
}],
Expand All @@ -72,24 +72,24 @@ pub fn add_span_metrics(project_config: &mut ProjectConfig) {
TagMapping {
metrics: vec![LazyGlob::new("d:spans/exclusive_time*@millisecond".into())],
tags: [
"environment",
"http.status_code",
"span.action",
"span.category",
"span.description",
"span.domain",
"span.group",
"span.module",
"span.op",
"span.status_code",
"span.status",
"span.system",
"transaction.method",
"transaction.op",
("", "environment"),
("", "http.status_code"),
("span.", "action"),
("span.", "category"),
("span.", "description"),
("span.", "domain"),
("span.", "group"),
("span.", "module"),
("span.", "op"),
("span.", "status_code"),
("span.", "status"),
("span.", "system"),
("", "transaction.method"),
("", "transaction.op"),
]
.map(|key| TagSpec {
key: key.into(),
field: Some(format!("span.data.{}", key.replace('.', "\\."))),
.map(|(prefix, key)| TagSpec {
key: format!("{prefix}{key}"),
field: Some(format!("span.sentry_tags.{}", key)),
value: None,
condition: None,
})
Expand All @@ -100,9 +100,9 @@ pub fn add_span_metrics(project_config: &mut ProjectConfig) {
tags: ["release", "device.class"] // TODO: sentry PR for static strings
.map(|key| TagSpec {
key: key.into(),
field: Some(format!("span.data.{}", key.replace('.', "\\."))),
field: Some(format!("span.sentry_tags.{}", key)),
value: None,
condition: Some(RuleCondition::eq("span.data.mobile", true)),
condition: Some(RuleCondition::eq("span.sentry_tags.mobile", "true")),
})
.into(),
},
Expand Down
3 changes: 3 additions & 0 deletions relay-event-normalization/src/normalize/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3620,6 +3620,7 @@ mod tests {
tags: ~,
origin: ~,
data: ~,
sentry_tags: ~,
other: {},
},
]
Expand Down Expand Up @@ -3658,6 +3659,7 @@ mod tests {
tags: ~,
origin: ~,
data: ~,
sentry_tags: ~,
other: {},
},
]
Expand Down Expand Up @@ -3696,6 +3698,7 @@ mod tests {
tags: ~,
origin: ~,
data: ~,
sentry_tags: ~,
other: {},
},
]
Expand Down
145 changes: 92 additions & 53 deletions relay-event-normalization/src/normalize/span/tag_extraction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::ops::ControlFlow;
use once_cell::sync::Lazy;
use regex::Regex;
use relay_event_schema::protocol::{Event, Span, TraceContext};
use relay_protocol::{Annotated, Value};
use relay_protocol::Annotated;
use sqlparser::ast::Visit;
use sqlparser::ast::{ObjectName, Visitor};
use url::Url;
Expand Down Expand Up @@ -37,7 +37,7 @@ pub enum SpanTagKey {
TransactionMethod,
TransactionOp,
HttpStatusCode,
// `true` if the transaction was sent by a mobile SDK.
// `"true"` if the transaction was sent by a mobile SDK.
Mobile,
DeviceClass,

Expand All @@ -55,34 +55,63 @@ pub enum SpanTagKey {
}

impl SpanTagKey {
/// Whether or not this tag should be added to metrics extracted from the span.
pub fn is_metric_tag(&self) -> bool {
!matches!(self, SpanTagKey::Release | SpanTagKey::User)
/// The key used to write this tag into `span.data`.
///
/// This key corresponds to the tag key on span metrics.
/// NOTE: This method can be removed once we stop double-writing span tags.
pub fn data_key(&self) -> &str {
match self {
SpanTagKey::Release => "release",
SpanTagKey::User => "user",
SpanTagKey::Environment => "environment",
SpanTagKey::Transaction => "transaction",
SpanTagKey::TransactionMethod => "transaction.method",
SpanTagKey::TransactionOp => "transaction.op",
SpanTagKey::HttpStatusCode => "http.status_code",
SpanTagKey::Mobile => "mobile",
SpanTagKey::DeviceClass => "device.class",

SpanTagKey::Description => "span.description",
SpanTagKey::Group => "span.group",
SpanTagKey::SpanOp => "span.op",
SpanTagKey::Category => "span.category",
SpanTagKey::Module => "span.module",
SpanTagKey::Action => "span.action",
SpanTagKey::Domain => "span.domain",
SpanTagKey::System => "span.system",
SpanTagKey::Status => "span.status",
SpanTagKey::StatusCode => "span.status_code",
}
}
}

relay_common::derive_fromstr_and_display!(SpanTagKey, (), {
SpanTagKey::Release => "release",
SpanTagKey::User => "user",
SpanTagKey::Environment => "environment",
SpanTagKey::Transaction => "transaction",
SpanTagKey::TransactionMethod => "transaction.method",
SpanTagKey::TransactionOp => "transaction.op",
SpanTagKey::HttpStatusCode => "http.status_code",
SpanTagKey::Mobile => "mobile",
SpanTagKey::DeviceClass => "device.class",

SpanTagKey::Description => "span.description",
SpanTagKey::Group => "span.group",
SpanTagKey::SpanOp => "span.op",
SpanTagKey::Category => "span.category",
SpanTagKey::Module => "span.module",
SpanTagKey::Action => "span.action",
SpanTagKey::Domain => "span.domain",
SpanTagKey::System => "span.system",
SpanTagKey::Status => "span.status",
SpanTagKey::StatusCode => "span.status_code",
});
/// The key used to write this tag into `span.sentry_keys`.
///
/// This key corresponds to the tag key in the snuba span dataset.
pub fn sentry_tag_key(&self) -> &str {
match self {
SpanTagKey::Release => "release",
SpanTagKey::User => "user",
SpanTagKey::Environment => "environment",
SpanTagKey::Transaction => "transaction",
SpanTagKey::TransactionMethod => "transaction.method",
SpanTagKey::TransactionOp => "transaction.op",
SpanTagKey::HttpStatusCode => "http.status_code",
SpanTagKey::Mobile => "mobile",
SpanTagKey::DeviceClass => "device.class",

SpanTagKey::Description => "description",
SpanTagKey::Group => "group",
SpanTagKey::SpanOp => "op",
SpanTagKey::Category => "category",
SpanTagKey::Module => "module",
SpanTagKey::Action => "action",
SpanTagKey::Domain => "domain",
SpanTagKey::System => "system",
SpanTagKey::Status => "status",
SpanTagKey::StatusCode => "status_code",
}
}
}

/// Configuration for span tag extraction.
pub(crate) struct Config {
Expand All @@ -107,35 +136,46 @@ pub(crate) fn extract_span_tags(event: &mut Event, config: &Config) {

let tags = extract_tags(span, config);

span.sentry_tags = Annotated::new(
shared_tags
.clone()
.into_iter()
.chain(tags.clone())
.map(|(k, v)| (k.sentry_tag_key().to_owned(), Annotated::new(v)))
.collect(),
);

// Double write to `span.data` for now. This can be removed once all users of these fields
// have switched to `sentry_tags`.
let data = span.data.value_mut().get_or_insert_with(Default::default);
data.extend(
shared_tags
.clone()
.into_iter()
.chain(tags)
.map(|(k, v)| (k.to_string(), Annotated::new(v))),
.map(|(k, v)| (k.data_key().to_owned(), Annotated::new(v.into()))),
);
}
}

/// Extracts tags shared by every span.
pub fn extract_shared_tags(event: &Event) -> BTreeMap<SpanTagKey, Value> {
let mut tags = BTreeMap::<SpanTagKey, Value>::new();
pub fn extract_shared_tags(event: &Event) -> BTreeMap<SpanTagKey, String> {
let mut tags = BTreeMap::new();

if let Some(release) = event.release.as_str() {
tags.insert(SpanTagKey::Release, release.to_owned().into());
tags.insert(SpanTagKey::Release, release.to_owned());
}

if let Some(user) = event.user.value().and_then(get_eventuser_tag) {
tags.insert(SpanTagKey::User, user.into());
tags.insert(SpanTagKey::User, user);
}

if let Some(environment) = event.environment.as_str() {
tags.insert(SpanTagKey::Environment, environment.into());
tags.insert(SpanTagKey::Environment, environment.to_owned());
}

if let Some(transaction_name) = event.transaction.value() {
tags.insert(SpanTagKey::Transaction, transaction_name.clone().into());
tags.insert(SpanTagKey::Transaction, transaction_name.clone());

let transaction_method_from_request = event
.request
Expand All @@ -146,25 +186,25 @@ pub fn extract_shared_tags(event: &Event) -> BTreeMap<SpanTagKey, Value> {
if let Some(transaction_method) = transaction_method_from_request.or_else(|| {
http_method_from_transaction_name(transaction_name).map(|m| m.to_uppercase())
}) {
tags.insert(SpanTagKey::TransactionMethod, transaction_method.into());
tags.insert(SpanTagKey::TransactionMethod, transaction_method);
}
}

if let Some(trace_context) = event.context::<TraceContext>() {
if let Some(op) = extract_transaction_op(trace_context) {
tags.insert(SpanTagKey::TransactionOp, op.to_lowercase().into());
tags.insert(SpanTagKey::TransactionOp, op.to_lowercase().to_owned());
}
}

if let Some(transaction_http_status_code) = extract_http_status_code(event) {
tags.insert(
SpanTagKey::HttpStatusCode,
transaction_http_status_code.into(),
transaction_http_status_code.to_owned(),
);
}

if MOBILE_SDKS.contains(&event.sdk_name()) {
tags.insert(SpanTagKey::Mobile, true.into());
tags.insert(SpanTagKey::Mobile, "true".to_owned());
}

if let Some(device_class) = event.tag_value("device.class") {
Expand All @@ -180,25 +220,25 @@ pub fn extract_shared_tags(event: &Event) -> BTreeMap<SpanTagKey, Value> {
/// [span operations](https://develop.sentry.dev/sdk/performance/span-operations/) and
/// existing [span data](https://develop.sentry.dev/sdk/performance/span-data-conventions/) fields,
/// and rely on Sentry conventions and heuristics.
pub(crate) fn extract_tags(span: &Span, config: &Config) -> BTreeMap<SpanTagKey, Value> {
let mut span_tags: BTreeMap<SpanTagKey, Value> = BTreeMap::new();
pub(crate) fn extract_tags(span: &Span, config: &Config) -> BTreeMap<SpanTagKey, String> {
let mut span_tags: BTreeMap<SpanTagKey, String> = BTreeMap::new();

let system = span
.data
.value()
.and_then(|v| v.get("db.system"))
.and_then(|system| system.as_str());
if let Some(sys) = system {
span_tags.insert(SpanTagKey::System, sys.to_lowercase().into());
span_tags.insert(SpanTagKey::System, sys.to_lowercase());
}

if let Some(unsanitized_span_op) = span.op.value() {
let span_op = unsanitized_span_op.to_owned().to_lowercase();

span_tags.insert(SpanTagKey::SpanOp, span_op.to_owned().into());
span_tags.insert(SpanTagKey::SpanOp, span_op.to_owned());

if let Some(category) = span_op_to_category(&span_op) {
span_tags.insert(SpanTagKey::Category, category.to_owned().into());
span_tags.insert(SpanTagKey::Category, category.to_owned());
}

let span_module = if span_op.starts_with("http") {
Expand All @@ -212,7 +252,7 @@ pub(crate) fn extract_tags(span: &Span, config: &Config) -> BTreeMap<SpanTagKey,
};

if let Some(module) = span_module {
span_tags.insert(SpanTagKey::Module, module.to_owned().into());
span_tags.insert(SpanTagKey::Module, module.to_owned());
}

let scrubbed_description = span
Expand Down Expand Up @@ -259,7 +299,7 @@ pub(crate) fn extract_tags(span: &Span, config: &Config) -> BTreeMap<SpanTagKey,
};

if let Some(act) = action {
span_tags.insert(SpanTagKey::Action, act.into());
span_tags.insert(SpanTagKey::Action, act);
}

let domain = if span_op == "http.client" {
Expand All @@ -286,7 +326,7 @@ pub(crate) fn extract_tags(span: &Span, config: &Config) -> BTreeMap<SpanTagKey,

if !span_op.starts_with("db.redis") {
if let Some(dom) = domain {
span_tags.insert(SpanTagKey::Domain, dom.into());
span_tags.insert(SpanTagKey::Domain, dom);
}
}

Expand All @@ -298,19 +338,19 @@ pub(crate) fn extract_tags(span: &Span, config: &Config) -> BTreeMap<SpanTagKey,

let mut span_group = format!("{:?}", md5::compute(scrubbed_desc));
span_group.truncate(16);
span_tags.insert(SpanTagKey::Group, span_group.into());
span_tags.insert(SpanTagKey::Group, span_group);

let truncated = truncate_string(scrubbed_desc.to_owned(), config.max_tag_value_size);
span_tags.insert(SpanTagKey::Description, truncated.into());
span_tags.insert(SpanTagKey::Description, truncated);
}
}

if let Some(span_status) = span.status.value() {
span_tags.insert(SpanTagKey::Status, span_status.to_string().into());
span_tags.insert(SpanTagKey::Status, span_status.to_string());
}

if let Some(status_code) = http_status_code_from_span(span) {
span_tags.insert(SpanTagKey::StatusCode, status_code.into());
span_tags.insert(SpanTagKey::StatusCode, status_code);
}

span_tags
Expand Down Expand Up @@ -561,9 +601,8 @@ mod tests {
.value()
.and_then(|e| e.spans.value())
.and_then(|spans| spans[0].value())
.and_then(|s| s.data.value())
.and_then(|s| s.sentry_tags.value())
.and_then(|d| d.get("transaction.method"))
.and_then(|v| v.value())
.and_then(|v| v.as_str())
.unwrap()
);
Expand Down
5 changes: 5 additions & 0 deletions relay-event-schema/src/protocol/span.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ pub struct Span {
#[metastructure(pii = "true")]
pub data: Annotated<Object<Value>>,

/// Tags generated by Relay. These tags are a superset of the tags set on span metrics.
pub sentry_tags: Annotated<Object<String>>,

// TODO remove retain when the api stabilizes
/// Additional arbitrary fields for forwards compatibility.
#[metastructure(additional_properties, retain = "true", pii = "maybe")]
Expand Down Expand Up @@ -125,6 +128,8 @@ impl Getter for Span {
val = map.get(&part)?.value()?;
}
val.into()
} else if let Some(key) = path.strip_prefix("sentry_tags.") {
self.sentry_tags.value()?.get(key)?.as_str()?.into()
} else {
return None;
}
Expand Down
Loading

0 comments on commit 415e5cb

Please sign in to comment.