Skip to content

Commit

Permalink
Remove hierarchical hashes from code base (#6604)
Browse files Browse the repository at this point in the history
Hierarchical grouping was deprecated back in September.

See project: https://github.com/orgs/getsentry/projects/242

The failing test depends on this PR getting merged first:
getsentry/sentry-kafka-schemas#353

---------

Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
  • Loading branch information
armenzg and getsantry[bot] authored Dec 3, 2024
1 parent ce28a0f commit db53174
Show file tree
Hide file tree
Showing 24 changed files with 16 additions and 322 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ python-dateutil==2.8.2
python-rapidjson==1.8
redis==4.5.4
sentry-arroyo==2.17.6
sentry-kafka-schemas==0.1.117
sentry-kafka-schemas==0.1.122
sentry-redis-tools==0.3.0
sentry-relay==0.9.2
sentry-sdk==2.18.0
Expand Down
10 changes: 0 additions & 10 deletions rust_snuba/src/processors/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,6 @@ struct ErrorData {
#[serde(default, alias = "sentry.interfaces.Exception")]
exception: Option<Exception>,
#[serde(default)]
hierarchical_hashes: Vec<String>,
#[serde(default)]
location: Option<String>,
#[serde(default)]
modules: Option<BTreeMap<String, Option<String>>>,
Expand Down Expand Up @@ -350,7 +348,6 @@ struct ErrorRow {
#[serde(rename = "exception_stacks.value")]
exception_stacks_value: Vec<Option<String>>,
group_id: u64,
hierarchical_hashes: Vec<Uuid>,
http_method: Option<String>,
http_referer: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
Expand Down Expand Up @@ -419,12 +416,6 @@ impl ErrorRow {

// Hashes
let primary_hash = to_uuid(from.primary_hash);
let hierarchical_hashes: Vec<Uuid> = from
.data
.hierarchical_hashes
.into_iter()
.map(to_uuid)
.collect();

// SDK Integrations
let from_sdk = from.data.sdk.unwrap_or_default();
Expand Down Expand Up @@ -671,7 +662,6 @@ impl ErrorRow {
exception_stacks_type: stack_types,
exception_stacks_value: stack_values,
group_id: from.group_id,
hierarchical_hashes,
http_method: from_request.method.0,
http_referer,
ip_address_v4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,6 @@ expression: snapshot_payload
"Some exception."
],
"group_id": 124,
"hierarchical_hashes": [],
"http_method": null,
"http_referer": null,
"ip_address_v4": "84.115.213.8",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,6 @@ expression: snapshot_payload
"Some exception."
],
"group_id": 124,
"hierarchical_hashes": [],
"http_method": null,
"http_referer": null,
"ip_address_v4": "84.115.213.8",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ expression: snapshot_payload
"Some exception."
],
"group_id": 124,
"hierarchical_hashes": [],
"http_method": null,
"http_referer": null,
"ip_address_v4": "84.115.213.8",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ expression: snapshot_payload
"exception_stacks.type": [],
"exception_stacks.value": [],
"group_id": 124,
"hierarchical_hashes": [],
"http_method": null,
"http_referer": null,
"level": null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ expression: snapshot_payload
"exception_stacks.type": [],
"exception_stacks.value": [],
"group_id": 123123123,
"hierarchical_hashes": [],
"http_method": null,
"http_referer": null,
"level": null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ expression: snapshot_payload
"exception_stacks.type": [],
"exception_stacks.value": [],
"group_id": 124,
"hierarchical_hashes": [],
"http_method": null,
"http_referer": null,
"level": null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ expression: snapshot_payload
"exception_stacks.type": [],
"exception_stacks.value": [],
"group_id": 123123,
"hierarchical_hashes": [],
"http_method": null,
"http_referer": null,
"level": null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ expression: snapshot_payload
"exception_stacks.type": [],
"exception_stacks.value": [],
"group_id": 123,
"hierarchical_hashes": [],
"http_method": null,
"http_referer": null,
"level": null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ expression: snapshot_payload
"exception_stacks.type": [],
"exception_stacks.value": [],
"group_id": 123123123,
"hierarchical_hashes": [],
"http_method": null,
"http_referer": null,
"level": null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,6 @@ schema:
{ name: deleted, type: UInt, args: { size: 8 } },
{ name: group_id, type: UInt, args: { size: 64 } },
{ name: primary_hash, type: UUID },
{
name: hierarchical_hashes,
type: Array,
args: { inner_type: { type: UUID } },
},
{ name: received, type: DateTime },
{ name: message, type: String },
{ name: title, type: String },
Expand Down
5 changes: 0 additions & 5 deletions snuba/datasets/configuration/events/entities/events.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,6 @@ schema:
{ name: deleted, type: UInt, args: { size: 8 } },
{ name: group_id, type: UInt, args: { size: 64 } },
{ name: primary_hash, type: UUID },
{
name: hierarchical_hashes,
type: Array,
args: { inner_type: { type: UUID } },
},
{ name: received, type: DateTime },
{ name: message, type: String },
{ name: title, type: String },
Expand Down
9 changes: 0 additions & 9 deletions snuba/datasets/configuration/events/storages/errors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,6 @@ schema:
{ name: deleted, type: UInt, args: { size: 8 } },
{ name: group_id, type: UInt, args: { size: 64 } },
{ name: primary_hash, type: UUID },
{
name: hierarchical_hashes,
type: Array,
args: { inner_type: { type: UUID } },
},
{ name: received, type: DateTime },
{ name: message, type: String },
{ name: title, type: String },
Expand Down Expand Up @@ -307,10 +302,6 @@ query_processors:
args:
columns: !!set
span_id: null
- processor: UUIDArrayColumnProcessor
args:
columns: !!set
hierarchical_hashes: null
- processor: SliceOfMapOptimizer
- processor: EventsBooleanContextsProcessor
- processor: TypeConditionOptimizer
Expand Down
9 changes: 0 additions & 9 deletions snuba/datasets/configuration/events/storages/errors_ro.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,6 @@ schema:
{ name: deleted, type: UInt, args: { size: 8 } },
{ name: group_id, type: UInt, args: { size: 64 } },
{ name: primary_hash, type: UUID },
{
name: hierarchical_hashes,
type: Array,
args: { inner_type: { type: UUID } },
},
{ name: received, type: DateTime },
{ name: message, type: String },
{ name: title, type: String },
Expand Down Expand Up @@ -303,10 +298,6 @@ query_processors:
args:
columns: !!set
span_id: null
- processor: UUIDArrayColumnProcessor
args:
columns: !!set
hierarchical_hashes: null
- processor: SliceOfMapOptimizer
- processor: EventsBooleanContextsProcessor
- processor: TypeConditionOptimizer
Expand Down
3 changes: 0 additions & 3 deletions snuba/datasets/processors/errors_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,9 +242,6 @@ def extract_custom(
output["message"] = _unicodify(event["message"])

output["primary_hash"] = str(uuid.UUID(_hashify(event["primary_hash"])))
output["hierarchical_hashes"] = list(
str(uuid.UUID(_hashify(x))) for x in data.get("hierarchical_hashes") or ()
)

output["culprit"] = _unicodify(data.get("culprit", ""))
output["type"] = _unicodify(data.get("type", ""))
Expand Down
2 changes: 0 additions & 2 deletions snuba/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,10 @@ class ReplacementType(str, Enum):
START_DELETE_GROUPS = "start_delete_groups"
START_MERGE = "start_merge"
START_UNMERGE = "start_unmerge"
START_UNMERGE_HIERARCHICAL = "start_unmerge_hierarchical"
START_DELETE_TAG = "start_delete_tag"
END_DELETE_GROUPS = "end_delete_groups"
END_MERGE = "end_merge"
END_UNMERGE = "end_unmerge"
END_UNMERGE_HIERARCHICAL = "end_unmerge_hierarchical"
END_DELETE_TAG = "end_delete_tag"
TOMBSTONE_EVENTS = "tombstone_events"
REPLACE_GROUP = "replace_group"
Expand Down
104 changes: 0 additions & 104 deletions snuba/replacers/errors_replacer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
EndDeleteGroupsMessageBody,
EndDeleteTagMessageBody,
EndMergeMessageBody,
EndUnmergeHierarchicalMessageBody,
EndUnmergeMessageBody,
ExcludeGroupsMessageBody,
ReplaceGroupMessageBody,
Expand Down Expand Up @@ -193,7 +192,6 @@ def process_message(
ReplacementType.START_DELETE_GROUPS,
ReplacementType.START_MERGE,
ReplacementType.START_UNMERGE,
ReplacementType.START_UNMERGE_HIERARCHICAL,
ReplacementType.START_DELETE_TAG,
):
return None
Expand Down Expand Up @@ -822,107 +820,6 @@ def _convert_hash(
return "'%s'" % _hashify(hash)


@dataclass
class UnmergeHierarchicalReplacement(Replacement):
project_id: int
timestamp: datetime
primary_hash: str
hierarchical_hash: str
previous_group_id: int
new_group_id: int

all_columns: Sequence[FlattenedColumn]
state_name: ReplacerState

@classmethod
def parse_message(
cls,
message: ReplacementMessage[EndUnmergeHierarchicalMessageBody],
context: ReplacementContext,
) -> Optional[UnmergeHierarchicalReplacement]:
try:
timestamp = datetime.strptime(
message.data["datetime"], settings.PAYLOAD_DATETIME_FORMAT
)

primary_hash = message.data["primary_hash"]
assert isinstance(primary_hash, str)

hierarchical_hash = message.data["hierarchical_hash"]
assert isinstance(hierarchical_hash, str)

uuid.UUID(primary_hash)
uuid.UUID(hierarchical_hash)
except Exception as exc:
# TODO(markus): We're sacrificing consistency over uptime as long as
# this is in development. At some point this piece of code should be
# stable enough to remove this.
logger.error("process_unmerge_hierarchical.failed", exc_info=exc)
return None

return cls(
project_id=message.data["project_id"],
timestamp=timestamp,
primary_hash=primary_hash,
hierarchical_hash=hierarchical_hash,
all_columns=context.all_columns,
state_name=context.state_name,
previous_group_id=message.data["previous_group_id"],
new_group_id=message.data["new_group_id"],
)

@cached_property
def _where_clause(self) -> str:
primary_hash = _convert_hash(self.primary_hash, self.state_name)
hierarchical_hash = _convert_hash(
self.hierarchical_hash, self.state_name, convert_types=True
)
timestamp = self.timestamp.strftime(DATETIME_FORMAT)

return f"""\
PREWHERE primary_hash = {primary_hash}
WHERE group_id = {self.previous_group_id}
AND has(hierarchical_hashes, {hierarchical_hash})
AND project_id = {self.project_id}
AND received <= CAST('{timestamp}' AS DateTime)
AND NOT deleted
"""

def get_count_query(self, table_name: str) -> Optional[str]:
return f"""
SELECT count()
FROM {table_name} FINAL
{self._where_clause}
"""

def get_insert_query(self, table_name: str) -> Optional[str]:
all_column_names = [c.escaped for c in self.all_columns]
all_columns = ", ".join(all_column_names)
select_columns = ", ".join(
map(
lambda i: i if i != "group_id" else str(self.new_group_id),
all_column_names,
)
)

return f"""\
INSERT INTO {table_name} ({all_columns})
SELECT {select_columns}
FROM {table_name} FINAL
{self._where_clause}
"""

def get_query_time_flags(self) -> Optional[QueryTimeFlags]:
return None

def get_project_id(self) -> int:
return self.project_id

@classmethod
def get_replacement_type(cls) -> ReplacementType:
return ReplacementType.END_UNMERGE_HIERARCHICAL


@dataclass
class DeleteTagReplacement(Replacement):
project_id: int
Expand Down Expand Up @@ -1042,7 +939,6 @@ def get_replacement_type(cls) -> ReplacementType:
DeleteGroupsReplacement,
MergeReplacement,
UnmergeGroupsReplacement,
UnmergeHierarchicalReplacement,
DeleteTagReplacement,
TombstoneEventsReplacement,
ReplaceGroupReplacement,
Expand Down
9 changes: 0 additions & 9 deletions tests/datasets/test_errors_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from datetime import datetime, timedelta, timezone
from typing import Any, Mapping, Sequence
from unittest.mock import ANY
from uuid import UUID

import pytest

Expand Down Expand Up @@ -206,10 +205,6 @@ def serialize(self) -> tuple[int, str, Mapping[str, Any]]:
},
"fingerprint": ["{{ default }}"],
"hashes": ["c8b21c571231e989060b9110a2ade7d3"],
"hierarchical_hashes": [
"04233d08ac90cf6fc015b1be5932e7e3",
"04233d08ac90cf6fc015b1be5932e7e4",
],
"key_id": "537125",
"level": "error",
"location": "snuba/clickhouse/http.py",
Expand Down Expand Up @@ -342,10 +337,6 @@ def build_result(self, meta: KafkaMessageMetadata) -> Mapping[str, Any]:
"deleted": 0,
"group_id": self.group_id,
"primary_hash": "d36001ef-28af-2542-fde8-cf2935766141",
"hierarchical_hashes": [
str(UUID("04233d08ac90cf6fc015b1be5932e7e3")),
str(UUID("04233d08ac90cf6fc015b1be5932e7e4")),
],
"received": int(
self.received_timestamp.replace(tzinfo=timezone.utc)
.replace(tzinfo=None, microsecond=0)
Expand Down
Loading

0 comments on commit db53174

Please sign in to comment.