Skip to content

Commit

Permalink
fix(migrations): Make Snuba event migration more robust (#16364)
Browse files Browse the repository at this point in the history
  • Loading branch information
BYK authored and Nisanthan Nanthakumar committed Jan 9, 2020
1 parent 0d43636 commit e3a3f11
Showing 1 changed file with 24 additions and 13 deletions.
37 changes: 24 additions & 13 deletions src/sentry/migrations/0024_auto_20191230_2052.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.13 on 2019-12-30 20:52
from __future__ import unicode_literals
from __future__ import unicode_literals, print_function

import os
from datetime import timedelta, datetime
Expand All @@ -20,9 +20,9 @@ def backfill_eventstream(apps, schema_editor):
from sentry import eventstore, eventstream
from sentry.utils.query import RangeQuerySetWrapper

Event = apps.get_model('sentry', 'Event')
Group = apps.get_model('sentry', 'Group')
Project = apps.get_model('sentry', 'Project')
Event = apps.get_model("sentry", "Event")
Group = apps.get_model("sentry", "Group")
Project = apps.get_model("sentry", "Project")

# Kill switch to skip this migration
skip_backfill = os.environ.get("SENTRY_SKIP_EVENTS_BACKFILL_FOR_10", False)
Expand All @@ -34,20 +34,26 @@ def backfill_eventstream(apps, schema_editor):
def get_events(last_days):
to_date = datetime.now()
from_date = to_date - timedelta(days=last_days)
return Event.objects.filter(datetime__gte=from_date, datetime__lte=to_date, group_id__isnull=False)
return Event.objects.filter(
datetime__gte=from_date, datetime__lte=to_date, group_id__isnull=False
)

def _attach_related(_events):
project_ids = {event.project_id for event in _events}
project_ids = set()
group_ids = set()
for event in _events:
project_ids.add(event.project_id)
group_ids.add(event.group_id)
projects = {p.id: p for p in Project.objects.filter(id__in=project_ids)}
group_ids = {event.group_id for event in _events}
groups = {g.id: g for g in Group.objects.filter(id__in=group_ids)}

for event in _events:
event.project = projects[event.project_id]
event.group = groups[event.group_id]
event.project = projects.get(event.project_id)
event.group = groups.get(event.group_id)
eventstore.bind_nodes(_events, "data")

if skip_backfill:
print("Skipping backfill\n")
print("Skipping backfill.\n")
return

events = get_events(retention_days)
Expand All @@ -59,8 +65,13 @@ def _attach_related(_events):

print("Events to process: {}\n".format(count))

processed = 0
for event in RangeQuerySetWrapper(events, step=100, callbacks=(_attach_related,)):
primary_hash = event.get_primary_hash()
if event.project is None or event.group is None:
print("Skipped {} as group or project information is invalid.\n".format(event))
continue

eventstream.insert(
group=event.group,
event=event,
Expand All @@ -70,8 +81,9 @@ def _attach_related(_events):
primary_hash=primary_hash,
skip_consume=True,
)
processed += 1

print("Done.\n")
print("Event migration done. Processed {} of {} events.\n".format(processed, count))


class Migration(migrations.Migration):
Expand All @@ -89,9 +101,8 @@ class Migration(migrations.Migration):
# - Adding columns to highly active tables, even ones that are NULL.
is_dangerous = True


dependencies = [
('sentry', '0023_hide_environment_none_20191126'),
("sentry", "0023_hide_environment_none_20191126"),
]

operations = [
Expand Down

0 comments on commit e3a3f11

Please sign in to comment.