From e3a3f117baed0321a424f31e1fd5cba679612f33 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Thu, 9 Jan 2020 23:06:41 +0300 Subject: [PATCH] fix(migrations): Make Snuba event migration more robust (#16364) Fixes getsentry/onpremise#322 --- .../migrations/0024_auto_20191230_2052.py | 37 ++++++++++++------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/src/sentry/migrations/0024_auto_20191230_2052.py b/src/sentry/migrations/0024_auto_20191230_2052.py index 204385b5d4d490..b62a3b80af5410 100644 --- a/src/sentry/migrations/0024_auto_20191230_2052.py +++ b/src/sentry/migrations/0024_auto_20191230_2052.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Generated by Django 1.9.13 on 2019-12-30 20:52 -from __future__ import unicode_literals +from __future__ import unicode_literals, print_function import os from datetime import timedelta, datetime @@ -20,9 +20,9 @@ def backfill_eventstream(apps, schema_editor): from sentry import eventstore, eventstream from sentry.utils.query import RangeQuerySetWrapper - Event = apps.get_model('sentry', 'Event') - Group = apps.get_model('sentry', 'Group') - Project = apps.get_model('sentry', 'Project') + Event = apps.get_model("sentry", "Event") + Group = apps.get_model("sentry", "Group") + Project = apps.get_model("sentry", "Project") # Kill switch to skip this migration skip_backfill = os.environ.get("SENTRY_SKIP_EVENTS_BACKFILL_FOR_10", False) @@ -34,20 +34,26 @@ def backfill_eventstream(apps, schema_editor): def get_events(last_days): to_date = datetime.now() from_date = to_date - timedelta(days=last_days) - return Event.objects.filter(datetime__gte=from_date, datetime__lte=to_date, group_id__isnull=False) + return Event.objects.filter( + datetime__gte=from_date, datetime__lte=to_date, group_id__isnull=False + ) def _attach_related(_events): - project_ids = {event.project_id for event in _events} + project_ids = set() + group_ids = set() + for event in _events: + project_ids.add(event.project_id) + group_ids.add(event.group_id) projects = {p.id: p for p in Project.objects.filter(id__in=project_ids)} - group_ids = {event.group_id for event in _events} groups = {g.id: g for g in Group.objects.filter(id__in=group_ids)} + for event in _events: - event.project = projects[event.project_id] - event.group = groups[event.group_id] + event.project = projects.get(event.project_id) + event.group = groups.get(event.group_id) eventstore.bind_nodes(_events, "data") if skip_backfill: - print("Skipping backfill\n") + print("Skipping backfill.\n") return events = get_events(retention_days) @@ -59,8 +65,13 @@ def _attach_related(_events): print("Events to process: {}\n".format(count)) + processed = 0 for event in RangeQuerySetWrapper(events, step=100, callbacks=(_attach_related,)): primary_hash = event.get_primary_hash() + if event.project is None or event.group is None: + print("Skipped {} as group or project information is invalid.\n".format(event)) + continue + eventstream.insert( group=event.group, event=event, @@ -70,8 +81,9 @@ def _attach_related(_events): primary_hash=primary_hash, skip_consume=True, ) + processed += 1 - print("Done.\n") + print("Event migration done. Processed {} of {} events.\n".format(processed, count)) class Migration(migrations.Migration): @@ -89,9 +101,8 @@ class Migration(migrations.Migration): # - Adding columns to highly active tables, even ones that are NULL. is_dangerous = True - dependencies = [ - ('sentry', '0023_hide_environment_none_20191126'), + ("sentry", "0023_hide_environment_none_20191126"), ] operations = [