From 851eabcd9f52d51fc3b986224a4e54b686beebb8 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Thu, 9 Jan 2020 22:12:30 +0300 Subject: [PATCH 1/5] fix(migrations): Make Snuba event migration more robust Fixes getsentry/onpremise#322 --- src/sentry/migrations/0024_auto_20191230_2052.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/sentry/migrations/0024_auto_20191230_2052.py b/src/sentry/migrations/0024_auto_20191230_2052.py index 204385b5d4d490..73a6eb36aa7ec6 100644 --- a/src/sentry/migrations/0024_auto_20191230_2052.py +++ b/src/sentry/migrations/0024_auto_20191230_2052.py @@ -37,13 +37,17 @@ def get_events(last_days): return Event.objects.filter(datetime__gte=from_date, datetime__lte=to_date, group_id__isnull=False) def _attach_related(_events): - project_ids = {event.project_id for event in _events} + project_ids = set() + group_ids = set() + for event in _events: + project_ids.add(event.project_id) + group_ids.add(event.group_id) projects = {p.id: p for p in Project.objects.filter(id__in=project_ids)} - group_ids = {event.group_id for event in _events} groups = {g.id: g for g in Group.objects.filter(id__in=group_ids)} + for event in _events: - event.project = projects[event.project_id] - event.group = groups[event.group_id] + event.project = projects.get(event.project_id) + event.group = groups.get(event.group_id) eventstore.bind_nodes(_events, "data") if skip_backfill: @@ -61,6 +65,10 @@ def _attach_related(_events): for event in RangeQuerySetWrapper(events, step=100, callbacks=(_attach_related,)): primary_hash = event.get_primary_hash() + if event.project is None or event.group is None + print "Skipping %s as group or project information is invalid..." + continue + eventstream.insert( group=event.group, event=event, From c23a54b126551d87c2e955a6f2420ce3a9eb2322 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Thu, 9 Jan 2020 22:37:40 +0300 Subject: [PATCH 2/5] lol --- .../migrations/0024_auto_20191230_2052.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/sentry/migrations/0024_auto_20191230_2052.py b/src/sentry/migrations/0024_auto_20191230_2052.py index 73a6eb36aa7ec6..886bafbfe8c067 100644 --- a/src/sentry/migrations/0024_auto_20191230_2052.py +++ b/src/sentry/migrations/0024_auto_20191230_2052.py @@ -20,9 +20,9 @@ def backfill_eventstream(apps, schema_editor): from sentry import eventstore, eventstream from sentry.utils.query import RangeQuerySetWrapper - Event = apps.get_model('sentry', 'Event') - Group = apps.get_model('sentry', 'Group') - Project = apps.get_model('sentry', 'Project') + Event = apps.get_model("sentry", "Event") + Group = apps.get_model("sentry", "Group") + Project = apps.get_model("sentry", "Project") # Kill switch to skip this migration skip_backfill = os.environ.get("SENTRY_SKIP_EVENTS_BACKFILL_FOR_10", False) @@ -34,7 +34,9 @@ def backfill_eventstream(apps, schema_editor): def get_events(last_days): to_date = datetime.now() from_date = to_date - timedelta(days=last_days) - return Event.objects.filter(datetime__gte=from_date, datetime__lte=to_date, group_id__isnull=False) + return Event.objects.filter( + datetime__gte=from_date, datetime__lte=to_date, group_id__isnull=False + ) def _attach_related(_events): project_ids = set() @@ -51,21 +53,21 @@ def _attach_related(_events): eventstore.bind_nodes(_events, "data") if skip_backfill: - print("Skipping backfill\n") + print ("Skipping backfill\n") return events = get_events(retention_days) count = events.count() if count == 0: - print("Nothing to do, skipping migration.\n") + print ("Nothing to do, skipping migration.\n") return - print("Events to process: {}\n".format(count)) + print ("Events to process: {}\n".format(count)) for event in RangeQuerySetWrapper(events, step=100, callbacks=(_attach_related,)): primary_hash = event.get_primary_hash() - if event.project is None or event.group is None + if event.project is None or event.group is None: print "Skipping %s as group or project information is invalid..." continue @@ -79,7 +81,7 @@ def _attach_related(_events): skip_consume=True, ) - print("Done.\n") + print ("Done.\n") class Migration(migrations.Migration): @@ -97,9 +99,8 @@ class Migration(migrations.Migration): # - Adding columns to highly active tables, even ones that are NULL. is_dangerous = True - dependencies = [ - ('sentry', '0023_hide_environment_none_20191126'), + ("sentry", "0023_hide_environment_none_20191126"), ] operations = [ From bb21cb80362b57bc1d5554612c6d7896878a7d99 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Thu, 9 Jan 2020 22:40:05 +0300 Subject: [PATCH 3/5] print_function --- src/sentry/migrations/0024_auto_20191230_2052.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/sentry/migrations/0024_auto_20191230_2052.py b/src/sentry/migrations/0024_auto_20191230_2052.py index 886bafbfe8c067..473373b0f0c9ca 100644 --- a/src/sentry/migrations/0024_auto_20191230_2052.py +++ b/src/sentry/migrations/0024_auto_20191230_2052.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Generated by Django 1.9.13 on 2019-12-30 20:52 -from __future__ import unicode_literals +from __future__ import unicode_literals, print_function import os from datetime import timedelta, datetime @@ -53,22 +53,22 @@ def _attach_related(_events): eventstore.bind_nodes(_events, "data") if skip_backfill: - print ("Skipping backfill\n") + print("Skipping backfill\n") return events = get_events(retention_days) count = events.count() if count == 0: - print ("Nothing to do, skipping migration.\n") + print("Nothing to do, skipping migration.\n") return - print ("Events to process: {}\n".format(count)) + print("Events to process: {}\n".format(count)) for event in RangeQuerySetWrapper(events, step=100, callbacks=(_attach_related,)): primary_hash = event.get_primary_hash() if event.project is None or event.group is None: - print "Skipping %s as group or project information is invalid..." + print("Skipping %s as group or project information is invalid...") continue eventstream.insert( @@ -81,7 +81,7 @@ def _attach_related(_events): skip_consume=True, ) - print ("Done.\n") + print("Done.\n") class Migration(migrations.Migration): From 53a49235fac852cd40388dc93495a6b9d57356d8 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Thu, 9 Jan 2020 22:40:38 +0300 Subject: [PATCH 4/5] pass event to print log --- src/sentry/migrations/0024_auto_20191230_2052.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/migrations/0024_auto_20191230_2052.py b/src/sentry/migrations/0024_auto_20191230_2052.py index 473373b0f0c9ca..bd21ba55501418 100644 --- a/src/sentry/migrations/0024_auto_20191230_2052.py +++ b/src/sentry/migrations/0024_auto_20191230_2052.py @@ -68,7 +68,7 @@ def _attach_related(_events): for event in RangeQuerySetWrapper(events, step=100, callbacks=(_attach_related,)): primary_hash = event.get_primary_hash() if event.project is None or event.group is None: - print("Skipping %s as group or project information is invalid...") + print("Skipping %s as group or project information is invalid..." % event) continue eventstream.insert( From 0f172d71ce2c4146877a227467e7875113c5e8cf Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Thu, 9 Jan 2020 22:43:26 +0300 Subject: [PATCH 5/5] better logs --- src/sentry/migrations/0024_auto_20191230_2052.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/sentry/migrations/0024_auto_20191230_2052.py b/src/sentry/migrations/0024_auto_20191230_2052.py index bd21ba55501418..b62a3b80af5410 100644 --- a/src/sentry/migrations/0024_auto_20191230_2052.py +++ b/src/sentry/migrations/0024_auto_20191230_2052.py @@ -53,7 +53,7 @@ def _attach_related(_events): eventstore.bind_nodes(_events, "data") if skip_backfill: - print("Skipping backfill\n") + print("Skipping backfill.\n") return events = get_events(retention_days) @@ -65,10 +65,11 @@ def _attach_related(_events): print("Events to process: {}\n".format(count)) + processed = 0 for event in RangeQuerySetWrapper(events, step=100, callbacks=(_attach_related,)): primary_hash = event.get_primary_hash() if event.project is None or event.group is None: - print("Skipping %s as group or project information is invalid..." % event) + print("Skipped {} as group or project information is invalid.\n".format(event)) continue eventstream.insert( @@ -80,8 +81,9 @@ def _attach_related(_events): primary_hash=primary_hash, skip_consume=True, ) + processed += 1 - print("Done.\n") + print("Event migration done. Processed {} of {} events.\n".format(processed, count)) class Migration(migrations.Migration):