Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(crons): Initial removal of status from Monitor object #47910

Merged
merged 9 commits into from
Apr 27, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/sentry/monitors/consumers/check_in.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,8 @@ def _process_message(wrapper: Dict) -> None:
signal_first_checkin(project, monitor)

if check_in.status == CheckInStatus.ERROR and monitor.status != MonitorStatus.DISABLED:
monitor.mark_failed(start_time)
monitor_environment.mark_failed(start_time)
else:
monitor.mark_ok(check_in, start_time)
monitor_environment.mark_ok(check_in, start_time)

metrics.incr(
Expand Down
4 changes: 2 additions & 2 deletions src/sentry/monitors/endpoints/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def try_checkin_lookup(monitor: Monitor, checkin_id: str):
# which is unfinished (thus still mutable)
if checkin_id == "latest":
checkin = (
MonitorCheckIn.objects.filter(monitor=monitor)
MonitorCheckIn.objects.filter(monitor_environment__monitor=monitor)
.exclude(status__in=CheckInStatus.FINISHED_VALUES)
.order_by("-date_added")
.first()
Expand All @@ -240,6 +240,6 @@ def try_checkin_lookup(monitor: Monitor, checkin_id: str):
raise ParameterValidationError("Invalid check-in UUID")

try:
return MonitorCheckIn.objects.get(monitor=monitor, guid=checkin_id)
return MonitorCheckIn.objects.get(monitor_environment__monitor=monitor, guid=checkin_id)
except MonitorCheckIn.DoesNotExist:
raise ResourceDoesNotExist
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,10 @@ def put(
checkin.update(**params)

if checkin.status == CheckInStatus.ERROR:
monitor_failed = monitor.mark_failed(current_datetime)
monitor_environment.mark_failed(current_datetime)
monitor_failed = monitor_environment.mark_failed(current_datetime)
if not monitor_failed:
return self.respond(serialize(checkin, request.user), status=208)
else:
monitor.mark_ok(checkin, current_datetime)
monitor_environment.mark_ok(checkin, current_datetime)

return self.respond(serialize(checkin, request.user))
Original file line number Diff line number Diff line change
Expand Up @@ -186,14 +186,12 @@ def post(
signal_first_checkin(project, monitor)

if checkin.status == CheckInStatus.ERROR and monitor.status != MonitorStatus.DISABLED:
monitor_failed = monitor.mark_failed(last_checkin=checkin.date_added)
monitor_environment.mark_failed(last_checkin=checkin.date_added)
monitor_failed = monitor_environment.mark_failed(last_checkin=checkin.date_added)
if not monitor_failed:
if isinstance(request.auth, ProjectKey):
return self.respond(status=200)
return self.respond(serialize(checkin, request.user), status=200)
else:
monitor.mark_ok(checkin, checkin.date_added)
monitor_environment.mark_ok(checkin, checkin.date_added)

if isinstance(request.auth, ProjectKey):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ def get(self, request: Request, organization, project, monitor) -> Response:
if start is None or end is None:
raise ParseError(detail="Invalid date range")

# TODO(rjo100): switch this to query monitor environment when hack is removed
queryset = MonitorCheckIn.objects.filter(
monitor_id=monitor.id, date_added__gte=start, date_added__lte=end
monitor__id=monitor.id, date_added__gte=start, date_added__lte=end
)

environments = get_environments(request, organization)
Expand Down
3 changes: 3 additions & 0 deletions src/sentry/monitors/endpoints/organization_monitor_details.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def put(self, request: Request, organization, project, monitor) -> Response:
if "slug" in result:
params["slug"] = result["slug"]
if "status" in result:
# TODO(rjo100): resets status if monitor is failed, needs fixing with environments
if result["status"] == MonitorStatus.ACTIVE:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're going to do a migration here to move all monitors that are in non ObjectStatus type status to ACTIVE or DISABLED right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally yes. we reference PENDING DELETION a lot too

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's OK, pending deletion is part of ObjectStatus right

if monitor.status not in (MonitorStatus.OK, MonitorStatus.ERROR):
params["status"] = MonitorStatus.ACTIVE
Expand All @@ -111,6 +112,8 @@ def put(self, request: Request, organization, project, monitor) -> Response:

if params:
monitor.update(**params)
if params.get("status"):
MonitorEnvironment.objects.filter(monitor=monitor).update(status=params["status"])
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will fix a sort bug by marking the MonitorEnvironment as DISABLED

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure we want this? I would think we would just want to keep the sorting logic so that monitors that are disabled get sorted to the bottom and just ignore the disabled in that case.

I would also probably not do this in this cleanup PR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the issue is that we only read off the MonitorEnvironment status during sort right now, and that never gets marked as DISABLED. this is the only place we have that hook. some of this follow up work was going to simplify the sort logic but really it's more pending the "one row per monitor w/multiple environments" UI

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#48039
fix is here. will remove this

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good stuff 👍

self.create_audit_entry(
request=request,
organization=organization,
Expand Down
39 changes: 1 addition & 38 deletions src/sentry/monitors/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,43 +218,6 @@ def get_next_scheduled_checkin(self, last_checkin):
)
return next_checkin + timedelta(minutes=int(self.config.get("checkin_margin") or 0))

def mark_failed(self, last_checkin=None, reason=MonitorFailure.UNKNOWN):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice

if last_checkin is None:
next_checkin_base = timezone.now()
last_checkin = self.last_checkin or timezone.now()
else:
next_checkin_base = last_checkin

new_status = MonitorStatus.ERROR
if reason == MonitorFailure.MISSED_CHECKIN:
new_status = MonitorStatus.MISSED_CHECKIN

affected = (
type(self)
.objects.filter(
Q(last_checkin__lte=last_checkin) | Q(last_checkin__isnull=True), id=self.id
)
.update(
next_checkin=self.get_next_scheduled_checkin(next_checkin_base),
status=new_status,
last_checkin=last_checkin,
)
)
if not affected:
return False

return True

def mark_ok(self, checkin: MonitorCheckIn, ts: datetime):
params = {
"last_checkin": ts,
"next_checkin": self.get_next_scheduled_checkin(ts),
}
if checkin.status == CheckInStatus.OK and self.status != MonitorStatus.DISABLED:
params["status"] = MonitorStatus.OK

Monitor.objects.filter(id=self.id).exclude(last_checkin__gt=ts).update(**params)


@region_silo_only_model
class MonitorCheckIn(Model):
Expand Down Expand Up @@ -417,7 +380,7 @@ def mark_ok(self, checkin: MonitorCheckIn, ts: datetime):
"last_checkin": ts,
"next_checkin": self.monitor.get_next_scheduled_checkin(ts),
}
if checkin.status == CheckInStatus.OK and self.status != MonitorStatus.DISABLED:
if checkin.status == CheckInStatus.OK and self.monitor.status != MonitorStatus.DISABLED:
params["status"] = MonitorStatus.OK

MonitorEnvironment.objects.filter(id=self.id).exclude(last_checkin__gt=ts).update(**params)
4 changes: 0 additions & 4 deletions src/sentry/monitors/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,6 @@ def serialize(self, obj, attrs, user):
"name": obj.name,
"slug": obj.slug,
"config": config,
"lastCheckIn": obj.last_checkin,
"nextCheckIn": obj.next_checkin,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very nice

"dateCreated": obj.date_added,
"project": attrs["project"],
"environments": attrs["environments"],
Expand All @@ -102,8 +100,6 @@ class MonitorSerializerResponse(TypedDict):
type: str
config: Any
dateCreated: datetime
lastCheckIn: datetime
nextCheckIn: datetime
project: ProjectSerializerResponse
environments: MonitorEnvironmentSerializerResponse

Expand Down
2 changes: 0 additions & 2 deletions src/sentry/monitors/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def check_monitors(current_datetime=None):
monitor_environment=monitor_environment,
status=CheckInStatus.MISSED,
)
monitor_environment.monitor.mark_failed(reason=MonitorFailure.MISSED_CHECKIN)
monitor_environment.mark_failed(reason=MonitorFailure.MISSED_CHECKIN)

qs = (
Expand Down Expand Up @@ -106,5 +105,4 @@ def check_monitors(current_datetime=None):
status__in=[CheckInStatus.OK, CheckInStatus.ERROR],
).exists()
if not has_newer_result:
monitor_environment.monitor.mark_failed(reason=MonitorFailure.DURATION)
monitor_environment.mark_failed(reason=MonitorFailure.DURATION)
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,14 @@
from django.urls import reverse
from django.utils import timezone

from sentry.models import File
from sentry.monitors.models import CheckInStatus, Monitor, MonitorCheckIn, MonitorType
from sentry.models import Environment, File
from sentry.monitors.models import (
CheckInStatus,
Monitor,
MonitorCheckIn,
MonitorEnvironment,
MonitorType,
)
from sentry.testutils import MonitorIngestTestCase
from sentry.testutils.silo import region_silo_test

Expand All @@ -22,16 +28,31 @@ def _create_monitor(self):
return Monitor.objects.create(
organization_id=self.organization.id,
project_id=self.project.id,
next_checkin=timezone.now() - timedelta(minutes=1),
type=MonitorType.CRON_JOB,
config={"schedule": "* * * * *"},
date_added=timezone.now() - timedelta(minutes=1),
)

def _create_monitor_environment(self, monitor, name="production", **kwargs):
environment = Environment.get_or_create(project=self.project, name=name)

monitorenvironment_defaults = {
"status": monitor.status,
"next_checkin": timezone.now() - timedelta(minutes=1),
**kwargs,
}

return MonitorEnvironment.objects.create(
monitor=monitor, environment=environment, **monitorenvironment_defaults
)

def test_upload(self):
monitor = self._create_monitor()
monitor_environment = self._create_monitor_environment(monitor)

checkin = MonitorCheckIn.objects.create(
monitor=monitor,
monitor_environment=monitor_environment,
project_id=self.project.id,
date_added=monitor.date_added,
status=CheckInStatus.IN_PROGRESS,
Expand Down Expand Up @@ -60,8 +81,11 @@ def test_upload(self):

def test_upload_no_file(self):
monitor = self._create_monitor()
monitor_environment = self._create_monitor_environment(monitor)

checkin = MonitorCheckIn.objects.create(
monitor=monitor,
monitor_environment=monitor_environment,
project_id=self.project.id,
date_added=monitor.date_added,
status=CheckInStatus.IN_PROGRESS,
Expand All @@ -83,8 +107,11 @@ def test_upload_no_file(self):
)
def test_upload_file_too_big(self):
monitor = self._create_monitor()
monitor_environment = self._create_monitor_environment(monitor)

checkin = MonitorCheckIn.objects.create(
monitor=monitor,
monitor_environment=monitor_environment,
project_id=self.project.id,
date_added=monitor.date_added,
status=CheckInStatus.IN_PROGRESS,
Expand All @@ -107,8 +134,11 @@ def test_upload_file_too_big(self):

def test_duplicate_upload(self):
monitor = self._create_monitor()
monitor_environment = self._create_monitor_environment(monitor)

checkin = MonitorCheckIn.objects.create(
monitor=monitor,
monitor_environment=monitor_environment,
project_id=self.project.id,
date_added=monitor.date_added,
status=CheckInStatus.IN_PROGRESS,
Expand Down Expand Up @@ -151,8 +181,11 @@ def test_duplicate_upload(self):

def test_invalid_file_upload(self):
monitor = self._create_monitor()
monitor_environment = self._create_monitor_environment(monitor)

checkin = MonitorCheckIn.objects.create(
monitor=monitor,
monitor_environment=monitor_environment,
project_id=self.project.id,
date_added=monitor.date_added,
status=CheckInStatus.IN_PROGRESS,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,20 +104,19 @@ def test_passing(self):
checkin.monitor_environment.environment.name == monitor_environment.environment.name
)

monitor = Monitor.objects.get(id=monitor.id)
assert monitor.next_checkin > checkin.date_added
assert monitor.status == MonitorStatus.OK
assert monitor.last_checkin > checkin.date_added

monitor_environment = MonitorEnvironment.objects.get(id=monitor_environment.id)
assert monitor_environment.next_checkin > checkin.date_added
assert monitor_environment.status == MonitorStatus.OK
assert monitor_environment.last_checkin > checkin.date_added

def test_passing_with_slug(self):
monitor = self._create_monitor()
monitor_environment = self._create_monitor_environment(monitor)
checkin = MonitorCheckIn.objects.create(
monitor=monitor, project_id=self.project.id, date_added=monitor.date_added
monitor=monitor,
monitor_environment=monitor_environment,
project_id=self.project.id,
date_added=monitor.date_added,
)

path = reverse(
Expand Down Expand Up @@ -147,11 +146,6 @@ def test_failing(self):
checkin = MonitorCheckIn.objects.get(id=checkin.id)
assert checkin.status == CheckInStatus.ERROR

monitor = Monitor.objects.get(id=monitor.id)
assert monitor.next_checkin > checkin.date_added
assert monitor.status == MonitorStatus.ERROR
assert monitor.last_checkin > checkin.date_added

monitor_environment = MonitorEnvironment.objects.get(id=monitor_environment.id)
assert monitor_environment.next_checkin > checkin.date_added
assert monitor_environment.status == MonitorStatus.ERROR
Expand Down Expand Up @@ -196,11 +190,6 @@ def test_latest_returns_last_unfinished(self):
checkin3 = MonitorCheckIn.objects.get(id=checkin3.id)
assert checkin3.status == CheckInStatus.OK

monitor = Monitor.objects.get(id=monitor.id)
assert monitor.next_checkin > checkin2.date_added
assert monitor.status == MonitorStatus.OK
assert monitor.last_checkin > checkin2.date_added

monitor_environment = MonitorEnvironment.objects.get(id=monitor_environment.id)
assert monitor_environment.next_checkin > checkin2.date_added
assert monitor_environment.status == MonitorStatus.OK
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,6 @@ def test_passing(self, mock_record):
checkin = MonitorCheckIn.objects.get(guid=resp.data["id"])
assert checkin.status == CheckInStatus.OK

monitor = Monitor.objects.get(id=monitor.id)
assert monitor.status == MonitorStatus.OK
assert monitor.last_checkin == checkin.date_added
assert monitor.next_checkin == monitor.get_next_scheduled_checkin(checkin.date_added)

monitor_environment = MonitorEnvironment.objects.get(id=checkin.monitor_environment.id)
assert monitor_environment.status == MonitorStatus.OK
assert monitor_environment.last_checkin == checkin.date_added
Expand Down Expand Up @@ -109,11 +104,6 @@ def test_failing(self):
checkin = MonitorCheckIn.objects.get(guid=resp.data["id"])
assert checkin.status == CheckInStatus.ERROR

monitor = Monitor.objects.get(id=monitor.id)
assert monitor.status == MonitorStatus.ERROR
assert monitor.last_checkin == checkin.date_added
assert monitor.next_checkin == monitor.get_next_scheduled_checkin(checkin.date_added)

monitor_environment = MonitorEnvironment.objects.get(id=checkin.monitor_environment.id)
assert monitor_environment.status == MonitorStatus.ERROR
assert monitor_environment.last_checkin == checkin.date_added
Expand All @@ -132,11 +122,6 @@ def test_disabled(self):
checkin = MonitorCheckIn.objects.get(guid=resp.data["id"])
assert checkin.status == CheckInStatus.ERROR

monitor = Monitor.objects.get(id=monitor.id)
assert monitor.status == MonitorStatus.DISABLED
assert monitor.last_checkin == checkin.date_added
assert monitor.next_checkin == monitor.get_next_scheduled_checkin(checkin.date_added)

monitor_environment = MonitorEnvironment.objects.get(id=checkin.monitor_environment.id)
assert monitor_environment.status == MonitorStatus.DISABLED
assert monitor_environment.last_checkin == checkin.date_added
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ def test_download(self):
file.putfile(ContentFile(b"some data!"))

monitor = self._create_monitor()
monitor_environment = self._create_monitor_environment(monitor)
checkin = MonitorCheckIn.objects.create(
monitor=monitor,
monitor_environment=monitor_environment,
project_id=self.project.id,
date_added=monitor.date_added,
status=CheckInStatus.IN_PROGRESS,
Expand All @@ -32,8 +34,10 @@ def test_download(self):

def test_download_no_file(self):
monitor = self._create_monitor()
monitor_environment = self._create_monitor_environment(monitor)
checkin = MonitorCheckIn.objects.create(
monitor=monitor,
monitor_environment=monitor_environment,
project_id=self.project.id,
date_added=monitor.date_added,
status=CheckInStatus.IN_PROGRESS,
Expand Down
Loading