Skip to content

Commit

Permalink
NAS-130591 / 24.10-RC.1 / Other TrueNAS controller is inaccessible
Browse files Browse the repository at this point in the history
…proactive support alert (by themylogin) (#14375)

* `Other TrueNAS controller is inaccessible` proactive support alert

(cherry picked from commit e796aed)

* Proactive support: also notify gone alerts when requested

(cherry picked from commit f188d7a)

* Add incident ID

(cherry picked from commit 62b36c1)

* Fix

(cherry picked from commit 4cb1e74)

---------

Co-authored-by: themylogin <[email protected]>
  • Loading branch information
bugclerk and themylogin authored Sep 3, 2024
1 parent 5f3d286 commit 09e3883
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 3 deletions.
4 changes: 4 additions & 0 deletions src/middlewared/middlewared/alert/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ class AlertClass(metaclass=AlertClassMeta):
:cvar proactive_support: Set this to `true` if, upon creation of the alert, a support ticket should be open for the
systems that have a corresponding support license.
:cvar proactive_support_notify_gone: Set this to `true` if, upon removal of the alert, a support ticket should be
open for the systems that have a corresponding support license.
"""

classes = []
Expand All @@ -71,6 +74,7 @@ class AlertClass(metaclass=AlertClassMeta):
exclude_from_list = False
products = ("CORE", "ENTERPRISE", "SCALE", "SCALE_ENTERPRISE")
proactive_support = False
proactive_support_notify_gone = False

def __init__(self, middleware):
self.middleware = middleware
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) - iXsystems Inc.
#
# Licensed under the terms of the TrueNAS Enterprise License Agreement
# See the file LICENSE.IX for complete terms and conditions

import time

from middlewared.alert.base import AlertClass, AlertCategory, AlertLevel, Alert, AlertSource, UnavailableException
from middlewared.utils.crypto import generate_token


class FailoverRemoteSystemInaccessibleAlertClass(AlertClass):
category = AlertCategory.HA
level = AlertLevel.CRITICAL
title = 'Other Controller is Inaccessible'
text = 'Other TrueNAS controller is inaccessible. Contact support. Incident ID: %s.'
products = ('SCALE_ENTERPRISE',)
proactive_support = True
proactive_support_notify_gone = True


class FailoverRemoteSystemInaccessibleAlertSource(AlertSource):
products = ('SCALE_ENTERPRISE',)
failover_related = True
run_on_backup_node = False

def __init__(self, middleware):
super().__init__(middleware)
self.last_available = time.monotonic()
self.incident_id = None

async def check(self):
try:
await self.middleware.call('failover.call_remote', 'core.ping', [], {'timeout': 2})
except Exception:
if time.monotonic() - self.last_available > 4 * 3600:
if self.incident_id is None:
self.incident_id = generate_token(16, url_safe=True)
return [Alert(FailoverRemoteSystemInaccessibleAlertClass, args=[self.incident_id])]
else:
raise UnavailableException()

self.last_available = time.monotonic()
self.incident_id = None
return []
23 changes: 20 additions & 3 deletions src/middlewared/middlewared/plugins/alert.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,15 @@ async def send_alerts(self, job):
await self.middleware.call("mail.send", alert.mail)

if await self.middleware.call("system.is_enterprise"):
gone_proactive_support_alerts = [
alert
for alert in gone_alerts
if (
alert.klass.proactive_support and
(await as_.get_alert_class(alert)).get("proactive_support", True) and
alert.klass.proactive_support_notify_gone
)
]
new_proactive_support_alerts = [
alert
for alert in new_alerts
Expand All @@ -591,11 +600,19 @@ async def send_alerts(self, job):
(await as_.get_alert_class(alert)).get("proactive_support", True)
)
]
if new_proactive_support_alerts:
if gone_proactive_support_alerts or new_proactive_support_alerts:
if await self.middleware.call("support.is_available_and_enabled"):
support = await self.middleware.call("support.config")
msg = [f"* {html2text.html2text(alert.formatted)}"
for alert in new_proactive_support_alerts]

msg = []
if gone_proactive_support_alerts:
msg.append("The following alerts were cleared:")
msg += [f"* {html2text.html2text(alert.formatted)}"
for alert in gone_proactive_support_alerts]
if new_proactive_support_alerts:
msg.append("The following new alerts appeared:")
msg += [f"* {html2text.html2text(alert.formatted)}"
for alert in new_proactive_support_alerts]

serial = (await self.middleware.call("system.dmidecode_info"))["system-serial-number"]

Expand Down

0 comments on commit 09e3883

Please sign in to comment.