-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
96d4431
commit 607d4e6
Showing
6 changed files
with
301 additions
and
7 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
import prettyMilliseconds from 'pretty-ms' | ||
|
||
/** | ||
* @typedef {import('@web3-storage/data-segment').PieceLink} PieceLink | ||
* @typedef {import('@web3-storage/filecoin-api/aggregator/api').AggregateStore} AggregatorAggregateStore | ||
* @typedef {import('@web3-storage/filecoin-api/dealer/api').AggregateStore} DealerAggregateStore | ||
* @typedef {import('@web3-storage/filecoin-api/dealer/api').AggregateRecord} AggregateRecord | ||
* | ||
* @typedef {object} MonitorContext | ||
* @property {AggregatorAggregateStore} context.aggregatorAggregateStore | ||
* @property {DealerAggregateStore} context.dealerAggregateStore | ||
* @property {number} context.oldestPieceCriticalThresholdMs | ||
* @property {number} context.oldestPieceWarnThresholdMs | ||
* @property {number} context.aggregateMonitorThresholdMs | ||
* @property {string} context.monitoringNotificationsEndpoint | ||
* | ||
* @typedef {Object} Alert | ||
Check failure on line 17 in packages/core/src/dealer/deal-monitor-alert-tick.js GitHub Actions / Test
|
||
* @property {PieceLink} aggregate | ||
* @property {number} duration | ||
* @property {string} severity | ||
*/ | ||
|
||
/** | ||
* On CRON tick, get aggregates without deals, and verify if there | ||
* | ||
* @param {MonitorContext} context | ||
*/ | ||
export async function dealMonitorAlertTick (context) { | ||
// Get offered deals pending approval/rejection | ||
const offeredAggregates = await context.dealerAggregateStore.query({ | ||
status: 'offered', | ||
}) | ||
if (offeredAggregates.error) { | ||
return { | ||
error: offeredAggregates.error, | ||
} | ||
} | ||
|
||
// Get offered aggregates to monitor | ||
const offeredAggregatesToMonitor = [] | ||
const currentTime = Date.now() | ||
for (const offeredAggregate of offeredAggregates.ok) { | ||
const offerTime = (new Date(offeredAggregate.insertedAt)).getTime() | ||
// Monitor if offer time + monitor threshold is bigger than current time | ||
if (offerTime + context.aggregateMonitorThresholdMs > currentTime) { | ||
offeredAggregatesToMonitor.push(offeredAggregate) | ||
} | ||
} | ||
|
||
// Get aggregates duration | ||
const monitoredAggregatesResponse = await Promise.all( | ||
offeredAggregatesToMonitor.map(aggregate => monitorAggregate(aggregate, context)) | ||
) | ||
// Fail if any failed to get information | ||
const monitoredAggregatesErrorResponse = monitoredAggregatesResponse.find(r => r?.error) | ||
if (monitoredAggregatesErrorResponse) { | ||
return { | ||
error: monitoredAggregatesErrorResponse.error | ||
} | ||
} | ||
|
||
const alerts = /** @typedef {Alert[]} */ ([]) | ||
|
||
// Verify if monitored aggregates should create notifications | ||
for (const res of monitoredAggregatesResponse) { | ||
// @ts-ignore if not ok, should have failed before | ||
const duration = /** @type {number} */ (res.ok?.duration) | ||
// @ts-ignore if not ok, should have failed before | ||
const aggregate = /** @type {import('@web3-storage/data-segment').PieceLink} */ (res.ok?.aggregate) | ||
|
||
if (duration > context.oldestPieceCriticalThresholdMs) { | ||
alerts.push({ | ||
aggregate, | ||
duration, | ||
severity: 'critical' | ||
}) | ||
} else if (duration > context.oldestPieceWarnThresholdMs) { | ||
alerts.push({ | ||
aggregate, | ||
duration, | ||
severity: 'warn' | ||
}) | ||
} | ||
} | ||
|
||
if (!alerts.length) { | ||
return { | ||
ok: {} | ||
} | ||
} | ||
|
||
// Send alerts | ||
const alertPayload = getAlertPayload(alerts) | ||
const alertResponse = await fetch( | ||
context.monitoringNotificationsEndpoint, | ||
{ | ||
method: 'POST', | ||
headers: { | ||
'Content-Type': 'application/json' | ||
}, | ||
body: JSON.stringify(alertPayload) | ||
} | ||
) | ||
if (!alertResponse.ok) { | ||
return { | ||
error: new Error(`failed to send alert to ${context.monitoringNotificationsEndpoint} with ${alerts.length}`) | ||
} | ||
} | ||
|
||
return { | ||
ok: {} | ||
} | ||
} | ||
|
||
/** | ||
* @param {AggregateRecord} aggregateRecord | ||
* @param {MonitorContext} context | ||
*/ | ||
async function monitorAggregate (aggregateRecord, context) { | ||
const getAggregateInfo = await context.aggregatorAggregateStore.get({ | ||
aggregate: aggregateRecord.aggregate | ||
}) | ||
if (getAggregateInfo.error) { | ||
return { | ||
error: getAggregateInfo.error | ||
} | ||
} | ||
|
||
// Get aggregate current duration | ||
const currentTime = Date.now() | ||
// @ts-expect-error needs updated dep | ||
const offerTime = (new Date(getAggregateInfo.ok.oldestPieceInsertedAt)).getTime() | ||
|
||
return { | ||
ok: { | ||
aggregate: aggregateRecord.aggregate, | ||
duration: currentTime - offerTime | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Construct alert based on payload from Grafana Alerting. | ||
* | ||
* @see https://grafana.com/docs/oncall/latest/integrations/grafana-alerting/ | ||
* @see https://prometheus.io/docs/alerting/latest/notifications/#data | ||
* | ||
* @param {Alert[]} alerts | ||
*/ | ||
function getAlertPayload (alerts) { | ||
return { | ||
alerts: alerts.map(a => ({ | ||
labels: { | ||
aggregate: a.aggregate.toString(), | ||
duration: prettyMilliseconds(a.duration), | ||
severity: a.severity, | ||
}, | ||
status: 'firing', | ||
fingerprint: a.aggregate.toString() | ||
})), | ||
status: 'firing', | ||
version: '4', | ||
groupKey: '{}:{alertname=\FilecoinDealDelay\}', | ||
Check failure on line 163 in packages/core/src/dealer/deal-monitor-alert-tick.js GitHub Actions / Test
|
||
receiver: 'combo', | ||
groupLabels: { | ||
alertname: 'FilecoinDealDelay' | ||
}, | ||
commonLabels: { | ||
job: 'deal-monitor-alert', | ||
group: 'production', | ||
alertname: 'FilecoinDealDelay' | ||
} | ||
} | ||
} |
79 changes: 79 additions & 0 deletions
79
packages/functions/src/monitor/handle-deal-monitor-alert-cron-tick.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import * as Sentry from '@sentry/serverless' | ||
import { Table } from 'sst/node/table' | ||
|
||
// store clients | ||
import { createClient as createAggregatorAggregateStoreClient } from '@w3filecoin/core/src/store/aggregator-aggregate-store.js' | ||
import { createClient as createDealerAggregateStoreClient } from '@w3filecoin/core/src/store/dealer-aggregate-store.js' | ||
|
||
import { dealMonitorAlertTick } from '@w3filecoin/core/src/dealer/deal-monitor-alert-tick.js' | ||
|
||
import { mustGetEnv } from '../utils.js' | ||
|
||
Sentry.AWSLambda.init({ | ||
environment: process.env.SST_STAGE, | ||
dsn: process.env.SENTRY_DSN, | ||
tracesSampleRate: 1.0, | ||
}) | ||
|
||
async function handleEvent () { | ||
const { | ||
aggregatorAggregateStoreTableName, | ||
aggregatorAggregateStoreTableRegion, | ||
dealerAggregateStoreTableName, | ||
dealerAggregateStoreTableRegion, | ||
oldestPieceCriticalThresholdMs, | ||
oldestPieceWarnThresholdMs, | ||
aggregateMonitorThresholdMs, | ||
monitoringNotificationsEndpoint | ||
} = getEnv() | ||
|
||
// stores | ||
const aggregatorAggregateStore = createAggregatorAggregateStoreClient( | ||
{ region: aggregatorAggregateStoreTableRegion }, | ||
{ tableName: aggregatorAggregateStoreTableName.tableName } | ||
) | ||
const dealerAggregateStore = createDealerAggregateStoreClient({ | ||
region: dealerAggregateStoreTableRegion | ||
}, { | ||
tableName: dealerAggregateStoreTableName.tableName | ||
}) | ||
|
||
const { error } = await dealMonitorAlertTick({ | ||
aggregatorAggregateStore, | ||
dealerAggregateStore, | ||
oldestPieceCriticalThresholdMs, | ||
oldestPieceWarnThresholdMs, | ||
aggregateMonitorThresholdMs, | ||
monitoringNotificationsEndpoint | ||
}) | ||
|
||
if (error) { | ||
console.error(error) | ||
return { | ||
statusCode: 500, | ||
body: error.message | ||
} | ||
} | ||
|
||
return { | ||
statusCode: 200, | ||
} | ||
} | ||
|
||
/** | ||
* Get Env validating it is set. | ||
*/ | ||
function getEnv () { | ||
return { | ||
aggregatorAggregateStoreTableName: Table['aggregator-aggregate-store'], | ||
aggregatorAggregateStoreTableRegion: mustGetEnv('AWS_REGION'), | ||
dealerAggregateStoreTableName: Table['dealer-aggregate-store'], | ||
dealerAggregateStoreTableRegion: mustGetEnv('AWS_REGION'), | ||
oldestPieceCriticalThresholdMs: parseInt(mustGetEnv('OLDEST_PIECE_CRITICAL_THRESHOLD_MS')), | ||
oldestPieceWarnThresholdMs: parseInt(mustGetEnv('OLDEST_PIECE_WARN_THRESHOLD_MS')), | ||
aggregateMonitorThresholdMs: parseInt(mustGetEnv('AGGREGATE_MONITOR_THRESHOLD_MS')), | ||
monitoringNotificationsEndpoint: mustGetEnv('MONITORING_NOTIFICATIONS_ENDPOINT') | ||
} | ||
} | ||
|
||
export const main = Sentry.AWSLambda.wrapHandler(handleEvent) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters