Skip to content

Commit

Permalink
Merge pull request #380 from nivasan1/nikhil/add-peering-alert
Browse files Browse the repository at this point in the history
Nikhil/add peering alert
  • Loading branch information
itsciccio authored Jan 24, 2023
2 parents 40cdb9f + 0bceab1 commit d41b8e1
Show file tree
Hide file tree
Showing 28 changed files with 1,117 additions and 698 deletions.
Binary file added alerter/.coverage
Binary file not shown.
2 changes: 2 additions & 0 deletions alerter/src/alerter/alert_code/node/cosmos_alert_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,5 @@ class CosmosNodeAlertCode(AlertCode):
TendermintRPCDataObtainedAlert = 'cosmos_node_alert_39'
MetricNotFoundErrorAlert = 'cosmos_node_alert_40'
MetricFoundAlert = 'cosmos_node_alert_41'
NodeIsNotPeeredWithSentinelAlert = 'cosmos_node_alert_42'
NodeIsPeeredWithSentinelAlert = 'cosmos_node_alert_43'
4 changes: 4 additions & 0 deletions alerter/src/alerter/alerters/alerter.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ def _equal_condition_function(current: Any, previous: Any) -> bool:
def _is_true_condition_function(current: Any) -> bool:
return current is True

@staticmethod
def _is_false_condition_function(current: Any) -> bool:
return current is False

@staticmethod
def _true_fn() -> bool:
return True
Expand Down
22 changes: 20 additions & 2 deletions alerter/src/alerter/alerters/node/cosmos.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,6 @@ def _process_tendermint_rpc_result(self, tendermint_data: Dict,
parent_id, node_id, configs, is_validator)

# Check if some errors have been resolved

self.alerting_factory.classify_error_alert(
InvalidUrlException.code,
cosmos_alerts.TendermintRPCInvalidUrlAlert,
Expand Down Expand Up @@ -451,11 +450,16 @@ def _process_tendermint_rpc_result(self, tendermint_data: Dict,
)

# Check if the alert rules are satisfied for the metrics

is_syncing_configs = (
configs.validator_is_syncing if is_validator
else configs.node_is_syncing
)

is_peered_with_sentinel_configs = (
configs.validator_is_peered_with_sentinel if is_validator
else configs.node_is_peered_with_sentinel
)

classification_fn = (
self.alerting_factory
.classify_solvable_conditional_alert_no_repetition
Expand All @@ -473,6 +477,20 @@ def _process_tendermint_rpc_result(self, tendermint_data: Dict,
[node_name, Severity.INFO.value, last_monitored,
parent_id, node_id]
)
## Only alert if the node is running mev_tendermint
if str_to_bool(is_peered_with_sentinel_configs['enabled']) and meta_data['is_mev_tendermint_node']:
current = data['is_peered_with_sentinel']['current']
if current is not None:
classification_fn(
parent_id, node_id, MetricCode.NodeIsNotPeeredWithSentinel.value,
cosmos_alerts.NodeIsNotPeeredWithSentinelAlert,
self._is_false_condition_function, [current],
[node_name, is_peered_with_sentinel_configs['severity'],
last_monitored, parent_id, node_id], data_for_alerting,
cosmos_alerts.NodeIsPeeredWithSentinelAlert,
[node_name, Severity.INFO.value, last_monitored,
parent_id, node_id]
)

slashed_configs = configs.slashed
if str_to_bool(slashed_configs['enabled']):
Expand Down
17 changes: 17 additions & 0 deletions alerter/src/alerter/alerts/node/cosmos.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,23 @@ def __init__(self, origin_name: str, severity: str, timestamp: float,
timestamp, parent_id, origin_id,
GroupedCosmosNodeAlertsMetricCode.NodeIsSyncing, [origin_id])

class NodeIsPeeredWithSentinelAlert(Alert):
def __init__(self, origin_name: str, severity: str, timestamp: float,
parent_id: str, origin_id: str) -> None:
super().__init__(
CosmosNodeAlertCode.NodeIsPeeredWithSentinelAlert,
"Node {} is peered with sentinel.".format(origin_name), severity,
timestamp, parent_id, origin_id,
GroupedCosmosNodeAlertsMetricCode.NodeIsNotPeeredWithSentinel, [origin_id])

class NodeIsNotPeeredWithSentinelAlert(Alert):
def __init__(self, origin_name: str, severity: str, timestamp: float,
parent_id: str, origin_id: str) -> None:
super().__init__(
CosmosNodeAlertCode.NodeIsNotPeeredWithSentinelAlert,
"Node {} is not peered with sentinel.".format(origin_name), severity,
timestamp, parent_id, origin_id,
GroupedCosmosNodeAlertsMetricCode.NodeIsNotPeeredWithSentinel, [origin_id])

class ValidatorIsNotActiveAlert(Alert):
def __init__(self, origin_name: str, severity: str, timestamp: float,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def create_alerting_state(
AlertsMetricCode.MetricNotFound.value: False,
}
any_severity_sent = {
AlertsMetricCode.NodeIsNotPeeredWithSentinel.value: False,
AlertsMetricCode.NodeIsSyncing.value: False,
AlertsMetricCode.ValidatorIsNotActive.value: False,
AlertsMetricCode.ValidatorIsJailed.value: False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ class GroupedCosmosNodeAlertsMetricCode(GroupedAlertsMetricCode):
NodeIsDown = 'cosmos_node_is_down'
ValidatorWasSlashed = 'cosmos_node_slashed'
NodeIsSyncing = 'cosmos_node_syncing'
NodeIsNotPeeredWithSentinel = 'cosmos_node_is_not_peered_with_sentinel'
ValidatorIsNotActive = 'cosmos_node_active'
ValidatorIsJailed = 'cosmos_node_jailed'
BlocksMissedThreshold = 'cosmos_node_blocks_missed'
Expand Down
13 changes: 12 additions & 1 deletion alerter/src/configs/alerts/node/cosmos.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ def __init__(
cannot_access_tendermint_rpc_validator: Dict,
cannot_access_tendermint_rpc_node: Dict, missed_blocks: Dict,
slashed: Dict, node_is_syncing: Dict, validator_is_syncing: Dict,
validator_is_jailed: Dict) -> None:
validator_is_jailed: Dict,
node_is_peered_with_sentinel: Dict = None, validator_is_peered_with_sentinel: Dict = None) -> None:
self._parent_id = parent_id
self._cannot_access_validator = cannot_access_validator
self._cannot_access_node = cannot_access_node
Expand All @@ -38,6 +39,8 @@ def __init__(
self._node_is_syncing = node_is_syncing
self._validator_is_syncing = validator_is_syncing
self._validator_is_jailed = validator_is_jailed
self._node_is_peered_with_sentinel = node_is_peered_with_sentinel
self._validator_is_peered_with_sentinel = validator_is_peered_with_sentinel

def __eq__(self, other: Any) -> bool:
return self.__dict__ == other.__dict__
Expand Down Expand Up @@ -110,6 +113,14 @@ def node_is_syncing(self) -> Dict:
def validator_is_syncing(self) -> Dict:
return self._validator_is_syncing

@property
def node_is_peered_with_sentinel(self) -> Dict:
return self._node_is_peered_with_sentinel

@property
def validator_is_peered_with_sentinel(self) -> Dict:
return self._validator_is_peered_with_sentinel

@property
def validator_is_jailed(self) -> Dict:
return self._validator_is_jailed
5 changes: 3 additions & 2 deletions alerter/src/configs/factory/alerts/cosmos_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ def get_chain_name(self, parent_id: str,

return None


class CosmosNodeAlertsConfigsFactory(CosmosAlertsConfigsFactory):
"""
This class manages the node alerts configs. The configs are indexed by the
Expand Down Expand Up @@ -125,7 +124,9 @@ def add_new_config(self, chain_name: str, sent_configs: Dict) -> None:
slashed=filtered['slashed'],
node_is_syncing=filtered['node_is_syncing'],
validator_is_syncing=filtered['validator_is_syncing'],
validator_is_jailed=filtered['validator_is_jailed']
validator_is_jailed=filtered['validator_is_jailed'],
node_is_peered_with_sentinel=filtered['node_is_peered_with_sentinel'],
validator_is_peered_with_sentinel=filtered['validator_is_peered_with_sentinel'],
)

self._configs[chain_name] = cosmos_node_alerts_config
Expand Down
5 changes: 5 additions & 0 deletions alerter/src/data_store/redis/store_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
_key_cosmos_node_last_monitored_prometheus = 'CosmosNode11'
_key_cosmos_node_last_monitored_cosmos_rest = 'CosmosNode12'
_key_cosmos_node_last_monitored_tendermint_rpc = 'CosmosNode13'
_key_cosmos_node_is_peered = 'CosmosNode14'

# CosmosNetworkX_<cosmos_network_id>
_key_cosmos_network_proposals = 'CosmosNetwork1'
Expand Down Expand Up @@ -407,6 +408,10 @@ def get_cosmos_node_voting_power(cosmos_node_id: str) -> str:
def get_cosmos_node_is_syncing(cosmos_node_id: str) -> str:
return Keys._as_prefix(_key_cosmos_node_is_syncing) + cosmos_node_id

@staticmethod
def get_cosmos_node_is_peered(cosmos_node_id: str) -> str:
return Keys._as_prefix(_key_cosmos_node_is_peered) + cosmos_node_id

@staticmethod
def get_cosmos_node_bond_status(cosmos_node_id: str) -> str:
return Keys._as_prefix(_key_cosmos_node_bond_status) + cosmos_node_id
Expand Down
6 changes: 4 additions & 2 deletions alerter/src/data_store/stores/node/cosmos.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from src.utils.exceptions import (MessageWasNotDeliveredException,
NodeIsDownException)


class CosmosNodeStore(Store):
def __init__(self, name: str, logger: logging.Logger,
rabbitmq: RabbitMQApi) -> None:
Expand Down Expand Up @@ -261,6 +260,8 @@ def _process_redis_tendermint_rpc_result_store(self, data: Dict) -> None:
node_id): str(metrics['went_down_at']),
Keys.get_cosmos_node_is_syncing(node_id):
str(metrics['is_syncing']),
Keys.get_cosmos_node_is_peered(node_id):
"" if ('is_peered_with_sentinel' not in metrics) else str(metrics['is_peered_with_sentinel']),
Keys.get_cosmos_node_slashed(node_id):
json.dumps(metrics['slashed']),
Keys.get_cosmos_node_missed_blocks(
Expand Down Expand Up @@ -401,6 +402,7 @@ def _process_mongo_tendermint_rpc_result_store(self, data: Dict) -> None:
'went_down_at_tendermint_rpc': str(
metrics['went_down_at']),
'is_syncing': str(metrics['is_syncing']),
'is_peered_with_sentinel': "" if ('is_peered_with_sentinel' not in metrics) else str(metrics['is_peered_with_sentinel']),
'slashed': json.dumps(metrics['slashed']),
'missed_blocks': json.dumps(metrics['missed_blocks']),
'timestamp': meta_data['last_monitored'],
Expand Down Expand Up @@ -534,4 +536,4 @@ def _process_mongo_cosmos_rest_error_store(self, data: Dict) -> None:
},
'$inc': {'n_entries': 1},
}
)
)
10 changes: 10 additions & 0 deletions alerter/src/data_transformers/node/cosmos.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,12 @@ def _update_tendermint_rpc_state(self, tendermint_rpc_data: Dict) -> None:
node.set_slashed(metrics['slashed'])
node.set_missed_blocks(metrics['missed_blocks'])
node.set_is_syncing(metrics['is_syncing'])
## check if the node was a mev-tendermint node and update state if so
if meta_data['is_mev_tendermint_node']:
node.set_is_peered_with_sentinel(metrics['is_peered_with_sentinel'])
else:
# If a node has changed its config, this will be reflected in is_peered_with_sentinel
node.set_is_peered_with_sentinel(None)
node.set_last_monitored_tendermint_rpc(meta_data['last_monitored'])
node.set_tendermint_rpc_as_up()
elif 'error' in tendermint_rpc_data:
Expand Down Expand Up @@ -387,6 +393,7 @@ def _process_transformed_tendermint_rpc_data_for_alerting(
'data': {}
}
}

pd_data = processed_data['result']['data']

# Reformat the data in such a way that both the previous and current
Expand All @@ -402,6 +409,9 @@ def _process_transformed_tendermint_rpc_data_for_alerting(
pd_data['missed_blocks']['previous'] = copy.deepcopy(
node.missed_blocks)
pd_data['is_syncing']['previous'] = node.is_syncing
## Check if the current node is a mev-tendermint node, if so send the previous state of the mev-tendermint metrics
if td_meta_data['is_mev_tendermint_node']:
pd_data['is_peered_with_sentinel']['previous'] = node.is_peered_with_sentinel
elif 'error' in transformed_tendermint_rpc_data:
td_meta_data = transformed_tendermint_rpc_data['error']['meta_data']
td_error_code = transformed_tendermint_rpc_data['error']['code']
Expand Down
9 changes: 9 additions & 0 deletions alerter/src/monitorables/nodes/cosmos_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def __init__(self, node_name: str, node_id: str, parent_id: str) -> None:
self._current_height = None
self._voting_power = None
self._is_syncing = None
self._is_peered_with_sentinel = None
self._bond_status = None
self._jailed = None

Expand Down Expand Up @@ -86,6 +87,10 @@ def voting_power(self) -> Optional[int]:
def is_syncing(self) -> Optional[bool]:
return self._is_syncing

@property
def is_peered_with_sentinel(self) -> Optional[bool]:
return self._is_peered_with_sentinel

@property
def bond_status(self) -> Optional[BondStatus]:
return self._bond_status
Expand Down Expand Up @@ -199,6 +204,9 @@ def set_voting_power(self, new_voting_power: Optional[int]) -> None:
def set_is_syncing(self, new_is_syncing: Optional[bool]) -> None:
self._is_syncing = new_is_syncing

def set_is_peered_with_sentinel(self, new_is_peered_with_sentinel: Optional[bool]) -> None:
self._is_peered_with_sentinel = new_is_peered_with_sentinel

def set_bond_status(self, new_bond_status: Optional[BondStatus]) -> None:
self._bond_status = new_bond_status

Expand Down Expand Up @@ -284,6 +292,7 @@ def reset(self) -> None:
self.set_current_height(None)
self.set_voting_power(None)
self.set_is_syncing(None)
self.set_is_peered_with_sentinel(None)
self.set_bond_status(None)
self.set_jailed(None)
self.set_slashed({'slashed': False, 'amount_map': {}})
Expand Down
18 changes: 17 additions & 1 deletion alerter/src/monitors/node/cosmos.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,11 +419,20 @@ def _get_tendermint_rpc_direct_data(self) -> Dict:
def retrieval_process() -> Dict:
status = self.tendermint_rpc_api.execute_with_checks(
self.tendermint_rpc_api.get_status, [node_url], node_name)
## check if mev_info is present in response
if 'mev_info' not in status['result']:
return {
'consensus_hex_address': status['result']['validator_info'][
'address'],
'is_syncing': status['result']['sync_info'][
'catching_up'],
}
return {
'consensus_hex_address': status['result']['validator_info'][
'address'],
'is_syncing': status['result']['sync_info'][
'catching_up'],
'is_peered_with_sentinel' : status['result']['mev_info']['is_peered_with_relayer'],
}

return self._execute_cosmos_tendermint_retrieval_with_exceptions(
Expand Down Expand Up @@ -684,7 +693,13 @@ def _get_tendermint_rpc_data(self) -> (Dict, bool, Optional[Exception]):
if direct_data['consensus_hex_address'] not in ['', None]:
self._validator_consensus_address = direct_data[
'consensus_hex_address']
direct_data = {'is_syncing': direct_data['is_syncing']}
## If the node is running mev-tendermint add the is_peered_with_sentinel field
if 'is_peered_with_sentinel' in direct_data:
direct_data = {'is_syncing': direct_data['is_syncing'],
'is_peered_with_sentinel':
direct_data['is_peered_with_sentinel']}
else:
direct_data = {'is_syncing': direct_data['is_syncing']}

# Select archive node for archive data retrieval. If no archive
# node is accessible, or given by the user, try getting data with
Expand Down Expand Up @@ -882,6 +897,7 @@ def _process_retrieved_tendermint_rpc_data(self, data: Dict) -> Dict:
'node_id': self.node_config.node_id,
'node_parent_id': self.node_config.parent_id,
'time': datetime.now().timestamp(),
'is_mev_tendermint_node': 'is_peered_with_sentinel' in data,
'is_validator': self.node_config.is_validator,
'operator_address': self.node_config.operator_address,
},
Expand Down
Loading

0 comments on commit d41b8e1

Please sign in to comment.