Skip to content

Commit

Permalink
chore(networkmonitor): add metric listing content topics + messages
Browse files Browse the repository at this point in the history
  • Loading branch information
alrevuelta committed Nov 3, 2022
1 parent 08d936a commit ea4da67
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 11 deletions.
19 changes: 13 additions & 6 deletions tools/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ Monitoring tool to run in an existing `waku` network with the following features
* Keeps discovering new peers using `discv5`
* Tracks advertised capabilities of each node as per stored in the ENR `waku` field
* Attempts to connect to all nodes, tracking which protocols each node supports
* Presents grafana-ready metrics showing the state of the network in terms of locations, ips, number discovered peers, number of peers we could connect to, user-agent that each peer contains, etc.
* Metrics are exposed through prometheus metrics but also with a custom rest api, presenting detailed information about each peer.
* Presents grafana-ready metrics showing the state of the network in terms of locations, ips, number discovered peers, number of peers we could connect to, user-agent that each peer contains, content topics and the amount of rx messages in each one.
* Metrics are exposed through prometheus metrics but also with a custom rest api, presenting detailed information about each peer. These metrics are exposed via a rest api.

### Usage

Expand All @@ -77,14 +77,19 @@ The following options are available:

### Example

Connect to the network through a given bootstrap node, with default parameters. Once its running, metrics will be live at `localhost:8008/metrics`
Connect to the network through a given bootstrap node, with default parameters. See metrics section for the data that it exposes.

```console
./build/networkmonitor --log-level=INFO --b="enr:-Nm4QOdTOKZJKTUUZ4O_W932CXIET-M9NamewDnL78P5u9DOGnZlK0JFZ4k0inkfe6iY-0JAaJVovZXc575VV3njeiABgmlkgnY0gmlwhAjS3ueKbXVsdGlhZGRyc7g6ADg2MW5vZGUtMDEuYWMtY24taG9uZ2tvbmctYy53YWt1djIucHJvZC5zdGF0dXNpbS5uZXQGH0DeA4lzZWNwMjU2azGhAo0C-VvfgHiXrxZi3umDiooXMGY9FvYj5_d1Q4EeS7eyg3RjcIJ2X4N1ZHCCIyiFd2FrdTIP"
```

### Metrics

### metrics
Metrics are divided into two categories:
* Prometheus metrics, exposed as i.e. gauges.
* Custom metrics, used for unconstrained labels such as peer information or content topics. These metrics are not exposed through prometheus because since they are unconstrained, they can end up breaking the backend, as a new datapoint is generated for each one and it can reach up a point where is too much to handle.

#### Prometheus Metrics

The following metrics are available. See `http://localhost:8008/metrics`

Expand All @@ -96,6 +101,8 @@ Other relevant metrics reused from `nim-eth`:
* routing_table_nodes: Inherited from nim-eth, number of nodes in the routing table
* discovery_message_requests_outgoing_total: Inherited from nim-eth, number of outging discovery requests, useful to know if the node is actiely looking for new peers

The following metrics are exposed via a custom rest api. See `http://localhost:8009/allpeersinfo`
#### Custom Metrics

* json list of all peers with extra information such as ip, locatio, supported protocols and last connection time.
The following endpoints are available:
* `http://localhost:8009/allpeersinfo`: json list of all peers with extra information such as ip, location, supported protocols and last connection time.
* `http://localhost:8009/contenttopics`: content topic messages and its message count.
24 changes: 22 additions & 2 deletions tools/networkmonitor/networkmonitor.nim
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import
../../waku/v2/node/peer_manager/peer_manager,
../../waku/v2/node/waku_node,
../../waku/v2/utils/wakuenr,
../../waku/v2/protocol/waku_message,
../../waku/v2/utils/peers,
./networkmonitor_metrics,
./networkmonitor_config,
Expand Down Expand Up @@ -88,6 +89,7 @@ proc setConnectedPeersMetrics(discoveredNodes: seq[Node],
continue

# try to connect to the peer
# TODO: check last connection time and if not > x, skip connecting
let timedOut = not await node.connectToNodes(@[peer.get()]).withTimeout(timeout)
if timedOut:
warn "could not connect to peer, timedout", timeout=timeout, peer=peer.get()
Expand Down Expand Up @@ -184,22 +186,40 @@ proc main() {.async.} =

let d = node.wakuDiscv5.protocol
d.open()
#await node.start() not sure if needed
await node.mountRelay()

# list of peers that we have discovered/connected
var allPeersRef = CustomPeersTableRef()

# content topic and the number of messages that were received
var numMessagesPerContentTopic = ContentTopicMessageTableRef()

let serverAddress = initTAddress(conf.metricsRestAddress & ":" & $conf.metricsRestPort)
proc validate(pattern: string, value: string): int =
if pattern.startsWith("{") and pattern.endsWith("}"): 0
else: 1
var router = RestRouter.init(validate)
router.installHandler(allPeersRef)
router.installHandler(allPeersRef, numMessagesPerContentTopic)

# rest server for custom metrics
var sres = RestServerRef.new(router, serverAddress)
let restServer = sres.get()
restServer.start()

# subscribe to the defaul pubsub topic
let pubSubTopic = cast[PubsubTopic]("/waku/2/default-waku/proto")
proc handler(topic: PubsubTopic, data: seq[byte]) {.async, gcsafe.} =
let message = WakuMessage.init(data).value
let payload = cast[string](message.payload)
trace "got message", contentTopic = message.contentTopic

# add content topic to our table and increase count of messages
if not numMessagesPerContentTopic.hasKey(message.contentTopic):
numMessagesPerContentTopic[message.contentTopic] = 0
numMessagesPerContentTopic[message.contentTopic] += 1
node.subscribe(pubSubTopic, handler)

while true:
# discover new random nodes
let discoveredNodes = await d.queryRandom()
Expand All @@ -224,7 +244,7 @@ proc main() {.async.} =
# we dont run ipMajorityLoop
# we dont run revalidateLoop

await sleepAsync(conf.refreshInterval * 1000 * 60)
await sleepAsync(conf.refreshInterval * 1000)

when isMainModule:
waitFor main()
4 changes: 2 additions & 2 deletions tools/networkmonitor/networkmonitor_config.nim
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ type
abbr: "b" }: seq[string]

refreshInterval* {.
desc: "How often new peers are discovered and connected to (in minutes)",
defaultValue: 10,
desc: "How often new peers are discovered and connected to (in seconds)",
defaultValue: 5,
name: "refresh-interval",
abbr: "r" }: int

Expand Down
9 changes: 8 additions & 1 deletion tools/networkmonitor/networkmonitor_metrics.nim
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,18 @@ type
CustomPeersTable* = Table[string, CustomPeerInfo]
CustomPeersTableRef* = ref CustomPeersTable

ContentTopicMessageTableRef* = ref Table[string, int]

# GET /allpeersinfo
proc installHandler*(router: var RestRouter, allPeers: CustomPeersTableRef) =
proc installHandler*(router: var RestRouter,
allPeers: CustomPeersTableRef,
numMessagesPerContentTopic: ContentTopicMessageTableRef) =

router.api(MethodGet, "/allpeersinfo") do () -> RestApiResponse:
let values = toSeq(allPeers.keys()).mapIt(allPeers[it])
return RestApiResponse.response($(%values), contentType="application/json")
router.api(MethodGet, "/contenttopics") do () -> RestApiResponse:
return RestApiResponse.response($(%numMessagesPerContentTopic), contentType="application/json")

proc startMetricsServer*(serverIp: ValidIpAddress, serverPort: Port) =
info "Starting metrics HTTP server", serverIp, serverPort
Expand Down

0 comments on commit ea4da67

Please sign in to comment.