From d077364a716348df73d541fad411baea031fd1f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Wala?= Date: Thu, 10 Aug 2023 12:28:34 +0200 Subject: [PATCH 1/7] Update used telemetry events --- lib/rel/allocation_handler.ex | 41 ++++++++----------------------- lib/rel/listener.ex | 36 +++++----------------------- lib/rel_app.ex | 45 +++++++++++++++++++++++++++++------ 3 files changed, 54 insertions(+), 68 deletions(-) diff --git a/lib/rel/allocation_handler.ex b/lib/rel/allocation_handler.ex index b6a2d78..a64a3a4 100644 --- a/lib/rel/allocation_handler.ex +++ b/lib/rel/allocation_handler.ex @@ -40,7 +40,8 @@ defmodule Rel.AllocationHandler do Logger.metadata(alloc: alloc_id) Logger.info("Starting new allocation handler") - Process.send_after(self(), :measure_bitrate, 1000) + :telemetry.execute([:allocations], %{created: 1}) + Process.send_after(self(), :check_expiration, time_to_expiry * 1000) {:ok, @@ -54,14 +55,7 @@ defmodule Rel.AllocationHandler do permissions: %{}, chann_to_time: %{}, chann_to_addr: %{}, - addr_to_chann: %{}, - - # stats - # bytes sent by the client - out_bytes: 0, - # bytes sent to the client - in_bytes: 0, - last_check: System.monotonic_time(:second) + addr_to_chann: %{} }} end @@ -75,12 +69,15 @@ defmodule Rel.AllocationHandler do @impl true def handle_info({:udp, _socket, ip_addr, port, packet}, state) do + len = byte_size(packet) + + :telemetry.execute([:allocations, :peer], %{inbound: len}) + if Map.has_key?(state.permissions, ip_addr) do {c_ip, c_port, _, _, _} = state.five_tuple case Map.fetch(state.addr_to_chann, {ip_addr, port}) do {:ok, number} -> - len = byte_size(packet) channel_data = <> :ok = :gen_udp.send(state.turn_socket, c_ip, c_port, channel_data) @@ -96,7 +93,6 @@ defmodule Rel.AllocationHandler do :ok = :gen_udp.send(state.turn_socket, c_ip, c_port, response) end - state = %{state | in_bytes: state.in_bytes + byte_size(packet)} {:noreply, state} else Logger.warning( @@ -107,24 +103,6 @@ defmodule Rel.AllocationHandler do end end - @impl true - def handle_info(:measure_bitrate, state) do - now = System.monotonic_time(:second) - - in_bitrate = state.in_bytes / (now - state.last_check) - out_bitrate = state.out_bytes / (now - state.last_check) - - :telemetry.execute([:allocation], %{in_bitrate: in_bitrate, out_bitrate: out_bitrate}, %{ - allocation_id: state.alloc_id - }) - - state = %{state | in_bytes: 0, out_bytes: 0, last_check: now} - - Process.send_after(self(), :measure_bitrate, 1000) - - {:noreply, state} - end - @impl true def handle_info(:check_expiration, state) do if System.os_time(:second) >= state.expiry_timestamp do @@ -169,6 +147,7 @@ defmodule Rel.AllocationHandler do @impl true def terminate(reason, _state) do + :telemetry.execute([:allocations], %{expired: 1}) Logger.info("Allocation handler stopped with reason: #{inspect(reason)}") end @@ -240,7 +219,7 @@ defmodule Rel.AllocationHandler do true <- Map.has_key?(state.permissions, ip_addr) do # TODO: dont fragment attribute :ok = :gen_udp.send(state.socket, ip_addr, port, data) - {:ok, %{state | out_bytes: state.out_bytes + byte_size(data)}} + {:ok, state} else false -> {:ok, %XORPeerAddress{address: addr}} = get_xor_peer_address(msg) @@ -297,7 +276,7 @@ defmodule Rel.AllocationHandler do case Map.fetch(state.chann_to_addr, number) do {:ok, addr} -> :ok = :gen_udp.send(state.socket, addr, data) - {:ok, %{state | out_bytes: state.out_bytes + byte_size(data)}} + {:ok, state} :error -> {:ok, state} diff --git a/lib/rel/listener.ex b/lib/rel/listener.ex index ec5499f..ada56ad 100644 --- a/lib/rel/listener.ex +++ b/lib/rel/listener.ex @@ -48,40 +48,16 @@ defmodule Rel.Listener do spawn(Rel.Monitor, :start, [self(), socket]) - recv_loop(socket, %{ - listener_id: listener_addr, - in_bytes: 0, - last_stats_check: System.monotonic_time(:millisecond), - next_stats_check: System.monotonic_time(:millisecond) + 1000 - }) + recv_loop(socket) end - defp recv_loop(socket, state) do - now = System.monotonic_time(:millisecond) - rem_timeout = state.next_stats_check - now - - {next_timeout, state} = - if rem_timeout <= 0 do - duration = now - state.last_stats_check - in_bitrate = state.in_bytes / (duration / 1000) - - :telemetry.execute([:listener], %{in_bitrate: in_bitrate}, %{ - listener_id: state.listener_id - }) - - next_stats_check = System.monotonic_time(:millisecond) + 1000 - {1000, %{state | in_bytes: 0, last_stats_check: now, next_stats_check: next_stats_check}} - else - {rem_timeout, state} - end - - case :gen_udp.recv(socket, 0, next_timeout) do + defp recv_loop(socket) do + case :gen_udp.recv(socket, 0) do {:ok, {client_addr, client_port, packet}} -> - process(socket, client_addr, client_port, packet) - recv_loop(socket, %{state | in_bytes: state.in_bytes + byte_size(packet)}) + :telemetry.execute([:listener, :peer], %{inbound: byte_size(packet)}) - {:error, :timeout} -> - recv_loop(socket, state) + process(socket, client_addr, client_port, packet) + recv_loop(socket) {:error, reason} -> Logger.error("Couldn't receive from the socket, reason: #{inspect(reason)}") diff --git a/lib/rel_app.ex b/lib/rel_app.ex index 57b4608..fcd3a5b 100644 --- a/lib/rel_app.ex +++ b/lib/rel_app.ex @@ -53,15 +53,46 @@ defmodule Rel.App do import Telemetry.Metrics [ - last_value("listener.in_bitrate", tags: [:listener_id]), - last_value("allocation.in_bitrate", tags: [:allocation_id]), - last_value("allocation.out_bitrate", tags: [:allocation_id]), + sum( + "turn.allocations.created", + event_name: [:allocations], + measurement: :created + ), + sum( + "turn.allocations.expired", + event_name: [:allocations], + measurement: :expired + ), + sum( + "turn.listener.client_inbound_traffic.bytes", + event_name: [:listener, :client], + measurement: :inbound, + unit: :byte + ), + sum( + "turn.allocations.peer_inbound_traffic.bytes", + event_name: [:allocations, :peer], + measurement: :inbound, + unit: :byte + ), # telemetry poller - last_value("vm.memory.total", unit: :byte), - last_value("vm.total_run_queue_lengths.total"), - last_value("vm.total_run_queue_lengths.cpu"), - last_value("vm.total_run_queue_lengths.io") + last_value( + "vm.memory.bytes", + event_name: [:vm, :memory], + measurement: :total, + unit: :byte + ), + last_value( + "vm.run_queue.cpu.length", + event_name: [:vm, :total_run_queue_lengths], + measurement: :cpu + ), + last_value( + "vm.run_queue.io.length", + event_name: [:vm, :total_run_queue_lengths], + measurement: :io + ) ] end end From 5fc34f915fedaa11e4bdf0d67c9ac87aea5ed517 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Wala?= Date: Thu, 10 Aug 2023 14:05:32 +0200 Subject: [PATCH 2/7] Add node_exporter to docker-compose and prometheus config --- docker-compose.yml | 15 ++++++++++++++- prometheus.yml | 15 +++++---------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index f92de52..c97c3b3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ -version: '3.2' +version: '3.8' services: turn: image: ghcr.io/elixir-webrtc/rel:${TAG} @@ -8,6 +8,18 @@ services: environment: DOMAIN: "${DOMAIN}" + node-exporter: + image: prom/node-exporter:v1.6.1 + container_name: node_exporter + restart: on-failure + command: + - --path.rootfs=/host + network_mode: host + pid: host + volumes: + - /:/host:ro,rslave + + prometheus: image: prom/prometheus:v2.46.0 container_name: prometheus @@ -22,6 +34,7 @@ services: - prometheus_data:/prometheus depends_on: - turn + - node-exporter grafana: image: grafana/grafana:10.0.3 diff --git a/prometheus.yml b/prometheus.yml index 2d3563d..bab626c 100644 --- a/prometheus.yml +++ b/prometheus.yml @@ -1,19 +1,14 @@ global: - scrape_interval: 15s # By default, scrape targets every 15 seconds. + scrape_interval: 2s - # Attach these labels to any time series or alerts when communicating with - # external systems (federation, remote storage, Alertmanager). external_labels: monitor: 'codelab-monitor' -# A scrape configuration containing exactly one endpoint to scrape: -# Here it's Prometheus itself. scrape_configs: - # The job name is added as a label `job=` to any timeseries scraped from this config. - job_name: 'rel' - - # Override the global default and scrape targets from this job every 5 seconds. - scrape_interval: 1s - static_configs: - targets: ['127.0.0.1:9568'] + + - job_name: 'node' + static_configs: + - targets: ['127.0.0.1:9100'] From 904a2e2ef270ad6094ab6fd0be868bc84b0cdcfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Wala?= Date: Thu, 10 Aug 2023 14:27:20 +0200 Subject: [PATCH 3/7] Add default prometheus metrics values --- docker-compose.yml | 3 +-- lib/rel/listener.ex | 2 +- lib/rel/supervisor.ex | 6 ++++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index c97c3b3..f794288 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,6 @@ version: '3.8' services: - turn: + rel: image: ghcr.io/elixir-webrtc/rel:${TAG} container_name: turn restart: on-failure @@ -18,7 +18,6 @@ services: pid: host volumes: - /:/host:ro,rslave - prometheus: image: prom/prometheus:v2.46.0 diff --git a/lib/rel/listener.ex b/lib/rel/listener.ex index ada56ad..be78fba 100644 --- a/lib/rel/listener.ex +++ b/lib/rel/listener.ex @@ -54,7 +54,7 @@ defmodule Rel.Listener do defp recv_loop(socket) do case :gen_udp.recv(socket, 0) do {:ok, {client_addr, client_port, packet}} -> - :telemetry.execute([:listener, :peer], %{inbound: byte_size(packet)}) + :telemetry.execute([:listener, :client], %{inbound: byte_size(packet)}) process(socket, client_addr, client_port, packet) recv_loop(socket) diff --git a/lib/rel/supervisor.ex b/lib/rel/supervisor.ex index 71e92b3..944e1b2 100644 --- a/lib/rel/supervisor.ex +++ b/lib/rel/supervisor.ex @@ -12,6 +12,12 @@ defmodule Rel.Supervisor do listen_ip = Application.fetch_env!(:rel, :listen_ip) listen_port = Application.fetch_env!(:rel, :listen_port) + # Default values for prometheus + :telemetry.execute([:listener, :client], %{inbound: 0}) + :telemetry.execute([:allocations, :peer], %{inbound: 0}) + :telemetry.execute([:allocations], %{created: 0}) + :telemetry.execute([:allocations], %{expired: 0}) + children = [ {DynamicSupervisor, strategy: :one_for_one, name: Rel.AllocationSupervisor}, {Registry, keys: :unique, name: Registry.Allocations}, From 38dbc7ba3ad98c2b321edd63b1bc82fdb038ae25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Wala?= Date: Thu, 10 Aug 2023 16:48:32 +0200 Subject: [PATCH 4/7] Bugfixes --- .github/workflows/build_deploy.yml | 4 ++-- docker-compose.yml | 4 ++-- lib/rel/allocation_handler.ex | 4 ++-- lib/rel/supervisor.ex | 6 ------ lib/rel_app.ex | 8 ++++---- 5 files changed, 10 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build_deploy.yml b/.github/workflows/build_deploy.yml index a698bf4..69d564d 100644 --- a/.github/workflows/build_deploy.yml +++ b/.github/workflows/build_deploy.yml @@ -54,7 +54,7 @@ jobs: env: GF_SECURITY_ADMIN_PASSWORD: ${{ secrets.GF_SECURITY_ADMIN_PASSWORD }} GF_SECURITY_ADMIN_USER: ${{ secrets.GF_SECURITY_ADMIN_USER }} - DOMAIN: ${{ secrets.DOMAIN }} + DOMAIN_NAME: ${{ secrets.DOMAIN }} DIR_NAME: ${{ secrets.DIR_NAME }} TAG: ${{ github.ref_name }} with: @@ -65,7 +65,7 @@ jobs: rm -rf $DIR_NAME; mkdir $DIR_NAME cd $DIR_NAME git clone -b $TAG --depth 1 https://github.com/${{ github.repository }} . - echo "DOMAIN=$DOMAIN + echo "DOMAIN_NAME=$DOMAIN_NAME GF_SECURITY_ADMIN_PASSWORD=$GF_SECURITY_ADMIN_PASSWORD GF_SECURITY_ADMIN_USER=$GF_SECURITY_ADMIN_USER TAG=${TAG#v}" > .env diff --git a/docker-compose.yml b/docker-compose.yml index f794288..0eabf6a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,11 +2,11 @@ version: '3.8' services: rel: image: ghcr.io/elixir-webrtc/rel:${TAG} - container_name: turn + container_name: rel restart: on-failure network_mode: host environment: - DOMAIN: "${DOMAIN}" + DOMAIN_NAME: "${DOMAIN_NAME}" node-exporter: image: prom/node-exporter:v1.6.1 diff --git a/lib/rel/allocation_handler.ex b/lib/rel/allocation_handler.ex index a64a3a4..0d0e297 100644 --- a/lib/rel/allocation_handler.ex +++ b/lib/rel/allocation_handler.ex @@ -40,7 +40,7 @@ defmodule Rel.AllocationHandler do Logger.metadata(alloc: alloc_id) Logger.info("Starting new allocation handler") - :telemetry.execute([:allocations], %{created: 1}) + :telemetry.execute([:allocations], %{created: 1, expired: 0}) Process.send_after(self(), :check_expiration, time_to_expiry * 1000) @@ -147,7 +147,7 @@ defmodule Rel.AllocationHandler do @impl true def terminate(reason, _state) do - :telemetry.execute([:allocations], %{expired: 1}) + :telemetry.execute([:allocations], %{created: 0, expired: 1}) Logger.info("Allocation handler stopped with reason: #{inspect(reason)}") end diff --git a/lib/rel/supervisor.ex b/lib/rel/supervisor.ex index 944e1b2..71e92b3 100644 --- a/lib/rel/supervisor.ex +++ b/lib/rel/supervisor.ex @@ -12,12 +12,6 @@ defmodule Rel.Supervisor do listen_ip = Application.fetch_env!(:rel, :listen_ip) listen_port = Application.fetch_env!(:rel, :listen_port) - # Default values for prometheus - :telemetry.execute([:listener, :client], %{inbound: 0}) - :telemetry.execute([:allocations, :peer], %{inbound: 0}) - :telemetry.execute([:allocations], %{created: 0}) - :telemetry.execute([:allocations], %{expired: 0}) - children = [ {DynamicSupervisor, strategy: :one_for_one, name: Rel.AllocationSupervisor}, {Registry, keys: :unique, name: Registry.Allocations}, diff --git a/lib/rel_app.ex b/lib/rel_app.ex index fcd3a5b..15acffc 100644 --- a/lib/rel_app.ex +++ b/lib/rel_app.ex @@ -54,23 +54,23 @@ defmodule Rel.App do [ sum( - "turn.allocations.created", + "turn.allocations.total.created", event_name: [:allocations], measurement: :created ), sum( - "turn.allocations.expired", + "turn.allocations.total.expired", event_name: [:allocations], measurement: :expired ), sum( - "turn.listener.client_inbound_traffic.bytes", + "turn.listener.client_inbound_traffic.total.bytes", event_name: [:listener, :client], measurement: :inbound, unit: :byte ), sum( - "turn.allocations.peer_inbound_traffic.bytes", + "turn.allocations.peer_inbound_traffic.total.bytes", event_name: [:allocations, :peer], measurement: :inbound, unit: :byte From 41cb36f621a7ca4d4ba6b17c25227469adb623fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Wala?= Date: Fri, 11 Aug 2023 10:22:00 +0200 Subject: [PATCH 5/7] Properly use the DOMAIN env variable --- .github/workflows/build_deploy.yml | 4 ++-- README.md | 4 ++-- config/runtime.exs | 2 +- docker-compose.yml | 4 ++-- lib/rel/utils.ex | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build_deploy.yml b/.github/workflows/build_deploy.yml index 69d564d..a698bf4 100644 --- a/.github/workflows/build_deploy.yml +++ b/.github/workflows/build_deploy.yml @@ -54,7 +54,7 @@ jobs: env: GF_SECURITY_ADMIN_PASSWORD: ${{ secrets.GF_SECURITY_ADMIN_PASSWORD }} GF_SECURITY_ADMIN_USER: ${{ secrets.GF_SECURITY_ADMIN_USER }} - DOMAIN_NAME: ${{ secrets.DOMAIN }} + DOMAIN: ${{ secrets.DOMAIN }} DIR_NAME: ${{ secrets.DIR_NAME }} TAG: ${{ github.ref_name }} with: @@ -65,7 +65,7 @@ jobs: rm -rf $DIR_NAME; mkdir $DIR_NAME cd $DIR_NAME git clone -b $TAG --depth 1 https://github.com/${{ github.repository }} . - echo "DOMAIN_NAME=$DOMAIN_NAME + echo "DOMAIN=$DOMAIN GF_SECURITY_ADMIN_PASSWORD=$GF_SECURITY_ADMIN_PASSWORD GF_SECURITY_ADMIN_USER=$GF_SECURITY_ADMIN_USER TAG=${TAG#v}" > .env diff --git a/README.md b/README.md index 184a1b4..e0c0265 100644 --- a/README.md +++ b/README.md @@ -88,10 +88,10 @@ RELAY_IP=0.0.0.0 EXTERNAL_RELAY_IP=167.235.241.140 ``` -Remember to use the `DOMAIN_NAME` variable specific to your deployment. It's used in e.g. `REALM` STUN attributes. +Remember to use the `DOMAIN` variable specific to your deployment. It's used in e.g. `REALM` STUN attributes. ```console -DOMAIN_NAME=my-amazing-turn.com +DOMAIN=my-amazing-turn.com ``` ### Auth diff --git a/config/runtime.exs b/config/runtime.exs index 687e780..94195a9 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -124,7 +124,7 @@ config :rel, relay_ip: relay_ip, external_relay_ip: external_relay_ip, listen_port: System.get_env("UDP_LISTEN_PORT", "3478") |> ConfigUtils.parse_port(), - domain_name: System.get_env("DOMAIN_NAME", "example.com") + domain: System.get_env("DOMAIN", "example.com") # Metrics endpoint configuration config :rel, diff --git a/docker-compose.yml b/docker-compose.yml index 0eabf6a..f3acb09 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,7 +6,7 @@ services: restart: on-failure network_mode: host environment: - DOMAIN_NAME: "${DOMAIN_NAME}" + DOMAIN: "${DOMAIN}" node-exporter: image: prom/node-exporter:v1.6.1 @@ -32,7 +32,7 @@ services: - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro - prometheus_data:/prometheus depends_on: - - turn + - rel - node-exporter grafana: diff --git a/lib/rel/utils.ex b/lib/rel/utils.ex index ff9e419..e34fb92 100644 --- a/lib/rel/utils.ex +++ b/lib/rel/utils.ex @@ -32,7 +32,7 @@ defmodule Rel.Utils do @spec build_error(atom(), integer(), Method.t()) :: {response :: binary(), log_msg :: String.t()} def build_error(reason, t_id, method) do - domain_name = Application.fetch_env!(:rel, :domain_name) + domain = Application.fetch_env!(:rel, :domain) {log_msg, code, with_attrs?} = translate_error(reason) error_type = %Type{class: :error_response, method: method} @@ -40,7 +40,7 @@ defmodule Rel.Utils do attrs = if with_attrs? do - attrs ++ [%Nonce{value: build_nonce()}, %Realm{value: domain_name}] + attrs ++ [%Nonce{value: build_nonce()}, %Realm{value: domain}] else attrs end From 7a6987c2aabfdad8d4d205c605b1de2f9e4995cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Wala?= Date: Fri, 11 Aug 2023 10:30:48 +0200 Subject: [PATCH 6/7] Fix telemetry events names to match Prometheus conventions --- lib/rel_app.ex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rel_app.ex b/lib/rel_app.ex index 15acffc..78ff9dc 100644 --- a/lib/rel_app.ex +++ b/lib/rel_app.ex @@ -54,12 +54,12 @@ defmodule Rel.App do [ sum( - "turn.allocations.total.created", + "turn.allocations.created.total", event_name: [:allocations], measurement: :created ), sum( - "turn.allocations.total.expired", + "turn.allocations.expired.total", event_name: [:allocations], measurement: :expired ), From bd77d0993ad37c6accbb18d0c315ae897f2c8ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Wala?= Date: Fri, 11 Aug 2023 10:40:28 +0200 Subject: [PATCH 7/7] Add new Grafana dashboard --- .../{turn_stats.json => rel_metrics.json} | 345 ++++++++++++++---- 1 file changed, 281 insertions(+), 64 deletions(-) rename grafana/provisioning/dashboards/{turn_stats.json => rel_metrics.json} (53%) diff --git a/grafana/provisioning/dashboards/turn_stats.json b/grafana/provisioning/dashboards/rel_metrics.json similarity index 53% rename from grafana/provisioning/dashboards/turn_stats.json rename to grafana/provisioning/dashboards/rel_metrics.json index f312edb..f37cf61 100644 --- a/grafana/provisioning/dashboards/turn_stats.json +++ b/grafana/provisioning/dashboards/rel_metrics.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 1, + "id": 2, "links": [], "liveNow": false, "panels": [ @@ -46,7 +46,7 @@ "tooltip": false, "viz": false }, - "lineInterpolation": "linear", + "lineInterpolation": "smooth", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -76,23 +76,24 @@ } ] }, - "unit": "binbps" + "unit": "bits" }, "overrides": [] }, "gridPos": { - "h": 16, + "h": 11, "w": 24, "x": 0, "y": 0 }, - "id": 3, + "id": 2, + "interval": "5s", "options": { "legend": { "calcs": [ + "lastNotNull", "max", - "mean", - "lastNotNull" + "mean" ], "displayMode": "table", "placement": "bottom", @@ -110,11 +111,12 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "builder", - "expr": "listener_in_bitrate", + "exemplar": false, + "expr": "rate(turn_listener_client_inbound_traffic_total_bytes[$__interval]) * 8", "instant": false, - "legendFormat": "Listener IN ({{listener_id}})", + "legendFormat": "Clients -> Listener", "range": true, - "refId": "listener_in" + "refId": "client_listener" }, { "datasource": { @@ -122,28 +124,15 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "builder", - "expr": "allocation_in_bitrate", + "expr": "rate(turn_allocations_peer_inbound_traffic_total_bytes[$__interval]) * 8", "hide": false, "instant": false, - "legendFormat": "Allocation IN {{allocation_id}}", + "legendFormat": "Peers -> Allocation_handlers", "range": true, - "refId": "allocation_in" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "expr": "allocation_out_bitrate", - "hide": false, - "instant": false, - "legendFormat": "Allocation OUT {{allocation_id}}", - "range": true, - "refId": "allocation_out" + "refId": "peer_allocation" } ], - "title": "Bitrate", + "title": "Bitrates", "transparent": true, "type": "timeseries" }, @@ -171,7 +160,7 @@ "tooltip": false, "viz": false }, - "lineInterpolation": "linear", + "lineInterpolation": "stepBefore", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -206,18 +195,19 @@ "overrides": [] }, "gridPos": { - "h": 10, + "h": 9, "w": 24, "x": 0, - "y": 16 + "y": 11 }, - "id": 2, + "id": 3, + "interval": "5", "options": { "legend": { "calcs": [ + "lastNotNull", "max", - "mean", - "lastNotNull" + "mean" ], "displayMode": "table", "placement": "bottom", @@ -235,11 +225,11 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "builder", - "expr": "vm_total_run_queue_lengths_total", + "expr": "turn_allocations_created_total", "instant": false, - "legendFormat": "Total", + "legendFormat": "Created Allocations", "range": true, - "refId": "total" + "refId": "created" }, { "datasource": { @@ -247,28 +237,151 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "builder", - "expr": "vm_total_run_queue_lengths_cpu", + "expr": "turn_allocations_expired_total", "hide": false, "instant": false, - "legendFormat": "CPU", + "legendFormat": "Expired Allocations", "range": true, - "refId": "cpu" + "refId": "expired" + } + ], + "title": "Active allocations", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Active allocations", + "binary": { + "left": "Created Allocations", + "operator": "-", + "reducer": "sum", + "right": "Expired Allocations" + }, + "mode": "binary", + "reduce": { + "include": [], + "reducer": "diff" + }, + "replaceFields": true + } + } + ], + "transparent": true, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "vm_total_run_queue_lengths_io", - "hide": false, + "editorMode": "code", + "expr": "100 - (rate(node_cpu_seconds_total{job=\"node\",mode=\"idle\"}[$__rate_interval]) * 100)", "instant": false, - "legendFormat": "IO", + "legendFormat": "Core {{cpu}}", "range": true, - "refId": "A" + "refId": "cpu" + } + ], + "title": "CPU usage", + "transformations": [ + { + "id": "calculateField", + "options": { + "mode": "reduceRow", + "reduce": { + "include": [ + "Core 0", + "Core 1", + "Core 2", + "Core 3", + "Core 4", + "Core 5", + "Core 6", + "Core 7" + ], + "reducer": "sum" + } + } } ], - "title": "Total Run Queue Lengths", "transparent": true, "type": "timeseries" }, @@ -296,7 +409,7 @@ "tooltip": false, "viz": false }, - "lineInterpolation": "linear", + "lineInterpolation": "smooth", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -332,19 +445,15 @@ }, "gridPos": { "h": 10, - "w": 24, + "w": 12, "x": 0, - "y": 26 + "y": 29 }, - "id": 1, + "id": 4, "options": { "legend": { - "calcs": [ - "max", - "mean", - "lastNotNull" - ], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", "showLegend": true }, @@ -360,19 +469,127 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "builder", - "expr": "vm_memory_total", + "expr": "vm_memory_bytes", "instant": false, - "legendFormat": "Total", + "legendFormat": "Used memory", + "range": true, + "refId": "memory" + } + ], + "title": "VM Memory", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepBefore", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 29 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "expr": "vm_run_queue_cpu_length", + "instant": false, + "legendFormat": "CPU", + "range": true, + "refId": "cpu" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "expr": "vm_run_queue_io_length", + "hide": false, + "instant": false, + "legendFormat": "IO", "range": true, - "refId": "total" + "refId": "io" } ], - "title": "VM Memory Usage", + "title": "Run queue lengths", "transparent": true, "type": "timeseries" } ], - "refresh": "", + "refresh": "5s", "schemaVersion": 38, "style": "dark", "tags": [], @@ -380,13 +597,13 @@ "list": [] }, "time": { - "from": "now-6h", + "from": "now-5m", "to": "now" }, "timepicker": {}, "timezone": "", - "title": "TURN Stats", - "uid": "aa758579-a261-4899-8aac-adaec7c1ce01", - "version": 1, + "title": "Rel metrics", + "uid": "f89914d6-5064-454e-bfb9-128f69816c36", + "version": 6, "weekStart": "" } \ No newline at end of file