Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

R4R: prometheus exporter in substrate #4511

Closed
Closed
Show file tree
Hide file tree
Changes from 55 commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
40762b6
Refactor rebase master prometheus_v0.3
nodebreaker0-0 Nov 28, 2019
472aca1
Milestone1: Final Version of v0.3
nodebreaker0-0 Dec 10, 2019
1974f94
no-std or warm compatibility issues, grapana-data -source code refere…
nodebreaker0-0 Jan 2, 2020
62a873d
Cargo.lock paritytech/master rebase
nodebreaker0-0 Jan 2, 2020
97e37e4
prometheus networking.rs del, grafana-data-source networking.rs pub e…
nodebreaker0-0 Jan 3, 2020
f3e444e
chore: reflect various feedback
nodebreaker0-0 Jan 10, 2020
8145df7
Spaces to tabs.
gavofyork Jan 6, 2020
fca71be
Replace grafana and tidy
expenses Jan 10, 2020
855ceed
Add generics
expenses Jan 10, 2020
70c6bcc
Add photo back
expenses Jan 10, 2020
5941041
Re-fix spaces in primitives/consensus/babe/src/inherents.rs
expenses Jan 10, 2020
7d9d341
Refactor rebase master prometheus_v0.3
nodebreaker0-0 Nov 28, 2019
52316ae
Milestone1: Final Version of v0.3
nodebreaker0-0 Dec 10, 2019
82cd8cf
no-std or warm compatibility issues, grapana-data -source code refere…
nodebreaker0-0 Jan 2, 2020
b052829
prometheus networking.rs del, grafana-data-source networking.rs pub e…
nodebreaker0-0 Jan 3, 2020
4e831e9
chore: reflect various feedback
nodebreaker0-0 Jan 10, 2020
0b15bc4
Replace grafana and tidy
expenses Jan 10, 2020
8025361
Add generics
expenses Jan 10, 2020
fd081c9
Add photo back
expenses Jan 10, 2020
f00bb9b
Re-fix spaces in primitives/consensus/babe/src/inherents.rs
expenses Jan 10, 2020
398da97
chore: revert this file back to paritytech/master inherents.rs.
nodebreaker0-0 Jan 14, 2020
84c458f
Add newline at EOF
expenses Jan 14, 2020
f931588
Merge remote-tracking branch 'nodebreaker/prometheus_v0.3' into ashle…
expenses Jan 16, 2020
5796c85
Merge remote-tracking branch 'parity/master' into ashley-prometheus
expenses Jan 16, 2020
2a45de8
Merge remote-tracking branch 'parity/master' into ashley-prometheus
expenses Jan 16, 2020
8bae73b
Merge remote-tracking branch 'parity/master' into ashley-prometheus
expenses Jan 17, 2020
1caa0f1
Merge remote-tracking branch 'parity/master' into ashley-prometheus
expenses Jan 17, 2020
ffb4746
Tidy
expenses Jan 17, 2020
8a6e3c5
Use local registry
expenses Jan 17, 2020
53c95de
fix typo
hskang9 Jan 17, 2020
23cb72e
chore: Apply review feedback
nodebreaker0-0 Jan 20, 2020
3d1634b
Merge remote-tracking branch 'nodebreaker/prometheus_v0.3' into ashle…
expenses Jan 20, 2020
55ac4f8
Merge remote-tracking branch 'parity/master' into ashley-prometheus
expenses Jan 20, 2020
3cfe43c
endpoint -> exporter
expenses Jan 20, 2020
0af2369
fix readme
expenses Jan 20, 2020
44f8c0e
Merge pull request #3 from paritytech/ashley-prometheus
nodebreaker0-0 Jan 21, 2020
d2bba61
Remove lazy_static, use ServiceMetrics struct instead
expenses Jan 21, 2020
5d3d9a7
Merge pull request #4 from paritytech/ashley-prometheus
nodebreaker0-0 Jan 22, 2020
32c04b4
Merge branch 'paritytech/master' into prometheus_v0.3
mxinden Jan 23, 2020
9bde830
Switch to using GaugeVecs
expenses Jan 23, 2020
97faad6
Merge pull request #5 from mxinden/prometheus_v0.3
nodebreaker0-0 Jan 24, 2020
f64a482
chore: without nightly , edit README
nodebreaker0-0 Jan 24, 2020
5d56d93
Merge branch 'prometheus_v0.3' into ashley-prometheus
nodebreaker0-0 Jan 24, 2020
50f2928
block_height -> block_height_number
expenses Jan 27, 2020
8edb710
Merge branch 'ashley-prometheus' of github.com:paritytech/substrate i…
expenses Jan 27, 2020
6a25ea8
Switch to a ready_transactions_number gauge
expenses Jan 29, 2020
18cf2be
Merge pull request #7 from paritytech/ashley-prometheus
nodebreaker0-0 Jan 29, 2020
bb388f5
Update utils/prometheus/src/lib.rs
hskang9 Jan 31, 2020
77ba252
no-prometheus flag add
nodebreaker0-0 Jan 31, 2020
5e327c2
/metrics url Input check
nodebreaker0-0 Jan 31, 2020
ccb3179
remove prometheus in Tracing
nodebreaker0-0 Feb 5, 2020
152176f
remove prometheus in Tracing
nodebreaker0-0 Feb 14, 2020
8de57c2
Merge branch 'master' into prometheus_v0.3
nodebreaker0-0 Feb 14, 2020
e76f200
chore: master code rebase edit
nodebreaker0-0 Feb 14, 2020
55428b7
gitlab-check-web-wasm edit code
nodebreaker0-0 Feb 14, 2020
d1003e6
From:from and cargo.lock update
nodebreaker0-0 Feb 17, 2020
5c2e085
Merge commit 'db1ab7d18fbe7876cdea43bbf30f147ddd263f94' into promethe…
nodebreaker0-0 Feb 17, 2020
b4b4432
with_prometheus_registry add background_tasks
nodebreaker0-0 Feb 17, 2020
ee421a6
Merge commit '419e5fd0026cfd528cd3b327789bb0a3a8215703' into promethe…
nodebreaker0-0 Feb 18, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,8 @@ members = [
"client/telemetry",
"client/transaction-pool",
"client/transaction-pool/graph",
"utils/prometheus",
"utils/wasm-builder-runner",
"utils/grafana-data-source",
"utils/grafana-data-source/test",
"frame/assets",
"frame/aura",
"frame/authority-discovery",
Expand Down
1 change: 1 addition & 0 deletions client/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ sp-core = { version = "2.0.0", path = "../../primitives/core" }
sc-service = { version = "0.8", default-features = false, path = "../service" }
sp-state-machine = { version = "0.8", path = "../../primitives/state-machine" }
sc-telemetry = { version = "2.0.0", path = "../telemetry" }
prometheus-exporter = { path = "../../utils/prometheus" }
sp-keyring = { version = "2.0.0", path = "../../primitives/keyring" }
names = "0.11.0"
structopt = "0.3.8"
Expand Down
17 changes: 9 additions & 8 deletions client/cli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -623,13 +623,6 @@ where
config.rpc_ws = Some(parse_address(&format!("{}:{}", ws_interface, 9944), cli.ws_port)?);
}

if config.grafana_port.is_none() || cli.grafana_port.is_some() {
let grafana_interface: &str = if cli.grafana_external { "0.0.0.0" } else { "127.0.0.1" };
config.grafana_port = Some(
parse_address(&format!("{}:{}", grafana_interface, 9955), cli.grafana_port)?
);
}

config.rpc_ws_max_connections = cli.ws_max_connections;
config.rpc_cors = cli.rpc_cors.unwrap_or_else(|| if is_dev {
log::warn!("Running in --dev mode, RPC CORS has been disabled.");
Expand All @@ -651,6 +644,14 @@ where
} else if !cli.telemetry_endpoints.is_empty() {
config.telemetry_endpoints = Some(TelemetryEndpoints::new(cli.telemetry_endpoints));
}
// Override prometheus
if cli.no_prometheus {
config.prometheus_port = None;
} else {
let prometheus_interface: &str = if cli.prometheus_external { "0.0.0.0" } else { "127.0.0.1" };
config.prometheus_port = Some(
parse_address(&format!("{}:{}", prometheus_interface, 9615), cli.prometheus_port)?);
}

config.tracing_targets = cli.import_params.tracing_targets.into();
config.tracing_receiver = cli.import_params.tracing_receiver.into();
Expand Down Expand Up @@ -877,4 +878,4 @@ mod tests {
assert!(config.network.config_path.is_some());
assert!(!config.network.listen_addresses.is_empty());
}
}
}
20 changes: 12 additions & 8 deletions client/cli/src/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,6 @@ arg_enum! {
pub enum TracingReceiver {
Log,
Telemetry,
Grafana,
}
}

Expand All @@ -340,7 +339,6 @@ impl Into<sc_tracing::TracingReceiver> for TracingReceiver {
match self {
TracingReceiver::Log => sc_tracing::TracingReceiver::Log,
TracingReceiver::Telemetry => sc_tracing::TracingReceiver::Telemetry,
TracingReceiver::Grafana => sc_tracing::TracingReceiver::Grafana,
}
}
}
Expand Down Expand Up @@ -480,11 +478,11 @@ pub struct RunCmd {
#[structopt(long = "unsafe-ws-external")]
pub unsafe_ws_external: bool,

/// Listen to all Grafana data source interfaces.
/// Listen to all Prometheus endpoint interfaces.
///
/// Default is local.
#[structopt(long = "grafana-external")]
pub grafana_external: bool,
#[structopt(long = "prometheus-external")]
pub prometheus_external: bool,

/// Specify HTTP RPC server TCP port.
#[structopt(long = "rpc-port", value_name = "PORT")]
Expand All @@ -508,9 +506,15 @@ pub struct RunCmd {
#[structopt(long = "rpc-cors", value_name = "ORIGINS", parse(try_from_str = parse_cors))]
pub rpc_cors: Option<Cors>,

/// Specify Grafana data source server TCP Port.
#[structopt(long = "grafana-port", value_name = "PORT")]
pub grafana_port: Option<u16>,
/// Specify Prometheus endpoint TCP Port.
#[structopt(long = "prometheus-port", value_name = "PORT")]
pub prometheus_port: Option<u16>,

/// Do not expose a Prometheus metric endpoint.
///
/// Prometheus metric endpoint is enabled by default.
#[structopt(long = "no-prometheus")]
pub no_prometheus: bool,

/// The human-readable name for this node.
///
Expand Down
4 changes: 2 additions & 2 deletions client/service/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ sc-rpc-server = { version = "2.0.0", path = "../rpc-servers" }
sc-rpc = { version = "2.0.0", path = "../rpc" }
sc-telemetry = { version = "2.0.0", path = "../telemetry" }
sc-offchain = { version = "2.0.0", path = "../offchain" }
parity-multiaddr = { package = "parity-multiaddr", version = "0.7.1" }
grafana-data-source = { version = "0.8", path = "../../utils/grafana-data-source" }
parity-multiaddr = { package = "parity-multiaddr", version = "0.5.0" }
prometheus-exporter = { path = "../../utils/prometheus" }
sc-tracing = { version = "2.0.0", path = "../tracing" }
tracing = "0.1.10"
parity-util-mem = { version = "0.5.1", default-features = false, features = ["primitive-types"] }
Expand Down
143 changes: 112 additions & 31 deletions client/service/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ use sc_network::{config::BoxFinalityProofRequestBuilder, specialization::Network
use parking_lot::{Mutex, RwLock};
use sp_runtime::generic::BlockId;
use sp_runtime::traits::{
Block as BlockT, NumberFor, SaturatedConversion, HasherFor,
Block as BlockT, NumberFor, SaturatedConversion, HasherFor, UniqueSaturatedInto,
};
use sp_api::ProvideRuntimeApi;
use sc_executor::{NativeExecutor, NativeExecutionDispatch};
Expand All @@ -53,7 +53,43 @@ use sysinfo::{get_current_pid, ProcessExt, System, SystemExt};
use sc_telemetry::{telemetry, SUBSTRATE_INFO};
use sp_transaction_pool::MaintainedTransactionPool;
use sp_blockchain;
use grafana_data_source::{self, record_metrics};
use prometheus_exporter::{register, Gauge, U64, F64, Registry, PrometheusError, Opts, GaugeVec};

struct ServiceMetrics {
block_height_number: GaugeVec<U64>,
peers_count: Gauge<U64>,
ready_transactions_number: Gauge<U64>,
memory_usage_bytes: Gauge<U64>,
cpu_usage_percentage: Gauge<F64>,
network_per_sec_bytes: GaugeVec<U64>,
}

impl ServiceMetrics {
fn register(registry: &Registry) -> Result<Self, PrometheusError> {
Ok(Self {
block_height_number: register(GaugeVec::new(
Opts::new("block_height_number", "Height of the chain"),
&["status"]
)?, registry)?,
peers_count: register(Gauge::new(
"peers_count", "Number of network gossip peers",
)?, registry)?,
ready_transactions_number: register(Gauge::new(
"ready_transactions_number", "Number of transactions in the ready queue",
)?, registry)?,
memory_usage_bytes: register(Gauge::new(
"memory_usage_bytes", "Node memory usage",
)?, registry)?,
cpu_usage_percentage: register(Gauge::new(
"cpu_usage_percentage", "Node CPU usage",
)?, registry)?,
network_per_sec_bytes: register(GaugeVec::new(
Opts::new("network_per_sec_bytes", "Networking bytes per second"),
&["direction"]
)?, registry)?,
})
}
}

/// Aggregator for the components required to build a service.
///
Expand Down Expand Up @@ -90,6 +126,7 @@ pub struct ServiceBuilder<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TF
rpc_extensions: TRpc,
remote_backend: Option<Arc<dyn RemoteBlockchain<TBl>>>,
marker: PhantomData<(TBl, TRtApi)>,
prometheus_registry: Option<Registry>
}

/// Full client type.
Expand Down Expand Up @@ -266,6 +303,7 @@ where TGen: RuntimeGenesis, TCSExt: Extension {
rpc_extensions: Default::default(),
remote_backend: None,
marker: PhantomData,
prometheus_registry: None,
})
}

Expand Down Expand Up @@ -351,6 +389,7 @@ where TGen: RuntimeGenesis, TCSExt: Extension {
rpc_extensions: Default::default(),
remote_backend: Some(remote_blockchain),
marker: PhantomData,
prometheus_registry: None,
})
}
}
Expand Down Expand Up @@ -399,6 +438,7 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
rpc_extensions: self.rpc_extensions,
remote_backend: self.remote_backend,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}

Expand Down Expand Up @@ -441,6 +481,7 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
rpc_extensions: self.rpc_extensions,
remote_backend: self.remote_backend,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}

Expand All @@ -467,6 +508,7 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
rpc_extensions: self.rpc_extensions,
remote_backend: self.remote_backend,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}

Expand Down Expand Up @@ -507,6 +549,7 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
rpc_extensions: self.rpc_extensions,
remote_backend: self.remote_backend,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}

Expand Down Expand Up @@ -571,6 +614,7 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
rpc_extensions: self.rpc_extensions,
remote_backend: self.remote_backend,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}

Expand Down Expand Up @@ -626,6 +670,7 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
rpc_extensions: self.rpc_extensions,
remote_backend: self.remote_backend,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}

Expand Down Expand Up @@ -665,8 +710,30 @@ impl<TBl, TRtApi, TGen, TCSExt, TCl, TFchr, TSc, TImpQu, TFprb, TFpp, TNetP, TEx
rpc_extensions,
remote_backend: self.remote_backend,
marker: self.marker,
prometheus_registry: self.prometheus_registry,
})
}

/// Use an existing prometheus `Registry` to record metrics into.
pub fn with_prometheus_registry(self, registry: Registry) -> Self {
Self {
config: self.config,
client: self.client,
backend: self.backend,
keystore: self.keystore,
fetcher: self.fetcher,
select_chain: self.select_chain,
import_queue: self.import_queue,
finality_proof_request_builder: self.finality_proof_request_builder,
finality_proof_provider: self.finality_proof_provider,
network_protocol: self.network_protocol,
transaction_pool: self.transaction_pool,
rpc_extensions: self.rpc_extensions,
remote_backend: self.remote_backend,
marker: self.marker,
prometheus_registry: Some(registry),
}
}
}

/// Implemented on `ServiceBuilder`. Allows running block commands, such as import/export/validate
Expand Down Expand Up @@ -778,6 +845,7 @@ ServiceBuilder<
transaction_pool,
rpc_extensions,
remote_backend,
prometheus_registry,
} = self;

sp_session::generate_initial_session_keys(
Expand Down Expand Up @@ -949,6 +1017,30 @@ ServiceBuilder<
));
}

// Prometheus exporter and metrics
let metrics = if let Some(port) = config.prometheus_port {
let registry = match prometheus_registry {
Some(registry) => registry,
None => Registry::new_custom(Some("substrate".into()), None)?
};

let metrics = ServiceMetrics::register(&registry)?;

let future = select(
prometheus_exporter::init_prometheus(port, registry).boxed(),
exit.clone()
).map(drop);

let _ = to_spawn_tx.unbounded_send((
Box::pin(future),
From::from("prometheus-on-block")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure what "prometheus-on-block" relates to. I would expect something like prometheus-server or prometheus-endpoint. Can you explain your thoughts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't understand a little bit about what this was, so I copied the Telemetry, but I think it's the prometheus-endpoint

));

Some(metrics)
} else {
None
};

// Periodically notify the telemetry.
let transaction_pool_ = transaction_pool.clone();
let client_ = client.clone();
Expand All @@ -965,6 +1057,8 @@ ServiceBuilder<
let finalized_number: u64 = info.chain.finalized_number.saturated_into::<u64>();
let bandwidth_download = net_status.average_download_per_sec;
let bandwidth_upload = net_status.average_upload_per_sec;
let best_seen_block = net_status.best_seen_block
.map(|num: NumberFor<TBl>| num.unique_saturated_into() as u64);

// get cpu usage and memory usage of this process
let (cpu_usage, memory) = if let Some(self_pid) = self_pid {
Expand Down Expand Up @@ -993,25 +1087,22 @@ ServiceBuilder<
"disk_read_per_sec" => info.usage.as_ref().map(|usage| usage.io.bytes_read).unwrap_or(0),
"disk_write_per_sec" => info.usage.as_ref().map(|usage| usage.io.bytes_written).unwrap_or(0),
);
#[cfg(not(target_os = "unknown"))]
let memory_transaction_pool = parity_util_mem::malloc_size(&*transaction_pool_);
#[cfg(target_os = "unknown")]
let memory_transaction_pool = 0;
let _ = record_metrics!(
"peers" => num_peers,
"height" => best_number,
"txcount" => txpool_status.ready,
"cpu" => cpu_usage,
"memory" => memory,
"finalized_height" => finalized_number,
"bandwidth_download" => bandwidth_download,
"bandwidth_upload" => bandwidth_upload,
"used_state_cache_size" => info.usage.as_ref().map(|usage| usage.memory.state_cache).unwrap_or(0),
"used_db_cache_size" => info.usage.as_ref().map(|usage| usage.memory.database_cache).unwrap_or(0),
"disk_read_per_sec" => info.usage.as_ref().map(|usage| usage.io.bytes_read).unwrap_or(0),
"disk_write_per_sec" => info.usage.as_ref().map(|usage| usage.io.bytes_written).unwrap_or(0),
"memory_transaction_pool" => memory_transaction_pool,
);
if let Some(metrics) = metrics.as_ref() {
metrics.memory_usage_bytes.set(memory);
metrics.cpu_usage_percentage.set(f64::from(cpu_usage));
metrics.ready_transactions_number.set(txpool_status.ready as u64);
metrics.peers_count.set(num_peers as u64);

metrics.network_per_sec_bytes.with_label_values(&["download"]).set(net_status.average_download_per_sec);
metrics.network_per_sec_bytes.with_label_values(&["upload"]).set(net_status.average_upload_per_sec);

metrics.block_height_number.with_label_values(&["finalized"]).set(finalized_number);
metrics.block_height_number.with_label_values(&["best"]).set(best_number);

if let Some(best_seen_block) = best_seen_block {
metrics.block_height_number.with_label_values(&["sync_target"]).set(best_seen_block);
}
}

ready(())
});
Expand Down Expand Up @@ -1168,16 +1259,6 @@ ServiceBuilder<
telemetry
});

// Grafana data source
if let Some(port) = config.grafana_port {
let future = select(
grafana_data_source::run_server(port).boxed(),
exit.clone()
).map(drop);

let _ = to_spawn_tx.unbounded_send((Box::pin(future), From::from("grafana-server")));
}

// Instrumentation
if let Some(tracing_targets) = config.tracing_targets.as_ref() {
let subscriber = sc_tracing::ProfilingSubscriber::new(
Expand Down
Loading