Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge queue: embarking unstable (675a231) and [#5177 + #5192 + #4870 + #5029] together #5217

Closed
wants to merge 12 commits into from
10 changes: 2 additions & 8 deletions beacon_node/http_api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ use std::path::PathBuf;
use std::pin::Pin;
use std::sync::Arc;
use sysinfo::{System, SystemExt};
use system_health::observe_system_health_bn;
use system_health::{observe_nat, observe_system_health_bn};
use task_spawner::{Priority, TaskSpawner};
use tokio::sync::{
mpsc::{Sender, UnboundedSender},
Expand Down Expand Up @@ -4071,13 +4071,7 @@ pub fn serve<T: BeaconChainTypes>(
.and(warp::path::end())
.then(|task_spawner: TaskSpawner<T::EthSpec>| {
task_spawner.blocking_json_task(Priority::P1, move || {
Ok(api_types::GenericResponse::from(
lighthouse_network::metrics::NAT_OPEN
.as_ref()
.map(|v| v.get())
.unwrap_or(0)
!= 0,
))
Ok(api_types::GenericResponse::from(observe_nat()))
})
});

Expand Down
2 changes: 1 addition & 1 deletion beacon_node/lighthouse_network/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ impl Default for Config {
enr_udp6_port: None,
enr_quic6_port: None,
enr_tcp6_port: None,
target_peers: 50,
target_peers: 100,
gs_config,
discv5_config,
boot_nodes_enr: vec![],
Expand Down
10 changes: 6 additions & 4 deletions beacon_node/lighthouse_network/src/discovery/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ const MAX_DISCOVERY_RETRY: usize = 3;
/// Note: we always allow a single FindPeers query, so we would be
/// running a maximum of `MAX_CONCURRENT_SUBNET_QUERIES + 1`
/// discovery queries at a time.
const MAX_CONCURRENT_SUBNET_QUERIES: usize = 2;
const MAX_CONCURRENT_SUBNET_QUERIES: usize = 4;
/// The max number of subnets to search for in a single subnet discovery query.
const MAX_SUBNETS_IN_QUERY: usize = 3;
/// The number of closest peers to search for when doing a regular peer search.
Expand Down Expand Up @@ -1003,11 +1003,13 @@ impl<TSpec: EthSpec> NetworkBehaviour for Discovery<TSpec> {
}
discv5::Event::SocketUpdated(socket_addr) => {
info!(self.log, "Address updated"; "ip" => %socket_addr.ip(), "udp_port" => %socket_addr.port());
metrics::inc_counter(&metrics::ADDRESS_UPDATE_COUNT);
metrics::check_nat();
// We have SOCKET_UPDATED messages. This occurs when discovery has a majority of
// users reporting an external port and our ENR gets updated.
// Which means we are able to do NAT traversal.
metrics::set_gauge_vec(&metrics::NAT_OPEN, &["discv5"], 1);

// Discv5 will have updated our local ENR. We save the updated version
// to disk.

if (self.update_ports.tcp4 && socket_addr.is_ipv4())
|| (self.update_ports.tcp6 && socket_addr.is_ipv6())
{
Expand Down
68 changes: 11 additions & 57 deletions beacon_node/lighthouse_network/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,17 @@
pub use lighthouse_metrics::*;

lazy_static! {
pub static ref NAT_OPEN: Result<IntCounter> = try_create_int_counter(
pub static ref NAT_OPEN: Result<IntGaugeVec> = try_create_int_gauge_vec(
"nat_open",
"An estimate indicating if the local node is exposed to the internet."
"An estimate indicating if the local node is reachable from external nodes",
&["protocol"]
);
pub static ref ADDRESS_UPDATE_COUNT: Result<IntCounter> = try_create_int_counter(
"libp2p_address_update_total",
"Count of libp2p socked updated events (when our view of our IP address has changed)"
);
pub static ref PEERS_CONNECTED: Result<IntGauge> = try_create_int_gauge(
"libp2p_peers",
"Count of libp2p peers currently connected"
);

pub static ref TCP_PEERS_CONNECTED: Result<IntGauge> = try_create_int_gauge(
"libp2p_tcp_peers",
"Count of libp2p peers currently connected via TCP"
);

pub static ref QUIC_PEERS_CONNECTED: Result<IntGauge> = try_create_int_gauge(
"libp2p_quic_peers",
"Count of libp2p peers currently connected via QUIC"
);

pub static ref PEERS_CONNECTED: Result<IntGaugeVec> =
try_create_int_gauge_vec("libp2p_peers", "Count of libp2p peers currently connected", &["direction", "transport"]);
pub static ref PEER_CONNECT_EVENT_COUNT: Result<IntCounter> = try_create_int_counter(
"libp2p_peer_connect_event_total",
"Count of libp2p peer connect events (not the current number of connected peers)"
Expand All @@ -32,13 +20,10 @@ lazy_static! {
"libp2p_peer_disconnect_event_total",
"Count of libp2p peer disconnect events"
);
pub static ref DISCOVERY_SENT_BYTES: Result<IntGauge> = try_create_int_gauge(
"discovery_sent_bytes",
"The number of bytes sent in discovery"
);
pub static ref DISCOVERY_RECV_BYTES: Result<IntGauge> = try_create_int_gauge(
"discovery_recv_bytes",
"The number of bytes received in discovery"
pub static ref DISCOVERY_BYTES: Result<IntGaugeVec> = try_create_int_gauge_vec(
"discovery_bytes",
"The number of bytes sent and received in discovery",
&["direction"]
);
pub static ref DISCOVERY_QUEUE: Result<IntGauge> = try_create_int_gauge(
"discovery_queue_size",
Expand Down Expand Up @@ -135,17 +120,6 @@ lazy_static! {
&["type"]
);

/*
* Inbound/Outbound peers
*/
/// The number of peers that dialed us.
pub static ref NETWORK_INBOUND_PEERS: Result<IntGauge> =
try_create_int_gauge("network_inbound_peers","The number of peers that are currently connected that have dialed us.");

/// The number of peers that we dialed us.
pub static ref NETWORK_OUTBOUND_PEERS: Result<IntGauge> =
try_create_int_gauge("network_outbound_peers","The number of peers that are currently connected that we dialed.");

/*
* Peer Reporting
*/
Expand All @@ -156,31 +130,11 @@ lazy_static! {
);
}

/// Checks if we consider the NAT open.
///
/// Conditions for an open NAT:
/// 1. We have 1 or more SOCKET_UPDATED messages. This occurs when discovery has a majority of
/// users reporting an external port and our ENR gets updated.
/// 2. We have 0 SOCKET_UPDATED messages (can be true if the port was correct on boot), then we
/// rely on whether we have any inbound messages. If we have no socket update messages, but
/// manage to get at least one inbound peer, we are exposed correctly.
pub fn check_nat() {
// NAT is already deemed open.
if NAT_OPEN.as_ref().map(|v| v.get()).unwrap_or(0) != 0 {
return;
}
if ADDRESS_UPDATE_COUNT.as_ref().map(|v| v.get()).unwrap_or(0) != 0
|| NETWORK_INBOUND_PEERS.as_ref().map(|v| v.get()).unwrap_or(0) != 0_i64
{
inc_counter(&NAT_OPEN);
}
}

pub fn scrape_discovery_metrics() {
let metrics =
discv5::metrics::Metrics::from(discv5::Discv5::<discv5::DefaultProtocolId>::raw_metrics());
set_float_gauge(&DISCOVERY_REQS, metrics.unsolicited_requests_per_second);
set_gauge(&DISCOVERY_SESSIONS, metrics.active_sessions as i64);
set_gauge(&DISCOVERY_SENT_BYTES, metrics.bytes_sent as i64);
set_gauge(&DISCOVERY_RECV_BYTES, metrics.bytes_recv as i64);
set_gauge_vec(&DISCOVERY_BYTES, &["inbound"], metrics.bytes_recv as i64);
set_gauge_vec(&DISCOVERY_BYTES, &["outbound"], metrics.bytes_sent as i64);
}
18 changes: 0 additions & 18 deletions beacon_node/lighthouse_network/src/peer_manager/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -726,29 +726,14 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
return;
}

let mut connected_peer_count = 0;
let mut inbound_connected_peers = 0;
let mut outbound_connected_peers = 0;
let mut clients_per_peer = HashMap::new();

for (_peer, peer_info) in self.network_globals.peers.read().connected_peers() {
connected_peer_count += 1;
if let PeerConnectionStatus::Connected { n_in, .. } = peer_info.connection_status() {
if *n_in > 0 {
inbound_connected_peers += 1;
} else {
outbound_connected_peers += 1;
}
}
*clients_per_peer
.entry(peer_info.client().kind.to_string())
.or_default() += 1;
}

metrics::set_gauge(&metrics::PEERS_CONNECTED, connected_peer_count);
metrics::set_gauge(&metrics::NETWORK_INBOUND_PEERS, inbound_connected_peers);
metrics::set_gauge(&metrics::NETWORK_OUTBOUND_PEERS, outbound_connected_peers);

for client_kind in ClientKind::iter() {
let value = clients_per_peer.get(&client_kind.to_string()).unwrap_or(&0);
metrics::set_gauge_vec(
Expand Down Expand Up @@ -853,11 +838,8 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
// start a ping and status timer for the peer
self.status_peers.insert(*peer_id);

let connected_peers = self.network_globals.connected_peers() as i64;

// increment prometheus metrics
metrics::inc_counter(&metrics::PEER_CONNECT_EVENT_COUNT);
metrics::set_gauge(&metrics::PEERS_CONNECTED, connected_peers);

true
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ impl<TSpec: EthSpec> NetworkBehaviour for PeerManager<TSpec> {
self.on_dial_failure(peer_id);
}
FromSwarm::ExternalAddrConfirmed(_) => {
// We have an external address confirmed, means we are able to do NAT traversal.
metrics::set_gauge_vec(&metrics::NAT_OPEN, &["libp2p"], 1);
// TODO: we likely want to check this against our assumed external tcp
// address
}
Expand Down Expand Up @@ -243,25 +245,25 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
self.events.push(PeerManagerEvent::MetaData(peer_id));
}

// Check NAT if metrics are enabled
if self.network_globals.local_enr.read().udp4().is_some() {
metrics::check_nat();
}

// increment prometheus metrics
if self.metrics_enabled {
let remote_addr = endpoint.get_remote_address();
let direction = if endpoint.is_dialer() {
"outbound"
} else {
"inbound"
};
match remote_addr.iter().find(|proto| {
matches!(
proto,
multiaddr::Protocol::QuicV1 | multiaddr::Protocol::Tcp(_)
)
}) {
Some(multiaddr::Protocol::QuicV1) => {
metrics::inc_gauge(&metrics::QUIC_PEERS_CONNECTED);
metrics::inc_gauge_vec(&metrics::PEERS_CONNECTED, &[direction, "quic"]);
}
Some(multiaddr::Protocol::Tcp(_)) => {
metrics::inc_gauge(&metrics::TCP_PEERS_CONNECTED);
metrics::inc_gauge_vec(&metrics::PEERS_CONNECTED, &[direction, "tcp"]);
}
Some(_) => unreachable!(),
None => {
Expand Down Expand Up @@ -339,17 +341,23 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
let remote_addr = endpoint.get_remote_address();
// Update the prometheus metrics
if self.metrics_enabled {
let direction = if endpoint.is_dialer() {
"outbound"
} else {
"inbound"
};

match remote_addr.iter().find(|proto| {
matches!(
proto,
multiaddr::Protocol::QuicV1 | multiaddr::Protocol::Tcp(_)
)
}) {
Some(multiaddr::Protocol::QuicV1) => {
metrics::dec_gauge(&metrics::QUIC_PEERS_CONNECTED);
metrics::dec_gauge_vec(&metrics::PEERS_CONNECTED, &[direction, "quic"]);
}
Some(multiaddr::Protocol::Tcp(_)) => {
metrics::dec_gauge(&metrics::TCP_PEERS_CONNECTED);
metrics::dec_gauge_vec(&metrics::PEERS_CONNECTED, &[direction, "tcp"]);
}
// If it's an unknown protocol we already logged when connection was established.
_ => {}
Expand Down
2 changes: 1 addition & 1 deletion beacon_node/lighthouse_network/src/service/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ mod gossip_cache;
pub mod gossipsub_scoring_parameters;
pub mod utils;
/// The number of peers we target per subnet for discovery queries.
pub const TARGET_SUBNET_PEERS: usize = 6;
pub const TARGET_SUBNET_PEERS: usize = 3;

const MAX_IDENTIFY_ADDRESSES: usize = 10;

Expand Down
11 changes: 10 additions & 1 deletion beacon_node/network/src/sync/backfill_sync/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,16 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
}
// If we have run out of peers in which to retry this batch, the backfill state
// transitions to a paused state.
self.retry_batch_download(network, id)?;
// We still need to reset the state for all the affected batches, so we should not
// short circuit early
if self.retry_batch_download(network, id).is_err() {
debug!(
self.log,
"Batch could not be retried";
"batch_id" => id,
"error" => "no synced peers"
);
}
} else {
debug!(self.log, "Batch not found while removing peer";
"peer" => %peer_id, "batch" => id)
Expand Down
2 changes: 0 additions & 2 deletions beacon_node/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1122,8 +1122,6 @@ pub fn set_network_config(
config.target_peers = target_peers_str
.parse::<usize>()
.map_err(|_| format!("Invalid number of target peers: {}", target_peers_str))?;
} else {
config.target_peers = 80; // default value
}

if let Some(value) = cli_args.value_of("network-load") {
Expand Down
Loading
Loading