Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bgpd: allow batch handling of peer shutdown/failure #17505

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion bgpd/bgp_fsm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1271,7 +1271,8 @@ void bgp_fsm_change_status(struct peer_connection *connection,
* Clearing
* (or Deleted).
*/
if (!work_queue_is_scheduled(peer->clear_node_queue) &&
if (!CHECK_FLAG(peer->flags, PEER_FLAG_CLEARING_BATCH) &&
!work_queue_is_scheduled(peer->clear_node_queue) &&
status != Deleted)
BGP_EVENT_ADD(connection, Clearing_Completed);
}
Expand Down
4 changes: 1 addition & 3 deletions bgpd/bgp_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ void bgp_reads_off(struct peer_connection *connection)

event_cancel_async(fpt->master, &connection->t_read, NULL);
EVENT_OFF(connection->t_process_packet);
EVENT_OFF(connection->t_process_packet_error);

UNSET_FLAG(connection->thread_flags, PEER_THREAD_READS_ON);
}
Expand Down Expand Up @@ -252,8 +251,7 @@ static void bgp_process_reads(struct event *thread)
/* Handle the error in the main pthread, include the
* specific state change from 'bgp_read'.
*/
event_add_event(bm->master, bgp_packet_process_error, connection,
code, &connection->t_process_packet_error);
bgp_enqueue_conn_err(peer->bgp, connection, code);
goto done;
}

Expand Down
2 changes: 2 additions & 0 deletions bgpd/bgp_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,6 @@ DECLARE_MTYPE(BGP_SOFT_VERSION);

DECLARE_MTYPE(BGP_EVPN_OVERLAY);

DECLARE_MTYPE(CLEARING_BATCH);

#endif /* _QUAGGA_BGP_MEMORY_H */
34 changes: 0 additions & 34 deletions bgpd/bgp_packet.c
Original file line number Diff line number Diff line change
Expand Up @@ -4196,37 +4196,3 @@ void bgp_send_delayed_eor(struct bgp *bgp)
for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer))
bgp_write_proceed_actions(peer);
}

/*
* Task callback to handle socket error encountered in the io pthread. We avoid
* having the io pthread try to enqueue fsm events or mess with the peer
* struct.
*/
void bgp_packet_process_error(struct event *thread)
{
struct peer_connection *connection;
struct peer *peer;
int code;

connection = EVENT_ARG(thread);
peer = connection->peer;
code = EVENT_VAL(thread);

if (bgp_debug_neighbor_events(peer))
zlog_debug("%s [Event] BGP error %d on fd %d", peer->host, code,
connection->fd);

/* Closed connection or error on the socket */
if (peer_established(connection)) {
if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART)
|| CHECK_FLAG(peer->flags,
PEER_FLAG_GRACEFUL_RESTART_HELPER))
&& CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
} else
peer->last_reset = PEER_DOWN_CLOSE_SESSION;
}

bgp_event_update(connection, code);
}
2 changes: 0 additions & 2 deletions bgpd/bgp_packet.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ extern void bgp_process_packet(struct event *event);

extern void bgp_send_delayed_eor(struct bgp *bgp);

/* Task callback to handle socket error encountered in the io pthread */
void bgp_packet_process_error(struct event *thread);
extern struct bgp_notify
bgp_notify_decapsulate_hard_reset(struct bgp_notify *notify);
extern bool bgp_has_graceful_restart_notification(struct peer *peer);
Expand Down
232 changes: 232 additions & 0 deletions bgpd/bgp_route.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@

#include "bgpd/bgp_route_clippy.c"

/* Memory for batched clearing of peers from the RIB */
DEFINE_MTYPE(BGPD, CLEARING_BATCH, "Clearing batch");

DEFINE_HOOK(bgp_snmp_update_stats,
(struct bgp_dest *rn, struct bgp_path_info *pi, bool added),
(rn, pi, added));
Expand Down Expand Up @@ -6186,11 +6189,240 @@ void bgp_clear_route(struct peer *peer, afi_t afi, safi_t safi)
peer_unlock(peer);
}

/*
* Callback scheduled to process prefixes/dests for batch clearing; the
* dests were found via a rib walk.
* The one-peer version of this uses a per-peer workqueue to manage
* rescheduling, but we're just using a fixed limit here.
*/

/* Limit the number of dests we'll process per callback */
#define BGP_CLEARING_BATCH_MAX_DESTS 100

static void bgp_clear_batch_dests_task(struct event *event)
{
struct bgp_clearing_info *cinfo = EVENT_ARG(event);
struct bgp_dest *dest;
struct bgp_path_info *pi;
struct bgp_table *table;
struct bgp *bgp;
afi_t afi;
safi_t safi;
int counter = 0;

bgp = cinfo->bgp;

next_dest:

dest = bgp_clearing_batch_next_dest(cinfo);
if (dest == NULL)
goto done;

table = bgp_dest_table(dest);
afi = table->afi;
safi = table->safi;

/* Have to check every path: it is possible that we have multiple paths
* for a prefix from a peer if that peer is using AddPath.
*/
for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) {
if (!bgp_clearing_batch_check_peer(cinfo, pi->peer))
continue;

/* graceful restart STALE flag set. */
if (((CHECK_FLAG(pi->peer->sflags, PEER_STATUS_NSF_WAIT)
&& pi->peer->nsf[afi][safi])
|| CHECK_FLAG(pi->peer->af_sflags[afi][safi],
PEER_STATUS_ENHANCED_REFRESH))
&& !CHECK_FLAG(pi->flags, BGP_PATH_STALE)
&& !CHECK_FLAG(pi->flags, BGP_PATH_UNUSEABLE))
bgp_path_info_set_flag(dest, pi, BGP_PATH_STALE);
else {
/* If this is an EVPN route, process for
* un-import. */
if (safi == SAFI_EVPN)
bgp_evpn_unimport_route(
bgp, afi, safi,
bgp_dest_get_prefix(dest), pi);
/* Handle withdraw for VRF route-leaking and L3VPN */
if (SAFI_UNICAST == safi
&& (bgp->inst_type == BGP_INSTANCE_TYPE_VRF ||
bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT)) {
vpn_leak_from_vrf_withdraw(bgp_get_default(),
bgp, pi);
}
if (SAFI_MPLS_VPN == safi &&
bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT) {
vpn_leak_to_vrf_withdraw(pi);
}

bgp_rib_remove(dest, pi, pi->peer, afi, safi);
}
}

/* Unref this dest and table */
bgp_dest_unlock_node(dest);
bgp_table_unlock(bgp_dest_table(dest));

counter++;
if (counter < BGP_CLEARING_BATCH_MAX_DESTS)
goto next_dest;

done:

/* If there are still dests to process, reschedule. */
if (bgp_clearing_batch_dests_present(cinfo)) {
if (bgp_debug_neighbor_events(NULL))
zlog_debug("%s: Batch %p: Rescheduled after processing %d dests",
__func__, cinfo, counter);

event_add_event(bm->master, bgp_clear_batch_dests_task, cinfo,
0, &cinfo->t_sched);
} else {
if (bgp_debug_neighbor_events(NULL))
zlog_debug("%s: Batch %p: Done after processing %d dests",
__func__, cinfo, counter);
bgp_clearing_batch_completed(cinfo);
}

return;
}

/*
* Walk a single table for batch peer clearing processing
*/
static void clear_batch_table_helper(struct bgp_clearing_info *cinfo,
struct bgp_table *table)
{
struct bgp_dest *dest;
bool force = (cinfo->bgp->process_queue == NULL);
uint32_t examined = 0, queued = 0;

for (dest = bgp_table_top(table); dest; dest = bgp_route_next(dest)) {
struct bgp_path_info *pi, *next;
struct bgp_adj_in *ain;
struct bgp_adj_in *ain_next;

examined++;

ain = dest->adj_in;
while (ain) {
ain_next = ain->next;

if (bgp_clearing_batch_check_peer(cinfo, ain->peer))
bgp_adj_in_remove(&dest, ain);

ain = ain_next;

assert(dest != NULL);
}

for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = next) {
next = pi->next;
if (!bgp_clearing_batch_check_peer(cinfo, pi->peer))
continue;

queued++;

if (force) {
bgp_path_info_reap(dest, pi);
} else {
/* Unlocked after processing */
bgp_table_lock(bgp_dest_table(dest));
bgp_dest_lock_node(dest);

bgp_clearing_batch_add_dest(cinfo, dest);
break;
}
}
}

if (examined > 0) {
if (bgp_debug_neighbor_events(NULL))
zlog_debug("%s: %s/%s: examined %u, queued %u",
__func__, afi2str(table->afi),
safi2str(table->safi), examined, queued);
}
}

/*
* RIB-walking helper for batch clearing work: walk all tables, identify
* dests that are affected by the peers in the batch, enqueue the dests for
* async processing.
*/
static void clear_batch_rib_helper(struct bgp_clearing_info *cinfo)
{
afi_t afi;
safi_t safi;
struct bgp_dest *dest;
struct bgp_table *table;

FOREACH_AFI_SAFI (afi, safi) {
/* Identify table to be examined */
if (safi != SAFI_MPLS_VPN && safi != SAFI_ENCAP &&
safi != SAFI_EVPN) {
table = cinfo->bgp->rib[afi][safi];
if (!table)
continue;

clear_batch_table_helper(cinfo, table);
} else {
for (dest = bgp_table_top(cinfo->bgp->rib[afi][safi]);
dest; dest = bgp_route_next(dest)) {
table = bgp_dest_get_bgp_table_info(dest);
if (!table)
continue;

/* TODO -- record the tables we've seen
* and don't repeat any?
*/

clear_batch_table_helper(cinfo, table);
}
}
}
}

/*
* Identify prefixes that need to be cleared for a batch of peers in 'cinfo'.
* The actual clearing processing will be done async...
*/
void bgp_clear_route_batch(struct bgp_clearing_info *cinfo)
{
if (bgp_debug_neighbor_events(NULL))
zlog_debug("%s: BGP %s, batch %p", __func__,
cinfo->bgp->name_pretty, cinfo);

/* Walk the rib, checking the peers in the batch */
clear_batch_rib_helper(cinfo);

/* If we found some prefixes, schedule a task to begin work. */
if (bgp_clearing_batch_dests_present(cinfo))
event_add_event(bm->master, bgp_clear_batch_dests_task, cinfo,
0, &cinfo->t_sched);

/* NB -- it's the caller's job to clean up, release refs, etc. if
* we didn't find any dests
*/
}

void bgp_clear_route_all(struct peer *peer)
{
afi_t afi;
safi_t safi;

/* We may be able to batch multiple peers' clearing work: check
* and see.
*/
if (bgp_clearing_batch_add_peer(peer->bgp, peer)) {
if (bgp_debug_neighbor_events(peer))
zlog_debug("%s: peer %pBP batched", __func__, peer);
return;
}

if (bgp_debug_neighbor_events(peer))
zlog_debug("%s: peer %pBP", __func__, peer);

FOREACH_AFI_SAFI (afi, safi)
bgp_clear_route(peer, afi, safi);

Expand Down
6 changes: 3 additions & 3 deletions bgpd/bgp_route.h
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,9 @@ extern void bgp_soft_reconfig_table_task_cancel(const struct bgp *bgp,
extern bool bgp_soft_reconfig_in(struct peer *peer, afi_t afi, safi_t safi);
extern void bgp_clear_route(struct peer *, afi_t, safi_t);
extern void bgp_clear_route_all(struct peer *);
/* Clear routes for a batch of peers */
void bgp_clear_route_batch(struct bgp_clearing_info *cinfo);

extern void bgp_clear_adj_in(struct peer *, afi_t, safi_t);
extern void bgp_clear_stale_route(struct peer *, afi_t, safi_t);
extern void bgp_set_stale_route(struct peer *peer, afi_t afi, safi_t safi);
Expand Down Expand Up @@ -899,9 +902,6 @@ extern bool subgroup_announce_check(struct bgp_dest *dest,
const struct prefix *p, struct attr *attr,
struct attr *post_attr);

extern void bgp_peer_clear_node_queue_drain_immediate(struct peer *peer);
extern void bgp_process_queues_drain_immediate(void);

/* for encap/vpn */
extern struct bgp_dest *bgp_safi_node_lookup(struct bgp_table *table,
safi_t safi,
Expand Down
Loading
Loading