diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index f173bd01f20f..400475a86107 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -79,6 +79,8 @@ static void bgp_evpn_remote_ip_hash_unlink_nexthop(struct hash_bucket *bucket, void *args); static struct in_addr zero_vtep_ip; +static void bgp_evpn_local_l3vni_del_post_processing(struct bgp *bgp_vrf); + /* * Private functions. */ @@ -3882,14 +3884,6 @@ int bgp_evpn_route_entry_install_if_vrf_match(struct bgp *bgp_vrf, const struct prefix_evpn *evp = (const struct prefix_evpn *)bgp_dest_get_prefix(pi->net); - /* Consider "valid" remote routes applicable for - * this VRF. - */ - if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) - && pi->type == ZEBRA_ROUTE_BGP - && pi->sub_type == BGP_ROUTE_NORMAL)) - return 0; - if (is_route_matching_for_vrf(bgp_vrf, pi)) { if (bgp_evpn_route_rmac_self_check(bgp_vrf, evp, pi)) return 0; @@ -3916,26 +3910,67 @@ int bgp_evpn_route_entry_install_if_vrf_match(struct bgp *bgp_vrf, return ret; } +#define BGP_PROC_L3VNI_LIMIT 10 +static int install_uninstall_evpn_remote_route_per_l3vni(struct bgp_path_info *pi, + const struct prefix_evpn *evp) +{ + int ret = 0; + uint8_t vni_iter = 0; + bool is_install = false; + struct bgp *bgp_to_proc = NULL; + struct bgp *bgp_to_proc_next = NULL; + + for (bgp_to_proc = zebra_l3_vni_first(&bm->zebra_l3_vni_head); + bgp_to_proc && vni_iter < BGP_PROC_L3VNI_LIMIT; bgp_to_proc = bgp_to_proc_next) { + bgp_to_proc_next = zebra_l3_vni_next(&bm->zebra_l3_vni_head, bgp_to_proc); + vni_iter++; + is_install = !!CHECK_FLAG(bgp_to_proc->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + + ret = bgp_evpn_route_entry_install_if_vrf_match(bgp_to_proc, pi, is_install); + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to %s EVPN %s route in L3VNI %u during BP", + bgp_to_proc->vrf_id, is_install ? "install" : "uninstall", + bgp_evpn_route_type_str[evp->prefix.route_type].str, + bgp_to_proc->l3vni); + zebra_l3_vni_del(&bm->zebra_l3_vni_head, bgp_to_proc); + if (!is_install) + bgp_evpn_local_l3vni_del_post_processing(bgp_to_proc); + + return ret; + } + } + + return 0; +} /* * Install or uninstall mac-ip routes are appropriate for this * particular VRF. */ -static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) +int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) { afi_t afi; safi_t safi; struct bgp_dest *rd_dest, *dest; struct bgp_table *table; struct bgp_path_info *pi; - int ret; + int ret = 0; struct bgp *bgp_evpn = NULL; + uint8_t count = 0; afi = AFI_L2VPN; safi = SAFI_EVPN; bgp_evpn = bgp_get_evpn(); - if (!bgp_evpn) + if (!bgp_evpn) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("No BGP EVPN instance found..."); + return -1; + } + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Total %u L3VNI BGP-VRFs pending to be processed for remote route installation", + __func__, (uint32_t)zebra_l3_vni_count(&bm->zebra_l3_vni_head)); /* Walk entire global routing table and evaluate routes which could be * imported into this VRF. Note that we need to loop through all global * routes to determine which route matches the import rt on vrf @@ -3952,30 +3987,108 @@ static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) (const struct prefix_evpn *)bgp_dest_get_prefix( dest); - /* if not mac-ip route skip this route */ - if (!(evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE - || evp->prefix.route_type - == BGP_EVPN_IP_PREFIX_ROUTE)) - continue; - - /* if not a mac+ip route skip this route */ - if (!(is_evpn_prefix_ipaddr_v4(evp) - || is_evpn_prefix_ipaddr_v6(evp))) + /* Proceed only for MAC_IP/IP-Pfx routes */ + switch (evp->prefix.route_type) { + case BGP_EVPN_IP_PREFIX_ROUTE: + case BGP_EVPN_MAC_IP_ROUTE: + if (!(is_evpn_prefix_ipaddr_v4(evp) || + is_evpn_prefix_ipaddr_v6(evp))) + continue; + break; + default: continue; + } for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { - ret = bgp_evpn_route_entry_install_if_vrf_match( - bgp_vrf, pi, install); - if (ret) { - bgp_dest_unlock_node(rd_dest); - bgp_dest_unlock_node(dest); - return ret; + /* Consider "valid" remote routes applicable for + * this VRF */ + if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) && + pi->type == ZEBRA_ROUTE_BGP && + pi->sub_type == BGP_ROUTE_NORMAL)) + continue; + + if (!bgp_vrf) { + ret = install_uninstall_evpn_remote_route_per_l3vni(pi, evp); + if (ret) { + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + + return ret; + } + } else { + ret = bgp_evpn_route_entry_install_if_vrf_match(bgp_vrf, pi, + install); + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to %s EVPN %s route in L3VNI %u", + bgp_vrf->vrf_id, + install ? "install" : "uninstall", + bgp_evpn_route_type_str[evp->prefix.route_type] + .str, + bgp_vrf->l3vni); + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + + return ret; + } } } } } + if (!bgp_vrf) { + while (count < BGP_PROC_L3VNI_LIMIT) { + struct bgp *bgp_to_proc = zebra_l3_vni_pop(&bm->zebra_l3_vni_head); + + if (!bgp_to_proc) + return 0; + + if (CHECK_FLAG(bgp_to_proc->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE)) + bgp_evpn_local_l3vni_del_post_processing(bgp_to_proc); + + UNSET_FLAG(bgp_to_proc->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + UNSET_FLAG(bgp_to_proc->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE); + count++; + } + } + + return 0; +} + +#define BGP_PROC_L2VNI_LIMIT 10 +static int install_evpn_remote_route_per_l2vni(struct bgp *bgp, struct bgp_path_info *pi, + const struct prefix_evpn *evp) +{ + int ret = 0; + uint8_t vni_iter = 0; + struct bgpevpn *t_vpn = NULL; + struct bgpevpn *t_vpn_next = NULL; + + for (t_vpn = zebra_l2_vni_first(&bm->zebra_l2_vni_head); + t_vpn && vni_iter < BGP_PROC_L2VNI_LIMIT; t_vpn = t_vpn_next) { + t_vpn_next = zebra_l2_vni_next(&bm->zebra_l2_vni_head, t_vpn); + vni_iter++; + /* + * Skip install/uninstall if the route entry is not needed to + * be imported into the VNI i.e. RTs dont match + */ + if (!is_route_matching_for_vni(bgp, t_vpn, pi)) + continue; + + ret = install_evpn_route_entry(bgp, t_vpn, evp, pi); + + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to install EVPN %s route in VNI %u during BP", + bgp->vrf_id, bgp_evpn_route_type_str[evp->prefix.route_type].str, + t_vpn->vni); + zebra_l2_vni_del(&bm->zebra_l2_vni_head, t_vpn); + + return ret; + } + } + return 0; } @@ -3983,26 +4096,41 @@ static int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install) * Install or uninstall routes of specified type that are appropriate for this * particular VNI. */ -static int install_uninstall_routes_for_vni(struct bgp *bgp, - struct bgpevpn *vpn, bool install) +int install_uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn, bool install) { afi_t afi; safi_t safi; struct bgp_dest *rd_dest, *dest; struct bgp_table *table; struct bgp_path_info *pi; - int ret; + int ret = 0; + uint8_t count = 0; + bool walk_fifo = false; afi = AFI_L2VPN; safi = SAFI_EVPN; - /* Walk entire global routing table and evaluate routes which could be + if (!bgp) { + walk_fifo = true; + bgp = bgp_get_evpn(); + if (!bgp) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("No BGP EVPN instance found..."); + + return -1; + } + } + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Total %u L2VNI VPNs pending to be processed for remote route installation", + __func__, (uint32_t)zebra_l2_vni_count(&bm->zebra_l2_vni_head)); + /* + * Walk entire global routing table and evaluate routes which could be * imported into this VPN. Note that we cannot just look at the routes - * for - * the VNI's RD - remote routes applicable for this VNI could have any - * RD. + * for the VNI's RD - remote routes applicable for this VNI could have + * any RD. + * Note: EVPN routes are a 2-level table. */ - /* EVPN routes are a 2-level table. */ for (rd_dest = bgp_table_top(bgp->rib[afi][safi]); rd_dest; rd_dest = bgp_route_next(rd_dest)) { table = bgp_dest_get_bgp_table_info(rd_dest); @@ -4015,54 +4143,79 @@ static int install_uninstall_routes_for_vni(struct bgp *bgp, (const struct prefix_evpn *)bgp_dest_get_prefix( dest); - if (evp->prefix.route_type != BGP_EVPN_IMET_ROUTE && - evp->prefix.route_type != BGP_EVPN_AD_ROUTE && - evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE) + /* Proceed only for IMET/AD/MAC_IP routes */ + switch (evp->prefix.route_type) { + case BGP_EVPN_IMET_ROUTE: + case BGP_EVPN_AD_ROUTE: + case BGP_EVPN_MAC_IP_ROUTE: + break; + default: continue; + } for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { - /* Consider "valid" remote routes applicable for - * this VNI. */ - if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) - && pi->type == ZEBRA_ROUTE_BGP - && pi->sub_type == BGP_ROUTE_NORMAL)) - continue; - - if (!is_route_matching_for_vni(bgp, vpn, pi)) + /* + * Skip install/uninstall if + * - Not a valid remote routes + * - Install & evpn route matchesi macvrf SOO + */ + if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) && + pi->type == ZEBRA_ROUTE_BGP && + pi->sub_type == BGP_ROUTE_NORMAL) || + (install && bgp_evpn_route_matches_macvrf_soo(pi, evp))) continue; - if (install) { - if (bgp_evpn_route_matches_macvrf_soo( - pi, evp)) + if (walk_fifo) { + ret = install_evpn_remote_route_per_l2vni(bgp, pi, evp); + if (ret) { + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + return ret; + } + } else { + /* + * Skip install/uninstall if the route + * entry is not needed to be imported + * into the VNI i.e. RTs dont match + */ + if (!is_route_matching_for_vni(bgp, vpn, pi)) continue; - ret = install_evpn_route_entry(bgp, vpn, - evp, pi); - } else - ret = uninstall_evpn_route_entry( - bgp, vpn, evp, pi); - - if (ret) { - flog_err(EC_BGP_EVPN_FAIL, - "%u: Failed to %s EVPN %s route in VNI %u", - bgp->vrf_id, - install ? "install" - : "uninstall", - evp->prefix.route_type == - BGP_EVPN_MAC_IP_ROUTE - ? "MACIP" - : "IMET", - vpn->vni); - - bgp_dest_unlock_node(rd_dest); - bgp_dest_unlock_node(dest); - return ret; + if (install) + ret = install_evpn_route_entry(bgp, vpn, evp, pi); + else + ret = uninstall_evpn_route_entry(bgp, vpn, evp, pi); + + if (ret) { + flog_err(EC_BGP_EVPN_FAIL, + "%u: Failed to %s EVPN %s route in VNI %u", + bgp->vrf_id, + install ? "install" : "uninstall", + bgp_evpn_route_type_str[evp->prefix.route_type] + .str, + vpn->vni); + + bgp_dest_unlock_node(rd_dest); + bgp_dest_unlock_node(dest); + return ret; + } } } } } + if (walk_fifo) { + while (count < BGP_PROC_L2VNI_LIMIT) { + vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head); + if (!vpn) + return 0; + + UNSET_FLAG(vpn->flags, VNI_FLAG_ADD); + count++; + } + } + return 0; } @@ -6780,6 +6933,53 @@ static void link_l2vni_hash_to_l3vni(struct hash_bucket *bucket, bgpevpn_link_to_l3vni(vpn); } +static void bgp_evpn_l3vni_remote_route_processing(struct bgp *bgp, bool install) +{ + /* + * Anytime BGP gets a Bulk of L3 VNI ADD/DEL from zebra, + * - Walking the entire global routing table per VNI is very expensive. + * - The next read (say of another VNI ADD/DEL) from the socket does + * not proceed unless this walk is complete. + * This results in huge output buffer FIFO growth spiking up the + * memory in zebra. + * + * To avoid this, idea is to hookup the BGP-VRF off the struct + * bgp_master and maintain a struct bgp FIFO list which is processed + * later on, where we walk a chunk of BGP-VRFs and do the remote route + * install/uninstall. + */ + if (!CHECK_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL) && + !CHECK_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE)) + zebra_l3_vni_add_tail(&bm->zebra_l3_vni_head, bgp); + + if (install) { + SET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + UNSET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE); + } else { + SET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE); + UNSET_FLAG(bgp->flags, BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL); + } + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Scheduling L3VNI %s to be processed later for %s VNI %u", + install ? "ADD" : "DEL", bgp->name_pretty, bgp->l3vni); + /* + * If there are no BGP-VRFs's in the bm L3VNI FIFO list i.e. an update + * for an already processed L3VNI comes in, schedule the remote route + * install immediately. + * + * In all other cases, it is ok to schedule the remote route un/install + * after a small sleep. This is to give benefit of doubt in case more + * L3VNI events come. + */ + if (zebra_l3_vni_count(&bm->zebra_l3_vni_head)) + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l3vrf, NULL, + 20, &bm->t_bgp_zebra_l3_vni); + else + event_add_event(bm->master, bgp_zebra_process_remote_routes_for_l3vrf, NULL, 0, + &bm->t_bgp_zebra_l3_vni); +} + int bgp_evpn_local_l3vni_add(vni_t l3vni, vrf_id_t vrf_id, struct ethaddr *svi_rmac, struct ethaddr *vrr_rmac, @@ -6925,52 +7125,36 @@ int bgp_evpn_local_l3vni_add(vni_t l3vni, vrf_id_t vrf_id, /* advertise type-5 routes if needed */ update_advertise_vrf_routes(bgp_vrf); - /* install all remote routes belonging to this l3vni into correspondng - * vrf */ - install_routes_for_vrf(bgp_vrf); + bgp_evpn_l3vni_remote_route_processing(bgp_vrf, true); return 0; } -int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) +static void bgp_evpn_local_l3vni_del_post_processing(struct bgp *bgp_vrf) { - struct bgp *bgp_vrf = NULL; /* bgp vrf instance */ struct bgp *bgp_evpn = NULL; /* EVPN bgp instance */ struct listnode *node = NULL; struct listnode *next = NULL; struct bgpevpn *vpn = NULL; - bgp_vrf = bgp_lookup_by_vrf_id(vrf_id); - if (!bgp_vrf) { - flog_err( - EC_BGP_NO_DFLT, - "Cannot process L3VNI %u Del - Could not find BGP instance", - l3vni); - return -1; - } - bgp_evpn = bgp_get_evpn(); if (!bgp_evpn) { - flog_err( - EC_BGP_NO_DFLT, - "Cannot process L3VNI %u Del - Could not find EVPN BGP instance", - l3vni); - return -1; + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u Del - Could not find EVPN BGP instance", + bgp_vrf->l3vni); + return; } if (CHECK_FLAG(bgp_evpn->flags, BGP_FLAG_DELETE_IN_PROGRESS)) { flog_err(EC_BGP_NO_DFLT, - "Cannot process L3VNI %u ADD - EVPN BGP instance is shutting down", - l3vni); - return -1; + "Cannot process L3VNI %u ADD - EVPN BGP instance is shutting down", + bgp_vrf->l3vni); + return; } - /* Remove remote routes from BGT VRF even if BGP_VRF_AUTO is configured, - * bgp_delete would not remove/decrement bgp_path_info of the ip_prefix - * routes. This will uninstalling the routes from zebra and decremnt the - * bgp info count. - */ - uninstall_routes_for_vrf(bgp_vrf); + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("In %s for L3VNI %u after remote route installation", __func__, + bgp_vrf->l3vni); /* delete/withdraw all type-5 routes */ delete_withdraw_vrf_routes(bgp_vrf); @@ -7020,10 +7204,91 @@ int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) /* Delete the instance if it was autocreated */ if (CHECK_FLAG(bgp_vrf->vrf_flags, BGP_VRF_AUTO)) bgp_delete(bgp_vrf); +} + +int bgp_evpn_local_l3vni_del(vni_t l3vni, vrf_id_t vrf_id) +{ + struct bgp *bgp_evpn = NULL; /* EVPN bgp instance */ + struct bgp *bgp_vrf = NULL; /* bgp vrf instance */ + + bgp_vrf = bgp_lookup_by_vrf_id(vrf_id); + if (!bgp_vrf) { + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u Del - Could not find BGP instance", l3vni); + return -1; + } + + bgp_evpn = bgp_get_evpn(); + if (!bgp_evpn) { + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u Del - Could not find EVPN BGP instance", l3vni); + return -1; + } + + if (CHECK_FLAG(bgp_evpn->flags, BGP_FLAG_DELETE_IN_PROGRESS)) { + flog_err(EC_BGP_NO_DFLT, + "Cannot process L3VNI %u ADD - EVPN BGP instance is shutting down", l3vni); + return -1; + } + + if (!bgp_vrf->l3vni) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug( + "Returning from %s since VNI %u is already deleted", + __func__, l3vni); + + return -1; + } + + /* + * Move all the l3vni_delete operation post the remote route + * installation processing i.e. add the L3VNI DELETE item on the + * BGP-VRFs FIFO and move on. + */ + bgp_evpn_l3vni_remote_route_processing(bgp_vrf, false); return 0; } +static void bgp_evpn_l2vni_remote_route_processing(struct bgpevpn *vpn) +{ + /* + * Anytime BGP gets a Bulk of L2 VNIs ADD/UPD from zebra, + * - Walking the entire global routing table per VNI is very expensive. + * - The next read (say of another VNI ADD/UPD) from the socket does + * not proceed unless this walk is complete. + * This results in huge output buffer FIFO growth spiking up the + * memory in zebra. + * + * To avoid this, idea is to hookup the VPN off the struct bgp_master + * and maintain a VPN FIFO list which is processed later on, where we + * walk a chunk of VPNs and do the remote route install. + */ + if (!CHECK_FLAG(vpn->flags, VNI_FLAG_ADD)) { + zebra_l2_vni_add_tail(&bm->zebra_l2_vni_head, vpn); + SET_FLAG(vpn->flags, VNI_FLAG_ADD); + } + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Scheduling L2VNI ADD to be processed later for VNI %u", vpn->vni); + + /* + * If there are no VNI's in the bgp VPN FIFO list i.e. an update + * for an already processed VNI comes in, schedule the remote + * route install immediately. + * + * In all other cases, it is ok to schedule the remote route install + * after a small sleep. This is to give benefit of doubt in case more + * L2VNI ADD events come. + */ + if (zebra_l2_vni_count(&bm->zebra_l2_vni_head)) + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL, + 20, &bm->t_bgp_zebra_l2_vni); + else + event_add_event(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL, 0, + &bm->t_bgp_zebra_l2_vni); +} + /* * When bgp instance goes down also clean up what might have been left over * from evpn. @@ -7047,6 +7312,10 @@ int bgp_evpn_local_vni_del(struct bgp *bgp, vni_t vni) if (!vpn) return 0; + /* Remove the VPN from the bgp VPN FIFO (if exists) */ + UNSET_FLAG(vpn->flags, VNI_FLAG_ADD); + zebra_l2_vni_del(&bm->zebra_l2_vni_head, vpn); + /* Remove all local EVPN routes and schedule for processing (to * withdraw from peers). */ @@ -7203,12 +7472,6 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni, } } - /* If we have learnt and retained remote routes (VTEPs, MACs) for this - * VNI, - * install them. - */ - install_routes_for_vni(bgp, vpn); - /* If we are advertising gateway mac-ip It needs to be conveyed again to zebra */ bgp_zebra_advertise_gw_macip(bgp, vpn->advertise_gw_macip, vpn->vni); @@ -7216,6 +7479,8 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni, /* advertise svi mac-ip knob to zebra */ bgp_zebra_advertise_svi_macip(bgp, vpn->advertise_svi_macip, vpn->vni); + bgp_evpn_l2vni_remote_route_processing(vpn); + return 0; } @@ -7245,8 +7510,17 @@ void bgp_evpn_flood_control_change(struct bgp *bgp) */ void bgp_evpn_cleanup_on_disable(struct bgp *bgp) { - hash_iterate(bgp->vnihash, (void (*)(struct hash_bucket *, - void *))cleanup_vni_on_disable, + struct bgpevpn *vpn = NULL; + uint32_t vni_count = zebra_l2_vni_count(&bm->zebra_l2_vni_head); + + /* Cleanup VNI FIFO list from this bgp instance */ + while (vni_count) { + vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head); + UNSET_FLAG(vpn->flags, VNI_FLAG_ADD); + vni_count--; + } + + hash_iterate(bgp->vnihash, (void (*)(struct hash_bucket *, void *))cleanup_vni_on_disable, bgp); } diff --git a/bgpd/bgp_evpn.h b/bgpd/bgp_evpn.h index 1a333a5a09a3..8bbc5d3c37f2 100644 --- a/bgpd/bgp_evpn.h +++ b/bgpd/bgp_evpn.h @@ -200,4 +200,6 @@ bool bgp_evpn_skip_vrf_import_of_local_es(struct bgp *bgp_vrf, const struct pref int uninstall_evpn_route_entry_in_vrf(struct bgp *bgp_vrf, const struct prefix_evpn *evp, struct bgp_path_info *parent_pi); extern void bgp_zebra_evpn_pop_items_from_announce_fifo(struct bgpevpn *vpn); +extern int install_uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn, bool install); +extern int install_uninstall_routes_for_vrf(struct bgp *bgp_vrf, bool install); #endif /* _QUAGGA_BGP_EVPN_H */ diff --git a/bgpd/bgp_evpn_private.h b/bgpd/bgp_evpn_private.h index b05df3d82ae7..cc92807d42b4 100644 --- a/bgpd/bgp_evpn_private.h +++ b/bgpd/bgp_evpn_private.h @@ -32,6 +32,15 @@ #define BGP_EVPN_TYPE4_V4_PSIZE 23 #define BGP_EVPN_TYPE4_V6_PSIZE 34 +static const struct message bgp_evpn_route_type_str[] = { + {BGP_EVPN_UNKN_ROUTE, "UNKNOWN"}, + {BGP_EVPN_AD_ROUTE, "AD"}, + {BGP_EVPN_MAC_IP_ROUTE, "MACIP"}, + {BGP_EVPN_IMET_ROUTE, "IMET"}, + {BGP_EVPN_ES_ROUTE, "ES"}, + {BGP_EVPN_IP_PREFIX_ROUTE, "IP-PREFIX"}, + {0}}; + RB_HEAD(bgp_es_evi_rb_head, bgp_evpn_es_evi); RB_PROTOTYPE(bgp_es_evi_rb_head, bgp_evpn_es_evi, rb_node, bgp_es_evi_rb_cmp); @@ -53,8 +62,9 @@ struct bgpevpn { #define VNI_FLAG_RD_CFGD 0x4 /* RD is user configured. */ #define VNI_FLAG_IMPRT_CFGD 0x8 /* Import RT is user configured */ #define VNI_FLAG_EXPRT_CFGD 0x10 /* Export RT is user configured */ -#define VNI_FLAG_USE_TWO_LABELS 0x20 /* Attach both L2-VNI and L3-VNI if - needed for this VPN */ +/* Attach both L2-VNI and L3-VNI if needed for this VPN */ +#define VNI_FLAG_USE_TWO_LABELS 0x20 +#define VNI_FLAG_ADD 0x40 /* L2VNI Add */ struct bgp *bgp_vrf; /* back pointer to the vrf instance */ @@ -108,11 +118,15 @@ struct bgpevpn { /* List of local ESs */ struct list *local_es_evi_list; + struct zebra_l2_vni_item zl2vni; + QOBJ_FIELDS; }; DECLARE_QOBJ_TYPE(bgpevpn); +DECLARE_LIST(zebra_l2_vni, struct bgpevpn, zl2vni); + /* Mapping of Import RT to VNIs. * The Import RTs of all VNIs are maintained in a hash table with each * RT linking to all VNIs that will import routes matching this RT. diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index 535d2fc5f434..9ca20c949a30 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -207,6 +207,8 @@ static __attribute__((__noreturn__)) void bgp_exit(int status) bgp_nhg_finish(); zebra_announce_fini(&bm->zebra_announce_head); + zebra_l2_vni_fini(&bm->zebra_l2_vni_head); + zebra_l3_vni_fini(&bm->zebra_l3_vni_head); /* reverse bgp_dump_init */ bgp_dump_finish(); diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 688dfacaa0b6..783dec7802db 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -3025,6 +3025,48 @@ static void bgp_zebra_connected(struct zclient *zclient) BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(bgp, bgp->peer); } +void bgp_zebra_process_remote_routes_for_l2vni(struct event *e) +{ + /* + * If we have learnt and retained remote routes (VTEPs, MACs) + * for this VNI, install them. + */ + install_uninstall_routes_for_vni(NULL, NULL, true); + + /* + * If there are VNIs still pending to be processed, schedule them + * after a small sleep so that CPU can be used for other purposes. + */ + if (zebra_l2_vni_count(&bm->zebra_l2_vni_head)) + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l2vni, NULL, + 20, &bm->t_bgp_zebra_l2_vni); +} + +void bgp_zebra_process_remote_routes_for_l3vrf(struct event *e) +{ + /* + * Install/Uninstall all remote routes belonging to l3vni + * + * NOTE: + * - At this point it does not matter whether we call + * install_routes_for_vrf/uninstall_routes_for_vrf. + * - Since we pass struct bgp as NULL, + * * we iterate the bm FIFO list + * * the second variable (true) is ignored as well and + * calculated based on the BGP-VRFs flags for ADD/DELETE. + */ + install_uninstall_routes_for_vrf(NULL, true); + + /* + * If there are L3VNIs still pending to be processed, schedule them + * after a small sleep so that CPU can be used for other purposes. + */ + if (zebra_l3_vni_count(&bm->zebra_l3_vni_head)) { + event_add_timer_msec(bm->master, bgp_zebra_process_remote_routes_for_l3vrf, NULL, + 20, &bm->t_bgp_zebra_l3_vni); + } +} + static int bgp_zebra_process_local_es_add(ZAPI_CALLBACK_ARGS) { esi_t esi; diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h index 8deecba747b3..7e9d57cb8521 100644 --- a/bgpd/bgp_zebra.h +++ b/bgpd/bgp_zebra.h @@ -135,4 +135,6 @@ extern void bgp_zebra_release_label_range(uint32_t start, uint32_t end); extern enum zclient_send_status bgp_zebra_withdraw_actual(struct bgp_dest *dest, struct bgp_path_info *info, struct bgp *bgp); +extern void bgp_zebra_process_remote_routes_for_l2vni(struct event *e); +extern void bgp_zebra_process_remote_routes_for_l3vrf(struct event *e); #endif /* _QUAGGA_BGP_ZEBRA_H */ diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index f92ae969f8c4..f2149c3256cc 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -3966,11 +3966,16 @@ int bgp_delete(struct bgp *bgp) afi_t afi; safi_t safi; int i; + uint32_t vni_count; + struct bgpevpn *vpn = NULL; struct bgp_dest *dest = NULL; struct bgp_dest *dest_next = NULL; struct bgp_table *dest_table = NULL; struct graceful_restart_info *gr_info; - uint32_t cnt_before, cnt_after; + uint32_t b_ann_cnt = 0, b_l2_cnt = 0, b_l3_cnt = 0; + uint32_t a_ann_cnt = 0, a_l2_cnt = 0, a_l3_cnt = 0; + struct bgp *bgp_to_proc = NULL; + struct bgp *bgp_to_proc_next = NULL; assert(bgp); @@ -3978,7 +3983,7 @@ int bgp_delete(struct bgp *bgp) * Iterate the pending dest list and remove all the dest pertaininig to * the bgp under delete. */ - cnt_before = zebra_announce_count(&bm->zebra_announce_head); + b_ann_cnt = zebra_announce_count(&bm->zebra_announce_head); for (dest = zebra_announce_first(&bm->zebra_announce_head); dest; dest = dest_next) { dest_next = zebra_announce_next(&bm->zebra_announce_head, dest); @@ -3990,10 +3995,36 @@ int bgp_delete(struct bgp *bgp) } } - cnt_after = zebra_announce_count(&bm->zebra_announce_head); - if (BGP_DEBUG(zebra, ZEBRA)) - zlog_debug("Zebra Announce Fifo cleanup count before %u and after %u during BGP %s deletion", - cnt_before, cnt_after, bgp->name_pretty); + /* + * Pop all VPNs yet to be processed for remote routes install if the + * bgp-evpn instance is getting deleted + */ + if (bgp == bgp_get_evpn()) { + b_l2_cnt = zebra_l2_vni_count(&bm->zebra_l2_vni_head); + vni_count = b_l2_cnt; + while (vni_count) { + vpn = zebra_l2_vni_pop(&bm->zebra_l2_vni_head); + UNSET_FLAG(vpn->flags, VNI_FLAG_ADD); + vni_count--; + } + } + + b_l3_cnt = zebra_l3_vni_count(&bm->zebra_l3_vni_head); + for (bgp_to_proc = zebra_l3_vni_first(&bm->zebra_l3_vni_head); bgp_to_proc; + bgp_to_proc = bgp_to_proc_next) { + bgp_to_proc_next = zebra_l3_vni_next(&bm->zebra_l3_vni_head, bgp_to_proc); + if (bgp_to_proc == bgp) + zebra_l3_vni_del(&bm->zebra_l3_vni_head, bgp_to_proc); + } + + if (BGP_DEBUG(zebra, ZEBRA)) { + a_ann_cnt = zebra_announce_count(&bm->zebra_announce_head); + a_l2_cnt = zebra_l2_vni_count(&bm->zebra_l2_vni_head); + a_l3_cnt = zebra_l3_vni_count(&bm->zebra_l3_vni_head); + zlog_debug("BGP %s deletion FIFO cnt Zebra_Ann before %u after %u, L2_VNI before %u after, %u L3_VNI before %u after %u", + bgp->name_pretty, b_ann_cnt, a_ann_cnt, b_l2_cnt, a_l2_cnt, b_l3_cnt, + a_l3_cnt); + } bgp_soft_reconfig_table_task_cancel(bgp, NULL, NULL); @@ -8492,6 +8523,8 @@ void bgp_master_init(struct event_loop *master, const int buffer_size, bm = &bgp_master; zebra_announce_init(&bm->zebra_announce_head); + zebra_l2_vni_init(&bm->zebra_l2_vni_head); + zebra_l3_vni_init(&bm->zebra_l3_vni_head); bm->bgp = list_new(); bm->listen_sockets = list_new(); bm->port = BGP_PORT_DEFAULT; @@ -8515,6 +8548,8 @@ void bgp_master_init(struct event_loop *master, const int buffer_size, bm->stalepath_time = BGP_DEFAULT_STALEPATH_TIME; bm->select_defer_time = BGP_DEFAULT_SELECT_DEFERRAL_TIME; bm->rib_stale_time = BGP_DEFAULT_RIB_STALE_TIME; + bm->t_bgp_zebra_l2_vni = NULL; + bm->t_bgp_zebra_l3_vni = NULL; bgp_mac_init(); /* init the rd id space. @@ -8762,6 +8797,8 @@ void bgp_terminate(void) EVENT_OFF(bm->t_bgp_sync_label_manager); EVENT_OFF(bm->t_bgp_start_label_manager); EVENT_OFF(bm->t_bgp_zebra_route); + EVENT_OFF(bm->t_bgp_zebra_l2_vni); + EVENT_OFF(bm->t_bgp_zebra_l3_vni); bgp_mac_finish(); } diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index df55d879e71d..bdfc3316febb 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -19,6 +19,8 @@ #include "asn.h" PREDECL_LIST(zebra_announce); +PREDECL_LIST(zebra_l2_vni); +PREDECL_LIST(zebra_l3_vni); /* For union sockunion. */ #include "queue.h" @@ -204,6 +206,14 @@ struct bgp_master { /* To preserve ordering of installations into zebra across all Vrfs */ struct zebra_announce_head zebra_announce_head; + struct event *t_bgp_zebra_l2_vni; + /* To preserve ordering of processing of L2 VNIs in BGP */ + struct zebra_l2_vni_head zebra_l2_vni_head; + + struct event *t_bgp_zebra_l3_vni; + /* To preserve ordering of processing of BGP-VRFs for L3 VNIs */ + struct zebra_l3_vni_head zebra_l3_vni_head; + QOBJ_FIELDS; }; DECLARE_QOBJ_TYPE(bgp_master); @@ -554,6 +564,8 @@ struct bgp { #define BGP_FLAG_INSTANCE_HIDDEN (1ULL << 39) /* Prohibit BGP from enabling IPv6 RA on interfaces */ #define BGP_FLAG_IPV6_NO_AUTO_RA (1ULL << 40) +#define BGP_FLAG_L3VNI_SCHEDULE_FOR_INSTALL (1ULL << 41) +#define BGP_FLAG_L3VNI_SCHEDULE_FOR_DELETE (1ULL << 42) /* BGP default address-families. * New peers inherit enabled afi/safis from bgp instance. @@ -868,10 +880,14 @@ struct bgp { uint64_t node_already_on_queue; uint64_t node_deferred_on_queue; + struct zebra_l3_vni_item zl3vni; + QOBJ_FIELDS; }; DECLARE_QOBJ_TYPE(bgp); +DECLARE_LIST(zebra_l3_vni, struct bgp, zl3vni); + struct bgp_interface { #define BGP_INTERFACE_MPLS_BGP_FORWARDING (1 << 0) /* L3VPN multi domain switching */ diff --git a/lib/prefix.h b/lib/prefix.h index 2d679d06224f..2f1a437b80b4 100644 --- a/lib/prefix.h +++ b/lib/prefix.h @@ -26,6 +26,7 @@ extern "C" { /* EVPN route types. */ typedef enum { + BGP_EVPN_UNKN_ROUTE = 0, BGP_EVPN_AD_ROUTE = 1, /* Ethernet Auto-Discovery (A-D) route */ BGP_EVPN_MAC_IP_ROUTE, /* MAC/IP Advertisement route */ BGP_EVPN_IMET_ROUTE, /* Inclusive Multicast Ethernet Tag route */ diff --git a/lib/zclient.h b/lib/zclient.h index 6da9558aa560..1ddf020683c4 100644 --- a/lib/zclient.h +++ b/lib/zclient.h @@ -47,6 +47,11 @@ typedef uint16_t zebra_size_t; #define ZEBRA_MAX_PACKET_SIZ 16384U #define ZEBRA_SMALL_PACKET_SIZE 200U +/* Only for L2/L3 VNI Add/Del */ +#define ZEBRA_VNI_MAX_PACKET_SIZE 80U +#define ZEBRA_VNI_SMALL_PACKET_SIZE 50U +#define ZEBRA_VNI_MIN_PACKET_SIZE 25U + /* Zebra header size. */ #define ZEBRA_HEADER_SIZE 10 diff --git a/zebra/zebra_evpn.c b/zebra/zebra_evpn.c index 25c616c4c956..edff084bacf0 100644 --- a/zebra/zebra_evpn.c +++ b/zebra/zebra_evpn.c @@ -1105,7 +1105,7 @@ int zebra_evpn_send_add_to_client(struct zebra_evpn *zevpn) svi_index = zevpn->svi_if ? zevpn->svi_if->ifindex : 0; - s = stream_new(ZEBRA_SMALL_PACKET_SIZE); + s = stream_new(ZEBRA_VNI_SMALL_PACKET_SIZE); zclient_create_header(s, ZEBRA_VNI_ADD, zebra_vrf_get_evpn_id()); stream_putl(s, zevpn->vni); @@ -1157,7 +1157,7 @@ int zebra_evpn_send_del_to_client(struct zebra_evpn *zevpn) zebra_evpn_update_all_es(zevpn); } - s = stream_new(ZEBRA_SMALL_PACKET_SIZE); + s = stream_new(ZEBRA_VNI_MIN_PACKET_SIZE); stream_reset(s); zclient_create_header(s, ZEBRA_VNI_DEL, zebra_vrf_get_evpn_id()); diff --git a/zebra/zebra_vxlan.c b/zebra/zebra_vxlan.c index ad112a4ab1f4..d4d0a1df1d24 100644 --- a/zebra/zebra_vxlan.c +++ b/zebra/zebra_vxlan.c @@ -2203,7 +2203,7 @@ static int zl3vni_send_add_to_client(struct zebra_l3vni *zl3vni) is_anycast_mac = false; } - s = stream_new(ZEBRA_MAX_PACKET_SIZ); + s = stream_new(ZEBRA_VNI_MAX_PACKET_SIZE); /* The message is used for both vni add and/or update like * vrr mac is added for l3vni SVI. @@ -2246,7 +2246,7 @@ static int zl3vni_send_del_to_client(struct zebra_l3vni *zl3vni) if (!client) return 0; - s = stream_new(ZEBRA_MAX_PACKET_SIZ); + s = stream_new(ZEBRA_VNI_MIN_PACKET_SIZE); zclient_create_header(s, ZEBRA_L3VNI_DEL, zl3vni_vrf_id(zl3vni)); stream_putl(s, zl3vni->vni);