Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[fgnhgorch] Enable packet flow when no FG ECMP neighbors are resolved (
Browse files Browse the repository at this point in the history
…#1900)

*Added ability to set the FG route's next-hop as the RIF, the packets will trap to the kernel and be neighbor resolved via the kernel. Further, added the ability to set the next-hop to a fine grained ECMP next-hop after the 1st neighbor resolution occurs.
anish-n authored Sep 15, 2021
1 parent 8cf355d commit d01524d
Showing 4 changed files with 226 additions and 66 deletions.
243 changes: 185 additions & 58 deletions orchagent/fgnhgorch.cpp
Original file line number Diff line number Diff line change
@@ -317,6 +317,7 @@ bool FgNhgOrch::createFineGrainedNextHopGroup(FGNextHopGroupEntry &syncd_fg_rout
bool FgNhgOrch::removeFineGrainedNextHopGroup(FGNextHopGroupEntry *syncd_fg_route_entry)
{
SWSS_LOG_ENTER();

sai_status_t status;

for (auto nhgm : syncd_fg_route_entry->nhopgroup_members)
@@ -337,6 +338,34 @@ bool FgNhgOrch::removeFineGrainedNextHopGroup(FGNextHopGroupEntry *syncd_fg_rout

if (!gRouteOrch->removeFineGrainedNextHopGroup(syncd_fg_route_entry->next_hop_group_id))
{
SWSS_LOG_ERROR("Failed to remove nhgid %" PRIx64 " return failure",
syncd_fg_route_entry->next_hop_group_id);
return false;
}

return true;
}


bool FgNhgOrch::modifyRoutesNextHopId(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, sai_object_id_t next_hop_id)
{
SWSS_LOG_ENTER();

sai_route_entry_t route_entry;
sai_attribute_t route_attr;

route_entry.vr_id = vrf_id;
route_entry.switch_id = gSwitchId;
copy(route_entry.destination, ipPrefix);

route_attr.id = SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID;
route_attr.value.oid = next_hop_id;

sai_status_t status = sai_route_api->set_route_entry_attribute(&route_entry, &route_attr);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to set route %s with packet action forward, %d",
ipPrefix.to_string().c_str(), status);
return false;
}

@@ -385,22 +414,56 @@ bool FgNhgOrch::validNextHopInNextHopGroup(const NextHopKey& nexthop)
return true;
}

for (auto active_nh : syncd_fg_route_entry->active_nexthops)
if (fgNhgEntry->hash_bucket_indices.size() == 0 && syncd_fg_route_entry->points_to_rif)
{
bank_member_changes[fgNhgEntry->next_hops[active_nh.ip_address].bank].
active_nhs.push_back(active_nh);
/* Only happens the 1st time when hash_bucket_indices are not inited
*/
for (auto it : fgNhgEntry->next_hops)
{
while (bank_member_changes.size() <= it.second.bank)
{
bank_member_changes.push_back(BankMemberChanges());
}
}
}

bank_member_changes[fgNhgEntry->next_hops[nexthop.ip_address].bank].
nhs_to_add.push_back(nexthop);
nhopgroup_members_set[nexthop] = m_neighOrch->getNextHopId(nexthop);

if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry,
bank_member_changes, nhopgroup_members_set, route_table.first))
if (syncd_fg_route_entry->points_to_rif)
{
SWSS_LOG_ERROR("Failed to set fine grained next hop %s",
nexthop.to_string().c_str());
return false;
// RIF route is now neigh resolved: create Fine Grained ECMP
if (!createFineGrainedNextHopGroup(*syncd_fg_route_entry, fgNhgEntry, syncd_fg_route_entry->nhg_key))
{
return false;
}

if (!setNewNhgMembers(*syncd_fg_route_entry, fgNhgEntry, bank_member_changes, nhopgroup_members_set, route_table.first))
{
return false;
}

if (!modifyRoutesNextHopId(route_tables.first, route_table.first, syncd_fg_route_entry->next_hop_group_id))
{
return false;
}
}
else
{
for (auto active_nh : syncd_fg_route_entry->active_nexthops)
{
bank_member_changes[fgNhgEntry->next_hops[active_nh.ip_address].bank].
active_nhs.push_back(active_nh);
}

if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry,
bank_member_changes, nhopgroup_members_set, route_table.first))
{
SWSS_LOG_ERROR("Failed to set fine grained next hop %s",
nexthop.to_string().c_str());
return false;
}
}

m_neighOrch->increaseNextHopRefCount(nexthop);
@@ -760,14 +823,46 @@ bool FgNhgOrch::setInactiveBankToNextAvailableActiveBank(FGNextHopGroupEntry *sy

if (new_bank_idx == bank_member_changes.size())
{
SWSS_LOG_NOTICE("All banks of FG next-hops are down for prefix %s",
ipPrefix.to_string().c_str());
/* Case where there are no active banks */
/* Note: There is no way to set a NULL OID to the now inactive next-hops
* so we leave the next-hops as is in SAI, and future route/neighbor changes
* will take care of setting the next-hops to the correctly active nhs
SWSS_LOG_NOTICE("All banks of FG next-hops are down for prefix %s",
ipPrefix.to_string().c_str());

/* This may occur when there are no neigh entries available any more
* set route pointing to rif to allow for neigh resolution in kernel.
* If route already points to rif then we are done.
*/
syncd_fg_route_entry->syncd_fgnhg_map[bank].clear();
if (!syncd_fg_route_entry->points_to_rif)
{
std::string interface_alias = syncd_fg_route_entry->nhg_key.getNextHops().begin()->alias;
sai_object_id_t rif_next_hop_id = m_intfsOrch->getRouterIntfsId(interface_alias);
if (rif_next_hop_id == SAI_NULL_OBJECT_ID)
{
SWSS_LOG_INFO("Failed to get rif next hop for %s", interface_alias.c_str());
return false;
}
if (!modifyRoutesNextHopId(gVirtualRouterId, ipPrefix, rif_next_hop_id))
{
SWSS_LOG_ERROR("Failed to modify route nexthopid to rif");
return false;
}

if (!removeFineGrainedNextHopGroup(syncd_fg_route_entry))
{
SWSS_LOG_ERROR("Failed to delete Fine Grained next hop group");
return false;
}

syncd_fg_route_entry->points_to_rif = true;
syncd_fg_route_entry->next_hop_group_id = rif_next_hop_id;

// remove state_db entry
m_stateWarmRestartRouteTable.del(ipPrefix.to_string());
// Clear data structures
syncd_fg_route_entry->syncd_fgnhg_map.clear();
syncd_fg_route_entry->active_nexthops.clear();
syncd_fg_route_entry->inactive_to_active_map.clear();
syncd_fg_route_entry->nhopgroup_members.clear();
}
}

return true;
@@ -1017,6 +1112,7 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh
{
m_recoveryMap.erase(nexthopsMap);
}
syncd_fg_route_entry.points_to_rif = false;

return true;
}
@@ -1094,13 +1190,13 @@ bool FgNhgOrch::syncdContainsFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPre


bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const NextHopGroupKey &nextHops,
sai_object_id_t &next_hop_id, bool &prevNhgWasFineGrained)
sai_object_id_t &next_hop_id, bool &isNextHopIdChanged)
{
SWSS_LOG_ENTER();

/* default prevNhgWasFineGrained to true so that sai route is unaffected
/* default isNextHopIdChanged to false so that sai route is unaffected
* when we return early with success */
prevNhgWasFineGrained = true;
isNextHopIdChanged = false;
FgNhgEntry *fgNhgEntry = 0;
set<NextHopKey> next_hop_set = nextHops.getNextHops();
auto prefix_entry = m_fgNhgPrefixes.find(ipPrefix);
@@ -1123,7 +1219,7 @@ bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const
break;
}
}

if (m_syncdFGRouteTables.find(vrf_id) != m_syncdFGRouteTables.end() &&
m_syncdFGRouteTables.at(vrf_id).find(ipPrefix) != m_syncdFGRouteTables.at(vrf_id).end() &&
m_syncdFGRouteTables.at(vrf_id).at(ipPrefix).nhg_key == nextHops)
@@ -1150,7 +1246,7 @@ bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const
*/
for (auto it : fgNhgEntry->next_hops)
{
while(bank_member_changes.size() <= it.second.bank)
while (bank_member_changes.size() <= it.second.bank)
{
bank_member_changes.push_back(BankMemberChanges());
}
@@ -1202,6 +1298,7 @@ bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const
{
bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address].bank].
nhs_to_add.push_back(nhk);
next_hop_to_add = true;
}
}

@@ -1211,54 +1308,81 @@ bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const

if (syncd_fg_route_entry_it != m_syncdFGRouteTables.at(vrf_id).end())
{
/* Route exists and nh was associated in the past */
FGNextHopGroupEntry *syncd_fg_route_entry = &(syncd_fg_route_entry_it->second);
/* Route exists, update FG ECMP group in SAI */
for (auto nhk : syncd_fg_route_entry->active_nexthops)

if (syncd_fg_route_entry->points_to_rif)
{
if (nhopgroup_members_set.find(nhk) == nhopgroup_members_set.end())
if (next_hop_to_add)
{
bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address].bank].
nhs_to_del.push_back(nhk);
isNextHopIdChanged = true;
if (!createFineGrainedNextHopGroup(*syncd_fg_route_entry, fgNhgEntry, nextHops))
{
return false;
}

if (!setNewNhgMembers(*syncd_fg_route_entry, fgNhgEntry, bank_member_changes, nhopgroup_members_set, ipPrefix))
{
return false;
}
}
else
}
else
{
/* Update FG ECMP group in SAI */
for (auto nhk : syncd_fg_route_entry->active_nexthops)
{
bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address].bank].
active_nhs.push_back(nhk);
if (nhopgroup_members_set.find(nhk) == nhopgroup_members_set.end())
{
bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address].bank].
nhs_to_del.push_back(nhk);
}
else
{
bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address].bank].
active_nhs.push_back(nhk);
}
}
}

if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, bank_member_changes,
nhopgroup_members_set, ipPrefix))
{
return false;
if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, bank_member_changes,
nhopgroup_members_set, ipPrefix))
{
return false;
}
}
}
else
{
/* New route + nhg addition */
prevNhgWasFineGrained = false;
if (next_hop_to_add == false)
{
SWSS_LOG_INFO("There were no valid next-hops to add %s:%s", ipPrefix.to_string().c_str(),
nextHops.to_string().c_str());
/* Let the route retry logic(upon false rc) take care of this case */
return false;
}

isNextHopIdChanged = true;
FGNextHopGroupEntry syncd_fg_route_entry;
if (!createFineGrainedNextHopGroup(syncd_fg_route_entry, fgNhgEntry, nextHops))
if (next_hop_to_add)
{
return false;
}
if (!createFineGrainedNextHopGroup(syncd_fg_route_entry, fgNhgEntry, nextHops))
{
return false;
}

if (!setNewNhgMembers(syncd_fg_route_entry, fgNhgEntry, bank_member_changes, nhopgroup_members_set, ipPrefix))
if (!setNewNhgMembers(syncd_fg_route_entry, fgNhgEntry, bank_member_changes, nhopgroup_members_set, ipPrefix))
{
return false;
}
}
else
{
return false;
sai_object_id_t rif_next_hop_id = m_intfsOrch->getRouterIntfsId(next_hop_set.begin()->alias);
if (rif_next_hop_id == SAI_NULL_OBJECT_ID)
{
SWSS_LOG_INFO("Failed to get rif next hop %s for %s",
nextHops.to_string().c_str(), ipPrefix.to_string().c_str());
return false;
}

syncd_fg_route_entry.next_hop_group_id = rif_next_hop_id;
syncd_fg_route_entry.points_to_rif = true;
}

m_syncdFGRouteTables[vrf_id][ipPrefix] = syncd_fg_route_entry;

SWSS_LOG_NOTICE("Created route %s:%s", ipPrefix.to_string().c_str(), nextHops.to_string().c_str());
}
m_syncdFGRouteTables[vrf_id][ipPrefix].nhg_key = nextHops;

@@ -1310,19 +1434,22 @@ bool FgNhgOrch::removeFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix)
}

FGNextHopGroupEntry *syncd_fg_route_entry = &(it_route->second);
if (!removeFineGrainedNextHopGroup(syncd_fg_route_entry))
if (!syncd_fg_route_entry->points_to_rif)
{
SWSS_LOG_ERROR("Failed to clean-up fine grained ECMP SAI group");
return false;
}
if (!removeFineGrainedNextHopGroup(syncd_fg_route_entry))
{
SWSS_LOG_ERROR("Failed to clean-up fine grained ECMP SAI group");
return false;
}

for (auto nh : syncd_fg_route_entry->active_nexthops)
{
m_neighOrch->decreaseNextHopRefCount(nh);
}
for (auto nh : syncd_fg_route_entry->active_nexthops)
{
m_neighOrch->decreaseNextHopRefCount(nh);
}

// remove state_db entry
m_stateWarmRestartRouteTable.del(ipPrefix.to_string());
// remove state_db entry
m_stateWarmRestartRouteTable.del(ipPrefix.to_string());
}

it_route_table->second.erase(it_route);
if (it_route_table->second.size() == 0)
5 changes: 3 additions & 2 deletions orchagent/fgnhgorch.h
Original file line number Diff line number Diff line change
@@ -30,6 +30,7 @@ struct FGNextHopGroupEntry
BankFGNextHopGroupMap syncd_fgnhg_map; // Map of (bank) -> (nexthops) -> (index in nhopgroup_members)
NextHopGroupKey nhg_key; // Full next hop group key
InactiveBankMapsToBank inactive_to_active_map; // Maps an inactive bank to an active one in terms of hash bkts
bool points_to_rif; // Flag to identify that route is currently pointing to a rif
};

struct FGNextHopInfo
@@ -104,7 +105,7 @@ class FgNhgOrch : public Orch, public Observer
bool syncdContainsFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix);
bool validNextHopInNextHopGroup(const NextHopKey&);
bool invalidNextHopInNextHopGroup(const NextHopKey&);
bool setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const NextHopGroupKey &nextHops, sai_object_id_t &next_hop_id, bool &prevNhgWasFineGrained);
bool setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const NextHopGroupKey &nextHops, sai_object_id_t &next_hop_id, bool &isNextHopIdChanged);
bool removeFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix);

// warm reboot support
@@ -150,10 +151,10 @@ class FgNhgOrch : public Orch, public Observer
void setStateDbRouteEntry(const IpPrefix&, uint32_t index, NextHopKey nextHop);
bool writeHashBucketChange(FGNextHopGroupEntry *syncd_fg_route_entry, uint32_t index, sai_object_id_t nh_oid,
const IpPrefix &ipPrefix, NextHopKey nextHop);
bool modifyRoutesNextHopId(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, sai_object_id_t next_hop_id);
bool createFineGrainedNextHopGroup(FGNextHopGroupEntry &syncd_fg_route_entry, FgNhgEntry *fgNhgEntry,
const NextHopGroupKey &nextHops);
bool removeFineGrainedNextHopGroup(FGNextHopGroupEntry *syncd_fg_route_entry);

vector<FieldValueTuple> generateRouteTableFromNhgKey(NextHopGroupKey nhg);
void cleanupIpInLinkToIpMap(const string &link, const IpAddress &ip, FgNhgEntry &fgNhg_entry);

8 changes: 4 additions & 4 deletions orchagent/routeorch.cpp
Original file line number Diff line number Diff line change
@@ -1411,7 +1411,7 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops)
bool overlay_nh = false;
bool status = false;
bool curNhgIsFineGrained = false;
bool prevNhgWasFineGrained = false;
bool isFineGrainedNextHopIdChanged = false;
bool blackhole = false;

if (m_syncdRoutes.find(vrf_id) == m_syncdRoutes.end())
@@ -1434,9 +1434,9 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops)
/* We get 3 return values from setFgNhg:
* 1. success/failure: on addition/modification of nexthop group/members
* 2. next_hop_id: passed as a param to fn, used for sai route creation
* 3. prevNhgWasFineGrained: passed as a param to fn, used to determine transitions
* 3. isFineGrainedNextHopIdChanged: passed as a param to fn, used to determine transitions
* between regular and FG ECMP, this is an optimization to prevent multiple lookups */
if (!m_fgNhgOrch->setFgNhg(vrf_id, ipPrefix, nextHops, next_hop_id, prevNhgWasFineGrained))
if (!m_fgNhgOrch->setFgNhg(vrf_id, ipPrefix, nextHops, next_hop_id, isFineGrainedNextHopIdChanged))
{
return false;
}
@@ -1627,7 +1627,7 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops)
gRouteBulker.set_entry_attribute(&object_statuses.back(), &route_entry, &route_attr);
}

if (curNhgIsFineGrained && prevNhgWasFineGrained)
if (curNhgIsFineGrained && !isFineGrainedNextHopIdChanged)
{
/* Don't change route entry if the route is previously fine grained and new nhg is also fine grained.
* We already modifed sai nhg objs as part of setFgNhg to account for nhg change. */
36 changes: 34 additions & 2 deletions tests/test_fgnhg.py
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@
ASIC_NHG = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP"
ASIC_NHG_MEMB = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER"
ASIC_NH_TB = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP"
ASIC_RIF = "ASIC_STATE:SAI_OBJECT_TYPE_ROUTER_INTERFACE"

def create_entry(db, table, key, pairs):
db.create_entry(table, key, pairs)
@@ -79,6 +80,33 @@ def _access_function():
failure_message="Fine Grained ECMP route not found")
return result

def validate_asic_nhg_router_interface(asic_db, ipprefix):
def _access_function():
false_ret = (False, '')
keys = asic_db.get_keys(ASIC_ROUTE_TB)
key = ''
route_exists = False
for k in keys:
rt_key = json.loads(k)
if rt_key['dest'] == ipprefix:
route_exists = True
key = k
if not route_exists:
return false_ret

fvs = asic_db.get_entry(ASIC_ROUTE_TB, key)
if not fvs:
return false_ret

rifid = fvs.get("SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID")
fvs = asic_db.get_entry(ASIC_RIF, rifid)
if not fvs:
return false_ret

return (True, rifid)
_, result = wait_for_result(_access_function, failure_message="Route pointing to RIF not found")
return result

def validate_asic_nhg_regular_ecmp(asic_db, ipprefix):
def _access_function():
false_ret = (False, '')
@@ -338,8 +366,9 @@ def fine_grained_ecmp_base_test(dvs, match_mode):
found_route = True
break

# Since we didn't populate ARP yet, the route shouldn't be programmed
assert (found_route == False)
# Since we didn't populate ARP yet, route should point to RIF for kernel arp resolution to occur
assert (found_route == True)
validate_asic_nhg_router_interface(asic_db, fg_nhg_prefix)

asic_nh_count = len(asic_db.get_keys(ASIC_NH_TB))
dvs.runcmd("arp -s 10.0.0.1 00:00:00:00:00:01")
@@ -484,6 +513,9 @@ def fine_grained_ecmp_base_test(dvs, match_mode):
# bring all links up one by one
startup_link(dvs, app_db, 3)
startup_link(dvs, app_db, 4)
asic_db.wait_for_n_keys(ASIC_NHG_MEMB, bucket_size)
nhgid = validate_asic_nhg_fine_grained_ecmp(asic_db, fg_nhg_prefix, bucket_size)
nh_oid_map = get_nh_oid_map(asic_db)
nh_memb_exp_count = {"10.0.0.7":30,"10.0.0.9":30}
validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map,
fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size)

0 comments on commit d01524d

Please sign in to comment.