From 320eb7332a7b7134754b59563c7196e7c07e57f7 Mon Sep 17 00:00:00 2001 From: Stepan Blyschak Date: Mon, 13 Jan 2020 11:12:37 +0000 Subject: [PATCH] [portsorch] fix worng orchagent behaviour when LAG member gets disabled by teamd Originally portsorch was designed to remove LAG member from LAG when member gets disabled by teamd. This could lead to potential issues including flood to that port and loops, since after removal member becomes a switch port. Now, portsorch will make use of SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE and SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE to control collection/distribution through that LAG member port. With this new flow, teammgrd and teamsyncd are candidates to be refactored, especially teamsyncd warm reboot logic, since now we don't need to compare old, new lags and lag members. Teamsyncd's job is simply to signal "status" field update without waiting for reconciliation timer to expire. e.g. one port channel went down on peer: ``` admin@arc-switch1025:~$ show interfaces portchannel Flags: A - active, I - inactive, Up - up, Dw - Down, N/A - not available, S - selected, D - deselected, * - not synced No. Team Dev Protocol Ports ----- --------------- ----------- -------------- 0001 PortChannel0001 LACP(A)(Up) Ethernet112(S) 0002 PortChannel0002 LACP(A)(Up) Ethernet116(S) 0003 PortChannel0003 LACP(A)(Up) Ethernet120(S) 0004 PortChannel0004 LACP(A)(Dw) Ethernet124(D) admin@arc-switch1025:~$ docker exec -it syncd sx_api_lag_dump.py LAG Members Table =============================================================================================================== | SWID| LAG Logical Port| LAG Oper State| PVID| Member Port ID| Port Label| Collector| Distributor| Oper State| =============================================================================================================== | 0 0x10000000 UP 1| 0x11900| 29| Enable| Enable| UP| =============================================================================================================== | 0 0x10000100 UP 1| 0x11b00| 30| Enable| Enable| UP| =============================================================================================================== | 0 0x10000200 UP 1| 0x11d00| 31| Enable| Enable| UP| =============================================================================================================== | 0 0x10000300 DOWN 1| 0x11f00| 32| Disable| Disable| UP| =============================================================================================================== ``` Signed-off-by: Stepan Blyschak --- orchagent/portsorch.cpp | 94 ++++++++++++++++++++++++++++++++------- orchagent/portsorch.h | 2 + tests/test_portchannel.py | 40 ++++++++++++++--- 3 files changed, 113 insertions(+), 23 deletions(-) diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index f5d616323c0..d8088c123df 100644 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -2494,39 +2494,51 @@ void PortsOrch::doLagMemberTask(Consumer &consumer) status = fvValue(i); } - /* Sync an enabled member */ - if (status == "enabled") + if (lag.m_members.find(port_alias) == lag.m_members.end()) { - /* Duplicate entry */ - if (lag.m_members.find(port_alias) != lag.m_members.end()) + /* Assert the port doesn't belong to any LAG already */ + assert(!port.m_lag_id && !port.m_lag_member_id); + + if (!addLagMember(lag, port)) { - it = consumer.m_toSync.erase(it); + it++; continue; } + } - /* Assert the port doesn't belong to any LAG */ - assert(!port.m_lag_id && !port.m_lag_member_id); - - if (addLagMember(lag, port)) + /* Sync an enabled member */ + if (status == "enabled") + { + /* enable collection first, distribution-only mode + * is not supported on Mellanox platform + */ + if (setCollectionOnLagMember(port, true) && + setDistributionOnLagMember(port, true)) + { it = consumer.m_toSync.erase(it); + } else + { it++; + continue; + } } /* Sync an disabled member */ else /* status == "disabled" */ { - /* "status" is "disabled" at start when m_lag_id and - * m_lag_member_id are absent */ - if (!port.m_lag_id || !port.m_lag_member_id) + /* disable distribution first, distribution-only mode + * is not supported on Mellanox platform + */ + if (setDistributionOnLagMember(port, false) && + setCollectionOnLagMember(port, false)) { it = consumer.m_toSync.erase(it); - continue; } - - if (removeLagMember(lag, port)) - it = consumer.m_toSync.erase(it); else + { it++; + continue; + } } } /* Remove a LAG member */ @@ -2544,9 +2556,13 @@ void PortsOrch::doLagMemberTask(Consumer &consumer) } if (removeLagMember(lag, port)) + { it = consumer.m_toSync.erase(it); + } else + { it++; + } } else { @@ -3312,6 +3328,52 @@ bool PortsOrch::removeLagMember(Port &lag, Port &port) return true; } +bool PortsOrch::setCollectionOnLagMember(Port &lagMember, bool enableCollection) +{ + /* Port must be LAG member */ + assert(port.m_lag_member_id); + + sai_status_t status = SAI_STATUS_FAILURE; + sai_attribute_t attr {}; + + attr.id = SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE; + attr.value.booldata = !enableCollection; + + status = sai_lag_api->set_lag_member_attribute(lagMember.m_lag_member_id, &attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to %s collection on LAG member %s", + enableCollection ? "enable" : "disable", + lagMember.m_alias.c_str()); + return false; + } + + return true; +} + +bool PortsOrch::setDistributionOnLagMember(Port &lagMember, bool enableDistribution) +{ + /* Port must be LAG member */ + assert(port.m_lag_member_id); + + sai_status_t status = SAI_STATUS_FAILURE; + sai_attribute_t attr {}; + + attr.id = SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE; + attr.value.booldata = !enableDistribution; + + status = sai_lag_api->set_lag_member_attribute(lagMember.m_lag_member_id, &attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to %s distribution on LAG member %s", + enableDistribution ? "enable" : "disable", + lagMember.m_alias.c_str()); + return false; + } + + return true; +} + void PortsOrch::generateQueueMap() { if (m_isQueueMapGenerated) diff --git a/orchagent/portsorch.h b/orchagent/portsorch.h index 6f314d6bffe..0a9a68beebd 100644 --- a/orchagent/portsorch.h +++ b/orchagent/portsorch.h @@ -170,6 +170,8 @@ class PortsOrch : public Orch, public Subject bool removeLag(Port lag); bool addLagMember(Port &lag, Port &port); bool removeLagMember(Port &lag, Port &port); + bool setCollectionOnLagMember(Port &lagMember, bool enableCollection); + bool setDistributionOnLagMember(Port &lagMember, bool enableDistribution); void getLagMember(Port &lag, vector &portv); bool addPort(const set &lane_set, uint32_t speed, int an=0, string fec=""); diff --git a/tests/test_portchannel.py b/tests/test_portchannel.py index 0bc6db1e2ce..76808c07e79 100644 --- a/tests/test_portchannel.py +++ b/tests/test_portchannel.py @@ -33,13 +33,39 @@ def test_Portchannel(self, dvs, testlog): assert len(lagms) == 1 (status, fvs) = lagmtbl.get(lagms[0]) - for fv in fvs: - if fv[0] == "SAI_LAG_MEMBER_ATTR_LAG_ID": - assert fv[1] == lags[0] - elif fv[0] == "SAI_LAG_MEMBER_ATTR_PORT_ID": - assert dvs.asicdb.portoidmap[fv[1]] == "Ethernet0" - else: - assert False + fvs = dict(fvs) + assert status + assert "SAI_LAG_MEMBER_ATTR_LAG_ID" in fvs + assert fvs.pop("SAI_LAG_MEMBER_ATTR_LAG_ID") == lags[0] + assert "SAI_LAG_MEMBER_ATTR_PORT_ID" in fvs + assert dvs.asicdb.portoidmap[fvs.pop("SAI_LAG_MEMBER_ATTR_PORT_ID")] == "Ethernet0" + assert "SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE" in fvs + assert fvs.pop("SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE") == "false" + assert "SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE" in fvs + assert fvs.pop("SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE") == "false" + assert not fvs + + ps = swsscommon.ProducerStateTable(db, "LAG_MEMBER_TABLE") + fvs = swsscommon.FieldValuePairs([("status", "disabled")]) + + ps.set("PortChannel0001:Ethernet0", fvs) + + lagmtbl = swsscommon.Table(asicdb, "ASIC_STATE:SAI_OBJECT_TYPE_LAG_MEMBER") + lagms = lagmtbl.getKeys() + assert len(lagms) == 1 + + (status, fvs) = lagmtbl.get(lagms[0]) + fvs = dict(fvs) + assert status + assert "SAI_LAG_MEMBER_ATTR_LAG_ID" in fvs + assert fvs.pop("SAI_LAG_MEMBER_ATTR_LAG_ID") == lags[0] + assert "SAI_LAG_MEMBER_ATTR_PORT_ID" in fvs + assert dvs.asicdb.portoidmap[fvs.pop("SAI_LAG_MEMBER_ATTR_PORT_ID")] == "Ethernet0" + assert "SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE" in fvs + assert fvs.pop("SAI_LAG_MEMBER_ATTR_INGRESS_DISABLE") == "true" + assert "SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE" in fvs + assert fvs.pop("SAI_LAG_MEMBER_ATTR_EGRESS_DISABLE") == "true" + assert not fvs # remove port channel member ps = swsscommon.ProducerStateTable(db, "LAG_MEMBER_TABLE")