From 39e78320ac30708ee619a1ddb8cdd3e9d929e8dd Mon Sep 17 00:00:00 2001 From: Laszlo Kiraly Date: Thu, 4 May 2023 10:24:48 +0200 Subject: [PATCH] Fix for Policy Base Routing failure; Possible table ID collision. Related issue: networkservicemesh/deployments-k8s/issues/9119 Added a mechanism to protect table ID selection from parallel runs. Signed-off-by: Laszlo Kiraly --- .../ipcontext/iprule/common.go | 73 +++++++++++++------ .../ipcontext/iprule/gen.go | 6 ++ .../iprule/netns_rtable_conn_map.gen.go | 73 +++++++++++++++++++ .../ipcontext/iprule/nsrtnextid.go | 31 ++++++++ .../ipcontext/iprule/server.go | 10 ++- .../ipcontext/iprule/table_map.gen.go | 2 - 6 files changed, 167 insertions(+), 28 deletions(-) create mode 100644 pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/netns_rtable_conn_map.gen.go create mode 100644 pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/nsrtnextid.go diff --git a/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/common.go b/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/common.go index 13b97c5c..78a24c39 100644 --- a/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/common.go +++ b/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/common.go @@ -2,6 +2,8 @@ // // Copyright (c) 2023 Cisco and/or its affiliates. // +// Copyright (c) 2023 Nordix Foundation. +// // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,7 +42,7 @@ import ( link "github.com/networkservicemesh/sdk-kernel/pkg/kernel" ) -func create(ctx context.Context, conn *networkservice.Connection, tableIDs *Map) error { +func create(ctx context.Context, conn *networkservice.Connection, tableIDs *Map, nsRTableNextIDToConnID *NetnsRTableNextIDToConnMap) error { if mechanism := kernel.ToMechanism(conn.GetMechanism()); mechanism != nil && mechanism.GetVLAN() == 0 { // Construct the netlink handle for the target namespace for this kernel interface netlinkHandle, err := link.GetNetlinkHandle(mechanism.GetNetNSURL()) @@ -54,52 +56,73 @@ func create(ctx context.Context, conn *networkservice.Connection, tableIDs *Map) if err != nil { return errors.Wrapf(err, "failed to find link %s", ifName) } - - ps, ok := tableIDs.Load(conn.GetId()) + connID := conn.GetId() + ps, ok := tableIDs.Load(connID) if !ok { if len(conn.Context.IpContext.Policies) == 0 { return nil } ps = make(map[int]*networkservice.PolicyRoute) - tableIDs.Store(conn.GetId(), ps) + tableIDs.Store(connID, ps) } // Get policies to add and to remove toAdd, toRemove := getPolicyDifferences(ps, conn.Context.IpContext.Policies) // Remove no longer existing policies for tableID, policy := range toRemove { - if err := delRule(ctx, netlinkHandle, policy, tableID); err != nil { - return err + if errRule := delRule(ctx, netlinkHandle, policy, tableID); errRule != nil { + return errRule } delete(ps, tableID) - tableIDs.Store(conn.GetId(), ps) + tableIDs.Store(connID, ps) } // Add new policies + netNSInode := mechanism.GetNetNSInode() for _, policy := range toAdd { - tableID, err := getFreeTableID(ctx, netlinkHandle) - if err != nil { - return err - } - // If policy doesn't contain any route - add default - if len(policy.Routes) == 0 { - policy.Routes = append(policy.Routes, defaultRoute()) - } - - for _, route := range policy.Routes { - if err := routeAdd(ctx, netlinkHandle, l, route, tableID); err != nil { + var tableID int + var nsrtid *NetnsRTableNextID + // get a free table ID until we succeed + for { + tableID, err = getFreeTableID(ctx, netlinkHandle) + if err != nil { return err } + nsrtid = NewNetnsRTableNextID(netNSInode, tableID) + storedConnID, _ := nsRTableNextIDToConnID.LoadOrStore(*nsrtid, connID) + if connID == storedConnID { + // No other connection adding policy using this free routing table ID + break + } } - if err := ruleAdd(ctx, netlinkHandle, policy, tableID); err != nil { + if err := addPolicy(ctx, netlinkHandle, policy, l, ps, tableIDs, tableID, connID, *nsrtid, nsRTableNextIDToConnID); err != nil { return err } - ps[tableID] = policy - tableIDs.Store(conn.GetId(), ps) } } return nil } +func addPolicy(ctx context.Context, netlinkHandle *netlink.Handle, policy *networkservice.PolicyRoute, l netlink.Link, ps policies, tableIDs *Map, tableID int, connID string, nsrtid NetnsRTableNextID, nsRTableNextIDToConnID *NetnsRTableNextIDToConnMap) error { + // If policy doesn't contain any route - add default + if len(policy.Routes) == 0 { + policy.Routes = append(policy.Routes, defaultRoute()) + } + + for _, route := range policy.Routes { + if err := routeAdd(ctx, netlinkHandle, l, route, tableID); err != nil { + return err + } + } + if err := ruleAdd(ctx, netlinkHandle, policy, tableID); err != nil { + return err + } + ps[tableID] = policy + tableIDs.Store(connID, ps) + // release the lock + nsRTableNextIDToConnID.Delete(nsrtid) + return nil +} + func getPolicyDifferences(current map[int]*networkservice.PolicyRoute, newPolicies []*networkservice.PolicyRoute) (toAdd []*networkservice.PolicyRoute, toRemove map[int]*networkservice.PolicyRoute) { type table struct { tableID int @@ -242,7 +265,7 @@ func routeAdd(ctx context.Context, handle *netlink.Handle, l netlink.Link, route return nil } -func del(ctx context.Context, conn *networkservice.Connection, tableIDs *Map) error { +func del(ctx context.Context, conn *networkservice.Connection, tableIDs *Map, nsRTableNextIDToConnID *NetnsRTableNextIDToConnMap) error { if mechanism := kernel.ToMechanism(conn.GetMechanism()); mechanism != nil && mechanism.GetVLAN() == 0 { netlinkHandle, err := link.GetNetlinkHandle(mechanism.GetNetNSURL()) if err != nil { @@ -251,6 +274,11 @@ func del(ctx context.Context, conn *networkservice.Connection, tableIDs *Map) er defer netlinkHandle.Close() ps, ok := tableIDs.LoadAndDelete(conn.GetId()) if ok { + netNSInode := mechanism.GetNetNSInode() + for tableID := range ps { + nsrtid := NewNetnsRTableNextID(netNSInode, tableID) + nsRTableNextIDToConnID.Delete(*nsrtid) + } for tableID, policy := range ps { if err := delRule(ctx, netlinkHandle, policy, tableID); err != nil { return err @@ -338,7 +366,6 @@ func getFreeTableID(ctx context.Context, handle *netlink.Handle) (int, error) { break } } - log.FromContext(ctx). WithField("tableID", tableID). WithField("netlink", "getFreeTableID").Debug("completed") diff --git a/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/gen.go b/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/gen.go index 578cd3f2..092ee288 100644 --- a/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/gen.go +++ b/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/gen.go @@ -1,5 +1,7 @@ // Copyright (c) 2022 Doc.ai and/or its affiliates. // +// Copyright (c) 2023 Nordix Foundation. +// // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,8 +25,12 @@ import ( ) //go:generate go-syncmap -output table_map.gen.go -type Map +//go:generate go-syncmap -output netns_rtable_conn_map.gen.go -type NetnsRTableNextIDToConnMap type policies map[int]*networkservice.PolicyRoute // Map - sync.Map with key == string (connID) and value == policies type Map sync.Map + +// NetnsRTableNextIDToConnMap - sync.Map with key NetnsRTableNextID value of string (connID) +type NetnsRTableNextIDToConnMap sync.Map diff --git a/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/netns_rtable_conn_map.gen.go b/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/netns_rtable_conn_map.gen.go new file mode 100644 index 00000000..643554c5 --- /dev/null +++ b/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/netns_rtable_conn_map.gen.go @@ -0,0 +1,73 @@ +// Code generated by "-output netns_rtable_conn_map.gen.go -type NetnsRTableNextIDToConnMap -output netns_rtable_conn_map.gen.go -type NetnsRTableNextIDToConnMap"; DO NOT EDIT. +package iprule + +import ( + "sync" // Used by sync.Map. +) + +// Generate code that will fail if the constants change value. +func _() { + // An "cannot convert NetnsRTableNextIDToConnMap literal (type NetnsRTableNextIDToConnMap) to type sync.Map" compiler error signifies that the base type have changed. + // Re-run the go-syncmap command to generate them again. + _ = (sync.Map)(NetnsRTableNextIDToConnMap{}) +} + +var _nil_NetnsRTableNextIDToConnMap_string_value = func() (val string) { return }() + +// Load returns the value stored in the map for a key, or nil if no +// value is present. +// The ok result indicates whether value was found in the map. +func (m *NetnsRTableNextIDToConnMap) Load(key NetnsRTableNextID) (string, bool) { + value, ok := (*sync.Map)(m).Load(key) + if value == nil { + return _nil_NetnsRTableNextIDToConnMap_string_value, ok + } + return value.(string), ok +} + +// Store sets the value for a key. +func (m *NetnsRTableNextIDToConnMap) Store(key NetnsRTableNextID, value string) { + (*sync.Map)(m).Store(key, value) +} + +// LoadOrStore returns the existing value for the key if present. +// Otherwise, it stores and returns the given value. +// The loaded result is true if the value was loaded, false if stored. +func (m *NetnsRTableNextIDToConnMap) LoadOrStore(key NetnsRTableNextID, value string) (string, bool) { + actual, loaded := (*sync.Map)(m).LoadOrStore(key, value) + if actual == nil { + return _nil_NetnsRTableNextIDToConnMap_string_value, loaded + } + return actual.(string), loaded +} + +// LoadAndDelete deletes the value for a key, returning the previous value if any. +// The loaded result reports whether the key was present. +func (m *NetnsRTableNextIDToConnMap) LoadAndDelete(key NetnsRTableNextID) (value string, loaded bool) { + actual, loaded := (*sync.Map)(m).LoadAndDelete(key) + if actual == nil { + return _nil_NetnsRTableNextIDToConnMap_string_value, loaded + } + return actual.(string), loaded +} + +// Delete deletes the value for a key. +func (m *NetnsRTableNextIDToConnMap) Delete(key NetnsRTableNextID) { + (*sync.Map)(m).Delete(key) +} + +// Range calls f sequentially for each key and value present in the map. +// If f returns false, range stops the iteration. +// +// Range does not necessarily correspond to any consistent snapshot of the Map's +// contents: no key will be visited more than once, but if the value for any key +// is stored or deleted concurrently, Range may reflect any mapping for that key +// from any point during the Range call. +// +// Range may be O(N) with the number of elements in the map even if f returns +// false after a constant number of calls. +func (m *NetnsRTableNextIDToConnMap) Range(f func(key NetnsRTableNextID, value string) bool) { + (*sync.Map)(m).Range(func(key, value interface{}) bool { + return f(key.(NetnsRTableNextID), value.(string)) + }) +} diff --git a/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/nsrtnextid.go b/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/nsrtnextid.go new file mode 100644 index 00000000..352fcc0e --- /dev/null +++ b/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/nsrtnextid.go @@ -0,0 +1,31 @@ +// Copyright (c) 2023 Nordix and/or its affiliates. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iprule + +// NetnsRTableNextID stores Network namespace and Next Routing Table ID +type NetnsRTableNextID struct { + ns string + nrtid int +} + +// NewNetnsRTableNextID returns *NetnsRTableNextID entry +func NewNetnsRTableNextID(ns string, nrtid int) *NetnsRTableNextID { + return &NetnsRTableNextID{ + ns: ns, + nrtid: nrtid, + } +} diff --git a/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/server.go b/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/server.go index f63e2cc3..b972da46 100644 --- a/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/server.go +++ b/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/server.go @@ -2,7 +2,7 @@ // // Copyright (c) 2022 Doc.ai and/or its affiliates. // -// Copyright (c) 2021-2022 Nordix Foundation. +// Copyright (c) 2023 Nordix Foundation. // // SPDX-License-Identifier: Apache-2.0 // @@ -35,6 +35,10 @@ import ( type ipruleServer struct { tables Map + // Protecting route and rule setting with this sync.Map + // The next table ID is calculated based on a dump + // other connection from same client can add new table in parallel + nsRTableNextIDToConnID NetnsRTableNextIDToConnMap } // NewServer creates a new server chain element setting ip rules @@ -55,7 +59,7 @@ func (i *ipruleServer) Request(ctx context.Context, request *networkservice.Netw return nil, err } - if err := create(ctx, conn, &i.tables); err != nil { + if err := create(ctx, conn, &i.tables, &i.nsRTableNextIDToConnID); err != nil { closeCtx, cancelClose := postponeCtxFunc() defer cancelClose() @@ -70,6 +74,6 @@ func (i *ipruleServer) Request(ctx context.Context, request *networkservice.Netw } func (i *ipruleServer) Close(ctx context.Context, conn *networkservice.Connection) (*empty.Empty, error) { - _ = del(ctx, conn, &i.tables) + _ = del(ctx, conn, &i.tables, &i.nsRTableNextIDToConnID) return next.Server(ctx).Close(ctx, conn) } diff --git a/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/table_map.gen.go b/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/table_map.gen.go index 15d44cce..630daf4e 100644 --- a/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/table_map.gen.go +++ b/pkg/kernel/networkservice/connectioncontextkernel/ipcontext/iprule/table_map.gen.go @@ -1,6 +1,4 @@ // Code generated by "-output table_map.gen.go -type Map -output table_map.gen.go -type Map"; DO NOT EDIT. -// Install -output table_map.gen.go -type Map by "go get -u github.com/searKing/golang/tools/-output table_map.gen.go -type Map" - package iprule import (