Skip to content

Commit

Permalink
Fix race condition in ovn-kubernetes CNI
Browse files Browse the repository at this point in the history
In OVN-Kubernetes out of order event lead to stale gatewayroutes and
nongatewayroutes. This lead to incorrect flow in datatpath. Now gatewayroutes
and nongatewayroutes use cluster ID instead of endpoint name. This ensures that
there is only one resource and it gets updated when remote endpoint is created
and updated and preventing stale endpoints. The gatewayroutes only care about
remoteCIDRs does not matter if the remote gateway switches.

Signed-off-by: Aswin Suryanarayanan <[email protected]>
  • Loading branch information
aswinsuryan committed Jan 5, 2024
1 parent 6a588b1 commit cd6e3db
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 11 deletions.
18 changes: 13 additions & 5 deletions pkg/routeagent_driver/handlers/ovn/gateway_route_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,20 +68,28 @@ func (h *GatewayRouteHandler) GetNetworkPlugins() []string {

func (h *GatewayRouteHandler) RemoteEndpointCreated(endpoint *submarinerv1.Endpoint) error {
if h.State().IsOnGateway() {
_, err := h.smClient.SubmarinerV1().GatewayRoutes(endpoint.Namespace).Create(context.TODO(),
h.newGatewayRoute(endpoint), metav1.CreateOptions{})
if err != nil && !apierrors.IsAlreadyExists(err) {
logger.V(log.TRACE).Infof("Remote endpoint %q created event received on gateway node", endpoint.Name)

gwr := h.newGatewayRoute(endpoint)

result, err := util.CreateOrUpdate(context.TODO(), GatewayResourceInterface(h.smClient, endpoint.Namespace),
gwr, util.Replace(gwr))
if err != nil {
return errors.Wrapf(err, "error processing the remote endpoint creation for %q", endpoint.Name)
}

logger.V(log.TRACE).Infof("GatewayRoute %s: %#v", result, gwr)
}

return nil
}

func (h *GatewayRouteHandler) RemoteEndpointRemoved(endpoint *submarinerv1.Endpoint) error {
if h.State().IsOnGateway() {
logger.V(log.TRACE).Infof("Endpoint %q removed event received on gateway node ", endpoint.Name)

if err := h.smClient.SubmarinerV1().GatewayRoutes(endpoint.Namespace).Delete(context.TODO(),
endpoint.Name, metav1.DeleteOptions{}); err != nil && !apierrors.IsNotFound(err) {
endpoint.Spec.ClusterID, metav1.DeleteOptions{}); err != nil && !apierrors.IsNotFound(err) {
return errors.Wrapf(err, "error deleting gatewayRoute %q", endpoint.Name)
}
}
Expand Down Expand Up @@ -109,7 +117,7 @@ func (h *GatewayRouteHandler) TransitionToGateway() error {
func (h *GatewayRouteHandler) newGatewayRoute(endpoint *submarinerv1.Endpoint) *submarinerv1.GatewayRoute {
return &submarinerv1.GatewayRoute{
ObjectMeta: metav1.ObjectMeta{
Name: endpoint.Name,
Name: endpoint.Spec.ClusterID,
},
RoutePolicySpec: submarinerv1.RoutePolicySpec{
RemoteCIDRs: endpoint.Spec.Subnets,
Expand Down
20 changes: 14 additions & 6 deletions pkg/routeagent_driver/handlers/ovn/non_gateway_route_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,18 @@ func (h *NonGatewayRouteHandler) RemoteEndpointCreated(endpoint *submarinerv1.En
if !h.State().IsOnGateway() || h.transitSwitchIP == "" {
return nil
}
logger.V(log.TRACE).Infof("Remote endpoint %q created event received on gateway node", endpoint.Name)

_, err := h.smClient.SubmarinerV1().
NonGatewayRoutes(endpoint.Namespace).Create(context.TODO(),
h.newNonGatewayRoute(endpoint), metav1.CreateOptions{})
if err != nil && !apierrors.IsAlreadyExists(err) {
ngwr := h.newNonGatewayRoute(endpoint)

result, err := util.CreateOrUpdate(context.TODO(), NonGatewayResourceInterface(h.smClient, endpoint.Namespace),
ngwr, util.Replace(ngwr))
if err != nil {
return errors.Wrapf(err, "error processing the remote endpoint create event for %q", endpoint.Name)
}

logger.V(log.TRACE).Infof("NonGatewayRoute %s: %#v", result, ngwr)

return nil
}

Expand All @@ -100,9 +104,13 @@ func (h *NonGatewayRouteHandler) RemoteEndpointRemoved(endpoint *submarinerv1.En
return nil
}

logger.V(log.TRACE).Infof("Endpoint %q removed event received on gateway node ", endpoint.Name)

if err := h.smClient.SubmarinerV1().NonGatewayRoutes(endpoint.Namespace).Delete(context.TODO(),
endpoint.Name, metav1.DeleteOptions{}); err != nil && !apierrors.IsNotFound(err) {
endpoint.Spec.ClusterID, metav1.DeleteOptions{}); err != nil && !apierrors.IsNotFound(err) {
return errors.Wrapf(err, "error deleting nonGatewayRoute %q", endpoint.Name)
} else {
logger.Infof("Ignoring Endpoint removed")
}

return nil
Expand Down Expand Up @@ -132,7 +140,7 @@ func (h *NonGatewayRouteHandler) TransitionToGateway() error {
func (h *NonGatewayRouteHandler) newNonGatewayRoute(endpoint *submarinerv1.Endpoint) *submarinerv1.NonGatewayRoute {
return &submarinerv1.NonGatewayRoute{
ObjectMeta: metav1.ObjectMeta{
Name: endpoint.Name,
Name: endpoint.Spec.ClusterID,
Namespace: endpoint.Namespace,
},
RoutePolicySpec: submarinerv1.RoutePolicySpec{
Expand Down

0 comments on commit cd6e3db

Please sign in to comment.