Skip to content

Commit

Permalink
ndp: handle nexthop updates in control plane
Browse files Browse the repository at this point in the history
Do not modify nexthop objects in the data plane. It requires acquiring
locks and also modifying the routing table. It can lead to races between
multiple worker threads.

Instead, when receiving a valid packet in ndp_{ns,na}_input, make a copy
of it and send the copy to the control plane thread and do whatever is
required there:

* Possibly create a new nexthop for prefix routes and create a new /128
  route that points to it.
* Update the remote nexthop Ethernet address with the information
  provided in the NDP packet.
* If packets are present in the nexthop hold queue, flush it and send
  them to ip6_output.

The original mbuf is kept and sent along the graph in the same way as
before (e.g. if it is an neighbour solicitation, send a reply).

Change all nexthop references to const in the datapath to ensure they
cannot be modified.

Signed-off-by: Robin Jarry <[email protected]>
  • Loading branch information
rjarry committed Dec 16, 2024
1 parent 45b28c8 commit 86ac575
Show file tree
Hide file tree
Showing 6 changed files with 361 additions and 319 deletions.
422 changes: 214 additions & 208 deletions docs/graph.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions modules/ip6/control/gr_ip6_control.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ struct nexthop *ip6_nexthop_lookup(uint16_t vrf_id, const struct rte_ipv6_addr *
struct nexthop *ip6_nexthop_new(uint16_t vrf_id, uint16_t iface_id, const struct rte_ipv6_addr *);

void ip6_nexthop_unreachable_cb(struct rte_mbuf *m);
void ndp_probe_input_cb(struct rte_mbuf *m);

int ip6_route_insert(uint16_t vrf_id, const struct rte_ipv6_addr *, uint8_t prefixlen, struct nexthop *);
int ip6_route_delete(uint16_t vrf_id, const struct rte_ipv6_addr *, uint8_t prefixlen);
Expand Down
89 changes: 89 additions & 0 deletions modules/ip6/control/nexthop.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <gr_api.h>
#include <gr_control_input.h>
#include <gr_control_output.h>
#include <gr_icmp6.h>
#include <gr_iface.h>
#include <gr_ip6.h>
#include <gr_ip6_control.h>
Expand Down Expand Up @@ -110,6 +111,94 @@ void ip6_nexthop_unreachable_cb(struct rte_mbuf *m) {
rte_pktmbuf_free(m);
}

void ndp_probe_input_cb(struct rte_mbuf *m) {
const struct icmp6 *icmp6 = rte_pktmbuf_mtod(m, const struct icmp6 *);
const struct iface *iface = mbuf_data(m)->iface;
const struct icmp6_neigh_solicit *ns;
const struct icmp6_neigh_advert *na;
struct rte_ipv6_addr target;
struct rte_ether_addr mac;
struct nexthop *nh;
bool lladdr_found;

switch (icmp6->type) {
case ICMP6_TYPE_NEIGH_SOLICIT:
ns = PAYLOAD(icmp6);
target = ns->target;
lladdr_found = icmp6_get_opt(
PAYLOAD(ns),
rte_pktmbuf_pkt_len(m) - sizeof(*ns),
ICMP6_OPT_SRC_LLADDR,
&mac
);
break;
case ICMP6_TYPE_NEIGH_ADVERT:
na = PAYLOAD(icmp6);
target = na->target;
lladdr_found = icmp6_get_opt(
PAYLOAD(na),
rte_pktmbuf_pkt_len(m) - sizeof(*ns),
ICMP6_OPT_TARGET_LLADDR,
&mac
);
break;
default:
goto free;
}
if (!lladdr_found)
goto free;

nh = ip6_nexthop_lookup(iface->vrf_id, &target);
if (nh == NULL) {
// We don't have an entry for the probe sender address yet.
//
// Create one now. If the sender has requested our mac address,
// they will certainly contact us soon and it will save us an
// NDP solicitation.
if ((nh = ip6_nexthop_new(iface->vrf_id, iface->id, &target)) == NULL) {
LOG(ERR, "ip6_nexthop_new: %s", strerror(errno));
goto free;
}
// Add an internal /128 route to reference the newly created nexthop.
if (ip6_route_insert(iface->vrf_id, &target, RTE_IPV6_MAX_DEPTH, nh) < 0) {
LOG(ERR, "ip6_route_insert: %s", strerror(errno));
goto free;
}
}

// Static next hops never need updating.
if (nh->flags & GR_NH_F_STATIC)
goto free;

// Refresh all fields.
nh->last_reply = rte_get_tsc_cycles();
nh->iface_id = iface->id;
nh->flags |= GR_NH_F_REACHABLE;
nh->flags &= ~(GR_NH_F_STALE | GR_NH_F_PENDING | GR_NH_F_FAILED);
nh->ucast_probes = 0;
nh->bcast_probes = 0;
nh->lladdr = mac;

// Flush all held packets.
struct rte_mbuf *held = nh->held_pkts_head;
while (held != NULL) {
struct ip6_output_mbuf_data *o;
struct rte_mbuf *next;

next = queue_mbuf_data(held)->next;
o = ip6_output_mbuf_data(held);
o->nh = nh;
o->iface = NULL;
post_to_stack(ip6_output_node, held);
held = next;
}
nh->held_pkts_head = NULL;
nh->held_pkts_tail = NULL;
nh->held_pkts_num = 0;
free:
rte_pktmbuf_free(m);
}

static struct api_out nh6_add(const void *request, void ** /*response*/) {
const struct gr_ip6_nh_add_req *req = request;
struct nexthop *nh;
Expand Down
5 changes: 0 additions & 5 deletions modules/ip6/datapath/gr_ip6_datapath.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,6 @@

GR_MBUF_PRIV_DATA_TYPE(ip6_output_mbuf_data, { const struct nexthop *nh; });

GR_MBUF_PRIV_DATA_TYPE(ndp_mbuf_data, {
struct nexthop *local;
struct nexthop *remote;
});

GR_MBUF_PRIV_DATA_TYPE(ip6_local_mbuf_data, {
struct rte_ipv6_addr src;
struct rte_ipv6_addr dst;
Expand Down
76 changes: 14 additions & 62 deletions modules/ip6/datapath/ndp_na_input.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause
// Copyright (c) 2024 Robin Jarry

#include <gr_control_output.h>
#include <gr_graph.h>
#include <gr_icmp6.h>
#include <gr_ip6_control.h>
Expand All @@ -15,76 +16,32 @@
#include <rte_ip6.h>

enum {
IP_OUTPUT = 0,
CONTROL = 0,
INVAL,
EDGE_COUNT,
};

// Declaration in gr_ip6_datapath.h. This function is shared with ndp_ns_input.
void ndp_update_nexthop(
struct rte_graph *graph,
struct rte_node *node,
struct nexthop *nh,
const struct iface *iface,
const struct rte_ether_addr *mac
) {
struct ip6_output_mbuf_data *d;
struct rte_mbuf *m, *next;

// Static next hops never need updating.
if (nh->flags & GR_NH_F_STATIC)
return;

rte_spinlock_lock(&nh->lock);

// Refresh all fields.
nh->last_reply = rte_get_tsc_cycles();
nh->iface_id = iface->id;
nh->flags |= GR_NH_F_REACHABLE;
nh->flags &= ~(GR_NH_F_STALE | GR_NH_F_PENDING | GR_NH_F_FAILED);
nh->ucast_probes = 0;
nh->bcast_probes = 0;
nh->lladdr = *mac;

// Flush all held packets.
m = nh->held_pkts_head;
while (m != NULL) {
next = queue_mbuf_data(m)->next;
d = ip6_output_mbuf_data(m);
d->nh = nh;
d->iface = NULL;
rte_node_enqueue_x1(graph, node, IP_OUTPUT, m);
m = next;
}
nh->held_pkts_head = NULL;
nh->held_pkts_tail = NULL;
nh->held_pkts_num = 0;

rte_spinlock_unlock(&nh->lock);
}

static uint16_t ndp_na_input_process(
struct rte_graph *graph,
struct rte_node *node,
void **objs,
uint16_t nb_objs
) {
struct icmp6_neigh_solicit *ns;
struct control_output_mbuf_data *ctrl_data;
struct icmp6_neigh_advert *na;
struct ip6_local_mbuf_data *d;
struct rte_ether_addr lladdr;
const struct nexthop *remote;
const struct iface *iface;
struct nexthop *remote;
struct icmp6_opt *opt;
struct rte_mbuf *mbuf;
struct icmp6 *icmp6;
bool lladdr_found;
rte_edge_t next;
rte_edge_t edge;

#define ASSERT_NDP(condition) \
do { \
if (!(condition)) { \
next = INVAL; \
edge = INVAL; \
goto next; \
} \
} while (0)
Expand All @@ -95,7 +52,7 @@ static uint16_t ndp_na_input_process(
d = ip6_local_mbuf_data(mbuf);
icmp6 = rte_pktmbuf_mtod(mbuf, struct icmp6 *);
iface = d->iface;
na = (struct icmp6_neigh_advert *)rte_pktmbuf_adj(mbuf, sizeof(*icmp6));
na = PAYLOAD(icmp6);

// Validation of Neighbor Advertisements
// https://www.rfc-editor.org/rfc/rfc4861.html#section-7.1.2
Expand Down Expand Up @@ -126,27 +83,22 @@ static uint16_t ndp_na_input_process(
remote = ip6_nexthop_lookup(iface->vrf_id, &na->target);
ASSERT_NDP(remote != NULL);

opt = (struct icmp6_opt *)rte_pktmbuf_adj(mbuf, sizeof(*ns));
lladdr_found = icmp6_get_opt(
opt, rte_pktmbuf_pkt_len(mbuf), ICMP6_OPT_TARGET_LLADDR, &lladdr
PAYLOAD(na), rte_pktmbuf_pkt_len(mbuf), ICMP6_OPT_TARGET_LLADDR, &lladdr
);
// If the link layer has addresses and no Target Link-Layer Address
// option is included, the receiving node SHOULD silently discard the
// received advertisement.
ASSERT_NDP(lladdr_found);

ndp_update_nexthop(graph, node, remote, iface, &lladdr);

if (gr_mbuf_is_traced(mbuf)) {
gr_mbuf_trace_add(mbuf, node, 0);
gr_mbuf_trace_finish(mbuf);
}
rte_pktmbuf_free(mbuf);
continue;
ctrl_data = control_output_mbuf_data(mbuf);
ctrl_data->iface = iface;
ctrl_data->callback = ndp_probe_input_cb;
edge = CONTROL;
next:
if (gr_mbuf_is_traced(mbuf))
gr_mbuf_trace_add(mbuf, node, 0);
rte_node_enqueue_x1(graph, node, next, mbuf);
rte_node_enqueue_x1(graph, node, edge, mbuf);
}

return nb_objs;
Expand All @@ -159,7 +111,7 @@ static struct rte_node_register node = {

.nb_edges = EDGE_COUNT,
.next_nodes = {
[IP_OUTPUT] = "ip6_output",
[CONTROL] = "control_output",
[INVAL] = "ndp_na_input_inval",
},
};
Expand Down
Loading

0 comments on commit 86ac575

Please sign in to comment.