diff --git a/docs/graph.svg b/docs/graph.svg index e791bed8..fb6f8140 100644 --- a/docs/graph.svg +++ b/docs/graph.svg @@ -4,280 +4,280 @@ - - + + gr-0003 - + control_input - -control_input + +control_input loopback_input - -loopback_input + +loopback_input control_input->loopback_input - - + + arp_output_request - -arp_output_request + +arp_output_request control_input->arp_output_request - - + + icmp_local_send - -icmp_local_send + +icmp_local_send control_input->icmp_local_send - - + + ndp_ns_output - -ndp_ns_output + +ndp_ns_output control_input->ndp_ns_output - - + + ip_output - -ip_output + +ip_output control_input->ip_output - - + + ip6_output - -ip6_output + +ip6_output control_input->ip6_output - - + + loopback_input->ip_output - - + + loopback_input->ip6_output - - + + ip_input_local - -ip_input_local + +ip_input_local loopback_input->ip_input_local - - + + ip6_input_local - -ip6_input_local + +ip6_input_local loopback_input->ip6_input_local - - + + eth_output - -eth_output + +eth_output arp_output_request->eth_output - - + + icmp_output - -icmp_output + +icmp_output icmp_local_send->icmp_output - - + + - + ndp_ns_output->ip6_output - - + + ip_output->eth_output - - + + loopback_output - -loopback_output + +loopback_output ip_output->loopback_output - - + + ip_hold - -ip_hold + +ip_hold ip_output->ip_hold - - + + ipip_output - -ipip_output + +ipip_output ip_output->ipip_output - - + + ip6_output->eth_output - - + + ip6_output->loopback_output - - + + ip6_hold - -ip6_hold + +ip6_hold ip6_output->ip6_hold - - + + control_output - -control_output + +control_output eth_input - -eth_input + +eth_input arp_input - -arp_input + +arp_input eth_input->arp_input - - + + ip_input - -ip_input + +ip_input eth_input->ip_input - - + + ip6_input - -ip6_input + +ip6_input eth_input->ip6_input - - + + arp_input_request - -arp_input_request + +arp_input_request arp_input->arp_input_request - - + + @@ -288,320 +288,326 @@ arp_input->arp_input_reply - - + + ip_input->ip_input_local - - + + ip_forward - -ip_forward + +ip_forward ip_input->ip_forward - - + + ip6_input->ip6_input_local - - + + ip6_forward - -ip6_forward + +ip6_forward ip6_input->ip6_forward - - + + port_tx - -port_tx + +port_tx eth_output->port_tx - - + + icmp_input - -icmp_input + +icmp_input ip_input_local->icmp_input - - + + ipip_input - -ipip_input + +ipip_input ip_input_local->ipip_input - - + + tcp_redirect_loopback - -tcp_redirect_loopback + +tcp_redirect_loopback ip_input_local->tcp_redirect_loopback - - + + udp_redirect_loopback - -udp_redirect_loopback + +udp_redirect_loopback ip_input_local->udp_redirect_loopback - - + + sctp_redirect_loopback - -sctp_redirect_loopback + +sctp_redirect_loopback ip_input_local->sctp_redirect_loopback - - + + ip6_input_local->tcp_redirect_loopback - - + + ip6_input_local->udp_redirect_loopback - - + + ip6_input_local->sctp_redirect_loopback - - + + icmp6_input - -icmp6_input + +icmp6_input ip6_input_local->icmp6_input - - + + loopback_output->control_output - - + + port_rx - -port_rx + +port_rx port_rx->eth_input - - + + arp_input_request->control_output - - + + arp_output_reply - -arp_output_reply + +arp_output_reply arp_input_request->arp_output_reply - - + + arp_input_reply->control_output - - + + arp_output_reply->eth_output - - + + icmp_input->control_output - - + + icmp_input->icmp_output - - + + icmp_output->ip_output - - + + ip_forward->ip_output - - + + ip_hold->control_output - - + + - + ipip_input->ip_input - - + + - + tcp_redirect_loopback->loopback_output - - + + - + udp_redirect_loopback->loopback_output - - + + - + sctp_redirect_loopback->loopback_output - - + + - + ipip_output->ip_output - - + + icmp6_output - -icmp6_output + +icmp6_output icmp6_input->icmp6_output - - + + ndp_ns_input - -ndp_ns_input + +ndp_ns_input icmp6_input->ndp_ns_input - - + + ndp_na_input - -ndp_na_input + +ndp_na_input icmp6_input->ndp_na_input - - + + icmp6_output->ip6_output - - + + ndp_ns_input->ip6_output - - + + + + + +ndp_ns_input->control_output + + - + -ndp_na_input->ip6_output - - +ndp_na_input->control_output + + ip6_forward->ip6_output - - + + ip6_hold->control_output - - + + diff --git a/modules/ip6/control/gr_ip6_control.h b/modules/ip6/control/gr_ip6_control.h index 0eb37e6c..2e1e69ef 100644 --- a/modules/ip6/control/gr_ip6_control.h +++ b/modules/ip6/control/gr_ip6_control.h @@ -25,6 +25,7 @@ struct nexthop *ip6_nexthop_lookup(uint16_t vrf_id, const struct rte_ipv6_addr * struct nexthop *ip6_nexthop_new(uint16_t vrf_id, uint16_t iface_id, const struct rte_ipv6_addr *); void ip6_nexthop_unreachable_cb(struct rte_mbuf *m); +void ndp_probe_input_cb(struct rte_mbuf *m); int ip6_route_insert(uint16_t vrf_id, const struct rte_ipv6_addr *, uint8_t prefixlen, struct nexthop *); int ip6_route_delete(uint16_t vrf_id, const struct rte_ipv6_addr *, uint8_t prefixlen); diff --git a/modules/ip6/control/nexthop.c b/modules/ip6/control/nexthop.c index 18d3c476..13db89ca 100644 --- a/modules/ip6/control/nexthop.c +++ b/modules/ip6/control/nexthop.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,94 @@ void ip6_nexthop_unreachable_cb(struct rte_mbuf *m) { rte_pktmbuf_free(m); } +void ndp_probe_input_cb(struct rte_mbuf *m) { + const struct icmp6 *icmp6 = rte_pktmbuf_mtod(m, const struct icmp6 *); + const struct iface *iface = mbuf_data(m)->iface; + const struct icmp6_neigh_solicit *ns; + const struct icmp6_neigh_advert *na; + struct rte_ipv6_addr target; + struct rte_ether_addr mac; + struct nexthop *nh; + bool lladdr_found; + + switch (icmp6->type) { + case ICMP6_TYPE_NEIGH_SOLICIT: + ns = PAYLOAD(icmp6); + target = ns->target; + lladdr_found = icmp6_get_opt( + PAYLOAD(ns), + rte_pktmbuf_pkt_len(m) - sizeof(*ns), + ICMP6_OPT_SRC_LLADDR, + &mac + ); + break; + case ICMP6_TYPE_NEIGH_ADVERT: + na = PAYLOAD(icmp6); + target = na->target; + lladdr_found = icmp6_get_opt( + PAYLOAD(na), + rte_pktmbuf_pkt_len(m) - sizeof(*ns), + ICMP6_OPT_TARGET_LLADDR, + &mac + ); + break; + default: + goto free; + } + if (!lladdr_found) + goto free; + + nh = ip6_nexthop_lookup(iface->vrf_id, &target); + if (nh == NULL) { + // We don't have an entry for the probe sender address yet. + // + // Create one now. If the sender has requested our mac address, + // they will certainly contact us soon and it will save us an + // NDP solicitation. + if ((nh = ip6_nexthop_new(iface->vrf_id, iface->id, &target)) == NULL) { + LOG(ERR, "ip6_nexthop_new: %s", strerror(errno)); + goto free; + } + // Add an internal /128 route to reference the newly created nexthop. + if (ip6_route_insert(iface->vrf_id, &target, RTE_IPV6_MAX_DEPTH, nh) < 0) { + LOG(ERR, "ip6_route_insert: %s", strerror(errno)); + goto free; + } + } + + // Static next hops never need updating. + if (nh->flags & GR_NH_F_STATIC) + goto free; + + // Refresh all fields. + nh->last_reply = rte_get_tsc_cycles(); + nh->iface_id = iface->id; + nh->flags |= GR_NH_F_REACHABLE; + nh->flags &= ~(GR_NH_F_STALE | GR_NH_F_PENDING | GR_NH_F_FAILED); + nh->ucast_probes = 0; + nh->bcast_probes = 0; + nh->lladdr = mac; + + // Flush all held packets. + struct rte_mbuf *held = nh->held_pkts_head; + while (held != NULL) { + struct ip6_output_mbuf_data *o; + struct rte_mbuf *next; + + next = queue_mbuf_data(held)->next; + o = ip6_output_mbuf_data(held); + o->nh = nh; + o->iface = NULL; + post_to_stack(ip6_output_node, held); + held = next; + } + nh->held_pkts_head = NULL; + nh->held_pkts_tail = NULL; + nh->held_pkts_num = 0; +free: + rte_pktmbuf_free(m); +} + static struct api_out nh6_add(const void *request, void ** /*response*/) { const struct gr_ip6_nh_add_req *req = request; struct nexthop *nh; diff --git a/modules/ip6/datapath/gr_ip6_datapath.h b/modules/ip6/datapath/gr_ip6_datapath.h index 954f334a..c754b1ae 100644 --- a/modules/ip6/datapath/gr_ip6_datapath.h +++ b/modules/ip6/datapath/gr_ip6_datapath.h @@ -19,11 +19,6 @@ GR_MBUF_PRIV_DATA_TYPE(ip6_output_mbuf_data, { const struct nexthop *nh; }); -GR_MBUF_PRIV_DATA_TYPE(ndp_mbuf_data, { - struct nexthop *local; - struct nexthop *remote; -}); - GR_MBUF_PRIV_DATA_TYPE(ip6_local_mbuf_data, { struct rte_ipv6_addr src; struct rte_ipv6_addr dst; diff --git a/modules/ip6/datapath/ndp_na_input.c b/modules/ip6/datapath/ndp_na_input.c index 79ff47d8..adfd3891 100644 --- a/modules/ip6/datapath/ndp_na_input.c +++ b/modules/ip6/datapath/ndp_na_input.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause // Copyright (c) 2024 Robin Jarry +#include #include #include #include @@ -15,76 +16,32 @@ #include enum { - IP_OUTPUT = 0, + CONTROL = 0, INVAL, EDGE_COUNT, }; -// Declaration in gr_ip6_datapath.h. This function is shared with ndp_ns_input. -void ndp_update_nexthop( - struct rte_graph *graph, - struct rte_node *node, - struct nexthop *nh, - const struct iface *iface, - const struct rte_ether_addr *mac -) { - struct ip6_output_mbuf_data *d; - struct rte_mbuf *m, *next; - - // Static next hops never need updating. - if (nh->flags & GR_NH_F_STATIC) - return; - - rte_spinlock_lock(&nh->lock); - - // Refresh all fields. - nh->last_reply = rte_get_tsc_cycles(); - nh->iface_id = iface->id; - nh->flags |= GR_NH_F_REACHABLE; - nh->flags &= ~(GR_NH_F_STALE | GR_NH_F_PENDING | GR_NH_F_FAILED); - nh->ucast_probes = 0; - nh->bcast_probes = 0; - nh->lladdr = *mac; - - // Flush all held packets. - m = nh->held_pkts_head; - while (m != NULL) { - next = queue_mbuf_data(m)->next; - d = ip6_output_mbuf_data(m); - d->nh = nh; - d->iface = NULL; - rte_node_enqueue_x1(graph, node, IP_OUTPUT, m); - m = next; - } - nh->held_pkts_head = NULL; - nh->held_pkts_tail = NULL; - nh->held_pkts_num = 0; - - rte_spinlock_unlock(&nh->lock); -} - static uint16_t ndp_na_input_process( struct rte_graph *graph, struct rte_node *node, void **objs, uint16_t nb_objs ) { - struct icmp6_neigh_solicit *ns; + struct control_output_mbuf_data *ctrl_data; struct icmp6_neigh_advert *na; struct ip6_local_mbuf_data *d; struct rte_ether_addr lladdr; + const struct nexthop *remote; const struct iface *iface; - struct nexthop *remote; - struct icmp6_opt *opt; struct rte_mbuf *mbuf; struct icmp6 *icmp6; bool lladdr_found; - rte_edge_t next; + rte_edge_t edge; #define ASSERT_NDP(condition) \ do { \ if (!(condition)) { \ - next = INVAL; \ + edge = INVAL; \ goto next; \ } \ } while (0) @@ -95,7 +52,7 @@ static uint16_t ndp_na_input_process( d = ip6_local_mbuf_data(mbuf); icmp6 = rte_pktmbuf_mtod(mbuf, struct icmp6 *); iface = d->iface; - na = (struct icmp6_neigh_advert *)rte_pktmbuf_adj(mbuf, sizeof(*icmp6)); + na = PAYLOAD(icmp6); // Validation of Neighbor Advertisements // https://www.rfc-editor.org/rfc/rfc4861.html#section-7.1.2 @@ -126,27 +83,22 @@ static uint16_t ndp_na_input_process( remote = ip6_nexthop_lookup(iface->vrf_id, &na->target); ASSERT_NDP(remote != NULL); - opt = (struct icmp6_opt *)rte_pktmbuf_adj(mbuf, sizeof(*ns)); lladdr_found = icmp6_get_opt( - opt, rte_pktmbuf_pkt_len(mbuf), ICMP6_OPT_TARGET_LLADDR, &lladdr + PAYLOAD(na), rte_pktmbuf_pkt_len(mbuf), ICMP6_OPT_TARGET_LLADDR, &lladdr ); // If the link layer has addresses and no Target Link-Layer Address // option is included, the receiving node SHOULD silently discard the // received advertisement. ASSERT_NDP(lladdr_found); - ndp_update_nexthop(graph, node, remote, iface, &lladdr); - - if (gr_mbuf_is_traced(mbuf)) { - gr_mbuf_trace_add(mbuf, node, 0); - gr_mbuf_trace_finish(mbuf); - } - rte_pktmbuf_free(mbuf); - continue; + ctrl_data = control_output_mbuf_data(mbuf); + ctrl_data->iface = iface; + ctrl_data->callback = ndp_probe_input_cb; + edge = CONTROL; next: if (gr_mbuf_is_traced(mbuf)) gr_mbuf_trace_add(mbuf, node, 0); - rte_node_enqueue_x1(graph, node, next, mbuf); + rte_node_enqueue_x1(graph, node, edge, mbuf); } return nb_objs; @@ -159,7 +111,7 @@ static struct rte_node_register node = { .nb_edges = EDGE_COUNT, .next_nodes = { - [IP_OUTPUT] = "ip6_output", + [CONTROL] = "control_output", [INVAL] = "ndp_na_input_inval", }, }; diff --git a/modules/ip6/datapath/ndp_ns_input.c b/modules/ip6/datapath/ndp_ns_input.c index 6ed4360c..5a317866 100644 --- a/modules/ip6/datapath/ndp_ns_input.c +++ b/modules/ip6/datapath/ndp_ns_input.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause // Copyright (c) 2024 Robin Jarry +#include #include #include #include @@ -16,6 +17,7 @@ enum { IP_OUTPUT = 0, + CONTROL, INVAL, ERROR, IGNORE, @@ -28,7 +30,6 @@ static uint16_t ndp_ns_input_process( void **objs, uint16_t nb_objs ) { - struct nexthop *remote, *local; struct icmp6_neigh_solicit *ns; struct icmp6_neigh_advert *na; struct ip6_local_mbuf_data *d; @@ -39,10 +40,12 @@ static uint16_t ndp_ns_input_process( struct rte_ipv6_hdr *ip; struct icmp6_opt *opt; struct rte_mbuf *mbuf; + struct nexthop *local; uint16_t payload_len; struct icmp6 *icmp6; bool lladdr_found; rte_edge_t next; + bool solicited; #define ASSERT_NDP(condition) \ do { \ @@ -57,7 +60,7 @@ static uint16_t ndp_ns_input_process( d = ip6_local_mbuf_data(mbuf); icmp6 = rte_pktmbuf_mtod(mbuf, struct icmp6 *); - ns = (struct icmp6_neigh_solicit *)rte_pktmbuf_adj(mbuf, sizeof(*icmp6)); + ns = PAYLOAD(icmp6); iface = d->iface; src = d->src; dst = d->dst; @@ -83,11 +86,45 @@ static uint16_t ndp_ns_input_process( goto next; } - opt = (struct icmp6_opt *)rte_pktmbuf_adj(mbuf, sizeof(*ns)); lladdr_found = icmp6_get_opt( - opt, rte_pktmbuf_pkt_len(mbuf), ICMP6_OPT_SRC_LLADDR, &lladdr + PAYLOAD(ns), rte_pktmbuf_pkt_len(mbuf), ICMP6_OPT_SRC_LLADDR, &lladdr ); + if (rte_ipv6_addr_is_unspec(&src)) { + // - If the IP source address is the unspecified address, the IP + // destination address is a solicited-node multicast address. + ASSERT_NDP(rte_ipv6_addr_is_mcast(&dst)); + // - If the IP source address is the unspecified address, there is + // no source link-layer address option in the message. + ASSERT_NDP(!lladdr_found); + // If the source of the solicitation is the unspecified address, the + // node MUST set the Solicited flag to zero and multicast the + // advertisement to the all-nodes address. + src = (struct rte_ipv6_addr)RTE_IPV6_ADDR_ALLNODES_LINK_LOCAL; + solicited = false; + } else { + if (lladdr_found) { + // Copy the NS probe and send it to control plane for processing. + struct rte_mbuf *copy = rte_pktmbuf_copy( + mbuf, mbuf->pool, 0, UINT32_MAX + ); + if (copy == NULL) { + next = ERROR; + goto next; + } + if (gr_mbuf_is_traced(mbuf)) + gr_mbuf_trace_add(mbuf, node, 0); + struct control_output_mbuf_data *d; + d = control_output_mbuf_data(copy); + d->iface = iface; + d->callback = ndp_probe_input_cb; + rte_node_enqueue_x1(graph, node, CONTROL, copy); + } + // Otherwise, the node MUST set the Solicited flag to one and unicast the + // advertisement to the Source Address of the solicitation. + solicited = true; + } + // Reuse the mbuf to forge a neighbour advertisement reply. // XXX,TODO: avoid issues with encap, remember previous data offset? rte_pktmbuf_reset(mbuf); @@ -97,6 +134,7 @@ static uint16_t ndp_ns_input_process( na = (struct icmp6_neigh_advert *)rte_pktmbuf_append(mbuf, sizeof(*na)); na->override = 1; na->router = 1; + na->solicited = solicited; na->target = local->ipv6; opt = (struct icmp6_opt *)rte_pktmbuf_append(mbuf, sizeof(*opt)); opt->type = ICMP6_OPT_TARGET_LLADDR; @@ -111,46 +149,6 @@ static uint16_t ndp_ns_input_process( // replaced with the remote one. ip6_output_mbuf_data(mbuf)->nh = local; - if (rte_ipv6_addr_is_unspec(&src)) { - // - If the IP source address is the unspecified address, the IP - // destination address is a solicited-node multicast address. - ASSERT_NDP(rte_ipv6_addr_is_mcast(&dst)); - // - If the IP source address is the unspecified address, there is - // no source link-layer address option in the message. - ASSERT_NDP(!lladdr_found); - // If the source of the solicitation is the unspecified address, the - // node MUST set the Solicited flag to zero and multicast the - // advertisement to the all-nodes address. - src = (struct rte_ipv6_addr)RTE_IPV6_ADDR_ALLNODES_LINK_LOCAL; - na->solicited = 0; - } else { - if (lladdr_found) { - // update or create the nexthop that sent the solicitation - if ((remote = ip6_nexthop_lookup(iface->vrf_id, &src)) == NULL) { - remote = ip6_nexthop_new(iface->vrf_id, iface->id, &src); - if (remote != NULL - && ip6_route_insert( - iface->vrf_id, - &src, - RTE_IPV6_MAX_DEPTH, - remote - ) < 0) { - next = ERROR; - goto next; - } - } - if (remote == NULL) { - next = ERROR; - goto next; - } - ndp_update_nexthop(graph, node, remote, iface, &lladdr); - ip6_output_mbuf_data(mbuf)->nh = remote; - } - // Otherwise, the node MUST set the Solicited flag to one and unicast the - // advertisement to the Source Address of the solicitation. - na->solicited = 1; - } - // Fill IPv6 layer payload_len = rte_pktmbuf_pkt_len(mbuf); ip = (struct rte_ipv6_hdr *)rte_pktmbuf_prepend(mbuf, sizeof(*ip)); @@ -180,6 +178,7 @@ static struct rte_node_register node = { .nb_edges = EDGE_COUNT, .next_nodes = { [IP_OUTPUT] = "ip6_output", + [CONTROL] = "control_output", [INVAL] = "ndp_ns_input_inval", [ERROR] = "ndp_ns_input_error", [IGNORE] = "ndp_ns_input_ignore",