diff --git a/modules/infra/control/gr_nh_control.h b/modules/infra/control/gr_nh_control.h index 62690efb..ab4423ed 100644 --- a/modules/infra/control/gr_nh_control.h +++ b/modules/infra/control/gr_nh_control.h @@ -7,9 +7,46 @@ #include #include +#include #include #include +// Forward declaration +struct nexthop; + +// Nexthop pool. Only one should be needed per L3 address family. +struct nh_pool; + +// Callback that will be invoked when a nexthop needs to be refreshed by sending a probe. +typedef int (*nh_solicit_cb_t)(struct nexthop *); + +// Callback that will be invoked when all nexthop probes failed and it needs to be freed. +typedef void (*nh_free_cb_t)(struct nexthop *); + +// Nexthop pool options. +struct nh_pool_opts { + // Callback that will be invoked when a nexthop needs to be refreshed by sending a probe. + nh_solicit_cb_t solicit_nh; + // Callback that will be invoked when all nexthop probes failed and it needs to be freed. + nh_free_cb_t free_nh; + // The number of nexthops allocated in this pool. + unsigned num_nexthops; +}; + +// Allocate a new nexthop pool with the provided options. +// If any field left to 0 in opts the default values will be used. +struct nh_pool * +nh_pool_new(uint8_t family, struct event_base *base, const struct nh_pool_opts *opts); + +// Free a nexthop pool previously allocated with nh_pool_new(). +void nh_pool_free(struct nh_pool *); + +// nh_pool_iter callback. +typedef void (*nh_iter_cb_t)(struct nexthop *nh, void *priv); + +// Iterate over a nexthop pool and invoke a callback for each active nexthop. +void nh_pool_iter(struct nh_pool *, nh_iter_cb_t nh_cb, void *priv); + // Max number of packets to hold per next hop waiting for resolution (default: 256). #define NH_MAX_HELD_PKTS 256 // Reachable next hop lifetime after last probe reply received (default: 20 min). @@ -28,14 +65,16 @@ struct __rte_cache_aligned nexthop { uint16_t iface_id; union { + struct { + } addr; ip4_addr_t ipv4; struct rte_ipv6_addr ipv6; }; uint8_t prefixlen; + uint8_t family; // AF_INET, AF_INET6, ... uint8_t ucast_probes; uint8_t bcast_probes; - uint32_t ref_count; // number of routes referencing this nexthop uint64_t last_request; uint64_t last_reply; @@ -45,6 +84,9 @@ struct __rte_cache_aligned nexthop { uint16_t held_pkts_num; struct rte_mbuf *held_pkts_head; struct rte_mbuf *held_pkts_tail; + + // internal + struct nh_pool *pool; }; #define HOPLIST_MAX_SIZE 8 @@ -55,4 +97,10 @@ struct hoplist { struct nexthop *nh[HOPLIST_MAX_SIZE]; }; +struct nexthop *nexthop_lookup(struct nh_pool *, uint16_t vrf_id, const void *addr); +struct nexthop *nexthop_new(struct nh_pool *, uint16_t vrf_id, uint16_t iface_id, const void *addr); + +void nexthop_incref(struct nexthop *); +void nexthop_decref(struct nexthop *); + #endif diff --git a/modules/infra/control/meson.build b/modules/infra/control/meson.build index a77f3538..611ebe33 100644 --- a/modules/infra/control/meson.build +++ b/modules/infra/control/meson.build @@ -6,6 +6,7 @@ src += files( 'iface.c', 'loopback.c', 'mempool.c', + 'nexthop.c', 'port.c', 'worker.c', 'graph.c', diff --git a/modules/infra/control/nexthop.c b/modules/infra/control/nexthop.c new file mode 100644 index 00000000..4726059b --- /dev/null +++ b/modules/infra/control/nexthop.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2024 Robin Jarry + +#include +#include +#include + +#include +#include + +#include + +struct nh_pool { + struct rte_mempool *mp; + struct event *ageing_timer; + nh_solicit_cb_t solicit_nh; + nh_free_cb_t free_nh; + unsigned num_nexthops; + uint8_t family; +}; + +static void nh_pool_do_ageing(evutil_socket_t, short, void *); + +struct nh_pool * +nh_pool_new(uint8_t family, struct event_base *ev_base, const struct nh_pool_opts *opts) { + struct nh_pool *nhp; + const char *name; + + if (opts == NULL || ev_base == NULL || opts->num_nexthops == 0 || opts->free_nh == NULL + || opts->solicit_nh == NULL) + ABORT("invalid arguments"); + + switch (family) { + case AF_INET: + name = "ipv4-nexthops"; + break; + case AF_INET6: + name = "ipv6-nexthops"; + break; + default: + ABORT("unsupported address family: %hhu", family); + } + + nhp = rte_zmalloc(name, sizeof(*nhp), alignof(struct nh_pool)); + if (nhp == NULL) { + LOG(ERR, "rte_zmalloc() failed"); + return errno_set_null(ENOMEM); + } + + nhp->family = family; + nhp->free_nh = opts->free_nh; + nhp->solicit_nh = opts->solicit_nh; + nhp->num_nexthops = rte_align32pow2(opts->num_nexthops) - 1; + nhp->mp = rte_mempool_create( + name, + nhp->num_nexthops, + sizeof(struct nexthop), + 0, // cache size + 0, // priv size + NULL, // mp_init + NULL, // mp_init_arg + NULL, // obj_init + NULL, // obj_init_arg + SOCKET_ID_ANY, + 0 // flags + ); + if (nhp->mp == NULL) { + nh_pool_free(nhp); + return errno_set_null(ENOMEM); + } + + nhp->ageing_timer = event_new( + ev_base, -1, EV_PERSIST | EV_FINALIZE, nh_pool_do_ageing, nhp + ); + if (nhp->ageing_timer == NULL) { + LOG(ERR, "event_new() failed"); + nh_pool_free(nhp); + return errno_set_null(ENOMEM); + } + + if (event_add(nhp->ageing_timer, &(struct timeval) {.tv_sec = 1}) < 0) { + LOG(ERR, "event_add() failed"); + nh_pool_free(nhp); + return errno_set_null(ENOMEM); + } + + return nhp; +} + +void nh_pool_free(struct nh_pool *nhp) { + if (nhp == NULL) + return; + if (nhp->ageing_timer) + event_free(nhp->ageing_timer); + if (nhp->mp) + rte_mempool_free(nhp->mp); + rte_free(nhp); +} + +struct nexthop * +nexthop_new(struct nh_pool *nhp, uint16_t vrf_id, uint16_t iface_id, const void *addr) { + struct nexthop *nh; + void *data; + int ret; + + if (nhp == NULL) + ABORT("nhp == NULL"); + + if ((ret = rte_mempool_get(nhp->mp, &data)) < 0) + return errno_set_null(-ret); + + nh = data; + nh->vrf_id = vrf_id; + nh->iface_id = iface_id; + nh->family = nhp->family; + switch (nhp->family) { + case AF_INET: + nh->ipv4 = *(ip4_addr_t *)addr; + break; + case AF_INET6: + nh->ipv6 = *(struct rte_ipv6_addr *)addr; + break; + } + nh->pool = nhp; + + return nh; +} + +struct pool_iterator { + struct nh_pool *nhp; + nh_iter_cb_t user_cb; + void *priv; +}; + +static void nh_pool_iter_cb(struct rte_mempool *, void *priv, void *obj, unsigned /*obj_idx*/) { + struct pool_iterator *it = priv; + struct nexthop *nh = obj; + if (nh->ref_count != 0) + it->user_cb(nh, it->priv); +} + +void nh_pool_iter(struct nh_pool *nhp, nh_iter_cb_t nh_cb, void *priv) { + struct pool_iterator it = { + .nhp = nhp, + .user_cb = nh_cb, + .priv = priv, + }; + rte_mempool_obj_iter(nhp->mp, nh_pool_iter_cb, &it); +} + +struct lookup_filter { + uint16_t vrf_id; + uint8_t family; + const void *addr; + struct nexthop *nh; +}; + +static void nh_lookup_cb(struct nexthop *nh, void *priv) { + struct lookup_filter *filter = priv; + + if (filter->nh != NULL || nh->vrf_id != filter->vrf_id) + return; + + switch (filter->family) { + case AF_INET: + if (nh->ipv4 == *(ip4_addr_t *)filter->addr) + filter->nh = nh; + break; + case AF_INET6: + if (rte_ipv6_addr_eq(&nh->ipv6, filter->addr)) + filter->nh = nh; + break; + } +} + +struct nexthop *nexthop_lookup(struct nh_pool *nhp, uint16_t vrf_id, const void *addr) { + struct lookup_filter filter = {.family = nhp->family, .vrf_id = vrf_id, .addr = addr}; + nh_pool_iter(nhp, nh_lookup_cb, &filter); + return filter.nh ?: errno_set_null(ENOENT); +} + +void nexthop_decref(struct nexthop *nh) { + if (nh->ref_count <= 1) { + struct nh_pool *nhp = nh->pool; + rte_spinlock_lock(&nh->lock); + // Flush all held packets. + struct rte_mbuf *m = nh->held_pkts_head; + while (m != NULL) { + struct rte_mbuf *next = queue_mbuf_data(m)->next; + rte_pktmbuf_free(m); + m = next; + } + rte_mempool_put(nhp->mp, nh); + memset(nh, 0, sizeof(*nh)); + rte_spinlock_unlock(&nh->lock); + } else { + nh->ref_count--; + } +} + +void nexthop_incref(struct nexthop *nh) { + nh->ref_count++; +} + +static void nexthop_ageing_cb(struct nexthop *nh, void *priv) { + uint64_t now = rte_get_tsc_cycles(); + uint64_t reply_age, request_age; + unsigned probes, max_probes; + struct nh_pool *nhp = priv; + + if (nh->flags & GR_NH_F_STATIC) + return; + + reply_age = (now - nh->last_reply) / rte_get_tsc_hz(); + request_age = (now - nh->last_request) / rte_get_tsc_hz(); + max_probes = NH_UCAST_PROBES + NH_BCAST_PROBES; + probes = nh->ucast_probes + nh->bcast_probes; + + if (nh->flags & (GR_NH_F_PENDING | GR_NH_F_STALE) && request_age > probes) { + if (probes >= max_probes && !(nh->flags & GR_NH_F_GATEWAY)) { + LOG(DEBUG, + ADDR_F " vrf=%u failed_probes=%u held_pkts=%u: %s -> failed", + ADDR_W(nh->family), + &nh->addr, + nh->vrf_id, + probes, + nh->held_pkts_num, + gr_nh_flag_name(nh->flags & (GR_NH_F_PENDING | GR_NH_F_STALE))); + + nh->flags &= ~(GR_NH_F_PENDING | GR_NH_F_STALE); + nh->flags |= GR_NH_F_FAILED; + } else { + if (nhp->solicit_nh(nh) < 0) + LOG(ERR, + ADDR_F " vrf=%u solicit failed: %s", + ADDR_W(nh->family), + &nh->addr, + nh->vrf_id, + strerror(errno)); + } + } else if (nh->flags & GR_NH_F_REACHABLE && reply_age > NH_LIFETIME_REACHABLE) { + nh->flags &= ~GR_NH_F_REACHABLE; + nh->flags |= GR_NH_F_STALE; + } else if (nh->flags & GR_NH_F_FAILED && request_age > NH_LIFETIME_UNREACHABLE) { + LOG(DEBUG, + ADDR_F " vrf=%u failed_probes=%u held_pkts=%u: failed -> ", + ADDR_W(nh->family), + &nh->addr, + nh->vrf_id, + probes, + nh->held_pkts_num); + nhp->free_nh(nh); + } +} + +static void nh_pool_do_ageing(evutil_socket_t, short /*what*/, void *priv) { + struct nh_pool *nhp = priv; + nh_pool_iter(nhp, nexthop_ageing_cb, nhp); +} diff --git a/modules/ip/control/address.c b/modules/ip/control/address.c index 6015a8d0..91f2dba2 100644 --- a/modules/ip/control/address.c +++ b/modules/ip/control/address.c @@ -91,7 +91,7 @@ static struct api_out addr_add(const void *request, void ** /*response*/) { if (iface_get_eth_addr(iface->id, &nh->lladdr) < 0) if (errno != EOPNOTSUPP) { - ip4_nexthop_decref(nh); + nexthop_decref(nh); return api_out(errno, 0); } diff --git a/modules/ip/control/gr_ip4_control.h b/modules/ip/control/gr_ip4_control.h index b8aa262b..f30ede17 100644 --- a/modules/ip/control/gr_ip4_control.h +++ b/modules/ip/control/gr_ip4_control.h @@ -22,8 +22,6 @@ struct nexthop *ip4_nexthop_lookup(uint16_t vrf_id, ip4_addr_t ip); struct nexthop *ip4_nexthop_new(uint16_t vrf_id, uint16_t iface_id, ip4_addr_t ip); -void ip4_nexthop_incref(struct nexthop *); -void ip4_nexthop_decref(struct nexthop *); int ip4_route_insert(uint16_t vrf_id, ip4_addr_t ip, uint8_t prefixlen, struct nexthop *); int ip4_route_delete(uint16_t vrf_id, ip4_addr_t ip, uint8_t prefixlen); diff --git a/modules/ip/control/nexthop.c b/modules/ip/control/nexthop.c index b50462ad..42981cf3 100644 --- a/modules/ip/control/nexthop.c +++ b/modules/ip/control/nexthop.c @@ -22,64 +22,14 @@ #include #include -static struct rte_mempool *nh_pool; +static struct nh_pool *nh_pool; struct nexthop *ip4_nexthop_new(uint16_t vrf_id, uint16_t iface_id, ip4_addr_t ip) { - struct nexthop *nh; - void *data; - int ret; - - if ((ret = rte_mempool_get(nh_pool, &data)) < 0) - return errno_set_null(-ret); - - nh = data; - nh->vrf_id = vrf_id; - nh->iface_id = iface_id; - nh->ipv4 = ip; - - return nh; -} - -struct lookup_filter { - uint16_t vrf_id; - ip4_addr_t ip; - struct nexthop *nh; -}; - -static void nh_lookup_cb(struct rte_mempool *, void *opaque, void *obj, unsigned /*obj_idx*/) { - struct lookup_filter *filter = opaque; - struct nexthop *nh = obj; - if (filter->nh == NULL && nh->ref_count > 0 && nh->ipv4 == filter->ip - && nh->vrf_id == filter->vrf_id) - filter->nh = nh; + return nexthop_new(nh_pool, vrf_id, iface_id, &ip); } struct nexthop *ip4_nexthop_lookup(uint16_t vrf_id, ip4_addr_t ip) { - struct lookup_filter filter = {.vrf_id = vrf_id, .ip = ip}; - rte_mempool_obj_iter(nh_pool, nh_lookup_cb, &filter); - return filter.nh ?: errno_set_null(ENOENT); -} - -void ip4_nexthop_decref(struct nexthop *nh) { - if (nh->ref_count <= 1) { - rte_spinlock_lock(&nh->lock); - // Flush all held packets. - struct rte_mbuf *m = nh->held_pkts_head; - while (m != NULL) { - struct rte_mbuf *next = queue_mbuf_data(m)->next; - rte_pktmbuf_free(m); - m = next; - } - rte_spinlock_unlock(&nh->lock); - memset(nh, 0, sizeof(*nh)); - rte_mempool_put(nh_pool, nh); - } else { - nh->ref_count--; - } -} - -void ip4_nexthop_incref(struct nexthop *nh) { - nh->ref_count++; + return nexthop_lookup(nh_pool, vrf_id, &ip); } static struct api_out nh4_add(const void *request, void ** /*response*/) { @@ -138,12 +88,11 @@ struct list_context { struct gr_nexthop *nh; }; -static void nh_list_cb(struct rte_mempool *, void *opaque, void *obj, unsigned /*obj_idx*/) { - struct list_context *ctx = opaque; - struct nexthop *nh = obj; +static void nh_list_cb(struct nexthop *nh, void *priv) { + struct list_context *ctx = priv; struct gr_nexthop api_nh; - if (nh->ref_count == 0 || (nh->vrf_id != ctx->vrf_id && ctx->vrf_id != UINT16_MAX)) + if (nh->vrf_id != ctx->vrf_id && ctx->vrf_id != UINT16_MAX) return; api_nh.ipv4 = nh->ipv4; @@ -165,7 +114,7 @@ static struct api_out nh4_list(const void *request, void **response) { struct gr_ip4_nh_list_resp *resp = NULL; size_t len; - rte_mempool_obj_iter(nh_pool, nh_list_cb, &ctx); + nh_pool_iter(nh_pool, nh_list_cb, &ctx); len = sizeof(*resp) + gr_vec_len(ctx.nh) * sizeof(*ctx.nh); if ((resp = calloc(1, len)) == NULL) { @@ -182,90 +131,19 @@ static struct api_out nh4_list(const void *request, void **response) { return api_out(0, len); } -static void nh_gc_cb(struct rte_mempool *, void * /*opaque*/, void *obj, unsigned /*obj_idx*/) { - uint64_t now = rte_get_tsc_cycles(); - uint64_t reply_age, request_age; - unsigned probes, max_probes; - struct nexthop *nh = obj; - - max_probes = NH_UCAST_PROBES + NH_BCAST_PROBES; - - if (nh->ref_count == 0 || nh->flags & GR_NH_F_STATIC) - return; - - reply_age = (now - nh->last_reply) / rte_get_tsc_hz(); - request_age = (now - nh->last_request) / rte_get_tsc_hz(); - probes = nh->ucast_probes + nh->bcast_probes; - - if (nh->flags & (GR_NH_F_PENDING | GR_NH_F_STALE) && request_age > probes) { - if (probes >= max_probes && !(nh->flags & GR_NH_F_GATEWAY)) { - LOG(DEBUG, - IP4_F " vrf=%u failed_probes=%u held_pkts=%u: %s -> failed", - &nh->ipv4, - nh->vrf_id, - probes, - nh->held_pkts_num, - gr_nh_flag_name(nh->flags & (GR_NH_F_PENDING | GR_NH_F_STALE))); - - nh->flags &= ~(GR_NH_F_PENDING | GR_NH_F_STALE); - nh->flags |= GR_NH_F_FAILED; - } else { - if (arp_output_request_solicit(nh) < 0) - LOG(ERR, "arp_output_request_solicit: %s", strerror(errno)); - } - } else if (nh->flags & GR_NH_F_REACHABLE && reply_age > NH_LIFETIME_REACHABLE) { - nh->flags &= ~GR_NH_F_REACHABLE; - nh->flags |= GR_NH_F_STALE; - } else if (nh->flags & GR_NH_F_FAILED && request_age > NH_LIFETIME_UNREACHABLE) { - LOG(DEBUG, - IP4_F " vrf=%u failed_probes=%u held_pkts=%u: failed -> ", - &nh->ipv4, - nh->vrf_id, - probes, - nh->held_pkts_num); - - // this also does ip4_nexthop_decref(), freeing the next hop - // and buffered packets. - ip4_route_cleanup(nh); - } -} - -static void nexthop_gc(evutil_socket_t, short /*what*/, void * /*priv*/) { - rte_mempool_obj_iter(nh_pool, nh_gc_cb, NULL); -} - -static struct event *nh_gc_timer; - static void nh4_init(struct event_base *ev_base) { - nh_pool = rte_mempool_create( - "ip4_nh", // name - rte_align32pow2(IP4_MAX_NEXT_HOPS) - 1, - sizeof(struct nexthop), - 0, // cache size - 0, // priv size - NULL, // mp_init - NULL, // mp_init_arg - NULL, // obj_init - NULL, // obj_init_arg - SOCKET_ID_ANY, - 0 // flags - ); + struct nh_pool_opts opts = { + .solicit_nh = arp_output_request_solicit, + .free_nh = ip4_route_cleanup, + .num_nexthops = IP4_MAX_NEXT_HOPS, + }; + nh_pool = nh_pool_new(AF_INET, ev_base, &opts); if (nh_pool == NULL) - ABORT("rte_mempool_create(ip4_nh) failed"); - - nh_gc_timer = event_new(ev_base, -1, EV_PERSIST | EV_FINALIZE, nexthop_gc, NULL); - if (nh_gc_timer == NULL) - ABORT("event_new() failed"); - struct timeval tv = {.tv_sec = 1}; - if (event_add(nh_gc_timer, &tv) < 0) - ABORT("event_add() failed"); + ABORT("nh_pool_new(AF_INET) failed"); } static void nh4_fini(struct event_base *) { - event_free(nh_gc_timer); - nh_gc_timer = NULL; - rte_mempool_free(nh_pool); - nh_pool = NULL; + nh_pool_free(nh_pool); } static struct gr_api_handler nh4_add_handler = { diff --git a/modules/ip/control/route.c b/modules/ip/control/route.c index 8c41bf29..252ebc6c 100644 --- a/modules/ip/control/route.c +++ b/modules/ip/control/route.c @@ -134,7 +134,7 @@ int ip4_route_insert(uint16_t vrf_id, ip4_addr_t ip, uint8_t prefixlen, struct n uint32_t host_order_ip = rte_be_to_cpu_32(ip); int ret; - ip4_nexthop_incref(nh); + nexthop_incref(nh); if (fib == NULL) { ret = -errno; @@ -149,7 +149,7 @@ int ip4_route_insert(uint16_t vrf_id, ip4_addr_t ip, uint8_t prefixlen, struct n return 0; fail: - ip4_nexthop_decref(nh); + nexthop_decref(nh); return errno_set(-ret); } @@ -169,7 +169,7 @@ int ip4_route_delete(uint16_t vrf_id, ip4_addr_t ip, uint8_t prefixlen) { if ((ret = rte_fib_delete(fib, host_order_ip, prefixlen)) < 0) return errno_set(-ret); - ip4_nexthop_decref(nh); + nexthop_decref(nh); return 0; } @@ -201,11 +201,11 @@ static struct api_out route4_add(const void *request, void ** /*response*/) { host_order_ip = ntohl(req->dest.ip); if ((ret = rte_fib_add(fib, host_order_ip, req->dest.prefixlen, nh_ptr_to_id(nh))) < 0) { - ip4_nexthop_decref(nh); + nexthop_decref(nh); return api_out(-ret, 0); } - ip4_nexthop_incref(nh); + nexthop_incref(nh); nh->flags |= GR_NH_F_GATEWAY; return api_out(0, 0); diff --git a/modules/ip6/control/address.c b/modules/ip6/control/address.c index f73a0c31..2a72dfe9 100644 --- a/modules/ip6/control/address.c +++ b/modules/ip6/control/address.c @@ -96,7 +96,7 @@ static int ip6_mcast_addr_add(struct iface *iface, const struct rte_ipv6_addr *i rte_ether_mcast_from_ipv6(&nh->lladdr, ip); } - ip6_nexthop_incref(nh); + nexthop_incref(nh); nh->flags = GR_NH_F_REACHABLE | GR_NH_F_STATIC | GR_NH_F_MCAST; maddrs->nh[i] = nh; maddrs->count++; @@ -129,7 +129,7 @@ static int ip6_mcast_addr_del(struct iface *iface, const struct rte_ipv6_addr *i // remove ethernet filter ret = iface_del_eth_addr(iface->id, &nh->lladdr); - ip6_nexthop_decref(nh); + nexthop_decref(nh); return ret; } @@ -166,7 +166,7 @@ iface6_addr_add(const struct iface *iface, const struct rte_ipv6_addr *ip, uint8 if ((ret = iface_get_eth_addr(iface->id, &nh->lladdr)) < 0) if (errno != EOPNOTSUPP) { - ip6_nexthop_decref(nh); + nexthop_decref(nh); return errno_set(-ret); } diff --git a/modules/ip6/control/gr_ip6_control.h b/modules/ip6/control/gr_ip6_control.h index 554a9966..ee0c78ab 100644 --- a/modules/ip6/control/gr_ip6_control.h +++ b/modules/ip6/control/gr_ip6_control.h @@ -23,8 +23,6 @@ struct nexthop *ip6_nexthop_lookup(uint16_t vrf_id, const struct rte_ipv6_addr *); struct nexthop *ip6_nexthop_new(uint16_t vrf_id, uint16_t iface_id, const struct rte_ipv6_addr *); -void ip6_nexthop_incref(struct nexthop *); -void ip6_nexthop_decref(struct nexthop *); int ip6_route_insert(uint16_t vrf_id, const struct rte_ipv6_addr *, uint8_t prefixlen, struct nexthop *); int ip6_route_delete(uint16_t vrf_id, const struct rte_ipv6_addr *, uint8_t prefixlen); diff --git a/modules/ip6/control/nexthop.c b/modules/ip6/control/nexthop.c index 3798fa63..fe5aee70 100644 --- a/modules/ip6/control/nexthop.c +++ b/modules/ip6/control/nexthop.c @@ -23,65 +23,15 @@ #include #include -static struct rte_mempool *nh_pool; +static struct nh_pool *nh_pool; struct nexthop * ip6_nexthop_new(uint16_t vrf_id, uint16_t iface_id, const struct rte_ipv6_addr *ip) { - struct nexthop *nh; - void *data; - int ret; - - if ((ret = rte_mempool_get(nh_pool, &data)) < 0) - return errno_set_null(-ret); - - nh = data; - nh->vrf_id = vrf_id; - nh->iface_id = iface_id; - nh->ipv6 = *ip; - - return nh; -} - -struct lookup_filter { - uint16_t vrf_id; - const struct rte_ipv6_addr *ip; - struct nexthop *nh; -}; - -static void nh_lookup_cb(struct rte_mempool *, void *opaque, void *obj, unsigned /*obj_idx*/) { - struct lookup_filter *filter = opaque; - struct nexthop *nh = obj; - if (filter->nh == NULL && nh->ref_count > 0 && rte_ipv6_addr_eq(&nh->ipv6, filter->ip) - && nh->vrf_id == filter->vrf_id) - filter->nh = nh; + return nexthop_new(nh_pool, vrf_id, iface_id, ip); } struct nexthop *ip6_nexthop_lookup(uint16_t vrf_id, const struct rte_ipv6_addr *ip) { - struct lookup_filter filter = {.vrf_id = vrf_id, .ip = ip}; - rte_mempool_obj_iter(nh_pool, nh_lookup_cb, &filter); - return filter.nh ?: errno_set_null(ENOENT); -} - -void ip6_nexthop_decref(struct nexthop *nh) { - if (nh->ref_count <= 1) { - rte_spinlock_lock(&nh->lock); - // Flush all held packets. - struct rte_mbuf *m = nh->held_pkts_head; - while (m != NULL) { - struct rte_mbuf *next = queue_mbuf_data(m)->next; - rte_pktmbuf_free(m); - m = next; - } - rte_spinlock_unlock(&nh->lock); - memset(nh, 0, sizeof(*nh)); - rte_mempool_put(nh_pool, nh); - } else { - nh->ref_count--; - } -} - -void ip6_nexthop_incref(struct nexthop *nh) { - nh->ref_count++; + return nexthop_lookup(nh_pool, vrf_id, ip); } static struct api_out nh6_add(const void *request, void ** /*response*/) { @@ -140,12 +90,11 @@ struct list_context { struct gr_nexthop *nh; }; -static void nh_list_cb(struct rte_mempool *, void *opaque, void *obj, unsigned /*obj_idx*/) { - struct list_context *ctx = opaque; - struct nexthop *nh = obj; +static void nh_list_cb(struct nexthop *nh, void *priv) { + struct list_context *ctx = priv; struct gr_nexthop api_nh; - if (nh->ref_count == 0 || (nh->vrf_id != ctx->vrf_id && ctx->vrf_id != UINT16_MAX) + if ((nh->vrf_id != ctx->vrf_id && ctx->vrf_id != UINT16_MAX) || rte_ipv6_addr_is_mcast(&nh->ipv6)) return; @@ -168,7 +117,7 @@ static struct api_out nh6_list(const void *request, void **response) { struct gr_ip6_nh_list_resp *resp = NULL; size_t len; - rte_mempool_obj_iter(nh_pool, nh_list_cb, &ctx); + nh_pool_iter(nh_pool, nh_list_cb, &ctx); len = sizeof(*resp) + gr_vec_len(ctx.nh) * sizeof(*ctx.nh); if ((resp = calloc(1, len)) == NULL) { @@ -185,90 +134,19 @@ static struct api_out nh6_list(const void *request, void **response) { return api_out(0, len); } -static void nh_gc_cb(struct rte_mempool *, void * /*opaque*/, void *obj, unsigned /*obj_idx*/) { - uint64_t now = rte_get_tsc_cycles(); - uint64_t reply_age, request_age; - unsigned probes, max_probes; - struct nexthop *nh = obj; - - max_probes = NH_UCAST_PROBES + NH_BCAST_PROBES; - - if (nh->ref_count == 0 || nh->flags & GR_NH_F_STATIC) - return; - - reply_age = (now - nh->last_reply) / rte_get_tsc_hz(); - request_age = (now - nh->last_request) / rte_get_tsc_hz(); - probes = nh->ucast_probes + nh->bcast_probes; - - if (nh->flags & (GR_NH_F_PENDING | GR_NH_F_STALE) && request_age > probes) { - if (probes >= max_probes && !(nh->flags & GR_NH_F_GATEWAY)) { - LOG(DEBUG, - IP6_F " vrf=%u failed_probes=%u held_pkts=%u: %s -> failed", - &nh->ipv6, - nh->vrf_id, - probes, - nh->held_pkts_num, - gr_nh_flag_name(nh->flags & (GR_NH_F_PENDING | GR_NH_F_STALE))); - - nh->flags &= ~(GR_NH_F_PENDING | GR_NH_F_STALE); - nh->flags |= GR_NH_F_FAILED; - } else { - if (ip6_nexthop_solicit(nh) < 0) - LOG(ERR, "arp_output_request_solicit: %s", strerror(errno)); - } - } else if (nh->flags & GR_NH_F_REACHABLE && reply_age > NH_LIFETIME_REACHABLE) { - nh->flags &= ~GR_NH_F_REACHABLE; - nh->flags |= GR_NH_F_STALE; - } else if (nh->flags & GR_NH_F_FAILED && request_age > NH_LIFETIME_UNREACHABLE) { - LOG(DEBUG, - IP6_F " vrf=%u failed_probes=%u held_pkts=%u: failed -> ", - &nh->ipv6, - nh->vrf_id, - probes, - nh->held_pkts_num); - - // this also does ip6_nexthop_decref(), freeing the next hop - // and buffered packets. - ip6_route_cleanup(nh); - } -} - -static void nexthop_gc(evutil_socket_t, short /*what*/, void * /*priv*/) { - rte_mempool_obj_iter(nh_pool, nh_gc_cb, NULL); -} - -static struct event *nh_gc_timer; - static void nh6_init(struct event_base *ev_base) { - nh_pool = rte_mempool_create( - "ip6_nh", // name - rte_align32pow2(IP6_MAX_NEXT_HOPS) - 1, - sizeof(struct nexthop), - 0, // cache size - 0, // priv size - NULL, // mp_init - NULL, // mp_init_arg - NULL, // obj_init - NULL, // obj_init_arg - SOCKET_ID_ANY, - 0 // flags - ); + struct nh_pool_opts opts = { + .solicit_nh = ip6_nexthop_solicit, + .free_nh = ip6_route_cleanup, + .num_nexthops = IP6_MAX_NEXT_HOPS, + }; + nh_pool = nh_pool_new(AF_INET6, ev_base, &opts); if (nh_pool == NULL) - ABORT("rte_mempool_create(ip6_nh) failed"); - - nh_gc_timer = event_new(ev_base, -1, EV_PERSIST | EV_FINALIZE, nexthop_gc, NULL); - if (nh_gc_timer == NULL) - ABORT("event_new() failed"); - struct timeval tv = {.tv_sec = 1}; - if (event_add(nh_gc_timer, &tv) < 0) - ABORT("event_add() failed"); + ABORT("nh_pool_new(AF_INET6) failed"); } static void nh6_fini(struct event_base *) { - event_free(nh_gc_timer); - nh_gc_timer = NULL; - rte_mempool_free(nh_pool); - nh_pool = NULL; + nh_pool_free(nh_pool); } static struct gr_api_handler nh6_add_handler = { diff --git a/modules/ip6/control/route.c b/modules/ip6/control/route.c index f2268e2a..d107c62c 100644 --- a/modules/ip6/control/route.c +++ b/modules/ip6/control/route.c @@ -136,7 +136,7 @@ int ip6_route_insert( struct rte_fib6 *fib = get_or_create_fib6(vrf_id); int ret; - ip6_nexthop_incref(nh); + nexthop_incref(nh); if (fib == NULL) { ret = -errno; @@ -151,7 +151,7 @@ int ip6_route_insert( return 0; fail: - ip6_nexthop_decref(nh); + nexthop_decref(nh); return errno_set(-ret); } @@ -170,7 +170,7 @@ int ip6_route_delete(uint16_t vrf_id, const struct rte_ipv6_addr *ip, uint8_t pr if ((ret = rte_fib6_delete(fib, ip, prefixlen)) < 0) return errno_set(-ret); - ip6_nexthop_decref(nh); + nexthop_decref(nh); return 0; } @@ -199,11 +199,11 @@ static struct api_out route6_add(const void *request, void ** /*response*/) { return api_out(errno, 0); if ((ret = rte_fib6_add(fib6, &req->dest.ip, req->dest.prefixlen, nh_ptr_to_id(nh))) < 0) { - ip6_nexthop_decref(nh); + nexthop_decref(nh); return api_out(-ret, 0); } - ip6_nexthop_incref(nh); + nexthop_incref(nh); nh->flags |= GR_NH_F_GATEWAY; return api_out(0, 0);