From c5441bc605e397164c2758e4f940750287cca23a Mon Sep 17 00:00:00 2001 From: David Marchand Date: Fri, 18 Mar 2022 16:33:39 +0100 Subject: [PATCH] dp-packet: Allow DPDK packet resize. DPDK based dp-packets points to data buffers that can't be expanded dynamically. Their layout is as follows: - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM), - a maximum size chosen at mempool creation, In some usecases though (like encapsulating with multiple tunnels), a 128 bytes headroom is too short. Dynamically allocate buffers in DPDK memory and make use of DPDK external buffers API (previously used for userspace TSO). Reported-at: https://issues.redhat.com/browse/FD-2658 Signed-off-by: David Marchand --- lib/dp-packet.c | 17 ++++++++++++- lib/netdev-dpdk.c | 47 +++++++++++++++++++++++++--------- lib/netdev-dpdk.h | 3 +++ tests/system-traffic.at | 56 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 110 insertions(+), 13 deletions(-) diff --git a/lib/dp-packet.c b/lib/dp-packet.c index 0e23c766e1b..0fc0c1fac9e 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -261,8 +261,23 @@ dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom) new_allocated = new_headroom + dp_packet_size(b) + new_tailroom; switch (b->source) { - case DPBUF_DPDK: + case DPBUF_DPDK: { +#ifdef DPDK_NETDEV + uint32_t buf_len; + + buf_len = new_allocated; + new_base = netdev_dpdk_extbuf_allocate(&buf_len); + if (!new_base) { + out_of_memory(); + } + ovs_assert(buf_len <= UINT16_MAX); + dp_packet_copy__(b, new_base, new_headroom, new_tailroom); + netdev_dpdk_extbuf_replace(b, new_base, buf_len); + break; +#else OVS_NOT_REACHED(); +#endif + } case DPBUF_MALLOC: if (new_headroom == dp_packet_headroom(b)) { diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 45f61930d40..368361354cc 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -2925,41 +2925,64 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev, struct rte_mbuf **pkts, return cnt; } +void * +netdev_dpdk_extbuf_allocate(uint32_t *data_len) +{ + *data_len += sizeof(struct rte_mbuf_ext_shared_info) + sizeof(uintptr_t); + *data_len = RTE_ALIGN_CEIL(*data_len, sizeof(uintptr_t)); + return rte_malloc(NULL, *data_len, RTE_CACHE_LINE_SIZE); +} + static void netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque) { rte_free(opaque); } +void +netdev_dpdk_extbuf_replace(struct dp_packet *b, void *buf, uint32_t data_len) +{ + struct rte_mbuf *pkt = (struct rte_mbuf *) b; + struct rte_mbuf_ext_shared_info *shinfo; + uint16_t buf_len = data_len; + + shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, + netdev_dpdk_extbuf_free, + buf); + ovs_assert(shinfo != NULL); + + if (RTE_MBUF_HAS_EXTBUF(pkt)) { + rte_pktmbuf_detach_extbuf(pkt); + } + rte_pktmbuf_attach_extbuf(pkt, buf, rte_malloc_virt2iova(buf), buf_len, + shinfo); +} + static struct rte_mbuf * dpdk_pktmbuf_attach_extbuf(struct rte_mbuf *pkt, uint32_t data_len) { uint32_t total_len = RTE_PKTMBUF_HEADROOM + data_len; - struct rte_mbuf_ext_shared_info *shinfo = NULL; + struct rte_mbuf_ext_shared_info *shinfo; uint16_t buf_len; void *buf; - total_len += sizeof *shinfo + sizeof(uintptr_t); - total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); - + buf = netdev_dpdk_extbuf_allocate(&total_len); + if (OVS_UNLIKELY(buf == NULL)) { + VLOG_ERR("Failed to allocate memory using rte_malloc: %u", total_len); + return NULL; + } if (OVS_UNLIKELY(total_len > UINT16_MAX)) { + netdev_dpdk_extbuf_free(NULL, buf); VLOG_ERR("Can't copy packet: too big %u", total_len); return NULL; } buf_len = total_len; - buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); - if (OVS_UNLIKELY(buf == NULL)) { - VLOG_ERR("Failed to allocate memory using rte_malloc: %u", buf_len); - return NULL; - } - - /* Initialize shinfo. */ shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, netdev_dpdk_extbuf_free, buf); if (OVS_UNLIKELY(shinfo == NULL)) { - rte_free(buf); + netdev_dpdk_extbuf_free(NULL, buf); VLOG_ERR("Failed to initialize shared info for mbuf while " "attempting to attach an external buffer."); return NULL; diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index 86df7a1e83c..274900c062d 100644 --- a/lib/netdev-dpdk.h +++ b/lib/netdev-dpdk.h @@ -32,6 +32,9 @@ struct netdev; void netdev_dpdk_register(const struct smap *); void free_dpdk_buf(struct dp_packet *); +void *netdev_dpdk_extbuf_allocate(uint32_t *); +void netdev_dpdk_extbuf_replace(struct dp_packet *, void *, uint32_t); + bool netdev_dpdk_flow_api_supported(struct netdev *); int diff --git a/tests/system-traffic.at b/tests/system-traffic.at index f363a778cc7..c8c048b457f 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -388,6 +388,62 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PI OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([datapath - ping over many vxlan tunnels]) +OVS_CHECK_TUNNEL_TSO() +OVS_CHECK_VXLAN() + +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-underlay0]) +ADD_BR([br-underlay1]) +ADD_BR([br-underlay2]) + +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) +AT_CHECK([ovs-ofctl add-flow br-underlay0 "actions=normal"]) +AT_CHECK([ovs-ofctl add-flow br-underlay1 "actions=normal"]) +AT_CHECK([ovs-ofctl add-flow br-underlay2 "actions=normal"]) + +ADD_NAMESPACES(at_ns0, at_ns1) + +AT_CHECK([ip link set dev br-underlay2 up]) +AT_CHECK([ip link set dev br-underlay1 up]) +AT_CHECK([ip link set dev br-underlay0 up]) + +ADD_VETH(p0, at_ns0, br-underlay2, "172.31.3.1/24") +AT_CHECK([ip addr add dev br-underlay2 "172.31.3.100/24"]) + +ADD_OVS_TUNNEL([vxlan], [br-underlay1], [at_vxlan4], [172.31.3.1], [172.31.2.100/24], [options:key=0]) +ADD_NATIVE_TUNNEL([vxlan], [at_vxlan5], [at_ns0], [172.31.3.100], [172.31.2.1/24], + [id 0 dstport 4789]) + +ADD_OVS_TUNNEL([vxlan], [br-underlay0], [at_vxlan2], [172.31.2.1], [172.31.1.100/24], [options:key=1]) +ADD_NATIVE_TUNNEL([vxlan], [at_vxlan3], [at_ns0], [172.31.2.100], [172.31.1.1/24], + [id 1 dstport 4789]) + +ADD_VETH(p1, at_ns1, br0, "10.1.1.200/24") +ADD_OVS_TUNNEL([vxlan], [br0], [at_vxlan0], [172.31.1.1], [10.1.1.100/24], [options:key=2]) +ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], [10.1.1.1/24], + [id 2 dstport 4789]) + +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.3.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.2.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.200 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([at_ns0], [ping -s 1000 -q -c 3 -i 0.3 -w 2 10.1.1.200 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([datapath - ping over vxlan6 tunnel]) OVS_CHECK_VXLAN_UDP6ZEROCSUM()