From ce37a9759f0ffe7785af28e613a1e7f537ffb94f Mon Sep 17 00:00:00 2001 From: David Marchand Date: Tue, 14 Jan 2025 09:57:21 +0100 Subject: [PATCH] dpif-netdev: Preserve inner offloads on recirculation. Rather than drop all pending Tx offloads on recirculation, preserve inner offloads (and mark packet with outer Tx offloads) when parsing the packet again. Fixes: c6538b443984 ("dpif-netdev: Fix crash due to tunnel offloading on recirculation.") Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.") Signed-off-by: David Marchand --- lib/dp-packet.h | 8 ------ lib/dpif-netdev-avx512.c | 2 +- lib/dpif-netdev-private-extract.c | 2 +- lib/dpif-netdev.c | 36 +++++--------------------- lib/flow.c | 43 ++++++++++++++++++++++++++----- lib/flow.h | 3 ++- ofproto/ofproto.c | 2 +- 7 files changed, 47 insertions(+), 49 deletions(-) diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 4afbbe72231..fb36091334a 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -1289,14 +1289,6 @@ dp_packet_hwol_set_tunnel_vxlan(struct dp_packet *b) *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_VXLAN; } -/* Clears tunnel offloading marks. */ -static inline void -dp_packet_hwol_reset_tunnel(struct dp_packet *b) -{ - *dp_packet_ol_flags_ptr(b) &= ~(DP_PACKET_OL_TX_TUNNEL_VXLAN | - DP_PACKET_OL_TX_TUNNEL_GENEVE); -} - /* Mark packet 'b' as a tunnel packet with outer IPv4 header. */ static inline void dp_packet_hwol_set_tx_outer_ipv4(struct dp_packet *b) diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c index 83e7a1394de..ff2d3db0fe0 100644 --- a/lib/dpif-netdev-avx512.c +++ b/lib/dpif-netdev-avx512.c @@ -241,7 +241,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, if (!mfex_hit) { /* Do a scalar miniflow extract into keys. */ - miniflow_extract(packet, &key->mf); + miniflow_extract(packet, &key->mf, false); } /* Cache TCP and byte values for all packets. */ diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c index ded08fd3ef2..976fa9542b5 100644 --- a/lib/dpif-netdev-private-extract.c +++ b/lib/dpif-netdev-private-extract.c @@ -351,7 +351,7 @@ dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets, /* Run scalar miniflow_extract to get default result. */ DP_PACKET_BATCH_FOR_EACH (i, packet, packets) { pkt_metadata_init(&packet->md, in_port); - miniflow_extract(packet, &keys[i].mf); + miniflow_extract(packet, &keys[i].mf, false); /* Store known good metadata to compare with optimized metadata. */ good_l2_5_ofs[i] = packet->l2_5_ofs; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 2a529f272d1..7a889a23781 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -115,7 +115,6 @@ COVERAGE_DEFINE(datapath_drop_lock_error); COVERAGE_DEFINE(datapath_drop_userspace_action_error); COVERAGE_DEFINE(datapath_drop_tunnel_push_error); COVERAGE_DEFINE(datapath_drop_tunnel_pop_error); -COVERAGE_DEFINE(datapath_drop_tunnel_tso_recirc); COVERAGE_DEFINE(datapath_drop_recirc_error); COVERAGE_DEFINE(datapath_drop_invalid_port); COVERAGE_DEFINE(datapath_drop_invalid_bond); @@ -8539,6 +8538,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd, DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) { struct dp_netdev_flow *flow = NULL; uint16_t tcp_flags; + bool tunneling; if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { dp_packet_delete(packet); @@ -8555,6 +8555,10 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd, if (!md_is_valid) { pkt_metadata_init(&packet->md, port_no); + tunneling = false; + } else { + tunneling = dp_packet_hwol_is_tunnel_geneve(packet) || + dp_packet_hwol_is_tunnel_vxlan(packet); } if (netdev_flow_api && recirc_depth == 0) { @@ -8590,7 +8594,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd, } } - miniflow_extract(packet, &key->mf); + miniflow_extract(packet, &key->mf, tunneling); key->len = 0; /* Not computed yet. */ key->hash = (md_is_valid == false) @@ -8923,34 +8927,6 @@ static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - size_t i, size = dp_packet_batch_size(packets); - struct dp_packet *packet; - - DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets) { - if (dp_packet_hwol_is_tunnel_geneve(packet) || - dp_packet_hwol_is_tunnel_vxlan(packet)) { - - if (dp_packet_hwol_is_tso(packet)) { - /* Can't perform GSO in the middle of a pipeline. */ - COVERAGE_INC(datapath_drop_tunnel_tso_recirc); - dp_packet_delete(packet); - VLOG_WARN_RL(&rl, "Recirculating tunnel packets with " - "TSO is not supported"); - continue; - } - /* Have to fix all the checksums before re-parsing, because the - * packet will be treated as having a single set of headers. */ - dp_packet_ol_send_prepare(packet, 0); - /* This packet must not be marked with anything tunnel-related. */ - dp_packet_hwol_reset_tunnel(packet); - /* Clear inner offsets. Other ones are collateral, but they will - * be re-initialized on re-parsing. */ - dp_packet_reset_offsets(packet); - } - dp_packet_batch_refill(packets, packet, i); - } - dp_netdev_input__(pmd, packets, true, 0); } diff --git a/lib/flow.c b/lib/flow.c index 9be4375246a..bac75a0331a 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -669,7 +669,7 @@ flow_extract(struct dp_packet *packet, struct flow *flow) COVERAGE_INC(flow_extract); - miniflow_extract(packet, &m.mf); + miniflow_extract(packet, &m.mf, false); miniflow_expand(&m.mf, flow); } @@ -762,6 +762,8 @@ dump_invalid_packet(struct dp_packet *packet, const char *reason) /* Initializes 'dst' from 'packet' and 'md', taking the packet type into * account. 'dst' must have enough space for FLOW_U64S * 8 bytes. + * 'tunneling' may be set to indicate that this packet is carrying some + * tunneled data. * * Initializes the layer offsets as follows: * @@ -787,7 +789,8 @@ dump_invalid_packet(struct dp_packet *packet, const char *reason) * of interest for the flow, otherwise UINT16_MAX. */ void -miniflow_extract(struct dp_packet *packet, struct miniflow *dst) +miniflow_extract(struct dp_packet *packet, struct miniflow *dst, + bool tunneling) { /* Add code to this function (or its callees) to extract new fields. */ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); @@ -799,6 +802,8 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) uint64_t *values = miniflow_values(dst); struct mf_ctx mf = { FLOWMAP_EMPTY_INITIALIZER, values, values + FLOW_U64S }; + uint16_t inner_l3_ofs = UINT16_MAX; + uint16_t inner_l4_ofs = UINT16_MAX; const char *frame; ovs_be16 dl_type = OVS_BE16_MAX; uint8_t nw_frag, nw_tos, nw_ttl, nw_proto; @@ -857,7 +862,16 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) /* Initialize packet's layer pointer and offsets. */ frame = data; + if (tunneling) { + /* Preserve inner offsets from previous circulation. */ + inner_l3_ofs = packet->inner_l3_ofs; + inner_l4_ofs = packet->inner_l4_ofs; + } dp_packet_reset_offsets(packet); + if (tunneling) { + packet->inner_l3_ofs = inner_l3_ofs; + packet->inner_l4_ofs = inner_l4_ofs; + } if (packet_type == htonl(PT_ETH)) { /* Must have full Ethernet header to proceed. */ @@ -936,9 +950,16 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) nw_proto = nh->ip_proto; nw_frag = ipv4_get_nw_frag(nh); data_pull(&data, &size, ip_len); - dp_packet_hwol_set_tx_ipv4(packet); - if (dp_packet_ip_checksum_good(packet)) { - dp_packet_hwol_set_tx_ip_csum(packet); + if (tunneling) { + dp_packet_hwol_set_tx_outer_ipv4(packet); + if (dp_packet_ip_checksum_good(packet)) { + dp_packet_hwol_set_tx_outer_ipv4_csum(packet); + } + } else { + dp_packet_hwol_set_tx_ipv4(packet); + if (dp_packet_ip_checksum_good(packet)) { + dp_packet_hwol_set_tx_ip_csum(packet); + } } } else if (dl_type == htons(ETH_TYPE_IPV6)) { const struct ovs_16aligned_ip6_hdr *nh = data; @@ -953,7 +974,11 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) } data_pull(&data, &size, sizeof *nh); - dp_packet_hwol_set_tx_ipv6(packet); + if (tunneling) { + dp_packet_hwol_set_tx_outer_ipv6(packet); + } else { + dp_packet_hwol_set_tx_ipv6(packet); + } plen = ntohs(nh->ip6_plen); dp_packet_set_l2_pad_size(packet, size - plen); size = plen; /* Never pull padding. */ @@ -1078,7 +1103,11 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) dp_packet_ol_l4_csum_check_partial(packet); if (dp_packet_l4_checksum_good(packet) || dp_packet_ol_l4_csum_partial(packet)) { - dp_packet_hwol_set_csum_udp(packet); + if (tunneling) { + dp_packet_hwol_set_outer_udp_csum(packet); + } else { + dp_packet_hwol_set_csum_udp(packet); + } } } } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) { diff --git a/lib/flow.h b/lib/flow.h index 60ec4b0d780..4a7c99a996d 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -541,7 +541,8 @@ struct pkt_metadata; /* The 'dst' must follow with buffer space for FLOW_U64S 64-bit units. * 'dst->map' is ignored on input and set on output to indicate which fields * were extracted. */ -void miniflow_extract(struct dp_packet *packet, struct miniflow *dst); +void miniflow_extract(struct dp_packet *packet, struct miniflow *dst, + bool tunneling); void miniflow_map_init(struct miniflow *, const struct flow *); void flow_wc_map(const struct flow *, struct flowmap *); size_t miniflow_alloc(struct miniflow *dsts[], size_t n, diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 3df64efb9ac..4c123b9214d 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -3733,7 +3733,7 @@ ofproto_packet_out_init(struct ofproto *ofproto, /* Store struct flow. */ opo->flow = xmalloc(sizeof *opo->flow); *opo->flow = po->flow_metadata.flow; - miniflow_extract(opo->packet, &m.mf); + miniflow_extract(opo->packet, &m.mf, false); flow_union_with_miniflow(opo->flow, &m.mf); /* Check actions like for flow mods. We pass a 'table_id' of 0 to