Skip to content

Commit

Permalink
dpif-netdev: Preserve inner offloads on recirculation.
Browse files Browse the repository at this point in the history
Rather than drop all pending Tx offloads on recirculation,
preserve inner offloads (and mark packet with outer Tx offloads)
when parsing the packet again.

Fixes: c6538b4 ("dpif-netdev: Fix crash due to tunnel offloading on recirculation.")
Fixes: 084c808 ("userspace: Support VXLAN and GENEVE TSO.")
Signed-off-by: David Marchand <[email protected]>
  • Loading branch information
david-marchand committed Jan 14, 2025
1 parent 8b7f1eb commit ce37a97
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 49 deletions.
8 changes: 0 additions & 8 deletions lib/dp-packet.h
Original file line number Diff line number Diff line change
Expand Up @@ -1289,14 +1289,6 @@ dp_packet_hwol_set_tunnel_vxlan(struct dp_packet *b)
*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_VXLAN;
}

/* Clears tunnel offloading marks. */
static inline void
dp_packet_hwol_reset_tunnel(struct dp_packet *b)
{
*dp_packet_ol_flags_ptr(b) &= ~(DP_PACKET_OL_TX_TUNNEL_VXLAN |
DP_PACKET_OL_TX_TUNNEL_GENEVE);
}

/* Mark packet 'b' as a tunnel packet with outer IPv4 header. */
static inline void
dp_packet_hwol_set_tx_outer_ipv4(struct dp_packet *b)
Expand Down
2 changes: 1 addition & 1 deletion lib/dpif-netdev-avx512.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,

if (!mfex_hit) {
/* Do a scalar miniflow extract into keys. */
miniflow_extract(packet, &key->mf);
miniflow_extract(packet, &key->mf, false);
}

/* Cache TCP and byte values for all packets. */
Expand Down
2 changes: 1 addition & 1 deletion lib/dpif-netdev-private-extract.c
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
/* Run scalar miniflow_extract to get default result. */
DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
pkt_metadata_init(&packet->md, in_port);
miniflow_extract(packet, &keys[i].mf);
miniflow_extract(packet, &keys[i].mf, false);

/* Store known good metadata to compare with optimized metadata. */
good_l2_5_ofs[i] = packet->l2_5_ofs;
Expand Down
36 changes: 6 additions & 30 deletions lib/dpif-netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ COVERAGE_DEFINE(datapath_drop_lock_error);
COVERAGE_DEFINE(datapath_drop_userspace_action_error);
COVERAGE_DEFINE(datapath_drop_tunnel_push_error);
COVERAGE_DEFINE(datapath_drop_tunnel_pop_error);
COVERAGE_DEFINE(datapath_drop_tunnel_tso_recirc);
COVERAGE_DEFINE(datapath_drop_recirc_error);
COVERAGE_DEFINE(datapath_drop_invalid_port);
COVERAGE_DEFINE(datapath_drop_invalid_bond);
Expand Down Expand Up @@ -8539,6 +8538,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) {
struct dp_netdev_flow *flow = NULL;
uint16_t tcp_flags;
bool tunneling;

if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
dp_packet_delete(packet);
Expand All @@ -8555,6 +8555,10 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,

if (!md_is_valid) {
pkt_metadata_init(&packet->md, port_no);
tunneling = false;
} else {
tunneling = dp_packet_hwol_is_tunnel_geneve(packet) ||
dp_packet_hwol_is_tunnel_vxlan(packet);
}

if (netdev_flow_api && recirc_depth == 0) {
Expand Down Expand Up @@ -8590,7 +8594,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
}
}

miniflow_extract(packet, &key->mf);
miniflow_extract(packet, &key->mf, tunneling);
key->len = 0; /* Not computed yet. */
key->hash =
(md_is_valid == false)
Expand Down Expand Up @@ -8923,34 +8927,6 @@ static void
dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
size_t i, size = dp_packet_batch_size(packets);
struct dp_packet *packet;

DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets) {
if (dp_packet_hwol_is_tunnel_geneve(packet) ||
dp_packet_hwol_is_tunnel_vxlan(packet)) {

if (dp_packet_hwol_is_tso(packet)) {
/* Can't perform GSO in the middle of a pipeline. */
COVERAGE_INC(datapath_drop_tunnel_tso_recirc);
dp_packet_delete(packet);
VLOG_WARN_RL(&rl, "Recirculating tunnel packets with "
"TSO is not supported");
continue;
}
/* Have to fix all the checksums before re-parsing, because the
* packet will be treated as having a single set of headers. */
dp_packet_ol_send_prepare(packet, 0);
/* This packet must not be marked with anything tunnel-related. */
dp_packet_hwol_reset_tunnel(packet);
/* Clear inner offsets. Other ones are collateral, but they will
* be re-initialized on re-parsing. */
dp_packet_reset_offsets(packet);
}
dp_packet_batch_refill(packets, packet, i);
}

dp_netdev_input__(pmd, packets, true, 0);
}

Expand Down
43 changes: 36 additions & 7 deletions lib/flow.c
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ flow_extract(struct dp_packet *packet, struct flow *flow)

COVERAGE_INC(flow_extract);

miniflow_extract(packet, &m.mf);
miniflow_extract(packet, &m.mf, false);
miniflow_expand(&m.mf, flow);
}

Expand Down Expand Up @@ -762,6 +762,8 @@ dump_invalid_packet(struct dp_packet *packet, const char *reason)

/* Initializes 'dst' from 'packet' and 'md', taking the packet type into
* account. 'dst' must have enough space for FLOW_U64S * 8 bytes.
* 'tunneling' may be set to indicate that this packet is carrying some
* tunneled data.
*
* Initializes the layer offsets as follows:
*
Expand All @@ -787,7 +789,8 @@ dump_invalid_packet(struct dp_packet *packet, const char *reason)
* of interest for the flow, otherwise UINT16_MAX.
*/
void
miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
miniflow_extract(struct dp_packet *packet, struct miniflow *dst,
bool tunneling)
{
/* Add code to this function (or its callees) to extract new fields. */
BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
Expand All @@ -799,6 +802,8 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
uint64_t *values = miniflow_values(dst);
struct mf_ctx mf = { FLOWMAP_EMPTY_INITIALIZER, values,
values + FLOW_U64S };
uint16_t inner_l3_ofs = UINT16_MAX;
uint16_t inner_l4_ofs = UINT16_MAX;
const char *frame;
ovs_be16 dl_type = OVS_BE16_MAX;
uint8_t nw_frag, nw_tos, nw_ttl, nw_proto;
Expand Down Expand Up @@ -857,7 +862,16 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)

/* Initialize packet's layer pointer and offsets. */
frame = data;
if (tunneling) {
/* Preserve inner offsets from previous circulation. */
inner_l3_ofs = packet->inner_l3_ofs;
inner_l4_ofs = packet->inner_l4_ofs;
}
dp_packet_reset_offsets(packet);
if (tunneling) {
packet->inner_l3_ofs = inner_l3_ofs;
packet->inner_l4_ofs = inner_l4_ofs;
}

if (packet_type == htonl(PT_ETH)) {
/* Must have full Ethernet header to proceed. */
Expand Down Expand Up @@ -936,9 +950,16 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
nw_proto = nh->ip_proto;
nw_frag = ipv4_get_nw_frag(nh);
data_pull(&data, &size, ip_len);
dp_packet_hwol_set_tx_ipv4(packet);
if (dp_packet_ip_checksum_good(packet)) {
dp_packet_hwol_set_tx_ip_csum(packet);
if (tunneling) {
dp_packet_hwol_set_tx_outer_ipv4(packet);
if (dp_packet_ip_checksum_good(packet)) {
dp_packet_hwol_set_tx_outer_ipv4_csum(packet);
}
} else {
dp_packet_hwol_set_tx_ipv4(packet);
if (dp_packet_ip_checksum_good(packet)) {
dp_packet_hwol_set_tx_ip_csum(packet);
}
}
} else if (dl_type == htons(ETH_TYPE_IPV6)) {
const struct ovs_16aligned_ip6_hdr *nh = data;
Expand All @@ -953,7 +974,11 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
}
data_pull(&data, &size, sizeof *nh);

dp_packet_hwol_set_tx_ipv6(packet);
if (tunneling) {
dp_packet_hwol_set_tx_outer_ipv6(packet);
} else {
dp_packet_hwol_set_tx_ipv6(packet);
}
plen = ntohs(nh->ip6_plen);
dp_packet_set_l2_pad_size(packet, size - plen);
size = plen; /* Never pull padding. */
Expand Down Expand Up @@ -1078,7 +1103,11 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
dp_packet_ol_l4_csum_check_partial(packet);
if (dp_packet_l4_checksum_good(packet)
|| dp_packet_ol_l4_csum_partial(packet)) {
dp_packet_hwol_set_csum_udp(packet);
if (tunneling) {
dp_packet_hwol_set_outer_udp_csum(packet);
} else {
dp_packet_hwol_set_csum_udp(packet);
}
}
}
} else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
Expand Down
3 changes: 2 additions & 1 deletion lib/flow.h
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,8 @@ struct pkt_metadata;
/* The 'dst' must follow with buffer space for FLOW_U64S 64-bit units.
* 'dst->map' is ignored on input and set on output to indicate which fields
* were extracted. */
void miniflow_extract(struct dp_packet *packet, struct miniflow *dst);
void miniflow_extract(struct dp_packet *packet, struct miniflow *dst,
bool tunneling);
void miniflow_map_init(struct miniflow *, const struct flow *);
void flow_wc_map(const struct flow *, struct flowmap *);
size_t miniflow_alloc(struct miniflow *dsts[], size_t n,
Expand Down
2 changes: 1 addition & 1 deletion ofproto/ofproto.c
Original file line number Diff line number Diff line change
Expand Up @@ -3733,7 +3733,7 @@ ofproto_packet_out_init(struct ofproto *ofproto,
/* Store struct flow. */
opo->flow = xmalloc(sizeof *opo->flow);
*opo->flow = po->flow_metadata.flow;
miniflow_extract(opo->packet, &m.mf);
miniflow_extract(opo->packet, &m.mf, false);
flow_union_with_miniflow(opo->flow, &m.mf);

/* Check actions like for flow mods. We pass a 'table_id' of 0 to
Expand Down

0 comments on commit ce37a97

Please sign in to comment.