Skip to content

Commit

Permalink
userspace: Enable L4 checksum offloading by default.
Browse files Browse the repository at this point in the history
The netdev receiving packets is supposed to provide the flags
indicating if the L4 checksum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 checksum when the packet is going to be sent
over a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <[email protected]>
Co-authored-by: Flavio Leitner <[email protected]>
Signed-off-by: Mike Pattrick <[email protected]>
Signed-off-by: Ilya Maximets <[email protected]>
  • Loading branch information
2 people authored and igsilya committed Jun 15, 2023
1 parent 5d11c47 commit 3337e6d
Show file tree
Hide file tree
Showing 13 changed files with 717 additions and 270 deletions.
6 changes: 3 additions & 3 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ Post-v3.1.0
- SRv6 Tunnel Protocol
* Added support for userspace datapath (only).
- Userspace datapath:
* IP checksum offload support is now enabled by default for interfaces
that support it. See the 'status' column in the 'interface' table to
check the status.
* IP and L4 checksum offload support is now enabled by default for
interfaces that support it. See the 'status' column in the 'interface'
table to check the status.


v3.1.0 - 16 Feb 2023
Expand Down
15 changes: 8 additions & 7 deletions lib/conntrack.c
Original file line number Diff line number Diff line change
Expand Up @@ -2060,13 +2060,12 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
}

if (ok) {
bool hwol_bad_l4_csum = dp_packet_l4_checksum_bad(pkt);
if (!hwol_bad_l4_csum) {
bool hwol_good_l4_csum = dp_packet_l4_checksum_good(pkt)
|| dp_packet_hwol_tx_l4_checksum(pkt);
if (!dp_packet_l4_checksum_bad(pkt)) {
/* Validate the checksum only when hwol is not supported. */
if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt),
&ctx->icmp_related, l3, !hwol_good_l4_csum,
&ctx->icmp_related, l3,
!dp_packet_l4_checksum_good(pkt) &&
!dp_packet_hwol_tx_l4_checksum(pkt),
NULL)) {
ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis);
return true;
Expand Down Expand Up @@ -3395,8 +3394,10 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
adj_seqnum(&th->tcp_seq, ec->seq_skew);
}

th->tcp_csum = 0;
if (!dp_packet_hwol_tx_l4_checksum(pkt)) {
if (dp_packet_hwol_tx_l4_checksum(pkt)) {
dp_packet_ol_reset_l4_csum_good(pkt);
} else {
th->tcp_csum = 0;
if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
th->tcp_csum = packet_csum_upperlayer6(nh6, th, ctx->key.nw_proto,
dp_packet_l4_size(pkt));
Expand Down
29 changes: 29 additions & 0 deletions lib/dp-packet.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so
dp_packet_init_specific(b);
/* By default assume the packet type to be Ethernet. */
b->packet_type = htonl(PT_ETH);
/* Reset csum start and offset. */
b->csum_start = 0;
b->csum_offset = 0;
}

static void
Expand Down Expand Up @@ -546,4 +549,30 @@ dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags)
dp_packet_hwol_reset_tx_ip_csum(p);
}
}

if (!dp_packet_hwol_tx_l4_checksum(p)) {
return;
}

if (dp_packet_l4_checksum_good(p)) {
dp_packet_hwol_reset_tx_l4_csum(p);
return;
}

if (dp_packet_hwol_l4_is_tcp(p)
&& !(flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
packet_tcp_complete_csum(p);
dp_packet_ol_set_l4_csum_good(p);
dp_packet_hwol_reset_tx_l4_csum(p);
} else if (dp_packet_hwol_l4_is_udp(p)
&& !(flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) {
packet_udp_complete_csum(p);
dp_packet_ol_set_l4_csum_good(p);
dp_packet_hwol_reset_tx_l4_csum(p);
} else if (!(flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM)
&& dp_packet_hwol_l4_is_sctp(p)) {
packet_sctp_complete_csum(p);
dp_packet_ol_set_l4_csum_good(p);
dp_packet_hwol_reset_tx_l4_csum(p);
}
}
78 changes: 72 additions & 6 deletions lib/dp-packet.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ struct dp_packet {
or UINT16_MAX. */
uint32_t cutlen; /* length in bytes to cut from the end. */
ovs_be32 packet_type; /* Packet type as defined in OpenFlow */
uint16_t csum_start; /* Position to start checksumming from. */
uint16_t csum_offset; /* Offset to place checksum. */
union {
struct pkt_metadata md;
uint64_t data[DP_PACKET_CONTEXT_SIZE / 8];
Expand Down Expand Up @@ -997,6 +999,13 @@ dp_packet_hwol_is_ipv4(const struct dp_packet *b)
return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_IPV4);
}

/* Returns 'true' if packet 'p' is marked as IPv6. */
static inline bool
dp_packet_hwol_tx_ipv6(const struct dp_packet *p)
{
return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IPV6);
}

/* Returns 'true' if packet 'b' is marked for TCP checksum offloading. */
static inline bool
dp_packet_hwol_l4_is_tcp(const struct dp_packet *b)
Expand All @@ -1021,18 +1030,26 @@ dp_packet_hwol_l4_is_sctp(struct dp_packet *b)
DP_PACKET_OL_TX_SCTP_CKSUM;
}

/* Mark packet 'b' for IPv4 checksum offloading. */
static inline void
dp_packet_hwol_set_tx_ipv4(struct dp_packet *b)
dp_packet_hwol_reset_tx_l4_csum(struct dp_packet *p)
{
*dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_L4_MASK;
}

/* Mark packet 'p' as IPv4. */
static inline void
dp_packet_hwol_set_tx_ipv4(struct dp_packet *p)
{
*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV4;
*dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV6;
*dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV4;
}

/* Mark packet 'b' for IPv6 checksum offloading. */
/* Mark packet 'a' as IPv6. */
static inline void
dp_packet_hwol_set_tx_ipv6(struct dp_packet *b)
dp_packet_hwol_set_tx_ipv6(struct dp_packet *a)
{
*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6;
*dp_packet_ol_flags_ptr(a) &= ~DP_PACKET_OL_TX_IPV4;
*dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6;
}

/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
Expand Down Expand Up @@ -1147,6 +1164,55 @@ dp_packet_l4_checksum_bad(const struct dp_packet *p)
DP_PACKET_OL_RX_L4_CKSUM_BAD;
}

/* Returns 'true' if the packet has good integrity though the
* checksum in the packet 'p' is not complete. */
static inline bool
dp_packet_ol_l4_csum_partial(const struct dp_packet *p)
{
return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) ==
DP_PACKET_OL_RX_L4_CKSUM_MASK;
}

/* Marks packet 'p' with good integrity though the checksum in the
* packet is not complete. */
static inline void
dp_packet_ol_set_l4_csum_partial(struct dp_packet *p)
{
*dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_MASK;
}

/* Marks packet 'p' with good L4 checksum. */
static inline void
dp_packet_ol_set_l4_csum_good(struct dp_packet *p)
{
*dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_BAD;
*dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_GOOD;
}

/* Marks packet 'p' with good L4 checksum as modified. */
static inline void
dp_packet_ol_reset_l4_csum_good(struct dp_packet *p)
{
if (!dp_packet_ol_l4_csum_partial(p)) {
*dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_GOOD;
}
}

/* Marks packet 'p' with good integrity if the 'start' and 'offset'
* matches with the 'csum_start' and 'csum_offset' in packet 'p'.
* The 'start' is the offset from the begin of the packet headers.
* The 'offset' is the offset from start to place the checksum.
* The csum_start and csum_offset fields are set from the virtio_net_hdr
* struct that may be provided by a netdev on packet ingress. */
static inline void
dp_packet_ol_l4_csum_check_partial(struct dp_packet *p, uint16_t start,
uint16_t offset)
{
if (p->csum_start == start && p->csum_offset == offset) {
dp_packet_ol_set_l4_csum_partial(p);
}
}

static inline uint32_t ALWAYS_INLINE
dp_packet_calc_hash_ipv4(const uint8_t *pkt, const uint16_t l3_ofs,
uint32_t hash)
Expand Down
62 changes: 57 additions & 5 deletions lib/dpif-netdev-extract-avx512.c
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,6 @@ mfex_ipv6_set_l2_pad_size(struct dp_packet *pkt,
return -1;
}
dp_packet_set_l2_pad_size(pkt, len_from_ipv6 - (p_len + IPV6_HEADER_LEN));
dp_packet_hwol_set_tx_ipv6(pkt);
return 0;
}

Expand Down Expand Up @@ -729,10 +728,6 @@ mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct ip_header *nh,
return -1;
}
dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - ip_tot_len);
dp_packet_hwol_set_tx_ipv4(pkt);
if (dp_packet_ip_checksum_good(pkt)) {
dp_packet_hwol_set_tx_ip_csum(pkt);
}
return 0;
}

Expand Down Expand Up @@ -763,6 +758,45 @@ mfex_check_tcp_data_offset(const struct tcp_header *tcp)
return ret;
}

static void
mfex_ipv4_set_hwol(struct dp_packet *pkt)
{
dp_packet_hwol_set_tx_ipv4(pkt);
if (dp_packet_ip_checksum_good(pkt)) {
dp_packet_hwol_set_tx_ip_csum(pkt);
}
}

static void
mfex_ipv6_set_hwol(struct dp_packet *pkt)
{
dp_packet_hwol_set_tx_ipv6(pkt);
}

static void
mfex_tcp_set_hwol(struct dp_packet *pkt)
{
dp_packet_ol_l4_csum_check_partial(pkt, pkt->l4_ofs,
offsetof(struct tcp_header,
tcp_csum));
if (dp_packet_l4_checksum_good(pkt)
|| dp_packet_ol_l4_csum_partial(pkt)) {
dp_packet_hwol_set_csum_tcp(pkt);
}
}

static void
mfex_udp_set_hwol(struct dp_packet *pkt)
{
dp_packet_ol_l4_csum_check_partial(pkt, pkt->l4_ofs,
offsetof(struct udp_header,
udp_csum));
if (dp_packet_l4_checksum_good(pkt)
|| dp_packet_ol_l4_csum_partial(pkt)) {
dp_packet_hwol_set_csum_udp(pkt);
}
}

/* Generic loop to process any mfex profile. This code is specialized into
* multiple actual MFEX implementation functions. Its marked ALWAYS_INLINE
* to ensure the compiler specializes each instance. The code is marked "hot"
Expand Down Expand Up @@ -864,6 +898,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
const struct tcp_header *tcp = (void *)&pkt[38];
mfex_handle_tcp_flags(tcp, &blocks[7]);
dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
mfex_ipv4_set_hwol(packet);
mfex_tcp_set_hwol(packet);
} break;

case PROFILE_ETH_VLAN_IPV4_UDP: {
Expand All @@ -876,6 +912,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
continue;
}
dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
mfex_ipv4_set_hwol(packet);
mfex_udp_set_hwol(packet);
} break;

case PROFILE_ETH_IPV4_TCP: {
Expand All @@ -891,6 +929,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
continue;
}
dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
mfex_ipv4_set_hwol(packet);
mfex_tcp_set_hwol(packet);
} break;

case PROFILE_ETH_IPV4_UDP: {
Expand All @@ -902,6 +942,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
continue;
}
dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
mfex_ipv4_set_hwol(packet);
mfex_udp_set_hwol(packet);
} break;

case PROFILE_ETH_IPV6_UDP: {
Expand All @@ -920,6 +962,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
/* Process UDP header. */
mfex_handle_ipv6_l4((void *)&pkt[54], &blocks[9]);
dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
mfex_ipv6_set_hwol(packet);
mfex_udp_set_hwol(packet);
} break;

case PROFILE_ETH_IPV6_TCP: {
Expand All @@ -943,6 +987,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
}
mfex_handle_tcp_flags(tcp, &blocks[9]);
dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
mfex_ipv6_set_hwol(packet);
mfex_tcp_set_hwol(packet);
} break;

case PROFILE_ETH_VLAN_IPV6_TCP: {
Expand All @@ -969,6 +1015,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
}
mfex_handle_tcp_flags(tcp, &blocks[10]);
dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
mfex_ipv6_set_hwol(packet);
mfex_tcp_set_hwol(packet);
} break;

case PROFILE_ETH_VLAN_IPV6_UDP: {
Expand All @@ -990,6 +1038,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
/* Process UDP header. */
mfex_handle_ipv6_l4((void *)&pkt[58], &blocks[10]);
dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
mfex_ipv6_set_hwol(packet);
mfex_udp_set_hwol(packet);
} break;

case PROFILE_ETH_IPV4_NVGRE: {
Expand All @@ -1000,6 +1050,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
continue;
}
dp_packet_update_rss_hash_ipv4(packet);
mfex_ipv4_set_hwol(packet);
mfex_udp_set_hwol(packet);
} break;

default:
Expand Down
Loading

0 comments on commit 3337e6d

Please sign in to comment.