diff --git a/lib/dp-packet.c b/lib/dp-packet.c index 920402369de..e7738c37a0c 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -546,16 +546,47 @@ dp_packet_compare_offsets(struct dp_packet *b1, struct dp_packet *b2, return true; } +void +dp_packet_tnl_outer_ol_send_prepare(struct dp_packet *p, + uint64_t flags) +{ + if (dp_packet_hwol_is_outer_ipv4_cksum(p)) { + if (!(flags & NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM)) { + dp_packet_ip_set_header_csum(p, false); + dp_packet_ol_set_ip_csum_good(p); + dp_packet_hwol_reset_outer_ipv4_csum(p); + } + } + + if (!dp_packet_hwol_is_outer_udp_cksum(p)) { + return; + } + + if (!(flags & NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM)) { + packet_udp_complete_csum(p, false); + dp_packet_ol_set_l4_csum_good(p); + dp_packet_hwol_reset_outer_udp_csum(p); + } +} + /* Checks if the packet 'p' is compatible with netdev_ol_flags 'flags' * and if not, updates the packet with the software fall back. */ void dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags) { + bool tnl_inner = false; + + if (dp_packet_hwol_is_tunnel_geneve(p) || + dp_packet_hwol_is_tunnel_vxlan(p)) { + dp_packet_tnl_outer_ol_send_prepare(p, flags); + tnl_inner = true; + } + if (dp_packet_hwol_tx_ip_csum(p)) { if (dp_packet_ip_checksum_good(p)) { dp_packet_hwol_reset_tx_ip_csum(p); } else if (!(flags & NETDEV_TX_OFFLOAD_IPV4_CKSUM)) { - dp_packet_ip_set_header_csum(p); + dp_packet_ip_set_header_csum(p, tnl_inner); dp_packet_ol_set_ip_csum_good(p); dp_packet_hwol_reset_tx_ip_csum(p); } @@ -565,24 +596,24 @@ dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags) return; } - if (dp_packet_l4_checksum_good(p)) { + if (dp_packet_l4_checksum_good(p) && !tnl_inner) { dp_packet_hwol_reset_tx_l4_csum(p); return; } if (dp_packet_hwol_l4_is_tcp(p) && !(flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { - packet_tcp_complete_csum(p); + packet_tcp_complete_csum(p, tnl_inner); dp_packet_ol_set_l4_csum_good(p); dp_packet_hwol_reset_tx_l4_csum(p); } else if (dp_packet_hwol_l4_is_udp(p) && !(flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) { - packet_udp_complete_csum(p); + packet_udp_complete_csum(p, tnl_inner); dp_packet_ol_set_l4_csum_good(p); dp_packet_hwol_reset_tx_l4_csum(p); } else if (!(flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM) && dp_packet_hwol_l4_is_sctp(p)) { - packet_sctp_complete_csum(p); + packet_sctp_complete_csum(p, tnl_inner); dp_packet_ol_set_l4_csum_good(p); dp_packet_hwol_reset_tx_l4_csum(p); } diff --git a/lib/dp-packet.h b/lib/dp-packet.h index ad272f581dc..ee1f0734ad9 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -86,22 +86,47 @@ enum dp_packet_offload_mask { DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, RTE_MBUF_F_TX_SCTP_CKSUM, 0x800), /* Offload IP checksum. */ DEF_OL_FLAG(DP_PACKET_OL_TX_IP_CKSUM, RTE_MBUF_F_TX_IP_CKSUM, 0x1000), + /* Offload packet is tunnel GENEVE. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_GENEVE, + RTE_MBUF_F_TX_TUNNEL_GENEVE, 0x2000), + /* Offload packet is tunnel VXLAN. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_VXLAN, + RTE_MBUF_F_TX_TUNNEL_VXLAN, 0x4000), + /* Offload tunnel packet, outer header is IPv4. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IPV4, + RTE_MBUF_F_TX_OUTER_IPV4, 0x8000), + /* Offload tunnel outer IPv4 checksum. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IP_CKSUM, + RTE_MBUF_F_TX_OUTER_IP_CKSUM, 0x10000), + /* Offload tunnel outer UDP checksum. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_UDP_CKSUM, + RTE_MBUF_F_TX_OUTER_UDP_CKSUM, 0x20000), + /* Offload tunnel packet, outer header is IPv6. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IPV6, + RTE_MBUF_F_TX_OUTER_IPV6, 0x40000), + /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */ }; -#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH | \ - DP_PACKET_OL_FLOW_MARK | \ - DP_PACKET_OL_RX_L4_CKSUM_BAD | \ - DP_PACKET_OL_RX_IP_CKSUM_BAD | \ - DP_PACKET_OL_RX_L4_CKSUM_GOOD | \ - DP_PACKET_OL_RX_IP_CKSUM_GOOD | \ - DP_PACKET_OL_TX_TCP_SEG | \ - DP_PACKET_OL_TX_IPV4 | \ - DP_PACKET_OL_TX_IPV6 | \ - DP_PACKET_OL_TX_TCP_CKSUM | \ - DP_PACKET_OL_TX_UDP_CKSUM | \ - DP_PACKET_OL_TX_SCTP_CKSUM | \ - DP_PACKET_OL_TX_IP_CKSUM) +#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH | \ + DP_PACKET_OL_FLOW_MARK | \ + DP_PACKET_OL_RX_L4_CKSUM_BAD | \ + DP_PACKET_OL_RX_IP_CKSUM_BAD | \ + DP_PACKET_OL_RX_L4_CKSUM_GOOD | \ + DP_PACKET_OL_RX_IP_CKSUM_GOOD | \ + DP_PACKET_OL_TX_TCP_SEG | \ + DP_PACKET_OL_TX_IPV4 | \ + DP_PACKET_OL_TX_IPV6 | \ + DP_PACKET_OL_TX_TCP_CKSUM | \ + DP_PACKET_OL_TX_UDP_CKSUM | \ + DP_PACKET_OL_TX_SCTP_CKSUM | \ + DP_PACKET_OL_TX_IP_CKSUM | \ + DP_PACKET_OL_TX_TUNNEL_GENEVE | \ + DP_PACKET_OL_TX_TUNNEL_VXLAN | \ + DP_PACKET_OL_TX_OUTER_IPV4 | \ + DP_PACKET_OL_TX_OUTER_IP_CKSUM | \ + DP_PACKET_OL_TX_OUTER_UDP_CKSUM | \ + DP_PACKET_OL_TX_OUTER_IPV6) #define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \ DP_PACKET_OL_TX_UDP_CKSUM | \ @@ -139,6 +164,10 @@ struct dp_packet { * or UINT16_MAX. */ uint16_t l4_ofs; /* Transport-level header offset, or UINT16_MAX. */ + uint16_t inner_l3_ofs; /* Inner Network-level header offset, + * or UINT16_MAX. */ + uint16_t inner_l4_ofs; /* Inner Transport-level header offset, + or UINT16_MAX. */ uint32_t cutlen; /* length in bytes to cut from the end. */ ovs_be32 packet_type; /* Packet type as defined in OpenFlow */ uint16_t csum_start; /* Position to start checksumming from. */ @@ -250,6 +279,7 @@ bool dp_packet_compare_offsets(struct dp_packet *good, struct dp_packet *test, struct ds *err_str); void dp_packet_ol_send_prepare(struct dp_packet *, uint64_t); +void dp_packet_tnl_outer_ol_send_prepare(struct dp_packet *, uint64_t); /* Frees memory that 'b' points to, as well as 'b' itself. */ @@ -482,6 +512,22 @@ dp_packet_l4_size(const struct dp_packet *b) : 0; } +static inline void * +dp_packet_inner_l3(const struct dp_packet *b) +{ + return b->inner_l3_ofs != UINT16_MAX + ? (char *) dp_packet_data(b) + b->inner_l3_ofs + : NULL; +} + +static inline void * +dp_packet_inner_l4(const struct dp_packet *b) +{ + return b->inner_l4_ofs != UINT16_MAX + ? (char *) dp_packet_data(b) + b->inner_l4_ofs + : NULL; +} + static inline const void * dp_packet_get_tcp_payload(const struct dp_packet *b) { @@ -539,6 +585,25 @@ dp_packet_get_nd_payload(const struct dp_packet *b) } #ifdef DPDK_NETDEV +static inline void +dp_packet_set_l2_len(struct dp_packet *b, size_t l2_len) +{ + b->mbuf.l2_len = l2_len; +} + +static inline void +dp_packet_set_l3_len(struct dp_packet *b, size_t l3_len) +{ + b->mbuf.l3_len = l3_len; +} + +static inline void +dp_packet_set_l4_len(struct dp_packet *b, size_t l4_len) +{ + b->mbuf.l4_len = l4_len; +} + + static inline uint64_t * dp_packet_ol_flags_ptr(const struct dp_packet *b) { @@ -558,6 +623,24 @@ dp_packet_flow_mark_ptr(const struct dp_packet *b) } #else +static inline void +dp_packet_set_l2_len(struct dp_packet *b OVS_UNUSED, size_t l2_len OVS_UNUSED) +{ + /* There is no implementation. */ +} + +static inline void +dp_packet_set_l3_len(struct dp_packet *b OVS_UNUSED, size_t l3_len OVS_UNUSED) +{ + /* There is no implementation. */ +} + +static inline void +dp_packet_set_l4_len(struct dp_packet *b OVS_UNUSED, size_t l4_len OVS_UNUSED) +{ + /* There is no implementation. */ +} + static inline uint32_t * dp_packet_ol_flags_ptr(const struct dp_packet *b) { @@ -619,6 +702,8 @@ dp_packet_set_size(struct dp_packet *b, uint32_t v) * (and thus 'v') will always be <= UINT16_MAX; this means that there is no * loss of accuracy in assigning 'v' to 'data_len'. */ + + ovs_assert(v <= UINT16_MAX); b->mbuf.data_len = (uint16_t)v; /* Current seg length. */ b->mbuf.pkt_len = v; /* Total length of all segments linked to * this segment. */ @@ -1056,6 +1141,36 @@ dp_packet_hwol_l4_is_sctp(struct dp_packet *b) DP_PACKET_OL_TX_SCTP_CKSUM; } +/* Returns 'true' if packet 'b' is marked for tunnel GENEVE + * checksum offloading. */ +static inline bool +dp_packet_hwol_is_tunnel_geneve(struct dp_packet *b) +{ + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_GENEVE); +} + +/* Returns 'true' if packet 'b' is marked for tunnel VXLAN + * checksum offloading. */ +static inline bool +dp_packet_hwol_is_tunnel_vxlan(struct dp_packet *b) +{ + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_VXLAN); +} + +/* Returns 'true' if packet 'b' is marked for outer IPv4 checksum offload. */ +static inline bool +dp_packet_hwol_is_outer_ipv4_cksum(struct dp_packet *b) +{ + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_OUTER_IP_CKSUM); +} + +/* Returns 'true' if packet 'b' is marked for outer UDP checksum offload. */ +static inline bool +dp_packet_hwol_is_outer_udp_cksum(struct dp_packet *b) +{ + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_OUTER_UDP_CKSUM); +} + static inline void dp_packet_hwol_reset_tx_l4_csum(struct dp_packet *p) { @@ -1078,6 +1193,14 @@ dp_packet_hwol_set_tx_ipv6(struct dp_packet *a) *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6; } +/* Mark packet 'a' as a tunnel packet with outer IPv6 header. */ +static inline void +dp_packet_hwol_set_tx_outer_ipv6(struct dp_packet *a) +{ + *dp_packet_ol_flags_ptr(a) &= ~DP_PACKET_OL_TX_OUTER_IPV4; + *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_OUTER_IPV6; +} + /* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */ static inline bool dp_packet_hwol_tx_ip_csum(const struct dp_packet *p) @@ -1131,6 +1254,53 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b) *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG; } +/* Mark packet 'b' for tunnel GENEVE offloading. */ +static inline void +dp_packet_hwol_set_tunnel_geneve(struct dp_packet *b) +{ + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_GENEVE; +} + +/* Mark packet 'b' for tunnel VXLAN offloading. */ +static inline void +dp_packet_hwol_set_tunnel_vxlan(struct dp_packet *b) +{ + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_VXLAN; +} + +/* Mark packet 'b' as a tunnel packet with outer IPv4 header. */ +static inline void +dp_packet_hwol_set_tx_outer_ipv4(struct dp_packet *b) +{ + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_OUTER_IPV4; +} + +/* Mark packet 'b' for csum offloading in outer IPv4 header. */ +static inline void +dp_packet_hwol_set_tx_outer_ipv4_csum(struct dp_packet *b) +{ + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_OUTER_IP_CKSUM; +} + +static inline void +dp_packet_hwol_reset_outer_ipv4_csum(struct dp_packet *p) +{ + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_OUTER_IP_CKSUM; +} + +static inline void +dp_packet_hwol_reset_outer_udp_csum(struct dp_packet *p) +{ + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_OUTER_UDP_CKSUM; +} + +/* Mark packet 'b' for csum offloading in outer UDP header. */ +static inline void +dp_packet_hwol_set_outer_udp_csum(struct dp_packet *b) +{ + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_OUTER_UDP_CKSUM; +} + /* Resets TCP Segmentation in packet 'p' and adjust flags to indicate * L3 and L4 checksumming is now required. */ static inline void @@ -1184,9 +1354,9 @@ dp_packet_ip_checksum_bad(const struct dp_packet *p) /* Calculate and set the IPv4 header checksum in packet 'p'. */ static inline void -dp_packet_ip_set_header_csum(struct dp_packet *p) +dp_packet_ip_set_header_csum(struct dp_packet *p, bool inner) { - struct ip_header *ip = dp_packet_l3(p); + struct ip_header *ip = (inner) ? dp_packet_inner_l3(p) : dp_packet_l3(p); ovs_assert(ip); ip->ip_csum = 0; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index df5bbf85a05..c1981137f92 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -8194,7 +8194,9 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_, ds_destroy(&ds); } - dp_packet_ol_send_prepare(packet_, 0); + if (type != DPIF_UC_MISS) { + dp_packet_ol_send_prepare(packet_, 0); + } return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata, actions, wc, put_actions, dp->upcall_aux); diff --git a/lib/flow.c b/lib/flow.c index b8f99f66be9..82d93570adb 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -3278,7 +3278,7 @@ packet_expand(struct dp_packet *p, const struct flow *flow, size_t size) if (dp_packet_hwol_tx_ip_csum(p)) { dp_packet_ol_reset_ip_csum_good(p); } else { - dp_packet_ip_set_header_csum(p); + dp_packet_ip_set_header_csum(p, false); dp_packet_ol_set_ip_csum_good(p); } pseudo_hdr_csum = packet_csum_pseudoheader(ip); diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1ff25c24692..fb26825ff85 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -416,6 +416,10 @@ enum dpdk_hw_ol_features { NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5, NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6, NETDEV_TX_TSO_OFFLOAD = 1 << 7, + NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD = 1 << 8, + NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD = 1 << 9, + NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD = 1 << 10, + NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD = 1 << 11, }; enum dpdk_rx_steer_flags { @@ -1075,6 +1079,14 @@ netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev) NETDEV_TX_OFFLOAD_SCTP_CKSUM); netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD, NETDEV_TX_OFFLOAD_TCP_TSO); + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD, + NETDEV_TX_VXLAN_TNL_TSO); + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD, + NETDEV_TX_GENEVE_TNL_TSO); + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD, + NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM); + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD, + NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM); } static int @@ -1129,6 +1141,22 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; } + if (dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD) { + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO; + } + + if (dev->hw_ol_features & NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD) { + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO; + } + + if (dev->hw_ol_features & NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD) { + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM; + } + + if (dev->hw_ol_features & NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD) { + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM; + } + /* Limit configured rss hash functions to only those supported * by the eth device. */ conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads; @@ -1346,6 +1374,18 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD; } + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM) { + dev->hw_ol_features |= NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD; + } else { + dev->hw_ol_features &= ~NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD; + } + + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM) { + dev->hw_ol_features |= NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD; + } else { + dev->hw_ol_features &= ~NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD; + } + dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; if (userspace_tso_enabled()) { if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) { @@ -1354,6 +1394,20 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) VLOG_WARN("%s: Tx TSO offload is not supported.", netdev_get_name(&dev->up)); } + + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO) { + dev->hw_ol_features |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD; + } else { + VLOG_WARN("%s: Tx Vxlan tunnel TSO offload is not supported.", + netdev_get_name(&dev->up)); + } + + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO) { + dev->hw_ol_features |= NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD; + } else { + VLOG_WARN("%s: Tx Geneve tunnel TSO offload is not supported.", + netdev_get_name(&dev->up)); + } } n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq); @@ -2479,11 +2533,23 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) return true; } - mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt); - mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt); - mbuf->l4_len = 0; - mbuf->outer_l2_len = 0; - mbuf->outer_l3_len = 0; + /* If packet is vxlan or geneve tunnel packet, calculate outer + * l2 len and outer l3 len. Inner l2/l3/l4 len are calculated + * before. */ + if (mbuf->ol_flags & + (RTE_MBUF_F_TX_TUNNEL_GENEVE | RTE_MBUF_F_TX_TUNNEL_VXLAN)) { + mbuf->outer_l2_len = (char *) dp_packet_l3(pkt) - + (char *) dp_packet_eth(pkt); + mbuf->outer_l3_len = (char *) dp_packet_l4(pkt) - + (char *) dp_packet_l3(pkt); + } else { + mbuf->l2_len = (char *) dp_packet_l3(pkt) - + (char *) dp_packet_eth(pkt); + mbuf->l3_len = (char *) dp_packet_l4(pkt) - + (char *) dp_packet_l3(pkt); + mbuf->outer_l2_len = 0; + mbuf->outer_l3_len = 0; + } th = dp_packet_l4(pkt); if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { @@ -2501,8 +2567,14 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) return false; } - mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; - mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; + if (mbuf->ol_flags & (RTE_MBUF_F_TX_TUNNEL_GENEVE | + RTE_MBUF_F_TX_TUNNEL_VXLAN)) { + mbuf->tso_segsz = dev->mtu - mbuf->l2_len - mbuf->l3_len - + mbuf->l4_len - mbuf->outer_l3_len; + } else { + mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; + mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; + } if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { int hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index d6ef865aa65..cd7e85a8188 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -1230,7 +1230,7 @@ netdev_dummy_send(struct netdev *netdev, int qid, if (dp_packet_hwol_tx_ip_csum(packet) && !dp_packet_ip_checksum_good(packet)) { - dp_packet_ip_set_header_csum(packet); + dp_packet_ip_set_header_csum(packet, false); dp_packet_ol_set_ip_csum_good(packet); } diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c index a0682c70fbb..fa87c6281d5 100644 --- a/lib/netdev-native-tnl.c +++ b/lib/netdev-native-tnl.c @@ -173,15 +173,29 @@ netdev_tnl_push_ip_header(struct dp_packet *packet, const void *header, ip6->ip6_plen = htons(*ip_tot_size); packet_set_ipv6_flow_label(&ip6->ip6_flow, ipv6_label); packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size; - dp_packet_hwol_set_tx_ipv6(packet); + + if (dp_packet_hwol_is_tunnel_geneve(packet) || + dp_packet_hwol_is_tunnel_vxlan(packet)) { + dp_packet_hwol_set_tx_outer_ipv6(packet); + } else { + dp_packet_hwol_set_tx_ipv6(packet); + } + dp_packet_ol_reset_ip_csum_good(packet); return ip6 + 1; } else { ip = netdev_tnl_ip_hdr(eth); ip->ip_tot_len = htons(*ip_tot_size); /* Postpone checksum to when the packet is pushed to the port. */ - dp_packet_hwol_set_tx_ipv4(packet); - dp_packet_hwol_set_tx_ip_csum(packet); + if (dp_packet_hwol_is_tunnel_geneve(packet) || + dp_packet_hwol_is_tunnel_vxlan(packet)) { + dp_packet_hwol_set_tx_outer_ipv4(packet); + dp_packet_hwol_set_tx_outer_ipv4_csum(packet); + } else { + dp_packet_hwol_set_tx_ipv4(packet); + dp_packet_hwol_set_tx_ip_csum(packet); + } + dp_packet_ol_reset_ip_csum_good(packet); *ip_tot_size -= IP_HEADER_LEN; packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size; @@ -226,6 +240,74 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl, return udp + 1; } +/* Calculate inner l2 l3 l4 len as tunnel outer header is not + * encapsulated now. */ +static void +dp_packet_tnl_ol_process(struct dp_packet *packet, + const struct ovs_action_push_tnl *data) +{ + struct udp_header *udp = NULL; + uint8_t opt_len = 0; + struct eth_header *eth = NULL; + struct ip_header *ip = NULL; + struct genevehdr *gnh = NULL; + + /* l2 l3 l4 len refer to inner len, tunnel outer + * header is not encapsulated here. */ + if (dp_packet_hwol_l4_mask(packet)) { + ip = dp_packet_l3(packet); + + if (ip->ip_proto == IPPROTO_TCP) { + struct tcp_header *th = dp_packet_l4(packet); + dp_packet_set_l4_len(packet, TCP_OFFSET(th->tcp_ctl) * 4); + } else if (ip->ip_proto == IPPROTO_UDP) { + dp_packet_set_l4_len(packet, UDP_HEADER_LEN); + } else if (ip->ip_proto == IPPROTO_SCTP) { + dp_packet_set_l4_len(packet, SCTP_HEADER_LEN); + } + + dp_packet_set_l3_len(packet, (char *) dp_packet_l4(packet) - + (char *) dp_packet_l3(packet)); + + if (data->tnl_type == OVS_VPORT_TYPE_GENEVE || + data->tnl_type == OVS_VPORT_TYPE_VXLAN) { + + if (IP_VER(ip->ip_ihl_ver) == 4) { + dp_packet_hwol_set_tx_ipv4(packet); + dp_packet_hwol_tx_ip_csum(packet); + } else if (IP_VER(ip->ip_ihl_ver) == 6) { + dp_packet_hwol_set_tx_ipv6(packet); + } + } + + /* Attention please, tunnel inner l2 len is consist of udp header + * len and tunnel header len and inner l2 len. */ + if (data->tnl_type == OVS_VPORT_TYPE_GENEVE) { + eth = (struct eth_header *)(data->header); + ip = (struct ip_header *)(eth + 1); + udp = (struct udp_header *)(ip + 1); + gnh = (struct genevehdr *)(udp + 1); + opt_len = gnh->opt_len * 4; + dp_packet_hwol_set_tunnel_geneve(packet); + dp_packet_set_l2_len(packet, (char *) dp_packet_l3(packet) - + (char *) dp_packet_eth(packet) + + GENEVE_BASE_HLEN + opt_len); + + packet->inner_l3_ofs = packet->l3_ofs + GENEVE_BASE_HLEN + opt_len; + packet->inner_l4_ofs = packet->l4_ofs + GENEVE_BASE_HLEN + opt_len; + + } else if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) { + dp_packet_hwol_set_tunnel_vxlan(packet); + dp_packet_set_l2_len(packet, (char *) dp_packet_l3(packet) - + (char *) dp_packet_eth(packet) + + VXLAN_HLEN); + + packet->inner_l3_ofs = packet->l3_ofs + VXLAN_HLEN; + packet->inner_l4_ofs = packet->l4_ofs + VXLAN_HLEN; + } + } +} + void netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, struct dp_packet *packet, @@ -234,6 +316,7 @@ netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, struct udp_header *udp; int ip_tot_size; + dp_packet_tnl_ol_process(packet, data); udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size, 0); @@ -241,13 +324,21 @@ netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, udp->udp_src = netdev_tnl_get_src_port(packet); udp->udp_len = htons(ip_tot_size); - /* Postpone checksum to the egress netdev. */ - dp_packet_hwol_set_csum_udp(packet); if (udp->udp_csum) { dp_packet_ol_reset_l4_csum_good(packet); + if (dp_packet_hwol_is_tunnel_geneve(packet) || + dp_packet_hwol_is_tunnel_vxlan(packet)) { + dp_packet_hwol_set_outer_udp_csum(packet); + } else { + dp_packet_hwol_set_csum_udp(packet); + } } else { dp_packet_ol_set_l4_csum_good(packet); } + + packet->inner_l3_ofs += packet->l4_ofs; + packet->inner_l4_ofs += packet->l4_ofs; + } static void * diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index a7393c7cecf..22840a058b7 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -43,6 +43,10 @@ enum netdev_ol_flags { NETDEV_TX_OFFLOAD_UDP_CKSUM = 1 << 2, NETDEV_TX_OFFLOAD_SCTP_CKSUM = 1 << 3, NETDEV_TX_OFFLOAD_TCP_TSO = 1 << 4, + NETDEV_TX_VXLAN_TNL_TSO = 1 << 5, + NETDEV_TX_GENEVE_TNL_TSO = 1 << 6, + NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM = 1 << 7, + NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM = 1 << 8, }; /* A network device (e.g. an Ethernet device). diff --git a/lib/netdev.c b/lib/netdev.c index 3ed8049f76a..f2d921ed633 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -69,6 +69,8 @@ COVERAGE_DEFINE(netdev_received); COVERAGE_DEFINE(netdev_sent); COVERAGE_DEFINE(netdev_add_router); COVERAGE_DEFINE(netdev_get_stats); +COVERAGE_DEFINE(netdev_vxlan_tso_drops); +COVERAGE_DEFINE(netdev_geneve_tso_drops); COVERAGE_DEFINE(netdev_push_header_drops); COVERAGE_DEFINE(netdev_soft_seg_good); COVERAGE_DEFINE(netdev_soft_seg_drops); @@ -912,6 +914,23 @@ netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { if (dp_packet_hwol_is_tso(packet)) { + if (dp_packet_hwol_is_tunnel_vxlan(packet) + && !(netdev_flags & NETDEV_TX_VXLAN_TNL_TSO)) { + VLOG_WARN_RL(&rl, "%s: No VXLAN TSO support", + netdev_get_name(netdev)); + COVERAGE_INC(netdev_vxlan_tso_drops); + dp_packet_delete_batch(batch, true); + return false; + } + + if (dp_packet_hwol_is_tunnel_geneve(packet) + && !(netdev_flags & NETDEV_TX_GENEVE_TNL_TSO)) { + VLOG_WARN_RL(&rl, "%s: No GENEVE TSO support", + netdev_get_name(netdev)); + COVERAGE_INC(netdev_geneve_tso_drops); + dp_packet_delete_batch(batch, true); + return false; + } return netdev_send_tso(netdev, qid, batch, concurrent_txq); } } @@ -990,17 +1009,31 @@ netdev_push_header(const struct netdev *netdev, size_t i, size = dp_packet_batch_size(batch); DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { - if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet))) { + if (OVS_UNLIKELY(data->tnl_type != OVS_VPORT_TYPE_GENEVE && + data->tnl_type != OVS_VPORT_TYPE_VXLAN && + dp_packet_hwol_is_tso(packet))) { COVERAGE_INC(netdev_push_header_drops); dp_packet_delete(packet); - VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO is " - "not supported: packet dropped", - netdev_get_name(netdev)); + VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO is not " + "supported for %s tunnels: packet dropped", + netdev_get_name(netdev), netdev_get_type(netdev)); } else { - /* The packet is going to be encapsulated and there is - * no support yet for inner network header csum offloading. */ - dp_packet_ol_send_prepare(packet, 0); - + if (data->tnl_type != OVS_VPORT_TYPE_GENEVE && + data->tnl_type != OVS_VPORT_TYPE_VXLAN) { + dp_packet_ol_send_prepare(packet, 0); + } else if (dp_packet_hwol_is_tunnel_geneve(packet) || + dp_packet_hwol_is_tunnel_vxlan(packet)) { + if (dp_packet_hwol_is_tso(packet)) { + COVERAGE_INC(netdev_push_header_drops); + dp_packet_delete(packet); + VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO is not " + "supported with multiple levels of " + "VXLAN or GENEVE encapsulation.", + netdev_get_name(netdev)); + continue; + } + dp_packet_ol_send_prepare(packet, 0); + } netdev->netdev_class->push_header(netdev, packet, data); pkt_metadata_init(&packet->md, data->out_port); @@ -1446,6 +1479,10 @@ netdev_get_status(const struct netdev *netdev, struct smap *smap) OL_ADD_STAT("udp_csum", NETDEV_TX_OFFLOAD_UDP_CKSUM); OL_ADD_STAT("sctp_csum", NETDEV_TX_OFFLOAD_SCTP_CKSUM); OL_ADD_STAT("tcp_seg", NETDEV_TX_OFFLOAD_TCP_TSO); + OL_ADD_STAT("vxlan_tso", NETDEV_TX_VXLAN_TNL_TSO); + OL_ADD_STAT("geneve_tso", NETDEV_TX_GENEVE_TNL_TSO); + OL_ADD_STAT("out_ip_csum", NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM); + OL_ADD_STAT("out_udp_csum", NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM); #undef OL_ADD_STAT err = 0; diff --git a/lib/packets.c b/lib/packets.c index dab823ba225..d9e41346e7b 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -1997,9 +1997,9 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6) /* Set TCP checksum field in packet 'p' with complete checksum. * The packet must have the L3 and L4 offsets. */ void -packet_tcp_complete_csum(struct dp_packet *p) +packet_tcp_complete_csum(struct dp_packet *p, bool inner) { - struct tcp_header *tcp = dp_packet_l4(p); + struct tcp_header *tcp = (inner) ? dp_packet_inner_l4(p) : dp_packet_l4(p); tcp->tcp_csum = 0; if (dp_packet_hwol_is_ipv4(p)) { @@ -2020,9 +2020,9 @@ packet_tcp_complete_csum(struct dp_packet *p) /* Set UDP checksum field in packet 'p' with complete checksum. * The packet must have the L3 and L4 offsets. */ void -packet_udp_complete_csum(struct dp_packet *p) +packet_udp_complete_csum(struct dp_packet *p, bool inner) { - struct udp_header *udp = dp_packet_l4(p); + struct udp_header *udp = (inner) ? dp_packet_inner_l4(p) : dp_packet_l4(p); /* Skip csum calculation if the udp_csum is zero. */ if (!udp->udp_csum) { @@ -2052,9 +2052,9 @@ packet_udp_complete_csum(struct dp_packet *p) /* Set SCTP checksum field in packet 'p' with complete checksum. * The packet must have the L3 and L4 offsets. */ void -packet_sctp_complete_csum(struct dp_packet *p) +packet_sctp_complete_csum(struct dp_packet *p, bool inner) { - struct sctp_header *sh = dp_packet_l4(p); + struct sctp_header *sh = (inner) ? dp_packet_inner_l4(p) : dp_packet_l4(p); uint16_t tp_len = dp_packet_l4_size(p); ovs_be32 csum; diff --git a/lib/packets.h b/lib/packets.h index 12245b7649a..8b6994809fe 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -1682,9 +1682,9 @@ uint32_t packet_csum_pseudoheader(const struct ip_header *); bool packet_rh_present(struct dp_packet *packet, uint8_t *nexthdr, bool *first_frag); void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6); -void packet_tcp_complete_csum(struct dp_packet *); -void packet_udp_complete_csum(struct dp_packet *); -void packet_sctp_complete_csum(struct dp_packet *); +void packet_tcp_complete_csum(struct dp_packet *, bool is_inner); +void packet_udp_complete_csum(struct dp_packet *, bool is_inner); +void packet_sctp_complete_csum(struct dp_packet *, bool is_inner); #define DNS_HEADER_LEN 12 struct dns_header { diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at index e5b9e0c3396..790b5a43af9 100644 --- a/tests/dpif-netdev.at +++ b/tests/dpif-netdev.at @@ -658,11 +658,11 @@ OVS_VSWITCHD_START( other-config:datapath-id=1234 fail-mode=secure]) AT_CHECK([ovs-vsctl get interface p1 status | sed -n 's/^{\(.*\).*}$/\1/p'], [0], [dnl -tx_ip_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false" +tx_geneve_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false" ], []) AT_CHECK([ovs-vsctl get interface br0 status | sed -n 's/^{\(.*\).*}$/\1/p'], [0], [dnl -tx_ip_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false" +tx_geneve_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false" ], []) OVS_VSWITCHD_STOP