diff --git a/lib/dp-packet.c b/lib/dp-packet.c index ed004c3b902..920402369de 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -34,6 +34,7 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so pkt_metadata_init(&b->md, 0); dp_packet_reset_cutlen(b); dp_packet_reset_offload(b); + dp_packet_set_tso_segsz(b, 0); /* Initialize implementation-specific fields of dp_packet. */ dp_packet_init_specific(b); /* By default assume the packet type to be Ethernet. */ @@ -203,6 +204,8 @@ dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom) *dp_packet_ol_flags_ptr(new_buffer) = *dp_packet_ol_flags_ptr(buffer); *dp_packet_ol_flags_ptr(new_buffer) &= DP_PACKET_OL_SUPPORTED_MASK; + dp_packet_set_tso_segsz(new_buffer, dp_packet_get_tso_segsz(buffer)); + if (dp_packet_rss_valid(buffer)) { dp_packet_set_rss_hash(new_buffer, dp_packet_get_rss_hash(buffer)); } diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 70ddf8aa45a..30a1d9dc090 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -126,6 +126,7 @@ struct dp_packet { uint32_t ol_flags; /* Offloading flags. */ uint32_t rss_hash; /* Packet hash. */ uint32_t flow_mark; /* Packet flow mark. */ + uint16_t tso_segsz; /* TCP segment size. */ #endif enum dp_packet_source source; /* Source of memory allocated as 'base'. */ @@ -166,6 +167,9 @@ static inline void dp_packet_set_size(struct dp_packet *, uint32_t); static inline uint16_t dp_packet_get_allocated(const struct dp_packet *); static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t); +static inline uint16_t dp_packet_get_tso_segsz(const struct dp_packet *); +static inline void dp_packet_set_tso_segsz(struct dp_packet *, uint16_t); + void *dp_packet_resize_l2(struct dp_packet *, int increment); void *dp_packet_resize_l2_5(struct dp_packet *, int increment); static inline void *dp_packet_eth(const struct dp_packet *); @@ -644,6 +648,17 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s) b->mbuf.buf_len = s; } +static inline uint16_t +dp_packet_get_tso_segsz(const struct dp_packet *p) +{ + return p->mbuf.tso_segsz; +} + +static inline void +dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s) +{ + p->mbuf.tso_segsz = s; +} #else /* DPDK_NETDEV */ static inline void @@ -700,6 +715,17 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s) b->allocated_ = s; } +static inline uint16_t +dp_packet_get_tso_segsz(const struct dp_packet *p) +{ + return p->tso_segsz; +} + +static inline void +dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s) +{ + p->tso_segsz = s; +} #endif /* DPDK_NETDEV */ static inline void diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 29f2b280d49..706036d4e42 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -2486,6 +2486,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { struct tcp_header *th = dp_packet_l4(pkt); + int hdr_len; if (!th) { VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header" @@ -2495,7 +2496,15 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; + hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; + if (OVS_UNLIKELY((hdr_len + mbuf->tso_segsz) > dev->max_packet_len)) { + VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. " + "hdr: %"PRIu32", gso: %"PRIu32", max len: %"PRIu32"", + dev->up.name, hdr_len, mbuf->tso_segsz, + dev->max_packet_len); + return false; + } if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) { mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM; @@ -2770,7 +2779,8 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev, struct rte_mbuf **pkts, int cnt = 0; struct rte_mbuf *pkt; - /* Filter oversized packets, unless are marked for TSO. */ + /* Filter oversized packets. The TSO packets are filtered out + * during the offloading preparation for performance reasons. */ for (i = 0; i < pkt_cnt; i++) { pkt = pkts[i]; if (OVS_UNLIKELY((pkt->pkt_len > dev->max_packet_len) diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 93a5845c064..e79a432607a 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -539,7 +539,7 @@ static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0); static bool tap_supports_vnet_hdr = true; static int netdev_linux_parse_vnet_hdr(struct dp_packet *b); -static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu); +static int netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu); static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *, int cmd, const char *cmd_name); static int get_flags(const struct netdev *, unsigned int *flags); @@ -1597,9 +1597,10 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_) } static int -netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu, - struct dp_packet_batch *batch) +netdev_linux_sock_batch_send(struct netdev *netdev_, int sock, int ifindex, + bool tso, int mtu, struct dp_packet_batch *batch) { + struct netdev_linux *netdev = netdev_linux_cast(netdev_); const size_t size = dp_packet_batch_size(batch); /* We don't bother setting most fields in sockaddr_ll because the * kernel ignores them for SOCK_RAW. */ @@ -1608,26 +1609,36 @@ netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu, struct mmsghdr *mmsg = xmalloc(sizeof(*mmsg) * size); struct iovec *iov = xmalloc(sizeof(*iov) * size); - struct dp_packet *packet; + int cnt = 0; + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { if (tso) { - netdev_linux_prepend_vnet_hdr(packet, mtu); - } + int ret = netdev_linux_prepend_vnet_hdr(packet, mtu); + + if (OVS_UNLIKELY(ret)) { + netdev->tx_dropped += 1; + VLOG_WARN_RL(&rl, "%s: Prepend vnet hdr failed, packet " + "dropped. %s", netdev_get_name(netdev_), + ovs_strerror(ret)); + continue; + } + } - iov[i].iov_base = dp_packet_data(packet); - iov[i].iov_len = dp_packet_size(packet); - mmsg[i].msg_hdr = (struct msghdr) { .msg_name = &sll, - .msg_namelen = sizeof sll, - .msg_iov = &iov[i], - .msg_iovlen = 1 }; + iov[cnt].iov_base = dp_packet_data(packet); + iov[cnt].iov_len = dp_packet_size(packet); + mmsg[cnt].msg_hdr = (struct msghdr) { .msg_name = &sll, + .msg_namelen = sizeof sll, + .msg_iov = &iov[cnt], + .msg_iovlen = 1 }; + cnt++; } int error = 0; - for (uint32_t ofs = 0; ofs < size; ) { + for (uint32_t ofs = 0; ofs < cnt;) { ssize_t retval; do { - retval = sendmmsg(sock, mmsg + ofs, size - ofs, 0); + retval = sendmmsg(sock, mmsg + ofs, cnt - ofs, 0); error = retval < 0 ? errno : 0; } while (error == EINTR); if (error) { @@ -1669,7 +1680,14 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, int mtu, int error; if (OVS_LIKELY(tap_supports_vnet_hdr)) { - netdev_linux_prepend_vnet_hdr(packet, mtu); + error = netdev_linux_prepend_vnet_hdr(packet, mtu); + if (OVS_UNLIKELY(error)) { + netdev->tx_dropped++; + VLOG_WARN_RL(&rl, "%s: Prepend vnet hdr failed, packet " + "dropped. %s", netdev_get_name(netdev_), + ovs_strerror(error)); + continue; + } } size = dp_packet_size(packet); @@ -1799,7 +1817,8 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, goto free_batch; } - error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch); + error = netdev_linux_sock_batch_send(netdev_, sock, ifindex, tso, mtu, + batch); } else { error = netdev_linux_tap_batch_send(netdev_, mtu, batch); } @@ -7092,8 +7111,7 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b) switch (vnet->gso_type) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_TCPV6: - /* FIXME: The packet has offloaded TCP segmentation. The gso_size - * is given and needs to be respected. */ + dp_packet_set_tso_segsz(b, (OVS_FORCE uint16_t) vnet->gso_size); dp_packet_hwol_set_tcp_seg(b); break; @@ -7115,18 +7133,32 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b) return ret; } -static void +/* Prepends struct virtio_net_hdr to packet 'b'. + * Returns 0 if successful, otherwise a positive errno value. + * Returns EMSGSIZE if the packet 'b' cannot be sent over MTU 'mtu'. */ +static int netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu) { struct virtio_net_hdr v; struct virtio_net_hdr *vnet = &v; if (dp_packet_hwol_is_tso(b)) { - uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char *)dp_packet_eth(b)) - + TCP_HEADER_LEN; + uint16_t tso_segsz = dp_packet_get_tso_segsz(b); + struct tcp_header *tcp = dp_packet_l4(b); + int tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4; + int hdr_len = ((char *) dp_packet_l4(b) - (char *) dp_packet_eth(b)) + + tcp_hdr_len; + int max_packet_len = mtu + ETH_HEADER_LEN + VLAN_HEADER_LEN; + + if (OVS_UNLIKELY((hdr_len + tso_segsz) > max_packet_len)) { + VLOG_WARN_RL(&rl, "Oversized TSO packet. hdr_len: %"PRIu32", " + "gso: %"PRIu16", max length: %"PRIu32".", hdr_len, + tso_segsz, max_packet_len); + return EMSGSIZE; + } vnet->hdr_len = (OVS_FORCE __virtio16)hdr_len; - vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len); + vnet->gso_size = (OVS_FORCE __virtio16)(tso_segsz); if (dp_packet_hwol_is_ipv4(b)) { vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; } else if (dp_packet_hwol_tx_ipv6(b)) { @@ -7216,4 +7248,5 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu) } dp_packet_push(b, vnet, sizeof *vnet); + return 0; }