Skip to content
This repository has been archived by the owner on Nov 21, 2022. It is now read-only.

Commit

Permalink
Merge branch 'sfc-encap-offloads-on-EF10'
Browse files Browse the repository at this point in the history
Edward Cree says:

====================
sfc: encap offloads on EF10

EF10 NICs from the 8000 series onwards support TX offloads (checksumming,
 TSO) on VXLAN- and NVGRE-encapsulated packets.  This series adds driver
 support for these offloads.

Changes from v1:
 * Fix 'no TXQ of type' error handling in patch #1 (and clear up the
   misleading comment that inspired the wrong version)
 * Add comment in patch #5 explaining what the deal with TSOv3 is
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Sep 12, 2020
2 parents 4a681bf + 24b2c37 commit 6b5525c
Show file tree
Hide file tree
Showing 18 changed files with 318 additions and 106 deletions.
124 changes: 85 additions & 39 deletions drivers/net/ethernet/sfc/ef10.c
Original file line number Diff line number Diff line change
Expand Up @@ -601,10 +601,14 @@ static int efx_ef10_probe(struct efx_nic *efx)
efx_ef10_read_licensed_features(efx);

/* We can have one VI for each vi_stride-byte region.
* However, until we use TX option descriptors we need two TX queues
* per channel.
* However, until we use TX option descriptors we need up to four
* TX queues per channel for different checksumming combinations.
*/
efx->tx_queues_per_channel = 2;
if (nic_data->datapath_caps &
(1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
efx->tx_queues_per_channel = 4;
else
efx->tx_queues_per_channel = 2;
efx->max_vis = efx_ef10_mem_map_size(efx) / efx->vi_stride;
if (!efx->max_vis) {
netif_err(efx, drv, efx->net_dev, "error determining max VIs\n");
Expand Down Expand Up @@ -1300,6 +1304,7 @@ static void efx_ef10_fini_nic(struct efx_nic *efx)
static int efx_ef10_init_nic(struct efx_nic *efx)
{
struct efx_ef10_nic_data *nic_data = efx->nic_data;
netdev_features_t hw_enc_features = 0;
int rc;

if (nic_data->must_check_datapath_caps) {
Expand Down Expand Up @@ -1344,6 +1349,21 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
nic_data->must_restore_piobufs = false;
}

/* add encapsulated checksum offload features */
if (efx_has_cap(efx, VXLAN_NVGRE) && !efx_ef10_is_vf(efx))
hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
/* add encapsulated TSO features */
if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) {
netdev_features_t encap_tso_features;

encap_tso_features = NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM;

hw_enc_features |= encap_tso_features | NETIF_F_TSO;
efx->net_dev->features |= encap_tso_features;
}
efx->net_dev->hw_enc_features = hw_enc_features;

/* don't fail init if RSS setup doesn't work */
rc = efx->type->rx_push_rss_config(efx, false,
efx->rss_context.rx_indir_table, NULL);
Expand Down Expand Up @@ -2146,6 +2166,9 @@ static int efx_ef10_irq_test_generate(struct efx_nic *efx)

static int efx_ef10_tx_probe(struct efx_tx_queue *tx_queue)
{
/* low two bits of label are what we want for type */
BUILD_BUG_ON((EFX_TXQ_TYPE_OUTER_CSUM | EFX_TXQ_TYPE_INNER_CSUM) != 3);
tx_queue->type = tx_queue->label & 3;
return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd.buf,
(tx_queue->ptr_mask + 1) *
sizeof(efx_qword_t),
Expand All @@ -2168,15 +2191,15 @@ static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue,

/* Add Firmware-Assisted TSO v2 option descriptors to a queue.
*/
static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
struct sk_buff *skb,
bool *data_mapped)
int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
bool *data_mapped)
{
struct efx_tx_buffer *buffer;
u16 inner_ipv4_id = 0;
u16 outer_ipv4_id = 0;
struct tcphdr *tcp;
struct iphdr *ip;

u16 ipv4_id;
u16 ip_tot_len;
u32 seqnum;
u32 mss;

Expand All @@ -2189,21 +2212,43 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
return -EINVAL;
}

ip = ip_hdr(skb);
if (skb->encapsulation) {
if (!tx_queue->tso_encap)
return -EINVAL;
ip = ip_hdr(skb);
if (ip->version == 4)
outer_ipv4_id = ntohs(ip->id);

ip = inner_ip_hdr(skb);
tcp = inner_tcp_hdr(skb);
} else {
ip = ip_hdr(skb);
tcp = tcp_hdr(skb);
}

/* 8000-series EF10 hardware requires that IP Total Length be
* greater than or equal to the value it will have in each segment
* (which is at most mss + 208 + TCP header length), but also less
* than (0x10000 - inner_network_header). Otherwise the TCP
* checksum calculation will be broken for encapsulated packets.
* We fill in ip->tot_len with 0xff30, which should satisfy the
* first requirement unless the MSS is ridiculously large (which
* should be impossible as the driver max MTU is 9216); it is
* guaranteed to satisfy the second as we only attempt TSO if
* inner_network_header <= 208.
*/
ip_tot_len = -EFX_TSO2_MAX_HDRLEN;
EFX_WARN_ON_ONCE_PARANOID(mss + EFX_TSO2_MAX_HDRLEN +
(tcp->doff << 2u) > ip_tot_len);

if (ip->version == 4) {
/* Modify IPv4 header if needed. */
ip->tot_len = 0;
ip->tot_len = htons(ip_tot_len);
ip->check = 0;
ipv4_id = ntohs(ip->id);
inner_ipv4_id = ntohs(ip->id);
} else {
/* Modify IPv6 header if needed. */
struct ipv6hdr *ipv6 = ipv6_hdr(skb);

ipv6->payload_len = 0;
ipv4_id = 0;
((struct ipv6hdr *)ip)->payload_len = htons(ip_tot_len);
}

tcp = tcp_hdr(skb);
seqnum = ntohl(tcp->seq);

buffer = efx_tx_queue_get_insert_buffer(tx_queue);
Expand All @@ -2216,7 +2261,7 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
ESF_DZ_TX_TSO_OPTION_TYPE,
ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A,
ESF_DZ_TX_TSO_IP_ID, ipv4_id,
ESF_DZ_TX_TSO_IP_ID, inner_ipv4_id,
ESF_DZ_TX_TSO_TCP_SEQNO, seqnum
);
++tx_queue->insert_count;
Expand All @@ -2226,11 +2271,12 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
buffer->flags = EFX_TX_BUF_OPTION;
buffer->len = 0;
buffer->unmap_len = 0;
EFX_POPULATE_QWORD_4(buffer->option,
EFX_POPULATE_QWORD_5(buffer->option,
ESF_DZ_TX_DESC_IS_OPT, 1,
ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
ESF_DZ_TX_TSO_OPTION_TYPE,
ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B,
ESF_DZ_TX_TSO_OUTER_IPID, outer_ipv4_id,
ESF_DZ_TX_TSO_TCP_MSS, mss
);
++tx_queue->insert_count;
Expand All @@ -2254,11 +2300,11 @@ static u32 efx_ef10_tso_versions(struct efx_nic *efx)

static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
{
bool csum_offload = tx_queue->label & EFX_TXQ_TYPE_OFFLOAD;
bool csum_offload = tx_queue->type & EFX_TXQ_TYPE_OUTER_CSUM;
bool inner_csum = tx_queue->type & EFX_TXQ_TYPE_INNER_CSUM;
struct efx_channel *channel = tx_queue->channel;
struct efx_nic *efx = tx_queue->efx;
struct efx_ef10_nic_data *nic_data;
bool tso_v2 = false;
efx_qword_t *txd;
int rc;

Expand All @@ -2281,15 +2327,18 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
* TSOv2 cannot be used with Hardware timestamping, and is never needed
* for XDP tx.
*/
if (csum_offload && (nic_data->datapath_caps2 &
(1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) &&
!tx_queue->timestamping && !tx_queue->xdp_tx) {
tso_v2 = true;
netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
channel->channel);
if (efx_has_cap(efx, TX_TSO_V2)) {
if ((csum_offload || inner_csum) &&
!tx_queue->timestamping && !tx_queue->xdp_tx) {
tx_queue->tso_version = 2;
netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
channel->channel);
}
} else if (efx_has_cap(efx, TX_TSO)) {
tx_queue->tso_version = 1;
}

rc = efx_mcdi_tx_init(tx_queue, tso_v2);
rc = efx_mcdi_tx_init(tx_queue);
if (rc)
goto fail;

Expand All @@ -2302,22 +2351,19 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
tx_queue->buffer[0].flags = EFX_TX_BUF_OPTION;
tx_queue->insert_count = 1;
txd = efx_tx_desc(tx_queue, 0);
EFX_POPULATE_QWORD_5(*txd,
EFX_POPULATE_QWORD_7(*txd,
ESF_DZ_TX_DESC_IS_OPT, true,
ESF_DZ_TX_OPTION_TYPE,
ESE_DZ_TX_OPTION_DESC_CRC_CSUM,
ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload,
ESF_DZ_TX_OPTION_IP_CSUM, csum_offload,
ESF_DZ_TX_OPTION_IP_CSUM, csum_offload && tx_queue->tso_version != 2,
ESF_DZ_TX_OPTION_INNER_UDP_TCP_CSUM, inner_csum,
ESF_DZ_TX_OPTION_INNER_IP_CSUM, inner_csum && tx_queue->tso_version != 2,
ESF_DZ_TX_TIMESTAMP, tx_queue->timestamping);
tx_queue->write_count = 1;

if (tso_v2) {
tx_queue->handle_tso = efx_ef10_tx_tso_desc;
tx_queue->tso_version = 2;
} else if (nic_data->datapath_caps &
(1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) {
tx_queue->tso_version = 1;
}
if (tx_queue->tso_version == 2 && efx_has_cap(efx, TX_TSO_V2_ENCAP))
tx_queue->tso_encap = true;

wmb();
efx_ef10_push_tx_desc(tx_queue, txd);
Expand Down Expand Up @@ -2880,7 +2926,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
/* Get the transmit queue */
tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL);
tx_queue = efx_channel_get_tx_queue(channel,
tx_ev_q_label % EFX_TXQ_TYPES);
tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);

if (!tx_queue->timestamping) {
/* Transmit completion */
Expand Down
9 changes: 8 additions & 1 deletion drivers/net/ethernet/sfc/ef100_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,14 @@ void ef100_tx_init(struct efx_tx_queue *tx_queue)
tx_queue->channel->channel -
tx_queue->efx->tx_channel_offset);

if (efx_mcdi_tx_init(tx_queue, false))
/* This value is purely documentational; as EF100 never passes through
* the switch statement in tx.c:__efx_enqueue_skb(), that switch does
* not handle case 3. EF100's TSOv3 descriptors are generated by
* ef100_make_tso_desc().
* Meanwhile, all efx_mcdi_tx_init() cares about is that it's not 2.
*/
tx_queue->tso_version = 3;
if (efx_mcdi_tx_init(tx_queue))
netdev_WARN(tx_queue->efx->net_dev,
"failed to initialise TXQ %d\n", tx_queue->queue);
}
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/sfc/efx.c
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,7 @@ static const struct net_device_ops efx_netdev_ops = {
.ndo_set_mac_address = efx_set_mac_address,
.ndo_set_rx_mode = efx_set_rx_mode,
.ndo_set_features = efx_set_features,
.ndo_features_check = efx_features_check,
.ndo_vlan_rx_add_vid = efx_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = efx_vlan_rx_kill_vid,
#ifdef CONFIG_SFC_SRIOV
Expand Down
10 changes: 5 additions & 5 deletions drivers/net/ethernet/sfc/efx_channels.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
*/

n_xdp_tx = num_possible_cpus();
n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES);
n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_MAX_TXQ_PER_CHANNEL);

vec_count = pci_msix_vec_count(efx->pci_dev);
if (vec_count < 0)
Expand Down Expand Up @@ -179,7 +179,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
efx->xdp_tx_queue_count = 0;
} else {
efx->n_xdp_channels = n_xdp_ev;
efx->xdp_tx_per_channel = EFX_TXQ_TYPES;
efx->xdp_tx_per_channel = EFX_MAX_TXQ_PER_CHANNEL;
efx->xdp_tx_queue_count = n_xdp_tx;
n_channels += n_xdp_ev;
netif_dbg(efx, drv, efx->net_dev,
Expand Down Expand Up @@ -520,7 +520,7 @@ static struct efx_channel *efx_alloc_channel(struct efx_nic *efx, int i)
channel->channel = i;
channel->type = &efx_default_channel_type;

for (j = 0; j < EFX_TXQ_TYPES; j++) {
for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
tx_queue = &channel->tx_queue[j];
tx_queue->efx = efx;
tx_queue->queue = -1;
Expand Down Expand Up @@ -594,7 +594,7 @@ struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
channel->napi_str.state = 0;
memset(&channel->eventq, 0, sizeof(channel->eventq));

for (j = 0; j < EFX_TXQ_TYPES; j++) {
for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
tx_queue = &channel->tx_queue[j];
if (tx_queue->channel)
tx_queue->channel = channel;
Expand Down Expand Up @@ -894,7 +894,7 @@ int efx_set_channels(struct efx_nic *efx)
xdp_queue_number, tx_queue->queue);
/* We may have a few left-over XDP TX
* queues owing to xdp_tx_queue_count
* not dividing evenly by EFX_TXQ_TYPES.
* not dividing evenly by EFX_MAX_TXQ_PER_CHANNEL.
* We still allocate and probe those
* TXQs, but never use them.
*/
Expand Down
Loading

0 comments on commit 6b5525c

Please sign in to comment.