Skip to content

Commit

Permalink
can: mcp251xfd: fix infinite loop when xmit fails
Browse files Browse the repository at this point in the history
commit d8fb63e upstream.

When the mcp251xfd_start_xmit() function fails, the driver stops
processing messages, and the interrupt routine does not return,
running indefinitely even after killing the running application.

Error messages:
[  441.298819] mcp251xfd spi2.0 can0: ERROR in mcp251xfd_start_xmit: -16
[  441.306498] mcp251xfd spi2.0 can0: Transmit Event FIFO buffer not empty. (seq=0x000017c7, tef_tail=0x000017cf, tef_head=0x000017d0, tx_head=0x000017d3).
... and repeat forever.

The issue can be triggered when multiple devices share the same SPI
interface. And there is concurrent access to the bus.

The problem occurs because tx_ring->head increments even if
mcp251xfd_start_xmit() fails. Consequently, the driver skips one TX
package while still expecting a response in
mcp251xfd_handle_tefif_one().

Resolve the issue by starting a workqueue to write the tx obj
synchronously if err = -EBUSY. In case of another error, decrement
tx_ring->head, remove skb from the echo stack, and drop the message.

Fixes: 55e5b97 ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN")
Cc: [email protected]
Signed-off-by: Vitor Soares <[email protected]>
Link: https://lore.kernel.org/all/[email protected]
[mkl: use more imperative wording in patch description]
Signed-off-by: Marc Kleine-Budde <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>
  • Loading branch information
Vitor Soares authored and gregkh committed Jul 5, 2024
1 parent 73fb6df commit 3e72558
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 9 deletions.
14 changes: 13 additions & 1 deletion drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1618,11 +1618,20 @@ static int mcp251xfd_open(struct net_device *ndev)
clear_bit(MCP251XFD_FLAGS_DOWN, priv->flags);
can_rx_offload_enable(&priv->offload);

priv->wq = alloc_ordered_workqueue("%s-mcp251xfd_wq",
WQ_FREEZABLE | WQ_MEM_RECLAIM,
dev_name(&spi->dev));
if (!priv->wq) {
err = -ENOMEM;
goto out_can_rx_offload_disable;
}
INIT_WORK(&priv->tx_work, mcp251xfd_tx_obj_write_sync);

err = request_threaded_irq(spi->irq, NULL, mcp251xfd_irq,
IRQF_SHARED | IRQF_ONESHOT,
dev_name(&spi->dev), priv);
if (err)
goto out_can_rx_offload_disable;
goto out_destroy_workqueue;

err = mcp251xfd_chip_interrupts_enable(priv);
if (err)
Expand All @@ -1634,6 +1643,8 @@ static int mcp251xfd_open(struct net_device *ndev)

out_free_irq:
free_irq(spi->irq, priv);
out_destroy_workqueue:
destroy_workqueue(priv->wq);
out_can_rx_offload_disable:
can_rx_offload_disable(&priv->offload);
set_bit(MCP251XFD_FLAGS_DOWN, priv->flags);
Expand Down Expand Up @@ -1661,6 +1672,7 @@ static int mcp251xfd_stop(struct net_device *ndev)
hrtimer_cancel(&priv->tx_irq_timer);
mcp251xfd_chip_interrupts_disable(priv);
free_irq(ndev->irq, priv);
destroy_workqueue(priv->wq);
can_rx_offload_disable(&priv->offload);
mcp251xfd_timestamp_stop(priv);
mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED);
Expand Down
55 changes: 47 additions & 8 deletions drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,39 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
tx_obj->xfer[0].len = len;
}

static void mcp251xfd_tx_failure_drop(const struct mcp251xfd_priv *priv,
struct mcp251xfd_tx_ring *tx_ring,
int err)
{
struct net_device *ndev = priv->ndev;
struct net_device_stats *stats = &ndev->stats;
unsigned int frame_len = 0;
u8 tx_head;

tx_ring->head--;
stats->tx_dropped++;
tx_head = mcp251xfd_get_tx_head(tx_ring);
can_free_echo_skb(ndev, tx_head, &frame_len);
netdev_completed_queue(ndev, 1, frame_len);
netif_wake_queue(ndev);

if (net_ratelimit())
netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
}

void mcp251xfd_tx_obj_write_sync(struct work_struct *work)
{
struct mcp251xfd_priv *priv = container_of(work, struct mcp251xfd_priv,
tx_work);
struct mcp251xfd_tx_obj *tx_obj = priv->tx_work_obj;
struct mcp251xfd_tx_ring *tx_ring = priv->tx;
int err;

err = spi_sync(priv->spi, &tx_obj->msg);
if (err)
mcp251xfd_tx_failure_drop(priv, tx_ring, err);
}

static int mcp251xfd_tx_obj_write(const struct mcp251xfd_priv *priv,
struct mcp251xfd_tx_obj *tx_obj)
{
Expand Down Expand Up @@ -162,6 +195,11 @@ static bool mcp251xfd_tx_busy(const struct mcp251xfd_priv *priv,
return false;
}

static bool mcp251xfd_work_busy(struct work_struct *work)
{
return work_busy(work);
}

netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
struct net_device *ndev)
{
Expand All @@ -175,7 +213,8 @@ netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
if (can_dev_dropped_skb(ndev, skb))
return NETDEV_TX_OK;

if (mcp251xfd_tx_busy(priv, tx_ring))
if (mcp251xfd_tx_busy(priv, tx_ring) ||
mcp251xfd_work_busy(&priv->tx_work))
return NETDEV_TX_BUSY;

tx_obj = mcp251xfd_get_tx_obj_next(tx_ring);
Expand All @@ -193,13 +232,13 @@ netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
netdev_sent_queue(priv->ndev, frame_len);

err = mcp251xfd_tx_obj_write(priv, tx_obj);
if (err)
goto out_err;

return NETDEV_TX_OK;

out_err:
netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
if (err == -EBUSY) {
netif_stop_queue(ndev);
priv->tx_work_obj = tx_obj;
queue_work(priv->wq, &priv->tx_work);
} else if (err) {
mcp251xfd_tx_failure_drop(priv, tx_ring, err);
}

return NETDEV_TX_OK;
}
5 changes: 5 additions & 0 deletions drivers/net/can/spi/mcp251xfd/mcp251xfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,10 @@ struct mcp251xfd_priv {
struct mcp251xfd_rx_ring *rx[MCP251XFD_FIFO_RX_NUM];
struct mcp251xfd_tx_ring tx[MCP251XFD_FIFO_TX_NUM];

struct workqueue_struct *wq;
struct work_struct tx_work;
struct mcp251xfd_tx_obj *tx_work_obj;

DECLARE_BITMAP(flags, __MCP251XFD_FLAGS_SIZE__);

u8 rx_ring_num;
Expand Down Expand Up @@ -952,6 +956,7 @@ void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv,
void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv);
void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv);

void mcp251xfd_tx_obj_write_sync(struct work_struct *work);
netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
struct net_device *ndev);

Expand Down

0 comments on commit 3e72558

Please sign in to comment.