Skip to content

Commit

Permalink
IB/hfi1: Correct tid qp rcd to match verbs context
Browse files Browse the repository at this point in the history
The qp priv rcd pointer doesn't match the context being used for verbs
causing issues when 9B and kdeth packets are processed by different
receive contexts and hence different CPUs.

When running on different CPUs the following panic can occur:

 WARNING: CPU: 3 PID: 2584 at lib/list_debug.c:59 __list_del_entry+0xa1/0xd0
 list_del corruption. prev->next should be ffff9a7ac31f7a30, but was ffff9a7c3bc89230
 CPU: 3 PID: 2584 Comm: z_wr_iss Kdump: loaded Tainted: P           OE  ------------   3.10.0-862.2.3.el7_lustre.x86_64 #1
 Call Trace:
  <IRQ>  [<ffffffffb7b0d78e>] dump_stack+0x19/0x1b
  [<ffffffffb74916d8>] __warn+0xd8/0x100
  [<ffffffffb749175f>] warn_slowpath_fmt+0x5f/0x80
  [<ffffffffb7768671>] __list_del_entry+0xa1/0xd0
  [<ffffffffc0c7a945>] process_rcv_qp_work+0xb5/0x160 [hfi1]
  [<ffffffffc0c7bc2b>] handle_receive_interrupt_nodma_rtail+0x20b/0x2b0 [hfi1]
  [<ffffffffc0c70683>] receive_context_interrupt+0x23/0x40 [hfi1]
  [<ffffffffb7540a94>] __handle_irq_event_percpu+0x44/0x1c0
  [<ffffffffb7540c42>] handle_irq_event_percpu+0x32/0x80
  [<ffffffffb7540ccc>] handle_irq_event+0x3c/0x60
  [<ffffffffb7543a1f>] handle_edge_irq+0x7f/0x150
  [<ffffffffb742d504>] handle_irq+0xe4/0x1a0
  [<ffffffffb7b23f7d>] do_IRQ+0x4d/0xf0
  [<ffffffffb7b16362>] common_interrupt+0x162/0x162
  <EOI>  [<ffffffffb775a326>] ? memcpy+0x6/0x110
  [<ffffffffc109210d>] ? abd_copy_from_buf_off_cb+0x1d/0x30 [zfs]
  [<ffffffffc10920f0>] ? abd_copy_to_buf_off_cb+0x30/0x30 [zfs]
  [<ffffffffc1093257>] abd_iterate_func+0x97/0x120 [zfs]
  [<ffffffffc10934d9>] abd_copy_from_buf_off+0x39/0x60 [zfs]
  [<ffffffffc109b828>] arc_write_ready+0x178/0x300 [zfs]
  [<ffffffffb7b11032>] ? mutex_lock+0x12/0x2f
  [<ffffffffb7b11032>] ? mutex_lock+0x12/0x2f
  [<ffffffffc1164d05>] zio_ready+0x65/0x3d0 [zfs]
  [<ffffffffc04d725e>] ? tsd_get_by_thread+0x2e/0x50 [spl]
  [<ffffffffc04d1318>] ? taskq_member+0x18/0x30 [spl]
  [<ffffffffc115ef22>] zio_execute+0xa2/0x100 [zfs]
  [<ffffffffc04d1d2c>] taskq_thread+0x2ac/0x4f0 [spl]
  [<ffffffffb74cee80>] ? wake_up_state+0x20/0x20
  [<ffffffffc115ee80>] ? zio_taskq_member.isra.7.constprop.10+0x80/0x80 [zfs]
  [<ffffffffc04d1a80>] ? taskq_thread_spawn+0x60/0x60 [spl]
  [<ffffffffb74bae31>] kthread+0xd1/0xe0
  [<ffffffffb74bad60>] ? insert_kthread_work+0x40/0x40
  [<ffffffffb7b1f5f7>] ret_from_fork_nospec_begin+0x21/0x21
  [<ffffffffb74bad60>] ? insert_kthread_work+0x40/0x40

Fix by reading the map entry in the same manner as the hardware so that
the kdeth and verbs contexts match.

Cc: <[email protected]>
Fixes: 5190f05 ("IB/hfi1: Allow the driver to initialize QP priv struct")
Reviewed-by: Kaike Wan <[email protected]>
Signed-off-by: Mike Marciniszyn <[email protected]>
Signed-off-by: Dennis Dalessandro <[email protected]>
Signed-off-by: Jason Gunthorpe <[email protected]>
  • Loading branch information
mmarcini authored and jgunthorpe committed Jun 11, 2019
1 parent da9de5f commit cc78076
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 3 deletions.
13 changes: 13 additions & 0 deletions drivers/infiniband/hw/hfi1/chip.c
Original file line number Diff line number Diff line change
Expand Up @@ -14031,6 +14031,19 @@ static void init_kdeth_qp(struct hfi1_devdata *dd)
RCV_BTH_QP_KDETH_QP_SHIFT);
}

/**
* hfi1_get_qp_map
* @dd: device data
* @idx: index to read
*/
u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx)
{
u64 reg = read_csr(dd, RCV_QP_MAP_TABLE + (idx / 8) * 8);

reg >>= (idx % 8) * 8;
return reg;
}

/**
* init_qpmap_table
* @dd - device data
Expand Down
1 change: 1 addition & 0 deletions drivers/infiniband/hw/hfi1/chip.h
Original file line number Diff line number Diff line change
Expand Up @@ -1445,6 +1445,7 @@ void clear_all_interrupts(struct hfi1_devdata *dd);
void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
void reset_interrupts(struct hfi1_devdata *dd);
u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx);

/*
* Interrupt source table.
Expand Down
4 changes: 1 addition & 3 deletions drivers/infiniband/hw/hfi1/tid_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,7 @@ static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
if (qp->ibqp.qp_num == 0)
ctxt = 0;
else
ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) %
(dd->n_krcv_queues - 1)) + 1;

ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift);
return dd->rcd[ctxt];
}

Expand Down

0 comments on commit cc78076

Please sign in to comment.