There can be a race between the waiters for a tx work request buffer
and the link down processing that finally clears the link. Although
all waiters are woken up before the link is cleared there might be
waiters which did not yet get back control and are still waiting.
This results in an access to a cleared wait queue head.
Fix this by introducing atomic reference counting around the wait calls,
and wait with the link clear processing until all waiters have finished.
Move the work request layer related calls into smc_wr.c and set the
link state to INACTIVE before calling smcr_link_clear() in
smc_llc_srv_add_link().
Fixes:
15e1b99aadfb ("net/smc: no WR buffer wait for terminating link group")
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
unsigned long *wr_tx_mask; /* bit mask of used indexes */
u32 wr_tx_cnt; /* number of WR send buffers */
wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */
+ atomic_t wr_tx_refcnt; /* tx refs to link */
struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */
struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */
struct ib_reg_wr wr_reg; /* WR register memory region */
wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */
+ atomic_t wr_reg_refcnt; /* reg refs to link */
enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */
u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/
if (!rc)
goto out;
out_clear_lnk:
+ lnk_new->state = SMC_LNK_INACTIVE;
smcr_link_clear(lnk_new, false);
out_reject:
smc_llc_cli_add_link_reject(qentry);
goto out_err;
return 0;
out_err:
+ link_new->state = SMC_LNK_INACTIVE;
smcr_link_clear(link_new, false);
return rc;
}
del_llc->reason = 0;
smc_llc_send_message(lnk, &qentry->msg); /* response */
- if (smc_link_downing(&lnk_del->state)) {
- if (smc_switch_conns(lgr, lnk_del, false))
- smc_wr_tx_wait_no_pending_sends(lnk_del);
- }
+ if (smc_link_downing(&lnk_del->state))
+ smc_switch_conns(lgr, lnk_del, false);
smcr_link_clear(lnk_del, true);
active_links = smc_llc_active_link_count(lgr);
link->smcibdev->ibdev->name, link->ibport);
complete(&link->llc_testlink_resp);
cancel_delayed_work_sync(&link->llc_testlink_wrk);
- smc_wr_wakeup_reg_wait(link);
- smc_wr_wakeup_tx_wait(link);
}
/* register a new rtoken at the remote peer (for all links) */
/* Wakeup sndbuf consumers from any context (IRQ or process)
* since there is more data to transmit; usable snd_wnd as max transmit
*/
-static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
struct smc_link *link = conn->lnk;
return rc;
}
+static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+{
+ struct smc_link *link = conn->lnk;
+ int rc = -ENOLINK;
+
+ if (!link)
+ return rc;
+
+ atomic_inc(&link->wr_tx_refcnt);
+ if (smc_link_usable(link))
+ rc = _smcr_tx_sndbuf_nonempty(conn);
+ if (atomic_dec_and_test(&link->wr_tx_refcnt))
+ wake_up_all(&link->wr_tx_wait);
+ return rc;
+}
+
static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
if (rc)
return rc;
+ atomic_inc(&link->wr_reg_refcnt);
rc = wait_event_interruptible_timeout(link->wr_reg_wait,
(link->wr_reg_state != POSTED),
SMC_WR_REG_MR_WAIT_TIME);
+ if (atomic_dec_and_test(&link->wr_reg_refcnt))
+ wake_up_all(&link->wr_reg_wait);
if (!rc) {
/* timeout - terminate link */
smcr_link_down_cond_sched(link);
return;
ibdev = lnk->smcibdev->ibdev;
+ smc_wr_wakeup_reg_wait(lnk);
+ smc_wr_wakeup_tx_wait(lnk);
+
if (smc_wr_tx_wait_no_pending_sends(lnk))
memset(lnk->wr_tx_mask, 0,
BITS_TO_LONGS(SMC_WR_BUF_CNT) *
sizeof(*lnk->wr_tx_mask));
+ wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
+ wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
if (lnk->wr_rx_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
memset(lnk->wr_tx_mask, 0,
BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
init_waitqueue_head(&lnk->wr_tx_wait);
+ atomic_set(&lnk->wr_tx_refcnt, 0);
init_waitqueue_head(&lnk->wr_reg_wait);
+ atomic_set(&lnk->wr_reg_refcnt, 0);
return rc;
dma_unmap: