OSDN Git Service

IB/hfi1: Add interlock between TID RDMA WRITE and other requests
authorKaike Wan <kaike.wan@intel.com>
Thu, 24 Jan 2019 05:51:49 +0000 (21:51 -0800)
committerDoug Ledford <dledford@redhat.com>
Tue, 5 Feb 2019 23:07:44 +0000 (18:07 -0500)
This locking mechanism is designed to provent vavious memory corruption
scenarios from occurring when requests are pipelined, especially when
RDMA WRITE requests are interleaved with TID RDMA READ requests:
1. READ-AFTER-READ;
2. READ-AFTER-WRITE;
3. WRITE-AFTER-READ;
4. WRITE-AFTER-WRITE.
When memory corruption is likely, a request will be held back until
previous requests have been completed.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/hfi1/tid_rdma.c
drivers/infiniband/hw/hfi1/tid_rdma.h

index 6d2abea..cfb8633 100644 (file)
@@ -173,6 +173,12 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
                }
 
                e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+               /* Check for tid write fence */
+               if ((qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK) ||
+                   hfi1_tid_rdma_ack_interlock(qp, e)) {
+                       iowait_set_flag(&qpriv->s_iowait, IOWAIT_PENDING_IB);
+                       goto bail;
+               }
                if (e->opcode == OP(RDMA_READ_REQUEST)) {
                        /*
                         * If a RDMA read response is being resent and
index 490e47a..2867520 100644 (file)
@@ -2179,6 +2179,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
                        req->state = TID_REQUEST_RESEND;
                        req->cur_seg = req->comp_seg;
                }
+               qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
        }
        /* Re-process old requests.*/
        if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
@@ -3229,6 +3230,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
        struct rvt_swqe *prev;
        struct hfi1_qp_priv *priv = qp->priv;
        u32 s_prev;
+       struct tid_rdma_request *req;
 
        s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
        prev = rvt_get_swqe_ptr(qp, s_prev);
@@ -3240,14 +3242,28 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
        case IB_WR_ATOMIC_CMP_AND_SWP:
        case IB_WR_ATOMIC_FETCH_AND_ADD:
        case IB_WR_RDMA_WRITE:
+               switch (prev->wr.opcode) {
+               case IB_WR_TID_RDMA_WRITE:
+                       req = wqe_to_tid_req(prev);
+                       if (req->ack_seg != req->total_segs)
+                               goto interlock;
+               default:
+                       break;
+               }
        case IB_WR_RDMA_READ:
-               break;
+               if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
+                       break;
+               /* fall through */
        case IB_WR_TID_RDMA_READ:
                switch (prev->wr.opcode) {
                case IB_WR_RDMA_READ:
                        if (qp->s_acked != qp->s_cur)
                                goto interlock;
                        break;
+               case IB_WR_TID_RDMA_WRITE:
+                       req = wqe_to_tid_req(prev);
+                       if (req->ack_seg != req->total_segs)
+                               goto interlock;
                default:
                        break;
                }
@@ -5157,7 +5173,9 @@ static int make_tid_rdma_ack(struct rvt_qp *qp,
                e = &qp->s_ack_queue[qpriv->r_tid_ack];
                req = ack_to_tid_req(e);
                flow = req->acked_tail;
-       }
+       } else if (req->ack_seg == req->total_segs &&
+                  qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK)
+               qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
 
        hwords += hfi1_build_tid_rdma_write_ack(qp, e, ohdr, flow, &bth1,
                                                &bth2);
@@ -5310,3 +5328,27 @@ bool hfi1_schedule_tid_send(struct rvt_qp *qp)
                                IOWAIT_PENDING_TID);
        return false;
 }
+
+bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e)
+{
+       struct rvt_ack_entry *prev;
+       struct tid_rdma_request *req;
+       struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
+       struct hfi1_qp_priv *priv = qp->priv;
+       u32 s_prev;
+
+       s_prev = qp->s_tail_ack_queue == 0 ? rvt_size_atomic(&dev->rdi) :
+               (qp->s_tail_ack_queue - 1);
+       prev = &qp->s_ack_queue[s_prev];
+
+       if ((e->opcode == TID_OP(READ_REQ) ||
+            e->opcode == OP(RDMA_READ_REQUEST)) &&
+           prev->opcode == TID_OP(WRITE_REQ)) {
+               req = ack_to_tid_req(prev);
+               if (req->ack_seg != req->total_segs) {
+                       priv->s_flags |= HFI1_R_TID_WAIT_INTERLCK;
+                       return true;
+               }
+       }
+       return false;
+}
index 7f8f17b..4446818 100644 (file)
@@ -25,6 +25,7 @@
  * s_flags, there are no collisions.
  *
  * HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
+ * HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock
  */
 #define HFI1_S_TID_BUSY_SET       BIT(0)
 /* BIT(1) reserved for RVT_S_BUSY. */
 /* BIT(3) reserved for RVT_S_RESP_PENDING. */
 /* BIT(4) reserved for RVT_S_ACK_PENDING. */
 #define HFI1_S_TID_WAIT_INTERLCK  BIT(5)
+#define HFI1_R_TID_WAIT_INTERLCK  BIT(6)
 /* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */
+/* BIT(16) reserved for RVT_S_SEND_ONE */
 #define HFI1_S_TID_RETRY_TIMER    BIT(17)
+/* BIT(18) reserved for RVT_S_ECN. */
 #define HFI1_R_TID_SW_PSN         BIT(19)
+/* BIT(26) reserved for HFI1_S_WAIT_HALT */
+/* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */
+/* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */
 
 /*
  * Unlike regular IB RDMA VERBS, which do not require an entry
@@ -309,4 +316,6 @@ void _hfi1_do_tid_send(struct work_struct *work);
 
 bool hfi1_schedule_tid_send(struct rvt_qp *qp);
 
+bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e);
+
 #endif /* HFI1_TID_RDMA_H */