OSDN Git Service

RDMA/hns: Add FRMR support for hip08
authorYixian Liu <liuyixian@huawei.com>
Fri, 5 Oct 2018 09:53:24 +0000 (17:53 +0800)
committerJason Gunthorpe <jgg@mellanox.com>
Tue, 16 Oct 2018 06:17:08 +0000 (00:17 -0600)
This patch adds fast register physical memory region (FRMR) support for
hip08.

Signed-off-by: Yixian Liu <liuyixian@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/hw/hns/hns_roce_device.h
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
drivers/infiniband/hw/hns/hns_roce_main.c
drivers/infiniband/hw/hns/hns_roce_mr.c

index de9b8e3..d39bdfd 100644 (file)
 #define BITMAP_RR                              1
 
 #define MR_TYPE_MR                             0x00
+#define MR_TYPE_FRMR                           0x01
 #define MR_TYPE_DMA                            0x03
 
+#define HNS_ROCE_FRMR_MAX_PA                   512
+
 #define PKEY_ID                                        0xffff
 #define GUID_LEN                               8
 #define NODE_DESC_SIZE                         64
@@ -194,6 +197,7 @@ enum {
        HNS_ROCE_CAP_FLAG_RECORD_DB             = BIT(3),
        HNS_ROCE_CAP_FLAG_SQ_RECORD_DB          = BIT(4),
        HNS_ROCE_CAP_FLAG_MW                    = BIT(7),
+       HNS_ROCE_CAP_FLAG_FRMR                  = BIT(8),
        HNS_ROCE_CAP_FLAG_ATOMIC                = BIT(10),
 };
 
@@ -308,6 +312,7 @@ struct hns_roce_mr {
        u32                     key; /* Key of MR */
        u32                     pd;   /* PD num of MR */
        u32                     access;/* Access permission of MR */
+       u32                     npages;
        int                     enabled; /* MR's active status */
        int                     type;   /* MR's register type */
        u64                     *pbl_buf;/* MR's PBL space */
@@ -773,6 +778,7 @@ struct hns_roce_hw {
                                struct hns_roce_mr *mr, int flags, u32 pdn,
                                int mr_access_flags, u64 iova, u64 size,
                                void *mb_buf);
+       int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
        int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
        void (*write_cqc)(struct hns_roce_dev *hr_dev,
                          struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
@@ -983,6 +989,10 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
                           u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
                           struct ib_udata *udata);
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+                               u32 max_num_sg);
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+                      unsigned int *sg_offset);
 int hns_roce_dereg_mr(struct ib_mr *ibmr);
 int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
                       struct hns_roce_cmd_mailbox *mailbox,
index e3d9f1d..a4c62ae 100644 (file)
@@ -54,6 +54,47 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
        dseg->len  = cpu_to_le32(sg->length);
 }
 
+static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+                        struct hns_roce_wqe_frmr_seg *fseg,
+                        const struct ib_reg_wr *wr)
+{
+       struct hns_roce_mr *mr = to_hr_mr(wr->mr);
+
+       /* use ib_access_flags */
+       roce_set_bit(rc_sq_wqe->byte_4,
+                    V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
+                    wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
+       roce_set_bit(rc_sq_wqe->byte_4,
+                    V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
+                    wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
+       roce_set_bit(rc_sq_wqe->byte_4,
+                    V2_RC_FRMR_WQE_BYTE_4_RR_S,
+                    wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
+       roce_set_bit(rc_sq_wqe->byte_4,
+                    V2_RC_FRMR_WQE_BYTE_4_RW_S,
+                    wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
+       roce_set_bit(rc_sq_wqe->byte_4,
+                    V2_RC_FRMR_WQE_BYTE_4_LW_S,
+                    wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
+
+       /* Data structure reuse may lead to confusion */
+       rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff);
+       rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32);
+
+       rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
+       rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
+       rc_sq_wqe->rkey = cpu_to_le32(wr->key);
+       rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
+
+       fseg->pbl_size = cpu_to_le32(mr->pbl_size);
+       roce_set_field(fseg->mode_buf_pg_sz,
+                      V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
+                      V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
+                      mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+       roce_set_bit(fseg->mode_buf_pg_sz,
+                    V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
+}
+
 static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
                           const struct ib_atomic_wr *wr)
 {
@@ -192,6 +233,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
        struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
        struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
        struct hns_roce_qp *qp = to_hr_qp(ibqp);
+       struct hns_roce_wqe_frmr_seg *fseg;
        struct device *dev = hr_dev->dev;
        struct hns_roce_v2_db sq_db;
        struct ib_qp_attr attr;
@@ -462,6 +504,11 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
                                rc_sq_wqe->inv_key =
                                            cpu_to_le32(wr->ex.invalidate_rkey);
                                break;
+                       case IB_WR_REG_MR:
+                               hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
+                               fseg = wqe;
+                               set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr));
+                               break;
                        case IB_WR_ATOMIC_CMP_AND_SWP:
                                hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
                                rc_sq_wqe->rkey =
@@ -505,7 +552,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
                                               V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
                                               V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
                                               wr->num_sge);
-                       } else {
+                       } else if (wr->opcode != IB_WR_REG_MR) {
                                ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
                                                        wqe, &sge_ind, bad_wr);
                                if (ret)
@@ -1297,7 +1344,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
                                  HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
 
        if (hr_dev->pci_dev->revision == 0x21)
-               caps->flags |= HNS_ROCE_CAP_FLAG_MW;
+               caps->flags |= HNS_ROCE_CAP_FLAG_MW |
+                              HNS_ROCE_CAP_FLAG_FRMR;
 
        caps->pkey_table_len[0] = 1;
        caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
@@ -1865,6 +1913,48 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
        return 0;
 }
 
+static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
+{
+       struct hns_roce_v2_mpt_entry *mpt_entry;
+
+       mpt_entry = mb_buf;
+       memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+       roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
+                      V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
+       roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
+                      V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
+       roce_set_field(mpt_entry->byte_4_pd_hop_st,
+                      V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
+                      V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
+                      mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
+       roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
+                      V2_MPT_BYTE_4_PD_S, mr->pd);
+
+       roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
+       roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
+       roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
+
+       roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
+       roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
+       roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
+       roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
+
+       mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
+
+       mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
+       roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
+                      V2_MPT_BYTE_48_PBL_BA_H_S,
+                      upper_32_bits(mr->pbl_ba >> 3));
+
+       roce_set_field(mpt_entry->byte_64_buf_pa1,
+                      V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
+                      V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
+                      mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+
+       return 0;
+}
+
 static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
 {
        struct hns_roce_v2_mpt_entry *mpt_entry;
@@ -2834,6 +2924,9 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
        roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S,
                     0);
 
+       roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
+       roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 0);
+
        roce_set_field(qpc_mask->byte_176_msg_pktn,
                       V2_QPC_BYTE_176_MSG_USE_PKTN_M,
                       V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0);
@@ -5259,6 +5352,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
        .set_mac = hns_roce_v2_set_mac,
        .write_mtpt = hns_roce_v2_write_mtpt,
        .rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt,
+       .frmr_write_mtpt = hns_roce_v2_frmr_write_mtpt,
        .mw_write_mtpt = hns_roce_v2_mw_write_mtpt,
        .write_cqc = hns_roce_v2_write_cqc,
        .set_hem = hns_roce_v2_set_hem,
index f8abcce..8bc8206 100644 (file)
@@ -886,6 +886,8 @@ struct hns_roce_v2_mpt_entry {
 #define V2_MPT_BYTE_8_MW_CNT_S 8
 #define V2_MPT_BYTE_8_MW_CNT_M GENMASK(31, 8)
 
+#define V2_MPT_BYTE_12_FRE_S 0
+
 #define V2_MPT_BYTE_12_PA_S 1
 
 #define V2_MPT_BYTE_12_MR_MW_S 4
@@ -1058,6 +1060,16 @@ struct hns_roce_v2_rc_send_wqe {
 
 #define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
 
+#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19
+
+#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20
+
+#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21
+
+#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22
+
+#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23
+
 #define        V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
 #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
 
@@ -1067,6 +1079,16 @@ struct hns_roce_v2_rc_send_wqe {
 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
 
+struct hns_roce_wqe_frmr_seg {
+       __le32  pbl_size;
+       __le32  mode_buf_pg_sz;
+};
+
+#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4
+#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M GENMASK(7, 4)
+
+#define V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S 8
+
 struct hns_roce_v2_wqe_data_seg {
        __le32    len;
        __le32    lkey;
index 7e693b1..1b3ee51 100644 (file)
@@ -535,6 +535,12 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
                                        (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW);
        }
 
+       /* FRMR */
+       if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
+               ib_dev->alloc_mr                = hns_roce_alloc_mr;
+               ib_dev->map_mr_sg               = hns_roce_map_mr_sg;
+       }
+
        /* OTHERS */
        ib_dev->get_port_immutable      = hns_roce_port_immutable;
        ib_dev->disassociate_ucontext   = hns_roce_disassociate_ucontext;
index 0613c11..521ad2a 100644 (file)
@@ -329,7 +329,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
        u64 bt_idx;
        u64 size;
 
-       mhop_num = hr_dev->caps.pbl_hop_num;
+       mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
        pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
        pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
 
@@ -351,7 +351,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
 
                mr->pbl_size = npages;
                mr->pbl_ba = mr->pbl_dma_addr;
-               mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
+               mr->pbl_hop_num = mhop_num;
                mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
                mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
                return 0;
@@ -511,7 +511,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
        mr->key = hw_index_to_key(index);       /* MR key */
 
        if (size == ~0ull) {
-               mr->type = MR_TYPE_DMA;
                mr->pbl_buf = NULL;
                mr->pbl_dma_addr = 0;
                /* PBL multi-hop addressing parameters */
@@ -522,7 +521,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
                mr->pbl_l1_dma_addr = NULL;
                mr->pbl_l0_dma_addr = 0;
        } else {
-               mr->type = MR_TYPE_MR;
                if (!hr_dev->caps.pbl_hop_num) {
                        mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
                                                         &(mr->pbl_dma_addr),
@@ -548,9 +546,9 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
        u32 mhop_num;
        u64 bt_idx;
 
-       npages = ib_umem_page_count(mr->umem);
+       npages = mr->pbl_size;
        pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
-       mhop_num = hr_dev->caps.pbl_hop_num;
+       mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
 
        if (mhop_num == HNS_ROCE_HOP_NUM_0)
                return;
@@ -636,7 +634,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
        }
 
        if (mr->size != ~0ULL) {
-               npages = ib_umem_page_count(mr->umem);
+               if (mr->type == MR_TYPE_MR)
+                       npages = ib_umem_page_count(mr->umem);
 
                if (!hr_dev->caps.pbl_hop_num)
                        dma_free_coherent(dev, (unsigned int)(npages * 8),
@@ -674,7 +673,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
                goto err_table;
        }
 
-       ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+       if (mr->type != MR_TYPE_FRMR)
+               ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+       else
+               ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
        if (ret) {
                dev_err(dev, "Write mtpt fail!\n");
                goto err_page;
@@ -855,6 +857,8 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
        if (mr == NULL)
                return  ERR_PTR(-ENOMEM);
 
+       mr->type = MR_TYPE_DMA;
+
        /* Allocate memory region key */
        ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
                                ~0ULL, acc, 0, mr);
@@ -1031,6 +1035,8 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                }
        }
 
+       mr->type = MR_TYPE_MR;
+
        ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
                                access_flags, n, mr);
        if (ret)
@@ -1202,6 +1208,76 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr)
        return ret;
 }
 
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+                               u32 max_num_sg)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+       struct device *dev = hr_dev->dev;
+       struct hns_roce_mr *mr;
+       u64 length;
+       u32 page_size;
+       int ret;
+
+       page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
+       length = max_num_sg * page_size;
+
+       if (mr_type != IB_MR_TYPE_MEM_REG)
+               return ERR_PTR(-EINVAL);
+
+       if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
+               dev_err(dev, "max_num_sg larger than %d\n",
+                       HNS_ROCE_FRMR_MAX_PA);
+               return ERR_PTR(-EINVAL);
+       }
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
+
+       mr->type = MR_TYPE_FRMR;
+
+       /* Allocate memory region key */
+       ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
+                               0, max_num_sg, mr);
+       if (ret)
+               goto err_free;
+
+       ret = hns_roce_mr_enable(hr_dev, mr);
+       if (ret)
+               goto err_mr;
+
+       mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+       mr->umem = NULL;
+
+       return &mr->ibmr;
+
+err_mr:
+       hns_roce_mr_free(to_hr_dev(pd->device), mr);
+
+err_free:
+       kfree(mr);
+       return ERR_PTR(ret);
+}
+
+static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
+{
+       struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+       mr->pbl_buf[mr->npages++] = cpu_to_le64(addr);
+
+       return 0;
+}
+
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+                      unsigned int *sg_offset)
+{
+       struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+       mr->npages = 0;
+
+       return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
+}
+
 static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
                             struct hns_roce_mw *mw)
 {