OSDN Git Service

scsi: lpfc: Improve PBDE checks during SGL processing
authorJames Smart <jsmart2021@gmail.com>
Fri, 10 Sep 2021 23:31:58 +0000 (16:31 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Wed, 15 Sep 2021 03:33:22 +0000 (23:33 -0400)
The PBDE feature, setting payload buffer address explicitly in the WQE so
it doesn't have to be fetched from the SGL, only makes sense when there is
a single buffer for the I/O. When there are multiple buffers it actually
hurts performance as the SGL subsequently has to be fetched.

Rework the SGL logic to only use PBDE when a single buffer.

Link: https://lore.kernel.org/r/20210910233159.115896-14-jsmart2021@gmail.com
Co-developed-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc_nvme.c
drivers/scsi/lpfc/lpfc_nvmet.c
drivers/scsi/lpfc/lpfc_scsi.c

index 33266e1..69d3758 100644 (file)
@@ -1299,7 +1299,6 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
        struct sli4_sge *first_data_sgl;
        struct ulp_bde64 *bde;
        dma_addr_t physaddr = 0;
-       uint32_t num_bde = 0;
        uint32_t dma_len = 0;
        uint32_t dma_offset = 0;
        int nseg, i, j;
@@ -1353,7 +1352,7 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
                        }
 
                        sgl->word2 = 0;
-                       if ((num_bde + 1) == nseg) {
+                       if (nseg == 1) {
                                bf_set(lpfc_sli4_sge_last, sgl, 1);
                                bf_set(lpfc_sli4_sge_type, sgl,
                                       LPFC_SGE_TYPE_DATA);
@@ -1422,8 +1421,9 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
 
                        j++;
                }
-               if (phba->cfg_enable_pbde) {
-                       /* Use PBDE support for first SGL only, offset == 0 */
+
+               /* PBDE support for first data SGE only */
+               if (nseg == 1 && phba->cfg_enable_pbde) {
                        /* Words 13-15 */
                        bde = (struct ulp_bde64 *)
                                &wqe->words[13];
@@ -1434,11 +1434,11 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
                        bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64;
                        bde->tus.w = cpu_to_le32(bde->tus.w);
 
-                       /* Word 11 */
+                       /* Word 11 - set PBDE bit */
                        bf_set(wqe_pbde, &wqe->generic.wqe_com, 1);
                } else {
                        memset(&wqe->words[13], 0, (sizeof(uint32_t) * 3));
-                       bf_set(wqe_pbde, &wqe->generic.wqe_com, 0);
+                       /* Word 11 - PBDE bit disabled by default template */
                }
 
        } else {
index 6e3dd0b..7318025 100644 (file)
@@ -2708,7 +2708,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
        struct ulp_bde64 *bde;
        dma_addr_t physaddr;
        int i, cnt, nsegs;
-       int do_pbde;
+       bool use_pbde = false;
        int xc = 1;
 
        if (!lpfc_is_link_up(phba)) {
@@ -2816,9 +2816,6 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
                if (!xc)
                        bf_set(wqe_xc, &wqe->fcp_tsend.wqe_com, 0);
 
-               /* Word 11 - set sup, irsp, irsplen later */
-               do_pbde = 0;
-
                /* Word 12 */
                wqe->fcp_tsend.fcp_data_len = rsp->transfer_length;
 
@@ -2896,12 +2893,13 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
                if (!xc)
                        bf_set(wqe_xc, &wqe->fcp_treceive.wqe_com, 0);
 
-               /* Word 11 - set pbde later */
-               if (phba->cfg_enable_pbde) {
-                       do_pbde = 1;
+               /* Word 11 - check for pbde */
+               if (nsegs == 1 && phba->cfg_enable_pbde) {
+                       use_pbde = true;
+                       /* Word 11 - PBDE bit already preset by template */
                } else {
+                       /* Overwrite default template setting */
                        bf_set(wqe_pbde, &wqe->fcp_treceive.wqe_com, 0);
-                       do_pbde = 0;
                }
 
                /* Word 12 */
@@ -2972,7 +2970,6 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
                               ((rsp->rsplen >> 2) - 1));
                        memcpy(&wqe->words[16], rsp->rspaddr, rsp->rsplen);
                }
-               do_pbde = 0;
 
                /* Word 12 */
                wqe->fcp_trsp.rsvd_12_15[0] = 0;
@@ -3007,23 +3004,24 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
                        bf_set(lpfc_sli4_sge_last, sgl, 1);
                sgl->word2 = cpu_to_le32(sgl->word2);
                sgl->sge_len = cpu_to_le32(cnt);
-               if (i == 0) {
-                       bde = (struct ulp_bde64 *)&wqe->words[13];
-                       if (do_pbde) {
-                               /* Words 13-15  (PBDE) */
-                               bde->addrLow = sgl->addr_lo;
-                               bde->addrHigh = sgl->addr_hi;
-                               bde->tus.f.bdeSize =
-                                       le32_to_cpu(sgl->sge_len);
-                               bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64;
-                               bde->tus.w = cpu_to_le32(bde->tus.w);
-                       } else {
-                               memset(bde, 0, sizeof(struct ulp_bde64));
-                       }
-               }
                sgl++;
                ctxp->offset += cnt;
        }
+
+       bde = (struct ulp_bde64 *)&wqe->words[13];
+       if (use_pbde) {
+               /* decrement sgl ptr backwards once to first data sge */
+               sgl--;
+
+               /* Words 13-15 (PBDE) */
+               bde->addrLow = sgl->addr_lo;
+               bde->addrHigh = sgl->addr_hi;
+               bde->tus.f.bdeSize = le32_to_cpu(sgl->sge_len);
+               bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+               bde->tus.w = cpu_to_le32(bde->tus.w);
+       } else {
+               memset(bde, 0, sizeof(struct ulp_bde64));
+       }
        ctxp->state = LPFC_NVME_STE_DATA;
        ctxp->entry_cnt++;
        return nvmewqe;
index a2cd227..078fbea 100644 (file)
@@ -3235,7 +3235,6 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
        struct lpfc_vport *vport = phba->pport;
        union lpfc_wqe128 *wqe = &pwqeq->wqe;
        dma_addr_t physaddr;
-       uint32_t num_bde = 0;
        uint32_t dma_len;
        uint32_t dma_offset = 0;
        int nseg, i, j;
@@ -3297,7 +3296,7 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
                j = 2;
                for (i = 0; i < nseg; i++) {
                        sgl->word2 = 0;
-                       if ((num_bde + 1) == nseg) {
+                       if (nseg == 1) {
                                bf_set(lpfc_sli4_sge_last, sgl, 1);
                                bf_set(lpfc_sli4_sge_type, sgl,
                                       LPFC_SGE_TYPE_DATA);
@@ -3366,13 +3365,15 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
 
                        j++;
                }
-               /*
-                * Setup the first Payload BDE. For FCoE we just key off
-                * Performance Hints, for FC we use lpfc_enable_pbde.
-                * We populate words 13-15 of IOCB/WQE.
+
+               /* PBDE support for first data SGE only.
+                * For FCoE, we key off Performance Hints.
+                * For FC, we key off lpfc_enable_pbde.
                 */
-               if ((phba->sli3_options & LPFC_SLI4_PERFH_ENABLED) ||
-                   phba->cfg_enable_pbde) {
+               if (nseg == 1 &&
+                   ((phba->sli3_options & LPFC_SLI4_PERFH_ENABLED) ||
+                    phba->cfg_enable_pbde)) {
+                       /* Words 13-15 */
                        bde = (struct ulp_bde64 *)
                                &wqe->words[13];
                        bde->addrLow = first_data_sgl->addr_lo;
@@ -3382,12 +3383,15 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
                        bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64;
                        bde->tus.w = cpu_to_le32(bde->tus.w);
 
+                       /* Word 11 - set PBDE bit */
+                       bf_set(wqe_pbde, &wqe->generic.wqe_com, 1);
                } else {
                        memset(&wqe->words[13], 0, (sizeof(uint32_t) * 3));
+                       /* Word 11 - PBDE bit disabled by default template */
                }
        } else {
                sgl += 1;
-               /* clear the last flag in the fcp_rsp map entry */
+               /* set the last flag in the fcp_rsp map entry */
                sgl->word2 = le32_to_cpu(sgl->word2);
                bf_set(lpfc_sli4_sge_last, sgl, 1);
                sgl->word2 = cpu_to_le32(sgl->word2);
@@ -3400,10 +3404,6 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
                }
        }
 
-       /* Word 11 */
-       if (phba->cfg_enable_pbde)
-               bf_set(wqe_pbde, &wqe->generic.wqe_com, 1);
-
        /*
         * Finish initializing those IOCB fields that are dependent on the
         * scsi_cmnd request_buffer.  Note that for SLI-2 the bdeSize is