From b179f7438dc0287b64c5d3c7e86398c6954eb4d2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 14 Jun 2018 15:40:31 +0000 Subject: [PATCH] [X86] Add more vector instructions to the memory folding table using the autogenerated table as a guide. The test cahnge is because we now fold stack reload into RNDSCALE and RNDSCALE can be turned into ROUND by EVEX->VEX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@334728 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 216 ++++++++++++++++++++++++++++++++++- test/CodeGen/X86/vec_ss_load_fold.ll | 61 +++------- 2 files changed, 234 insertions(+), 43 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 7e3f89c7c19..bd56e511a3e 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -563,7 +563,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::CMP32rr, X86::CMP32rm, 0 }, { X86::CMP64rr, X86::CMP64rm, 0 }, { X86::CMP8rr, X86::CMP8rm, 0 }, + { X86::COMISDrr, X86::COMISDrm, 0 }, { X86::COMISDrr_Int, X86::COMISDrm_Int, TB_NO_REVERSE }, + { X86::COMISSrr, X86::COMISSrm, 0 }, { X86::COMISSrr_Int, X86::COMISSrm_Int, TB_NO_REVERSE }, { X86::CVTDQ2PDrr, X86::CVTDQ2PDrm, TB_NO_REVERSE }, { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 }, @@ -692,7 +694,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::PSWAPDrr, X86::PSWAPDrm, 0 }, // AVX 128-bit versions of foldable instructions + { X86::VCOMISDrr, X86::VCOMISDrm, 0 }, { X86::VCOMISDrr_Int, X86::VCOMISDrm_Int, TB_NO_REVERSE }, + { X86::VCOMISSrr, X86::VCOMISSrm, 0 }, { X86::VCOMISSrr_Int, X86::VCOMISSrm_Int, TB_NO_REVERSE }, { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, { X86::VCVTTSD2SI64rr_Int,X86::VCVTTSD2SI64rm_Int,TB_NO_REVERSE }, @@ -933,6 +937,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // AVX-512 foldable instructions { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE }, { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE }, + { X86::VCOMISDZrr, X86::VCOMISDZrm, 0 }, + { X86::VCOMISDZrr_Int, X86::VCOMISDZrm_Int, TB_NO_REVERSE }, + { X86::VCOMISSZrr, X86::VCOMISSZrm, 0 }, + { X86::VCOMISSZrr_Int, X86::VCOMISSZrm_Int, TB_NO_REVERSE }, { X86::VCVTDQ2PDZrr, X86::VCVTDQ2PDZrm, 0 }, { X86::VCVTPD2PSZrr, X86::VCVTPD2PSZrm, 0 }, { X86::VCVTUDQ2PDZrr, X86::VCVTUDQ2PDZrm, 0 }, @@ -942,12 +950,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 }, { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 }, + { X86::VMOVDDUPZrr, X86::VMOVDDUPZrm, 0 }, { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 }, { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 }, { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zrm, 0 }, { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zrm, 0 }, { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 }, { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 }, + { X86::VMOVSHDUPZrr, X86::VMOVSHDUPZrm, 0 }, + { X86::VMOVSLDUPZrr, X86::VMOVSLDUPZrm, 0 }, { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 }, { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 }, { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE }, @@ -955,6 +966,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZrr, X86::VPABSDZrm, 0 }, { X86::VPABSQZrr, X86::VPABSQZrm, 0 }, { X86::VPABSWZrr, X86::VPABSWZrm, 0 }, + { X86::VPBROADCASTBZr, X86::VPBROADCASTBZm, TB_NO_REVERSE }, + { X86::VPBROADCASTDZr, X86::VPBROADCASTDZm, TB_NO_REVERSE }, + { X86::VPBROADCASTQZr, X86::VPBROADCASTQZm, TB_NO_REVERSE }, + { X86::VPBROADCASTWZr, X86::VPBROADCASTWZm, TB_NO_REVERSE }, { X86::VPCONFLICTDZrr, X86::VPCONFLICTDZrm, 0 }, { X86::VPCONFLICTQZrr, X86::VPCONFLICTQZrm, 0 }, { X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 }, @@ -993,6 +1008,20 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSRLDZri, X86::VPSRLDZmi, 0 }, { X86::VPSRLQZri, X86::VPSRLQZmi, 0 }, { X86::VPSRLWZri, X86::VPSRLWZmi, 0 }, + { X86::VRCP14PDZr, X86::VRCP14PDZm, 0 }, + { X86::VRCP14PSZr, X86::VRCP14PSZm, 0 }, + { X86::VRCP28PDr, X86::VRCP28PDm, 0 }, + { X86::VRCP28PSr, X86::VRCP28PSm, 0 }, + { X86::VRSQRT14PDZr, X86::VRSQRT14PDZm, 0 }, + { X86::VRSQRT14PSZr, X86::VRSQRT14PSZm, 0 }, + { X86::VRSQRT28PDr, X86::VRSQRT28PDm, 0 }, + { X86::VRSQRT28PSr, X86::VRSQRT28PSm, 0 }, + { X86::VSQRTPDZr, X86::VSQRTPDZm, 0 }, + { X86::VSQRTPSZr, X86::VSQRTPSZm, 0 }, + { X86::VUCOMISDZrr, X86::VUCOMISDZrm, 0 }, + { X86::VUCOMISDZrr_Int, X86::VUCOMISDZrm_Int, TB_NO_REVERSE }, + { X86::VUCOMISSZrr, X86::VUCOMISSZrm, 0 }, + { X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE }, // AVX-512 foldable instructions (256-bit versions) { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE }, @@ -1002,18 +1031,25 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VCVTUDQ2PDZ256rr, X86::VCVTUDQ2PDZ256rm, 0 }, { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 }, { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 }, + { X86::VMOVDDUPZ256rr, X86::VMOVDDUPZ256rm, 0 }, { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 }, { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rm, TB_ALIGN_32 }, { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256rm, 0 }, { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256rm, 0 }, { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256rm, 0 }, { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 }, + { X86::VMOVSHDUPZ256rr, X86::VMOVSHDUPZ256rm, 0 }, + { X86::VMOVSLDUPZ256rr, X86::VMOVSLDUPZ256rm, 0 }, { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 }, { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 }, { X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0 }, { X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 }, { X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 }, { X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 }, + { X86::VPBROADCASTBZ256r, X86::VPBROADCASTBZ256m, TB_NO_REVERSE }, + { X86::VPBROADCASTDZ256r, X86::VPBROADCASTDZ256m, TB_NO_REVERSE }, + { X86::VPBROADCASTQZ256r, X86::VPBROADCASTQZ256m, TB_NO_REVERSE }, + { X86::VPBROADCASTWZ256r, X86::VPBROADCASTWZ256m, TB_NO_REVERSE }, { X86::VPCONFLICTDZ256rr, X86::VPCONFLICTDZ256rm, 0 }, { X86::VPCONFLICTQZ256rr, X86::VPCONFLICTQZ256rm, 0 }, { X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 }, @@ -1052,6 +1088,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSRLDZ256ri, X86::VPSRLDZ256mi, 0 }, { X86::VPSRLQZ256ri, X86::VPSRLQZ256mi, 0 }, { X86::VPSRLWZ256ri, X86::VPSRLWZ256mi, 0 }, + { X86::VRCP14PDZ256r, X86::VRCP14PDZ256m, 0 }, + { X86::VRCP14PSZ256r, X86::VRCP14PSZ256m, 0 }, + { X86::VRSQRT14PDZ256r, X86::VRSQRT14PDZ256m, 0 }, + { X86::VRSQRT14PSZ256r, X86::VRSQRT14PSZ256m, 0 }, + { X86::VSQRTPDZ256r, X86::VSQRTPDZ256m, 0 }, + { X86::VSQRTPSZ256r, X86::VSQRTPSZ256m, 0 }, // AVX-512 foldable instructions (128-bit versions) { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE }, @@ -1060,18 +1102,25 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VCVTUDQ2PDZ128rr, X86::VCVTUDQ2PDZ128rm, TB_NO_REVERSE }, { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 }, { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 }, + { X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rm, 0 }, { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 }, { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 }, { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 }, { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128rm, 0 }, { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128rm, 0 }, { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 }, + { X86::VMOVSHDUPZ128rr, X86::VMOVSHDUPZ128rm, 0 }, + { X86::VMOVSLDUPZ128rr, X86::VMOVSLDUPZ128rm, 0 }, { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 }, { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 }, { X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0 }, { X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 }, { X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 }, { X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 }, + { X86::VPBROADCASTBZ128r, X86::VPBROADCASTBZ128m, TB_NO_REVERSE }, + { X86::VPBROADCASTDZ128r, X86::VPBROADCASTDZ128m, TB_NO_REVERSE }, + { X86::VPBROADCASTQZ128r, X86::VPBROADCASTQZ128m, TB_NO_REVERSE }, + { X86::VPBROADCASTWZ128r, X86::VPBROADCASTWZ128m, TB_NO_REVERSE }, { X86::VPCONFLICTDZ128rr, X86::VPCONFLICTDZ128rm, 0 }, { X86::VPCONFLICTQZ128rr, X86::VPCONFLICTQZ128rm, 0 }, { X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 }, @@ -1108,6 +1157,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSRLDZ128ri, X86::VPSRLDZ128mi, 0 }, { X86::VPSRLQZ128ri, X86::VPSRLQZ128mi, 0 }, { X86::VPSRLWZ128ri, X86::VPSRLWZ128mi, 0 }, + { X86::VRCP14PDZ128r, X86::VRCP14PDZ128m, 0 }, + { X86::VRCP14PSZ128r, X86::VRCP14PSZ128m, 0 }, + { X86::VRSQRT14PDZ128r, X86::VRSQRT14PDZ128m, 0 }, + { X86::VRSQRT14PSZ128r, X86::VRSQRT14PSZ128m, 0 }, + { X86::VSQRTPDZ128r, X86::VSQRTPDZ128m, 0 }, + { X86::VSQRTPSZ128r, X86::VSQRTPSZ128m, 0 }, // F16C foldable instructions { X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, TB_NO_REVERSE }, @@ -2063,13 +2118,28 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMINUQZrr, X86::VPMINUQZrm, 0 }, { X86::VPMINUWZrr, X86::VPMINUWZrm, 0 }, { X86::VPMULDQZrr, X86::VPMULDQZrm, 0 }, + { X86::VPMULHRSWZrr, X86::VPMULHRSWZrm, 0 }, + { X86::VPMULHUWZrr, X86::VPMULHUWZrm, 0 }, + { X86::VPMULHWZrr, X86::VPMULHWZrm, 0 }, { X86::VPMULLDZrr, X86::VPMULLDZrm, 0 }, { X86::VPMULLQZrr, X86::VPMULLQZrm, 0 }, { X86::VPMULLWZrr, X86::VPMULLWZrm, 0 }, + { X86::VPMULTISHIFTQBZrr, X86::VPMULTISHIFTQBZrm, 0 }, { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 }, { X86::VPORDZrr, X86::VPORDZrm, 0 }, { X86::VPORQZrr, X86::VPORQZrm, 0 }, + { X86::VPROLVDZrr, X86::VPROLVDZrm, 0 }, + { X86::VPROLVQZrr, X86::VPROLVQZrm, 0 }, + { X86::VPRORVDZrr, X86::VPRORVDZrm, 0 }, + { X86::VPRORVQZrr, X86::VPRORVQZrm, 0 }, { X86::VPSADBWZrr, X86::VPSADBWZrm, 0 }, + { X86::VPSHLDDZrri, X86::VPSHLDDZrmi, 0 }, + { X86::VPSHLDQZrri, X86::VPSHLDQZrmi, 0 }, + { X86::VPSHLDWZrri, X86::VPSHLDWZrmi, 0 }, + { X86::VPSHRDDZrri, X86::VPSHRDDZrmi, 0 }, + { X86::VPSHRDQZrri, X86::VPSHRDQZrmi, 0 }, + { X86::VPSHRDWZrri, X86::VPSHRDWZrmi, 0 }, + { X86::VPSHUFBITQMBZrr, X86::VPSHUFBITQMBZrm, 0 }, { X86::VPSHUFBZrr, X86::VPSHUFBZrm, 0 }, { X86::VPSLLDZrr, X86::VPSLLDZrm, 0 }, { X86::VPSLLQZrr, X86::VPSLLQZrm, 0 }, @@ -2097,6 +2167,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSUBUSBZrr, X86::VPSUBUSBZrm, 0 }, { X86::VPSUBUSWZrr, X86::VPSUBUSWZrm, 0 }, { X86::VPSUBWZrr, X86::VPSUBWZrm, 0 }, + { X86::VPTESTMBZrr, X86::VPTESTMBZrm, 0 }, + { X86::VPTESTMDZrr, X86::VPTESTMDZrm, 0 }, + { X86::VPTESTMQZrr, X86::VPTESTMQZrm, 0 }, + { X86::VPTESTMWZrr, X86::VPTESTMWZrm, 0 }, + { X86::VPTESTNMBZrr, X86::VPTESTNMBZrm, 0 }, + { X86::VPTESTNMDZrr, X86::VPTESTNMDZrm, 0 }, + { X86::VPTESTNMQZrr, X86::VPTESTNMQZrm, 0 }, + { X86::VPTESTNMWZrr, X86::VPTESTNMWZrm, 0 }, { X86::VPUNPCKHBWZrr, X86::VPUNPCKHBWZrm, 0 }, { X86::VPUNPCKHDQZrr, X86::VPUNPCKHDQZrm, 0 }, { X86::VPUNPCKHQDQZrr, X86::VPUNPCKHQDQZrm, 0 }, @@ -2107,12 +2185,38 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZrr, X86::VPUNPCKLWDZrm, 0 }, { X86::VPXORDZrr, X86::VPXORDZrm, 0 }, { X86::VPXORQZrr, X86::VPXORQZrm, 0 }, + { X86::VRANGEPDZrri, X86::VRANGEPDZrmi, 0 }, + { X86::VRANGEPSZrri, X86::VRANGEPSZrmi, 0 }, + { X86::VRANGESDZrri, X86::VRANGESDZrmi, TB_NO_REVERSE }, + { X86::VRANGESSZrri, X86::VRANGESSZrmi, TB_NO_REVERSE }, + { X86::VRCP14SDrr, X86::VRCP14SDrm, TB_NO_REVERSE }, + { X86::VRCP14SSrr, X86::VRCP14SSrm, TB_NO_REVERSE }, + { X86::VRCP28SDr, X86::VRCP28SDm, TB_NO_REVERSE }, + { X86::VRCP28SSr, X86::VRCP28SSm, TB_NO_REVERSE }, + { X86::VREDUCESDZrri, X86::VREDUCESDZrmi, TB_NO_REVERSE }, + { X86::VREDUCESSZrri, X86::VREDUCESSZrmi, TB_NO_REVERSE }, + { X86::VRNDSCALESDr, X86::VRNDSCALESDm, 0 }, + { X86::VRNDSCALESDr_Int, X86::VRNDSCALESDm_Int, TB_NO_REVERSE }, + { X86::VRNDSCALESSr, X86::VRNDSCALESSm, 0 }, + { X86::VRNDSCALESSr_Int, X86::VRNDSCALESSm_Int, TB_NO_REVERSE }, + { X86::VRSQRT14SDrr, X86::VRSQRT14SDrm, TB_NO_REVERSE }, + { X86::VRSQRT14SSrr, X86::VRSQRT14SSrm, TB_NO_REVERSE }, + { X86::VRSQRT28SDr, X86::VRSQRT28SDm, TB_NO_REVERSE }, + { X86::VRSQRT28SSr, X86::VRSQRT28SSm, TB_NO_REVERSE }, + { X86::VSCALEFPDZrr, X86::VSCALEFPDZrm, 0 }, + { X86::VSCALEFPSZrr, X86::VSCALEFPSZrm, 0 }, + { X86::VSCALEFSDZrr, X86::VSCALEFSDZrm, TB_NO_REVERSE }, + { X86::VSCALEFSSZrr, X86::VSCALEFSSZrm, TB_NO_REVERSE }, { X86::VSHUFF32X4Zrri, X86::VSHUFF32X4Zrmi, 0 }, { X86::VSHUFF64X2Zrri, X86::VSHUFF64X2Zrmi, 0 }, { X86::VSHUFI64X2Zrri, X86::VSHUFI64X2Zrmi, 0 }, { X86::VSHUFI32X4Zrri, X86::VSHUFI32X4Zrmi, 0 }, { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 }, { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 }, + { X86::VSQRTSDZr, X86::VSQRTSDZm, 0 }, + { X86::VSQRTSDZr_Int, X86::VSQRTSDZm_Int, TB_NO_REVERSE }, + { X86::VSQRTSSZr, X86::VSQRTSSZm, 0 }, + { X86::VSQRTSSZr_Int, X86::VSQRTSSZm_Int, TB_NO_REVERSE }, { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 }, { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 }, { X86::VSUBSDZrr, X86::VSUBSDZrm, 0 }, @@ -2299,20 +2403,50 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMINUWZ256rr, X86::VPMINUWZ256rm, 0 }, { X86::VPMULDQZ128rr, X86::VPMULDQZ128rm, 0 }, { X86::VPMULDQZ256rr, X86::VPMULDQZ256rm, 0 }, + { X86::VPMULHRSWZ128rr, X86::VPMULHRSWZ128rm, 0 }, + { X86::VPMULHRSWZ256rr, X86::VPMULHRSWZ256rm, 0 }, + { X86::VPMULHUWZ128rr, X86::VPMULHUWZ128rm, 0 }, + { X86::VPMULHUWZ256rr, X86::VPMULHUWZ256rm, 0 }, + { X86::VPMULHWZ128rr, X86::VPMULHWZ128rm, 0 }, + { X86::VPMULHWZ256rr, X86::VPMULHWZ256rm, 0 }, { X86::VPMULLDZ128rr, X86::VPMULLDZ128rm, 0 }, { X86::VPMULLDZ256rr, X86::VPMULLDZ256rm, 0 }, { X86::VPMULLQZ128rr, X86::VPMULLQZ128rm, 0 }, { X86::VPMULLQZ256rr, X86::VPMULLQZ256rm, 0 }, { X86::VPMULLWZ128rr, X86::VPMULLWZ128rm, 0 }, { X86::VPMULLWZ256rr, X86::VPMULLWZ256rm, 0 }, + { X86::VPMULTISHIFTQBZ128rr, X86::VPMULTISHIFTQBZ128rm, 0 }, + { X86::VPMULTISHIFTQBZ256rr, X86::VPMULTISHIFTQBZ256rm, 0 }, { X86::VPMULUDQZ128rr, X86::VPMULUDQZ128rm, 0 }, { X86::VPMULUDQZ256rr, X86::VPMULUDQZ256rm, 0 }, { X86::VPORDZ128rr, X86::VPORDZ128rm, 0 }, { X86::VPORDZ256rr, X86::VPORDZ256rm, 0 }, { X86::VPORQZ128rr, X86::VPORQZ128rm, 0 }, { X86::VPORQZ256rr, X86::VPORQZ256rm, 0 }, + { X86::VPROLVDZ128rr, X86::VPROLVDZ128rm, 0 }, + { X86::VPROLVDZ256rr, X86::VPROLVDZ256rm, 0 }, + { X86::VPROLVQZ128rr, X86::VPROLVQZ128rm, 0 }, + { X86::VPROLVQZ256rr, X86::VPROLVQZ256rm, 0 }, + { X86::VPRORVDZ128rr, X86::VPRORVDZ128rm, 0 }, + { X86::VPRORVDZ256rr, X86::VPRORVDZ256rm, 0 }, + { X86::VPRORVQZ128rr, X86::VPRORVQZ128rm, 0 }, + { X86::VPRORVQZ256rr, X86::VPRORVQZ256rm, 0 }, { X86::VPSADBWZ128rr, X86::VPSADBWZ128rm, 0 }, { X86::VPSADBWZ256rr, X86::VPSADBWZ256rm, 0 }, + { X86::VPSHLDDZ128rri, X86::VPSHLDDZ128rmi, 0 }, + { X86::VPSHLDDZ256rri, X86::VPSHLDDZ256rmi, 0 }, + { X86::VPSHLDQZ128rri, X86::VPSHLDQZ128rmi, 0 }, + { X86::VPSHLDQZ256rri, X86::VPSHLDQZ256rmi, 0 }, + { X86::VPSHLDWZ128rri, X86::VPSHLDWZ128rmi, 0 }, + { X86::VPSHLDWZ256rri, X86::VPSHLDWZ256rmi, 0 }, + { X86::VPSHRDDZ128rri, X86::VPSHRDDZ128rmi, 0 }, + { X86::VPSHRDDZ256rri, X86::VPSHRDDZ256rmi, 0 }, + { X86::VPSHRDQZ128rri, X86::VPSHRDQZ128rmi, 0 }, + { X86::VPSHRDQZ256rri, X86::VPSHRDQZ256rmi, 0 }, + { X86::VPSHRDWZ128rri, X86::VPSHRDWZ128rmi, 0 }, + { X86::VPSHRDWZ256rri, X86::VPSHRDWZ256rmi, 0 }, + { X86::VPSHUFBITQMBZ128rr,X86::VPSHUFBITQMBZ128rm, 0 }, + { X86::VPSHUFBITQMBZ256rr,X86::VPSHUFBITQMBZ256rm, 0 }, { X86::VPSHUFBZ128rr, X86::VPSHUFBZ128rm, 0 }, { X86::VPSHUFBZ256rr, X86::VPSHUFBZ256rm, 0 }, { X86::VPSLLDZ128rr, X86::VPSLLDZ128rm, 0 }, @@ -2367,6 +2501,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSUBUSWZ256rr, X86::VPSUBUSWZ256rm, 0 }, { X86::VPSUBWZ128rr, X86::VPSUBWZ128rm, 0 }, { X86::VPSUBWZ256rr, X86::VPSUBWZ256rm, 0 }, + { X86::VPTESTMBZ128rr, X86::VPTESTMBZ128rm, 0 }, + { X86::VPTESTMBZ256rr, X86::VPTESTMBZ256rm, 0 }, + { X86::VPTESTMDZ128rr, X86::VPTESTMDZ128rm, 0 }, + { X86::VPTESTMDZ256rr, X86::VPTESTMDZ256rm, 0 }, + { X86::VPTESTMQZ128rr, X86::VPTESTMQZ128rm, 0 }, + { X86::VPTESTMQZ256rr, X86::VPTESTMQZ256rm, 0 }, + { X86::VPTESTMWZ128rr, X86::VPTESTMWZ128rm, 0 }, + { X86::VPTESTMWZ256rr, X86::VPTESTMWZ256rm, 0 }, + { X86::VPTESTNMBZ128rr, X86::VPTESTNMBZ128rm, 0 }, + { X86::VPTESTNMBZ256rr, X86::VPTESTNMBZ256rm, 0 }, + { X86::VPTESTNMDZ128rr, X86::VPTESTNMDZ128rm, 0 }, + { X86::VPTESTNMDZ256rr, X86::VPTESTNMDZ256rm, 0 }, + { X86::VPTESTNMQZ128rr, X86::VPTESTNMQZ128rm, 0 }, + { X86::VPTESTNMQZ256rr, X86::VPTESTNMQZ256rm, 0 }, + { X86::VPTESTNMWZ128rr, X86::VPTESTNMWZ128rm, 0 }, + { X86::VPTESTNMWZ256rr, X86::VPTESTNMWZ256rm, 0 }, { X86::VPUNPCKHBWZ128rr, X86::VPUNPCKHBWZ128rm, 0 }, { X86::VPUNPCKHBWZ256rr, X86::VPUNPCKHBWZ256rm, 0 }, { X86::VPUNPCKHDQZ128rr, X86::VPUNPCKHDQZ128rm, 0 }, @@ -2387,6 +2537,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPXORDZ256rr, X86::VPXORDZ256rm, 0 }, { X86::VPXORQZ128rr, X86::VPXORQZ128rm, 0 }, { X86::VPXORQZ256rr, X86::VPXORQZ256rm, 0 }, + { X86::VRANGEPDZ128rri, X86::VRANGEPDZ128rmi, 0 }, + { X86::VRANGEPDZ256rri, X86::VRANGEPDZ256rmi, 0 }, + { X86::VRANGEPSZ128rri, X86::VRANGEPSZ128rmi, 0 }, + { X86::VRANGEPSZ256rri, X86::VRANGEPSZ256rmi, 0 }, + { X86::VSCALEFPDZ128rr, X86::VSCALEFPDZ128rm, 0 }, + { X86::VSCALEFPDZ256rr, X86::VSCALEFPDZ256rm, 0 }, + { X86::VSCALEFPSZ128rr, X86::VSCALEFPSZ128rm, 0 }, + { X86::VSCALEFPSZ256rr, X86::VSCALEFPSZ256rm, 0 }, { X86::VSHUFF32X4Z256rri, X86::VSHUFF32X4Z256rmi, 0 }, { X86::VSHUFF64X2Z256rri, X86::VSHUFF64X2Z256rmi, 0 }, { X86::VSHUFI32X4Z256rri, X86::VSHUFI32X4Z256rmi, 0 }, @@ -2547,9 +2705,25 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::AESENCLASTrr, X86::AESENCLASTrm, TB_ALIGN_16 }, { X86::AESENCrr, X86::AESENCrm, TB_ALIGN_16 }, { X86::VAESDECLASTrr, X86::VAESDECLASTrm, 0 }, + { X86::VAESDECLASTYrr, X86::VAESDECLASTYrm, 0 }, + { X86::VAESDECLASTZ128rr, X86::VAESDECLASTZ128rm, 0 }, + { X86::VAESDECLASTZ256rr, X86::VAESDECLASTZ256rm, 0 }, + { X86::VAESDECLASTZrr, X86::VAESDECLASTZrm, 0 }, { X86::VAESDECrr, X86::VAESDECrm, 0 }, + { X86::VAESDECYrr, X86::VAESDECYrm, 0 }, + { X86::VAESDECZ128rr, X86::VAESDECZ128rm, 0 }, + { X86::VAESDECZ256rr, X86::VAESDECZ256rm, 0 }, + { X86::VAESDECZrr, X86::VAESDECZrm, 0 }, { X86::VAESENCLASTrr, X86::VAESENCLASTrm, 0 }, + { X86::VAESENCLASTYrr, X86::VAESENCLASTYrm, 0 }, + { X86::VAESENCLASTZ128rr, X86::VAESENCLASTZ128rm, 0 }, + { X86::VAESENCLASTZ256rr, X86::VAESENCLASTZ256rm, 0 }, + { X86::VAESENCLASTZrr, X86::VAESENCLASTZrm, 0 }, { X86::VAESENCrr, X86::VAESENCrm, 0 }, + { X86::VAESENCYrr, X86::VAESENCYrm, 0 }, + { X86::VAESENCZ128rr, X86::VAESENCZ128rm, 0 }, + { X86::VAESENCZ256rr, X86::VAESENCZ256rm, 0 }, + { X86::VAESENCZrr, X86::VAESENCZrm, 0 }, // SHA foldable instructions { X86::SHA1MSG1rr, X86::SHA1MSG1rm, TB_ALIGN_16 }, @@ -2558,7 +2732,27 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::SHA1RNDS4rri, X86::SHA1RNDS4rmi, TB_ALIGN_16 }, { X86::SHA256MSG1rr, X86::SHA256MSG1rm, TB_ALIGN_16 }, { X86::SHA256MSG2rr, X86::SHA256MSG2rm, TB_ALIGN_16 }, - { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 } + { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 }, + + // GFNI foldable instructions + { X86::GF2P8AFFINEINVQBrri, X86::GF2P8AFFINEINVQBrmi, TB_ALIGN_16 }, + { X86::GF2P8AFFINEQBrri, X86::GF2P8AFFINEQBrmi, TB_ALIGN_16 }, + { X86::GF2P8MULBrr, X86::GF2P8MULBrm, TB_ALIGN_16 }, + { X86::VGF2P8AFFINEINVQBrri, X86::VGF2P8AFFINEINVQBrmi, 0 }, + { X86::VGF2P8AFFINEINVQBYrri, X86::VGF2P8AFFINEINVQBYrmi, 0 }, + { X86::VGF2P8AFFINEINVQBZ128rri, X86::VGF2P8AFFINEINVQBZ128rmi, 0 }, + { X86::VGF2P8AFFINEINVQBZ256rri, X86::VGF2P8AFFINEINVQBZ256rmi, 0 }, + { X86::VGF2P8AFFINEINVQBZrri, X86::VGF2P8AFFINEINVQBZrmi, 0 }, + { X86::VGF2P8AFFINEQBrri, X86::VGF2P8AFFINEQBrmi, 0 }, + { X86::VGF2P8AFFINEQBYrri, X86::VGF2P8AFFINEQBYrmi, 0 }, + { X86::VGF2P8AFFINEQBZ128rri, X86::VGF2P8AFFINEQBZ128rmi, 0 }, + { X86::VGF2P8AFFINEQBZ256rri, X86::VGF2P8AFFINEQBZ256rmi, 0 }, + { X86::VGF2P8AFFINEQBZrri, X86::VGF2P8AFFINEQBZrmi, 0 }, + { X86::VGF2P8MULBrr, X86::VGF2P8MULBrm, 0 }, + { X86::VGF2P8MULBYrr, X86::VGF2P8MULBYrm, 0 }, + { X86::VGF2P8MULBZ128rr, X86::VGF2P8MULBZ128rm, 0 }, + { X86::VGF2P8MULBZ256rr, X86::VGF2P8MULBZ256rm, 0 }, + { X86::VGF2P8MULBZrr, X86::VGF2P8MULBZrm, 0 }, }; for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) { @@ -2621,6 +2815,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPPERMrrr, X86::VPPERMrrm, 0 }, // AVX-512 instructions with 3 source operands. + { X86::VFIXUPIMMPDZrri, X86::VFIXUPIMMPDZrmi, 0 }, + { X86::VFIXUPIMMPSZrri, X86::VFIXUPIMMPSZrmi, 0 }, + { X86::VFIXUPIMMSDrri, X86::VFIXUPIMMSDrmi, TB_NO_REVERSE }, + { X86::VFIXUPIMMSSrri, X86::VFIXUPIMMSSrmi, TB_NO_REVERSE }, + { X86::VPDPBUSDSZr, X86::VPDPBUSDSZm, 0 }, + { X86::VPDPBUSDZr, X86::VPDPBUSDZm, 0 }, + { X86::VPDPWSSDSZr, X86::VPDPWSSDSZm, 0 }, + { X86::VPDPWSSDZr, X86::VPDPWSSDZm, 0 }, { X86::VPERMI2Brr, X86::VPERMI2Brm, 0 }, { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 }, { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 }, @@ -2639,6 +2841,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmi, 0 }, // AVX-512VL 256-bit instructions with 3 source operands. + { X86::VFIXUPIMMPDZ256rri, X86::VFIXUPIMMPDZ256rmi, 0 }, + { X86::VFIXUPIMMPSZ256rri, X86::VFIXUPIMMPSZ256rmi, 0 }, + { X86::VPDPBUSDSZ256r, X86::VPDPBUSDSZ256m, 0 }, + { X86::VPDPBUSDZ256r, X86::VPDPBUSDZ256m, 0 }, + { X86::VPDPWSSDSZ256r, X86::VPDPWSSDSZ256m, 0 }, + { X86::VPDPWSSDZ256r, X86::VPDPWSSDZ256m, 0 }, { X86::VPERMI2B256rr, X86::VPERMI2B256rm, 0 }, { X86::VPERMI2D256rr, X86::VPERMI2D256rm, 0 }, { X86::VPERMI2PD256rr, X86::VPERMI2PD256rm, 0 }, @@ -2657,6 +2865,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmi, 0 }, // AVX-512VL 128-bit instructions with 3 source operands. + { X86::VFIXUPIMMPDZ128rri, X86::VFIXUPIMMPDZ128rmi, 0 }, + { X86::VFIXUPIMMPSZ128rri, X86::VFIXUPIMMPSZ128rmi, 0 }, + { X86::VPDPBUSDSZ128r, X86::VPDPBUSDSZ128m, 0 }, + { X86::VPDPBUSDZ128r, X86::VPDPBUSDZ128m, 0 }, + { X86::VPDPWSSDSZ128r, X86::VPDPWSSDSZ128m, 0 }, + { X86::VPDPWSSDZ128r, X86::VPDPWSSDZ128m, 0 }, { X86::VPERMI2B128rr, X86::VPERMI2B128rm, 0 }, { X86::VPERMI2D128rr, X86::VPERMI2D128rm, 0 }, { X86::VPERMI2PD128rr, X86::VPERMI2PD128rm, 0 }, diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll index b6373cf997e..b3882afe2d9 100644 --- a/test/CodeGen/X86/vec_ss_load_fold.ll +++ b/test/CodeGen/X86/vec_ss_load_fold.ll @@ -214,49 +214,26 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind { ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq ; -; X32_AVX1-LABEL: test4: -; X32_AVX1: ## %bb.0: -; X32_AVX1-NEXT: subl $28, %esp -; X32_AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32_AVX1-NEXT: vmovaps %xmm0, (%esp) ## 16-byte Spill -; X32_AVX1-NEXT: calll _f -; X32_AVX1-NEXT: vroundss $4, (%esp), %xmm0, %xmm0 ## 16-byte Folded Reload -; X32_AVX1-NEXT: addl $28, %esp -; X32_AVX1-NEXT: retl -; -; X64_AVX1-LABEL: test4: -; X64_AVX1: ## %bb.0: -; X64_AVX1-NEXT: subq $24, %rsp -; X64_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64_AVX1-NEXT: vmovaps %xmm0, (%rsp) ## 16-byte Spill -; X64_AVX1-NEXT: callq _f -; X64_AVX1-NEXT: vroundss $4, (%rsp), %xmm0, %xmm0 ## 16-byte Folded Reload -; X64_AVX1-NEXT: addq $24, %rsp -; X64_AVX1-NEXT: retq -; -; X32_AVX512-LABEL: test4: -; X32_AVX512: ## %bb.0: -; X32_AVX512-NEXT: subl $28, %esp -; X32_AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32_AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32_AVX512-NEXT: vmovaps %xmm0, (%esp) ## 16-byte Spill -; X32_AVX512-NEXT: calll _f -; X32_AVX512-NEXT: vmovaps (%esp), %xmm1 ## 16-byte Reload -; X32_AVX512-NEXT: vroundss $4, %xmm1, %xmm0, %xmm0 -; X32_AVX512-NEXT: addl $28, %esp -; X32_AVX512-NEXT: retl +; X32_AVX-LABEL: test4: +; X32_AVX: ## %bb.0: +; X32_AVX-NEXT: subl $28, %esp +; X32_AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32_AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32_AVX-NEXT: vmovaps %xmm0, (%esp) ## 16-byte Spill +; X32_AVX-NEXT: calll _f +; X32_AVX-NEXT: vroundss $4, (%esp), %xmm0, %xmm0 ## 16-byte Folded Reload +; X32_AVX-NEXT: addl $28, %esp +; X32_AVX-NEXT: retl ; -; X64_AVX512-LABEL: test4: -; X64_AVX512: ## %bb.0: -; X64_AVX512-NEXT: subq $24, %rsp -; X64_AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64_AVX512-NEXT: vmovaps %xmm0, (%rsp) ## 16-byte Spill -; X64_AVX512-NEXT: callq _f -; X64_AVX512-NEXT: vmovaps (%rsp), %xmm1 ## 16-byte Reload -; X64_AVX512-NEXT: vroundss $4, %xmm1, %xmm0, %xmm0 -; X64_AVX512-NEXT: addq $24, %rsp -; X64_AVX512-NEXT: retq +; X64_AVX-LABEL: test4: +; X64_AVX: ## %bb.0: +; X64_AVX-NEXT: subq $24, %rsp +; X64_AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64_AVX-NEXT: vmovaps %xmm0, (%rsp) ## 16-byte Spill +; X64_AVX-NEXT: callq _f +; X64_AVX-NEXT: vroundss $4, (%rsp), %xmm0, %xmm0 ## 16-byte Folded Reload +; X64_AVX-NEXT: addq $24, %rsp +; X64_AVX-NEXT: retq %a = load float , float *%b %B = insertelement <4 x float> undef, float %a, i32 0 %q = call <4 x float> @f() -- 2.11.0