From bd477dfbbfe0e7c8eba1e1da775df0413cbb0fb9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 9 Feb 2015 04:04:50 +0000 Subject: [PATCH] [X86] Remove 'memop' uses from AVX512. Use 'load' instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228562 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 152 ++++++++++++++++++--------------------- 1 file changed, 71 insertions(+), 81 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 3bd1f74a2b0..ee63271632e 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -61,16 +61,6 @@ class X86VectorVTInfo("load" # EltVT); - // Load patterns used for memory operands. We only have this defined in - // case of i64 element types for sub-512 integer vectors. For now, keep - // MemOpFrag undefined in these cases. - PatFrag MemOpFrag = - !if (!eq (NumElts#EltTypeName, "1f32"), !cast("memopfsf32"), - !if (!eq (NumElts#EltTypeName, "1f64"), !cast("memopfsf64"), - !if (!eq (TypeVariantName, "f"), !cast("memop" # VTName), - !if (!eq (EltTypeName, "i64"), !cast("memop" # VTName), - !if (!eq (VTName, "v16i32"), !cast("memop" # VTName), ?))))); - // The corresponding float type, e.g. v16f32 for v16i32 // Note: For EltSize < 32, FloatVT is illegal and TableGen // fails to compile, so we choose FloatVT = VT @@ -893,7 +883,7 @@ multiclass avx512_perm_imm opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.RC:$dst, - (_.VT (OpNode (_.MemOpFrag addr:$src1), + (_.VT (OpNode (_.LdFrag addr:$src1), (i8 imm:$src2))))]>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; } @@ -917,7 +907,7 @@ multiclass avx512_permil OpcImm, bits<8> OpcVar, X86VectorVTInfo _, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.RC:$dst, (_.VT (X86VPermilpv _.RC:$src1, - (Ctrl.VT (Ctrl.MemOpFrag addr:$src2)))))]>, + (Ctrl.VT (Ctrl.LdFrag addr:$src2)))))]>, EVEX_4V; } } @@ -957,15 +947,15 @@ multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, EVEX_4V; } -defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem, +defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, loadv16i32, i512mem, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem, +defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, loadv8i64, i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; let ExeDomain = SSEPackedSingle in -defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem, +defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, loadv16f32, f512mem, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, +defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, loadv8f64, f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; // -- VPERM2I - 3 source operands form -- @@ -1040,16 +1030,16 @@ let Constraints = "$src1 = $dst" in { EVEX_4V, EVEX_KZ; } } -defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, +defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, loadv16i32, i512mem, X86VPermiv3, v16i32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, +defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, loadv8i64, i512mem, X86VPermiv3, v8i64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, +defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, loadv16f32, i512mem, X86VPermiv3, v16f32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, +defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, loadv8f64, i512mem, X86VPermiv3, v8f64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; @@ -1069,16 +1059,16 @@ multiclass avx512_perm_table_3src opc, string Suffix, RegisterClass RC, (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>; } -defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem, +defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, loadv16i32, i512mem, X86VPermv3, v16i32, VK16WM, v16i1, GR16>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem, +defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, loadv8i64, i512mem, X86VPermv3, v8i64, VK8WM, v8i1, GR8>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem, +defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, loadv16f32, i512mem, X86VPermv3, v16f32, VK16WM, v16i1, GR16>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem, +defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, loadv8f64, i512mem, X86VPermv3, v8f64, VK8WM, v8i1, GR8>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; @@ -1544,7 +1534,7 @@ multiclass avx512_cmp_packed; + (X86cmpm (vt RC:$src1), (load addr:$src2), imm:$cc))], d>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { @@ -3063,12 +3053,12 @@ defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul, SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", + loadv8i64, i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", + loadv8i64, i512mem, loadi64, i64mem, "{1to8}", SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))), @@ -3154,16 +3144,16 @@ multiclass avx512_unpack_fp opc, SDNode OpNode, ValueType vt, d>, EVEX_4V; } -defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64, +defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, loadv8f64, VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64, +defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, loadv8f64, VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64, +defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, loadv8f64, VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64, +defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, loadv8f64, VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; @@ -3183,16 +3173,16 @@ multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, IIC_SSE_UNPCK>, EVEX_4V; } defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32, - VR512, memopv16i32, i512mem>, EVEX_V512, + VR512, loadv16i32, i512mem>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64, - VR512, memopv8i64, i512mem>, EVEX_V512, + VR512, loadv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32, - VR512, memopv16i32, i512mem>, EVEX_V512, + VR512, loadv16i32, i512mem>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, - VR512, memopv8i64, i512mem>, EVEX_V512, + VR512, loadv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 - PSHUFD @@ -3217,7 +3207,7 @@ multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC, (i8 imm:$src2))))]>, EVEX; } -defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32, +defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, loadv16i32, i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; //===----------------------------------------------------------------------===// @@ -3351,18 +3341,18 @@ multiclass avx512_vptest opc, string OpcodeStr, RegisterClass KRC, } defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem, - memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512, + loadv16i32, X86testm, v16i32>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem, - memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W, + loadv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; let Predicates = [HasCDI] in { defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem, - memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512, + loadv16i32, X86testnm, v16i32>, T8XS, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem, - memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W, + loadv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } @@ -3387,7 +3377,7 @@ multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, defm mi : AVX512_maskable, AVX512BIi8Base, EVEX_4V; } @@ -3402,7 +3392,7 @@ multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, defm rm : AVX512_maskable, AVX512BIBase, EVEX_4V; } @@ -3457,7 +3447,7 @@ multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, defm rm : AVX512_maskable, AVX5128IBase, EVEX_4V; } @@ -3493,7 +3483,7 @@ def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX; } -defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>, +defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, loadv8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))), (VMOVDDUPZrm addr:$src)>; @@ -3514,17 +3504,17 @@ multiclass avx512_replicate_sfp op, SDNode OpNode, string OpcodeStr, } defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup", - v16f32, VR512, memopv16f32, f512mem>, EVEX_V512, + v16f32, VR512, loadv16f32, f512mem>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup", - v16f32, VR512, memopv16f32, f512mem>, EVEX_V512, + v16f32, VR512, loadv16f32, f512mem>, EVEX_V512, EVEX_CD8<32, CD8VF>; def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>; -def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))), +def : Pat<(v16i32 (X86Movshdup (loadv16i32 addr:$src))), (VMOVSHDUPZrm addr:$src)>; def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>; -def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))), +def : Pat<(v16i32 (X86Movsldup (loadv16i32 addr:$src))), (VMOVSLDUPZrm addr:$src)>; //===----------------------------------------------------------------------===// @@ -3650,7 +3640,7 @@ multiclass avx512_fma3p_m132 opc, string OpcodeStr, SDNode OpNode, def m: AVX512FMA3; def mb: AVX512FMA3, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, - memopv4f64, f256mem, v8f64, v8f32, + loadv4f64, f256mem, v8f64, v8f32, SSEPackedDouble>, EVEX_V512, PS, EVEX_CD8<32, CD8VH>; def : Pat<(v8f64 (extloadv8f32 addr:$src)), @@ -4058,27 +4048,27 @@ def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src), //===----------------------------------------------------------------------===// defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp, - memopv8i64, i512mem, v16f32, v16i32, + loadv8i64, i512mem, v16f32, v16i32, SSEPackedSingle>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp, - memopv4i64, i256mem, v8f64, v8i32, + loadv4i64, i256mem, v8f64, v8i32, SSEPackedDouble>, EVEX_V512, XS, EVEX_CD8<32, CD8VH>; defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, - memopv16f32, f512mem, v16i32, v16f32, + loadv16f32, f512mem, v16i32, v16f32, SSEPackedSingle>, EVEX_V512, XS, EVEX_CD8<32, CD8VF>; defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, - memopv8f64, f512mem, v8i32, v8f64, + loadv8f64, f512mem, v8i32, v8f64, SSEPackedDouble>, EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, - memopv16f32, f512mem, v16i32, v16f32, + loadv16f32, f512mem, v16i32, v16f32, SSEPackedSingle>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; @@ -4088,7 +4078,7 @@ def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src), (VCVTTPS2UDQZrr VR512:$src)>; defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint, - memopv8f64, f512mem, v8i32, v8f64, + loadv8f64, f512mem, v8i32, v8f64, SSEPackedDouble>, EVEX_V512, PS, VEX_W, EVEX_CD8<64, CD8VF>; @@ -4098,12 +4088,12 @@ def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src), (VCVTTPD2UDQZrr VR512:$src)>; defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp, - memopv4i64, f256mem, v8f64, v8i32, + loadv4i64, f256mem, v8f64, v8i32, SSEPackedDouble>, EVEX_V512, XS, EVEX_CD8<32, CD8VH>; defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp, - memopv16i32, f512mem, v16f32, v16i32, + loadv16i32, f512mem, v16f32, v16i32, SSEPackedSingle>, EVEX_V512, XD, EVEX_CD8<32, CD8VF>; @@ -4158,10 +4148,10 @@ let hasSideEffects = 0 in { } defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512, - memopv16f32, f512mem, SSEPackedSingle>, PD, + loadv16f32, f512mem, SSEPackedSingle>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X, - memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W, + loadv8f64, f512mem, SSEPackedDouble>, XD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src), @@ -4173,10 +4163,10 @@ def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src), (VCVTPD2DQZrrb VR512:$src, imm:$rc)>; defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512, - memopv16f32, f512mem, SSEPackedSingle>, + loadv16f32, f512mem, SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X, - memopv8f64, f512mem, SSEPackedDouble>, VEX_W, + loadv8f64, f512mem, SSEPackedDouble>, VEX_W, PS, EVEX_V512, EVEX_CD8<64, CD8VF>; def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src), @@ -4629,7 +4619,7 @@ let ExeDomain = d in { defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512, - memopv16f32, SSEPackedSingle>, EVEX_V512, + loadv16f32, SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1), @@ -4639,7 +4629,7 @@ def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1), defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512, - memopv8f64, SSEPackedDouble>, EVEX_V512, + loadv8f64, SSEPackedDouble>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1), @@ -4839,35 +4829,35 @@ multiclass avx512_extend opc, string OpcodeStr, RegisterClass KRC, } defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext, - memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, + loadv2i64, i128mem, v16i32, v16i8>, EVEX_V512, EVEX_CD8<8, CD8VQ>; defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext, - memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, + loadv2i64, i128mem, v8i64, v16i8>, EVEX_V512, EVEX_CD8<8, CD8VO>; defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext, - memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, + loadv4i64, i256mem, v16i32, v16i16>, EVEX_V512, EVEX_CD8<16, CD8VH>; defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext, - memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, + loadv2i64, i128mem, v8i64, v8i16>, EVEX_V512, EVEX_CD8<16, CD8VQ>; defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext, - memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, + loadv4i64, i256mem, v8i64, v8i32>, EVEX_V512, EVEX_CD8<32, CD8VH>; defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext, - memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, + loadv2i64, i128mem, v16i32, v16i8>, EVEX_V512, EVEX_CD8<8, CD8VQ>; defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext, - memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, + loadv2i64, i128mem, v8i64, v16i8>, EVEX_V512, EVEX_CD8<8, CD8VO>; defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext, - memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, + loadv4i64, i256mem, v16i32, v16i16>, EVEX_V512, EVEX_CD8<16, CD8VH>; defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext, - memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, + loadv2i64, i128mem, v8i64, v8i16>, EVEX_V512, EVEX_CD8<16, CD8VQ>; defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext, - memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, + loadv4i64, i256mem, v8i64, v8i32>, EVEX_V512, EVEX_CD8<32, CD8VH>; //===----------------------------------------------------------------------===// @@ -5020,21 +5010,21 @@ multiclass avx512_shufp; } -defm VSHUFPSZ : avx512_shufp, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VSHUFPDZ : avx512_shufp, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))), (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>; def : Pat<(v16i32 (X86Shufp VR512:$src1, - (memopv16i32 addr:$src2), (i8 imm:$imm))), + (loadv16i32 addr:$src2), (i8 imm:$imm))), (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>; def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))), (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>; def : Pat<(v8i64 (X86Shufp VR512:$src1, - (memopv8i64 addr:$src2), (i8 imm:$imm))), + (loadv8i64 addr:$src2), (i8 imm:$imm))), (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>; multiclass avx512_valign { @@ -5241,11 +5231,11 @@ def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1, (VPLZCNTQrrk VR512:$src1, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; -def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))), +def : Pat<(v16i32 (ctlz (loadv16i32 addr:$src))), (VPLZCNTDrm addr:$src)>; def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))), (VPLZCNTDrr VR512:$src)>; -def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))), +def : Pat<(v8i64 (ctlz (loadv8i64 addr:$src))), (VPLZCNTQrm addr:$src)>; def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))), (VPLZCNTQrr VR512:$src)>; -- 2.11.0