From bd2d90f5a53e66b4f515c5d2876afa8a97b78ca6 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Fri, 6 Aug 2010 20:03:27 +0000 Subject: [PATCH] Patterns to match AVX 256-bit permutation intrinsics git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110468 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFragmentsSIMD.td | 2 ++ lib/Target/X86/X86InstrSSE.td | 59 +++++++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 8a1245c5994..a00b0803a3e 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -216,6 +216,8 @@ def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>; def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; +def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>; +def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>; // SSSE3 uses MMX registers for some instructions. They aren't aligned on a // 16-byte boundary. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 594d7f615dd..135850fe126 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5359,29 +5359,44 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd">; // Permute Floating-Point Values multiclass avx_permil opc_rm, bits<8> opc_rmi, string OpcodeStr, - RegisterClass RC, X86MemOperand x86memop> { + RegisterClass RC, X86MemOperand x86memop_f, + X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag, + Intrinsic IntVar, Intrinsic IntImm> { def rr : AVX8I, VEX_4V; + [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V; def rm : AVX8I, VEX_4V; + [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V; + def ri : AVXAIi8, VEX; + [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX; def mi : AVXAIi8, VEX; -} - -defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem>; -defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem>; -defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem>; -defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem>; + [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX; +} + +defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, + memopv4f32, memopv4i32, + int_x86_avx_vpermilvar_ps, + int_x86_avx_vpermil_ps>; +defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, + memopv8f32, memopv8i32, + int_x86_avx_vpermilvar_ps_256, + int_x86_avx_vpermil_ps_256>; +defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, + memopv2f64, memopv2i64, + int_x86_avx_vpermilvar_pd, + int_x86_avx_vpermil_pd>; +defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, + memopv4f64, memopv4i64, + int_x86_avx_vpermilvar_pd_256, + int_x86_avx_vpermil_pd_256>; def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), @@ -5401,3 +5416,21 @@ def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", []>, VEX, Requires<[HasAVX]>; } // isAsmParserOnly + +def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; + +def : Pat<(int_x86_avx_vperm2f128_ps_256 + VR256:$src1, (memopv8f32 addr:$src2), imm:$src3), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vperm2f128_pd_256 + VR256:$src1, (memopv4f64 addr:$src2), imm:$src3), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vperm2f128_si_256 + VR256:$src1, (memopv8i32 addr:$src2), imm:$src3), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; + -- 2.11.0