From c6fcdeb8f94c0b1a4b203e341d978b36df58b1e3 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Thu, 1 Jul 2010 17:35:02 +0000 Subject: [PATCH] Move SSE3 Move patterns to a more appropriate section Add AVX SSE3 packed horizontal and & sub instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107405 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 85 +++++++++++++++++++++------------ test/MC/AsmParser/X86/x86_32-encoding.s | 32 +++++++++++++ test/MC/AsmParser/X86/x86_64-encoding.s | 32 +++++++++++++ 3 files changed, 119 insertions(+), 30 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e58102e0f8a..780788fdd30 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3221,10 +3221,20 @@ let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; defm MOVDDUP : sse3_replicate_dfp<"movddup">; +// Move Unaligned Integer +let isAsmParserOnly = 1 in + def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vlddqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; +def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "lddqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; + def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; +// Several Move patterns let AddedComplexity = 5 in { def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; @@ -3236,6 +3246,22 @@ def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; } +// vector_shuffle v1, <1, 1, 3, 3> +let AddedComplexity = 15 in +def : Pat<(v4i32 (movshdup VR128:$src, (undef))), + (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; +let AddedComplexity = 20 in +def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), + (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; + +// vector_shuffle v1, <0, 0, 2, 2> +let AddedComplexity = 15 in + def : Pat<(v4i32 (movsldup VR128:$src, (undef))), + (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; +let AddedComplexity = 20 in + def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), + (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; + //===---------------------------------------------------------------------===// // SSE3 - Arithmetic //===---------------------------------------------------------------------===// @@ -3275,28 +3301,43 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3], // SSE3 Instructions //===---------------------------------------------------------------------===// -def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "lddqu\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; - // Horizontal ops -class S3D_Intrr o, string OpcodeStr, Intrinsic IntId> +class S3D_Intrr o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> : S3DI; -class S3D_Intrm o, string OpcodeStr, Intrinsic IntId> +class S3D_Intrm o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> : S3DI; -class S3_Intrr o, string OpcodeStr, Intrinsic IntId> +class S3_Intrr o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> : S3I; -class S3_Intrm o, string OpcodeStr, Intrinsic IntId> +class S3_Intrm o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> : S3I; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in { + def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V; + def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V; + def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V; + def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V; + def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V; + def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V; + def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V; + def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V; +} + let Constraints = "$src1 = $dst" in { def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>; def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>; @@ -3308,32 +3349,16 @@ let Constraints = "$src1 = $dst" in { def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>; } +//===---------------------------------------------------------------------===// +// SSSE3 Instructions +//===---------------------------------------------------------------------===// + // Thread synchronization def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; -// vector_shuffle v1, <1, 1, 3, 3> -let AddedComplexity = 15 in -def : Pat<(v4i32 (movshdup VR128:$src, (undef))), - (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; -let AddedComplexity = 20 in -def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), - (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; - -// vector_shuffle v1, <0, 0, 2, 2> -let AddedComplexity = 15 in - def : Pat<(v4i32 (movsldup VR128:$src, (undef))), - (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; -let AddedComplexity = 20 in - def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), - (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; - -//===---------------------------------------------------------------------===// -// SSSE3 Instructions -//===---------------------------------------------------------------------===// - /// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8. multiclass SS3I_unop_rm_int_8 opc, string OpcodeStr, Intrinsic IntId64, Intrinsic IntId128> { diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index 9a797a0d792..b2789cda3f1 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -11582,3 +11582,35 @@ // CHECK: encoding: [0xc5,0xf1,0xd0,0x10] vaddsubpd (%eax), %xmm1, %xmm2 +// CHECK: vhaddps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9] + vhaddps %xmm1, %xmm2, %xmm3 + +// CHECK: vhaddps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7c,0x18] + vhaddps (%eax), %xmm2, %xmm3 + +// CHECK: vhaddpd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9] + vhaddpd %xmm1, %xmm2, %xmm3 + +// CHECK: vhaddpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7c,0x18] + vhaddpd (%eax), %xmm2, %xmm3 + +// CHECK: vhsubps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9] + vhsubps %xmm1, %xmm2, %xmm3 + +// CHECK: vhsubps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0x7d,0x18] + vhsubps (%eax), %xmm2, %xmm3 + +// CHECK: vhsubpd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9] + vhsubpd %xmm1, %xmm2, %xmm3 + +// CHECK: vhsubpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x7d,0x18] + vhsubpd (%eax), %xmm2, %xmm3 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index e433b350013..8be9d67d992 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -1630,3 +1630,35 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc5,0x21,0xd0,0x20] vaddsubpd (%rax), %xmm11, %xmm12 +// CHECK: vhaddps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb] + vhaddps %xmm11, %xmm12, %xmm13 + +// CHECK: vhaddps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0x7c,0x28] + vhaddps (%rax), %xmm12, %xmm13 + +// CHECK: vhaddpd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb] + vhaddpd %xmm11, %xmm12, %xmm13 + +// CHECK: vhaddpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x7c,0x28] + vhaddpd (%rax), %xmm12, %xmm13 + +// CHECK: vhsubps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb] + vhsubps %xmm11, %xmm12, %xmm13 + +// CHECK: vhsubps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0x7d,0x28] + vhsubps (%rax), %xmm12, %xmm13 + +// CHECK: vhsubpd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb] + vhsubpd %xmm11, %xmm12, %xmm13 + +// CHECK: vhsubpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x7d,0x28] + vhsubpd (%rax), %xmm12, %xmm13 + -- 2.11.0