From 8ae6d2810054e7edaad44ddb0cea1a309afec35b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 1 Aug 2017 17:51:20 +0000 Subject: [PATCH] [X86][SSE] Added missing vector logic intrinsic schedules Improves atom scheduler test coverage (to make it easier to upgrade them for PR32431). Merged SSE_VEC_BIT_ITINS_P + SSE_BIT_ITINS_P as we were interchanging between them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@309715 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 16 ++++++---------- test/CodeGen/X86/sse-schedule.ll | 34 +++++++++------------------------- test/CodeGen/X86/sse2-schedule.ll | 22 +++++++++++----------- 3 files changed, 26 insertions(+), 46 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b3c5ae54181..3849f0a14a6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -120,10 +120,6 @@ def SSE_DIV_ITINS_P : SizeItins< >; let Sched = WriteVecLogic in -def SSE_VEC_BIT_ITINS_P : OpndItins< - IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM ->; - def SSE_BIT_ITINS_P : OpndItins< IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM >; @@ -312,13 +308,13 @@ multiclass sse12_fp_packed_logical_rm opc, RegisterClass RC, Domain d, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rr, NoItinerary, d>, + pat_rr, IIC_SSE_BIT_P_RR, d>, Sched<[WriteVecLogic]>; def rm : PI, + pat_rm, IIC_SSE_BIT_P_RM, d>, Sched<[WriteVecLogicLd, ReadAfterLd]>; } @@ -2789,13 +2785,13 @@ let Predicates = [HasAVX2, prd] in // These are ordered here for pattern ordering requirements with the fp versions defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, - SSE_VEC_BIT_ITINS_P, 1, NoVLX>; + SSE_BIT_ITINS_P, 1, NoVLX>; defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, - SSE_VEC_BIT_ITINS_P, 1, NoVLX>; + SSE_BIT_ITINS_P, 1, NoVLX>; defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, - SSE_VEC_BIT_ITINS_P, 1, NoVLX>; + SSE_BIT_ITINS_P, 1, NoVLX>; defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, - SSE_VEC_BIT_ITINS_P, 0, NoVLX>; + SSE_BIT_ITINS_P, 0, NoVLX>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Logical Instructions diff --git a/test/CodeGen/X86/sse-schedule.ll b/test/CodeGen/X86/sse-schedule.ll index 699451a02f6..83b1b532112 100644 --- a/test/CodeGen/X86/sse-schedule.ll +++ b/test/CodeGen/X86/sse-schedule.ll @@ -114,12 +114,8 @@ define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; ATOM-LABEL: test_andps: ; ATOM: # BB#0: -; ATOM-NEXT: andps %xmm1, %xmm0 -; ATOM-NEXT: andps (%rdi), %xmm0 -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: andps (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] @@ -174,12 +170,8 @@ define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; ; ATOM-LABEL: test_andnotps: ; ATOM: # BB#0: -; ATOM-NEXT: andnps %xmm1, %xmm0 -; ATOM-NEXT: andnps (%rdi), %xmm0 -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: andnps (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] @@ -239,7 +231,7 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ATOM: # BB#0: ; ATOM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [5:5.00] ; ATOM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: orps %xmm1, %xmm0 +; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_cmpps: @@ -1748,12 +1740,8 @@ define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; ; ATOM-LABEL: test_orps: ; ATOM: # BB#0: -; ATOM-NEXT: orps %xmm1, %xmm0 -; ATOM-NEXT: orps (%rdi), %xmm0 -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: orps (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] @@ -2686,12 +2674,8 @@ define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; ATOM-LABEL: test_xorps: ; ATOM: # BB#0: -; ATOM-NEXT: xorps %xmm1, %xmm0 -; ATOM-NEXT: xorps (%rdi), %xmm0 -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: xorps (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] diff --git a/test/CodeGen/X86/sse2-schedule.ll b/test/CodeGen/X86/sse2-schedule.ll index 1fa1b6e4f10..f7013bd4e3d 100644 --- a/test/CodeGen/X86/sse2-schedule.ll +++ b/test/CodeGen/X86/sse2-schedule.ll @@ -115,8 +115,8 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; ATOM-LABEL: test_andpd: ; ATOM: # BB#0: -; ATOM-NEXT: andpd %xmm1, %xmm0 -; ATOM-NEXT: andpd (%rdi), %xmm0 +; ATOM-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: andpd (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; @@ -175,8 +175,8 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; ; ATOM-LABEL: test_andnotpd: ; ATOM: # BB#0: -; ATOM-NEXT: andnpd %xmm1, %xmm0 -; ATOM-NEXT: andnpd (%rdi), %xmm0 +; ATOM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: andnpd (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; @@ -239,7 +239,7 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ATOM: # BB#0: ; ATOM-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [6:3.00] ; ATOM-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: orpd %xmm1, %xmm0 +; ATOM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_cmppd: @@ -923,7 +923,7 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; ATOM: # BB#0: ; ATOM-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] ; ATOM-NEXT: cvtsd2ss %xmm0, %xmm2 # sched: [6:3.00] -; ATOM-NEXT: xorps %xmm0, %xmm0 +; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50] ; ATOM-NEXT: cvtsd2ss %xmm1, %xmm0 # sched: [6:3.00] ; ATOM-NEXT: addss %xmm2, %xmm0 # sched: [5:5.00] ; ATOM-NEXT: retq # sched: [79:39.50] @@ -1101,7 +1101,7 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; ATOM: # BB#0: ; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00] ; ATOM-NEXT: cvtss2sd %xmm0, %xmm2 # sched: [6:3.00] -; ATOM-NEXT: xorps %xmm0, %xmm0 +; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50] ; ATOM-NEXT: cvtss2sd %xmm1, %xmm0 # sched: [6:3.00] ; ATOM-NEXT: addsd %xmm2, %xmm0 # sched: [5:5.00] ; ATOM-NEXT: retq # sched: [79:39.50] @@ -2757,8 +2757,8 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; ATOM-LABEL: test_orpd: ; ATOM: # BB#0: -; ATOM-NEXT: orpd %xmm1, %xmm0 -; ATOM-NEXT: orpd (%rdi), %xmm0 +; ATOM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: orpd (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; @@ -6842,8 +6842,8 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; ATOM-LABEL: test_xorpd: ; ATOM: # BB#0: -; ATOM-NEXT: xorpd %xmm1, %xmm0 -; ATOM-NEXT: xorpd (%rdi), %xmm0 +; ATOM-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: xorpd (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; -- 2.11.0