From 80c5989127f7734230ea7bfdf2d461b4e2c07548 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 8 Dec 2017 16:38:41 +0000 Subject: [PATCH] [X86][SHA] Tag SHA instructions scheduler classes Put these under VecIMul itinerary classes for now - seems to be a good average value git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320161 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 33 ++++++++++------ test/CodeGen/X86/sha-schedule.ll | 84 ++++++++++++++++++++-------------------- 2 files changed, 64 insertions(+), 53 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b15954eec1e..52d2a49d44a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7155,8 +7155,9 @@ let Constraints = "$src1 = $dst" in { // SHA-NI Instructions //===----------------------------------------------------------------------===// +// FIXME: Is there a better scheduler itinerary for SHA than WriteVecIMul? multiclass SHAI_binop Opc, string OpcodeStr, Intrinsic IntId, - bit UsesXMM0 = 0> { + OpndItins itins, bit UsesXMM0 = 0> { def rr : I Opc, string OpcodeStr, Intrinsic IntId, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), [!if(UsesXMM0, (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), - (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, T8; + (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))], itins.rr>, + T8, Sched<[itins.Sched]>; def rm : I Opc, string OpcodeStr, Intrinsic IntId, (set VR128:$dst, (IntId VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)), (set VR128:$dst, (IntId VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8; + (bc_v4i32 (memopv2i64 addr:$src2)))))], itins.rm>, T8, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { @@ -7184,24 +7187,32 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, - (i8 imm:$src3)))]>, TA; + (i8 imm:$src3)))], IIC_SSE_INTMUL_P_RR>, TA, + Sched<[WriteVecIMul]>; def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, u8imm:$src3), "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (int_x86_sha1rnds4 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)), - (i8 imm:$src3)))]>, TA; + (i8 imm:$src3)))], IIC_SSE_INTMUL_P_RM>, TA, + Sched<[WriteVecIMulLd, ReadAfterLd]>; - defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte>; - defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1>; - defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2>; + defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte, + SSE_INTMUL_ITINS_P>; + defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1, + SSE_INTMUL_ITINS_P>; + defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2, + SSE_INTMUL_ITINS_P>; let Uses=[XMM0] in - defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 1>; + defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, + SSE_INTMUL_ITINS_P, 1>; - defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1>; - defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2>; + defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1, + SSE_INTMUL_ITINS_P>; + defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2, + SSE_INTMUL_ITINS_P>; } // Aliases with explicit %xmm0 diff --git a/test/CodeGen/X86/sha-schedule.ll b/test/CodeGen/X86/sha-schedule.ll index 7dd492d5584..c7022bad38d 100644 --- a/test/CodeGen/X86/sha-schedule.ll +++ b/test/CodeGen/X86/sha-schedule.ll @@ -11,20 +11,20 @@ define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_sha1msg1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: sha1msg1 %xmm1, %xmm0 -; GENERIC-NEXT: sha1msg1 (%rdi), %xmm0 +; GENERIC-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [9:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; GOLDMONT-LABEL: test_sha1msg1: ; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: sha1msg1 %xmm1, %xmm0 -; GOLDMONT-NEXT: sha1msg1 (%rdi), %xmm0 +; GOLDMONT-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:1.00] +; GOLDMONT-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [7:1.00] ; GOLDMONT-NEXT: retq # sched: [4:1.00] ; ; CANNONLAKE-LABEL: test_sha1msg1: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 -; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 +; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [5:1.00] +; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_sha1msg1: @@ -42,20 +42,20 @@ declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_sha1msg2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: sha1msg2 %xmm1, %xmm0 -; GENERIC-NEXT: sha1msg2 (%rdi), %xmm0 +; GENERIC-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [9:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; GOLDMONT-LABEL: test_sha1msg2: ; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: sha1msg2 %xmm1, %xmm0 -; GOLDMONT-NEXT: sha1msg2 (%rdi), %xmm0 +; GOLDMONT-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:1.00] +; GOLDMONT-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [7:1.00] ; GOLDMONT-NEXT: retq # sched: [4:1.00] ; ; CANNONLAKE-LABEL: test_sha1msg2: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 -; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 +; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [5:1.00] +; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_sha1msg2: @@ -73,20 +73,20 @@ declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_sha1nexte: ; GENERIC: # %bb.0: -; GENERIC-NEXT: sha1nexte %xmm1, %xmm0 -; GENERIC-NEXT: sha1nexte (%rdi), %xmm0 +; GENERIC-NEXT: sha1nexte %xmm1, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: sha1nexte (%rdi), %xmm0 # sched: [9:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; GOLDMONT-LABEL: test_sha1nexte: ; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: sha1nexte %xmm1, %xmm0 -; GOLDMONT-NEXT: sha1nexte (%rdi), %xmm0 +; GOLDMONT-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:1.00] +; GOLDMONT-NEXT: sha1nexte (%rdi), %xmm0 # sched: [7:1.00] ; GOLDMONT-NEXT: retq # sched: [4:1.00] ; ; CANNONLAKE-LABEL: test_sha1nexte: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 -; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 +; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [5:1.00] +; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_sha1nexte: @@ -104,20 +104,20 @@ declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_sha1rnds4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: sha1rnds4 $3, %xmm1, %xmm0 -; GENERIC-NEXT: sha1rnds4 $3, (%rdi), %xmm0 +; GENERIC-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [9:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; GOLDMONT-LABEL: test_sha1rnds4: ; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: sha1rnds4 $3, %xmm1, %xmm0 -; GOLDMONT-NEXT: sha1rnds4 $3, (%rdi), %xmm0 +; GOLDMONT-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:1.00] +; GOLDMONT-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [7:1.00] ; GOLDMONT-NEXT: retq # sched: [4:1.00] ; ; CANNONLAKE-LABEL: test_sha1rnds4: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 -; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 +; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00] +; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_sha1rnds4: @@ -139,20 +139,20 @@ declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_sha256msg1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: sha256msg1 %xmm1, %xmm0 -; GENERIC-NEXT: sha256msg1 (%rdi), %xmm0 +; GENERIC-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [9:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; GOLDMONT-LABEL: test_sha256msg1: ; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: sha256msg1 %xmm1, %xmm0 -; GOLDMONT-NEXT: sha256msg1 (%rdi), %xmm0 +; GOLDMONT-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:1.00] +; GOLDMONT-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [7:1.00] ; GOLDMONT-NEXT: retq # sched: [4:1.00] ; ; CANNONLAKE-LABEL: test_sha256msg1: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 -; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 +; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [5:1.00] +; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_sha256msg1: @@ -170,20 +170,20 @@ declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_sha256msg2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: sha256msg2 %xmm1, %xmm0 -; GENERIC-NEXT: sha256msg2 (%rdi), %xmm0 +; GENERIC-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [9:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; GOLDMONT-LABEL: test_sha256msg2: ; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: sha256msg2 %xmm1, %xmm0 -; GOLDMONT-NEXT: sha256msg2 (%rdi), %xmm0 +; GOLDMONT-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:1.00] +; GOLDMONT-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [7:1.00] ; GOLDMONT-NEXT: retq # sched: [4:1.00] ; ; CANNONLAKE-LABEL: test_sha256msg2: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 -; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 +; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [5:1.00] +; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_sha256msg2: @@ -203,8 +203,8 @@ define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, ; GENERIC: # %bb.0: ; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] ; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 -; GENERIC-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 +; GENERIC-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00] +; GENERIC-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [9:1.00] ; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -212,8 +212,8 @@ define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, ; GOLDMONT: # %bb.0: ; GOLDMONT-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] ; GOLDMONT-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; GOLDMONT-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 -; GOLDMONT-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 +; GOLDMONT-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00] +; GOLDMONT-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [7:1.00] ; GOLDMONT-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] ; GOLDMONT-NEXT: retq # sched: [4:1.00] ; @@ -221,8 +221,8 @@ define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, ; CANNONLAKE: # %bb.0: ; CANNONLAKE-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:1.00] ; CANNONLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 -; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 +; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00] +; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:1.00] ; CANNONLAKE-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; -- 2.11.0