From: Sanjay Patel Date: Mon, 19 Mar 2018 14:26:50 +0000 (+0000) Subject: [x86] put nops into the WriteNop class and customize for Jaguar X-Git-Tag: android-x86-7.1-r4~3608 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=3cf01d23dd13d4756d0fa18353d8d4b648198651;p=android-x86%2Fexternal-llvm.git [x86] put nops into the WriteNop class and customize for Jaguar 1. Given that we already have a classification bucket with 'nop' in the name, that's where 'nop' belongs. Right now, it's only used for prefix bytes and 'pause'. 2. Make the latency of this class '1' for Jaguar to tell the scheduler (and presumably llvm-mca) how to model the resource requirements better even though a nop has no dependencies. Differential Revision: https://reviews.llvm.org/D44608 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@327853 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 619b399ef8d..18071c6baa1 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -667,10 +667,10 @@ def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", [], IIC_FNCLEX>; } // Defs = [FPSW] } // SchedRW -// Operandless floating-point instructions for the disassembler. -let SchedRW = [WriteMicrocoded] in { -def FNOP : I<0xD9, MRM_D0, (outs), (ins), "fnop", [], IIC_FNOP>; +// Operand-less floating-point instructions for the disassembler. +def FNOP : I<0xD9, MRM_D0, (outs), (ins), "fnop", [], IIC_FNOP>, Sched<[WriteNop]>; +let SchedRW = [WriteMicrocoded] in { let Defs = [FPSW] in { def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", [], IIC_WAIT>; def FXAM : I<0xD9, MRM_E5, (outs), (ins), "fxam", [], IIC_FXAM>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 72709fd1d94..52912728431 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1133,7 +1133,7 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ // // Nop -let hasSideEffects = 0, SchedRW = [WriteZero] in { +let hasSideEffects = 0, SchedRW = [WriteNop] in { def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", [], IIC_NOP>; def NOOPW : I<0x1f, MRMXm, (outs), (ins i16mem:$zero), "nop{w}\t$zero", [], IIC_NOP>, TB, OpSize16; diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index 8811a5dfe5f..c092e60c602 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -286,7 +286,9 @@ defm : JWriteResIntPair; def : WriteRes { let Latency = 100; } def : WriteRes { let Latency = 100; } def : WriteRes; -def : WriteRes; +// Nops don't have dependencies, so there's no actual latency, but we set this +// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. +def : WriteRes { let Latency = 1; } //////////////////////////////////////////////////////////////////////////////// // Floating point. This covers both scalar and vector operations. diff --git a/test/CodeGen/X86/schedule-x86_64.ll b/test/CodeGen/X86/schedule-x86_64.ll index 72a764e64fa..02c468503a4 100644 --- a/test/CodeGen/X86/schedule-x86_64.ll +++ b/test/CodeGen/X86/schedule-x86_64.ll @@ -8389,13 +8389,13 @@ define void @test_nop(i16 %a0, i32 %a1, i64 %a2, i16 *%p0, i32 *%p1, i64 *%p2) o ; BTVER2-LABEL: test_nop: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: nop # sched: [1:?] -; BTVER2-NEXT: nopw %di # sched: [1:?] -; BTVER2-NEXT: nopw (%rcx) # sched: [1:?] -; BTVER2-NEXT: nopl %esi # sched: [1:?] -; BTVER2-NEXT: nopl (%r8) # sched: [1:?] -; BTVER2-NEXT: nopq %rdx # sched: [1:?] -; BTVER2-NEXT: nopq (%r9) # sched: [1:?] +; BTVER2-NEXT: nop # sched: [1:0.50] +; BTVER2-NEXT: nopw %di # sched: [1:0.50] +; BTVER2-NEXT: nopw (%rcx) # sched: [1:0.50] +; BTVER2-NEXT: nopl %esi # sched: [1:0.50] +; BTVER2-NEXT: nopl (%r8) # sched: [1:0.50] +; BTVER2-NEXT: nopq %rdx # sched: [1:0.50] +; BTVER2-NEXT: nopq (%r9) # sched: [1:0.50] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -9500,7 +9500,7 @@ define void @test_pause() optsize { ; BTVER2-LABEL: test_pause: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: pause # sched: [1:?] +; BTVER2-NEXT: pause # sched: [1:0.50] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retq # sched: [4:1.00] ; diff --git a/test/CodeGen/X86/sse-schedule.ll b/test/CodeGen/X86/sse-schedule.ll index 3a123df699f..a58bdd1ad57 100644 --- a/test/CodeGen/X86/sse-schedule.ll +++ b/test/CodeGen/X86/sse-schedule.ll @@ -3763,7 +3763,7 @@ define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ret <4 x float> %7 } -; 'WriteZero' class instructions. +; 'WriteZero' and 'WriteNop' class instructions. define <4 x float> @test_fnop() nounwind { ; GENERIC-LABEL: test_fnop: @@ -3840,7 +3840,7 @@ define <4 x float> @test_fnop() nounwind { ; BTVER2: # %bb.0: ; BTVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: #APP -; BTVER2-NEXT: nop # sched: [1:?] +; BTVER2-NEXT: nop # sched: [1:0.50] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retq # sched: [4:1.00] ; diff --git a/test/CodeGen/X86/x87-schedule.ll b/test/CodeGen/X86/x87-schedule.ll index 2950eb586e3..09001fba7c8 100644 --- a/test/CodeGen/X86/x87-schedule.ll +++ b/test/CodeGen/X86/x87-schedule.ll @@ -3209,7 +3209,7 @@ define void @test_fnop() optsize { ; SLM-LABEL: test_fnop: ; SLM: # %bb.0: ; SLM-NEXT: #APP -; SLM-NEXT: fnop # sched: [100:1.00] +; SLM-NEXT: fnop # sched: [1:?] ; SLM-NEXT: #NO_APP ; SLM-NEXT: retl # sched: [4:1.00] ; @@ -3251,7 +3251,7 @@ define void @test_fnop() optsize { ; BTVER2-LABEL: test_fnop: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fnop # sched: [100:0.50] +; BTVER2-NEXT: fnop # sched: [1:0.50] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retl # sched: [4:1.00] ;