1. Given that we already have a classification bucket with 'nop' in the name,
that's where 'nop' belongs. Right now, it's only used for prefix bytes and 'pause'.
2. Make the latency of this class '1' for Jaguar to tell the scheduler (and presumably
llvm-mca) how to model the resource requirements better even though a nop has no
dependencies.
Differential Revision: https://reviews.llvm.org/D44608
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@327853
91177308-0d34-0410-b5e6-
96231b3b80d8
} // Defs = [FPSW]
} // SchedRW
-// Operandless floating-point instructions for the disassembler.
-let SchedRW = [WriteMicrocoded] in {
-def FNOP : I<0xD9, MRM_D0, (outs), (ins), "fnop", [], IIC_FNOP>;
+// Operand-less floating-point instructions for the disassembler.
+def FNOP : I<0xD9, MRM_D0, (outs), (ins), "fnop", [], IIC_FNOP>, Sched<[WriteNop]>;
+let SchedRW = [WriteMicrocoded] in {
let Defs = [FPSW] in {
def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", [], IIC_WAIT>;
def FXAM : I<0xD9, MRM_E5, (outs), (ins), "fxam", [], IIC_FXAM>;
//
// Nop
-let hasSideEffects = 0, SchedRW = [WriteZero] in {
+let hasSideEffects = 0, SchedRW = [WriteNop] in {
def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", [], IIC_NOP>;
def NOOPW : I<0x1f, MRMXm, (outs), (ins i16mem:$zero),
"nop{w}\t$zero", [], IIC_NOP>, TB, OpSize16;
def : WriteRes<WriteSystem, [JALU01]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [JALU01]> { let Latency = 100; }
def : WriteRes<WriteFence, [JSAGU]>;
-def : WriteRes<WriteNop, []>;
+// Nops don't have dependencies, so there's no actual latency, but we set this
+// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
+def : WriteRes<WriteNop, [JALU01]> { let Latency = 1; }
////////////////////////////////////////////////////////////////////////////////
// Floating point. This covers both scalar and vector operations.
; BTVER2-LABEL: test_nop:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
-; BTVER2-NEXT: nop # sched: [1:?]
-; BTVER2-NEXT: nopw %di # sched: [1:?]
-; BTVER2-NEXT: nopw (%rcx) # sched: [1:?]
-; BTVER2-NEXT: nopl %esi # sched: [1:?]
-; BTVER2-NEXT: nopl (%r8) # sched: [1:?]
-; BTVER2-NEXT: nopq %rdx # sched: [1:?]
-; BTVER2-NEXT: nopq (%r9) # sched: [1:?]
+; BTVER2-NEXT: nop # sched: [1:0.50]
+; BTVER2-NEXT: nopw %di # sched: [1:0.50]
+; BTVER2-NEXT: nopw (%rcx) # sched: [1:0.50]
+; BTVER2-NEXT: nopl %esi # sched: [1:0.50]
+; BTVER2-NEXT: nopl (%r8) # sched: [1:0.50]
+; BTVER2-NEXT: nopq %rdx # sched: [1:0.50]
+; BTVER2-NEXT: nopq (%r9) # sched: [1:0.50]
; BTVER2-NEXT: #NO_APP
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_pause:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
-; BTVER2-NEXT: pause # sched: [1:?]
+; BTVER2-NEXT: pause # sched: [1:0.50]
; BTVER2-NEXT: #NO_APP
; BTVER2-NEXT: retq # sched: [4:1.00]
;
ret <4 x float> %7
}
-; 'WriteZero' class instructions.
+; 'WriteZero' and 'WriteNop' class instructions.
define <4 x float> @test_fnop() nounwind {
; GENERIC-LABEL: test_fnop:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: #APP
-; BTVER2-NEXT: nop # sched: [1:?]
+; BTVER2-NEXT: nop # sched: [1:0.50]
; BTVER2-NEXT: #NO_APP
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SLM-LABEL: test_fnop:
; SLM: # %bb.0:
; SLM-NEXT: #APP
-; SLM-NEXT: fnop # sched: [100:1.00]
+; SLM-NEXT: fnop # sched: [1:?]
; SLM-NEXT: #NO_APP
; SLM-NEXT: retl # sched: [4:1.00]
;
; BTVER2-LABEL: test_fnop:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fnop # sched: [100:0.50]
+; BTVER2-NEXT: fnop # sched: [1:0.50]
; BTVER2-NEXT: #NO_APP
; BTVER2-NEXT: retl # sched: [4:1.00]
;