return isZeroIdiom(MI, Mask);
}
+ /// Returns true if MI is a candidate for move elimination.
+ ///
+ /// A candidate for move elimination may be optimized out at register renaming
+ /// stage. Subtargets can specify the set of optimizable moves by
+ /// instantiating tablegen class `IsOptimizableRegisterMove` (see
+ /// llvm/Target/TargetInstrPredicate.td).
+ ///
+ /// SubtargetEmitter is responsible for processing all the definitions of class
+ /// IsOptimizableRegisterMove, and auto-generate an override for this method.
+ virtual bool isOptimizableRegisterMove(const MachineInstr *MI) const {
+ return false;
+ }
+
/// True if the subtarget should run MachineScheduler after aggressive
/// coalescing.
///
return isZeroIdiom(MI, Mask, CPUID);
}
+ /// Returns true if MI is a candidate for move elimination.
+ ///
+ /// Different subtargets may apply different constraints to optimizable
+ /// register moves. For example, on most X86 subtargets, a candidate for move
+ /// elimination cannot specify the same register for both source and
+ /// destination.
+ virtual bool isOptimizableRegisterMove(const MCInst &MI,
+ unsigned CPUID) const {
+ return false;
+ }
+
/// Given a branch instruction try to get the address the branch
/// targets. Return true on success, and the address in Target.
virtual bool
struct MCRegisterCostEntry {
unsigned RegisterClassID;
unsigned Cost;
+ bool AllowMoveElimination;
};
/// A register file descriptor.
uint16_t NumRegisterCostEntries;
// Index of the first cost entry in MCExtraProcessorInfo::RegisterCostTable.
uint16_t RegisterCostEntryIdx;
+ // A value of zero means: there is no limit in the number of moves that can be
+ // eliminated every cycle.
+ uint16_t MaxMovesEliminatedPerCycle;
+ // Ture if this register file only knows how to optimize register moves from
+ // known zero registers.
+ bool AllowZeroMoveEliminationOnly;
};
/// Provide extra details about the machine processor.
}
// Convenience classes and definitions used by processor scheduling models to
-// describe dependency breaking instructions.
+// describe dependency breaking instructions and move elimination candidates.
let UpdatesOpcodeMask = 1 in {
def IsZeroIdiomDecl : STIPredicateDecl<"isZeroIdiom">;
} // UpdatesOpcodeMask
+def IsOptimizableRegisterMoveDecl
+ : STIPredicateDecl<"isOptimizableRegisterMove">;
+
class IsZeroIdiomFunction<list<DepBreakingClass> classes>
: STIPredicate<IsZeroIdiomDecl, classes>;
class IsDepBreakingFunction<list<DepBreakingClass> classes>
: STIPredicate<IsDepBreakingDecl, classes>;
+
+class IsOptimizableRegisterMove<list<InstructionEquivalenceClass> classes>
+ : STIPredicate<IsOptimizableRegisterMoveDecl, classes>;
// - The number of physical registers which can be used for register renaming
// purpose.
// - The cost of a register rename.
+// - The set of registers that allow move elimination.
+// - The maximum number of moves that can be eliminated every cycle.
+// - Whether move elimination is limited to register moves whose input
+// is known to be zero.
//
// The cost of a rename is the number of physical registers allocated by the
// register alias table to map the new definition. By default, register can be
// partial write is combined with the previous super-register definition. We
// should add support for these cases, and correctly model merge problems with
// partial register accesses.
+//
+// Field MaxMovesEliminatedPerCycle specifies how many moves can be eliminated
+// every cycle. A default value of zero for that field means: there is no limit
+// to the number of moves that can be eliminated by this register file.
+//
+// An instruction MI is a candidate for move elimination if a call to
+// method TargetSubtargetInfo::isOptimizableRegisterMove(MI) returns true (see
+// llvm/CodeGen/TargetSubtargetInfo.h, and llvm/MC/MCInstrAnalysis.h).
+//
+// Subtargets can instantiate tablegen class IsOptimizableRegisterMove (see
+// llvm/Target/TargetInstrPredicate.td) to customize the set of move elimination
+// candidates. By default, no instruction is a valid move elimination candidate.
+//
+// A register move MI is eliminated only if:
+// - MI is a move elimination candidate.
+// - The destination register is from a register class that allows move
+// elimination (see field `AllowMoveElimination` below).
+// - Constraints on the move kind, and the maximum number of moves that can be
+// eliminated per cycle are all met.
+
class RegisterFile<int numPhysRegs, list<RegisterClass> Classes = [],
- list<int> Costs = []> {
+ list<int> Costs = [], list<bit> AllowMoveElim = [],
+ int MaxMoveElimPerCy = 0, bit AllowZeroMoveElimOnly = 0> {
list<RegisterClass> RegClasses = Classes;
list<int> RegCosts = Costs;
+ list<bit> AllowMoveElimination = AllowMoveElim;
int NumPhysRegs = numPhysRegs;
+ int MaxMovesEliminatedPerCycle = MaxMoveElimPerCy;
+ bit AllowZeroMoveEliminationOnly = AllowZeroMoveElimOnly;
SchedMachineModel SchedModel = ?;
}
// part of it.
// Reference: Section 21.10 "AMD Bobcat and Jaguar pipeline: Partial register
// access" - Agner Fog's "microarchitecture.pdf".
-def JIntegerPRF : RegisterFile<64, [GR64, CCR]>;
+def JIntegerPRF : RegisterFile<64, [GR64, CCR], [1, 1], [1, 0],
+ 0, // Max moves that can be eliminated per cycle.
+ 1>; // Restrict move elimination to zero regs.
// The Jaguar FP Retire Queue renames SIMD and FP uOps onto a pool of 72 SSE
// registers. Operations on 256-bit data types are cracked into two COPs.
// Reference: www.realworldtech.com/jaguar/4/
-def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2]>;
+
+// The PRF in the floating point unit can eliminate a move from a MMX or SSE
+// register that is know to be zero (i.e. it has been zeroed using a zero-idiom
+// dependency breaking instruction, or via VZEROALL).
+// Reference: Section 21.8 "AMD Bobcat and Jaguar pipeline: Dependency-breaking
+// instructions" - Agner Fog's "microarchitecture.pdf"
+def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2], [1, 1, 0],
+ 0, // Max moves that can be eliminated per cycle.
+ 1>; // Restrict move elimination to zero regs.
// The retire control unit (RCU) can track up to 64 macro-ops in-flight. It can
// retire up to two macro-ops per cycle.
], ZeroIdiomPredicate>
]>;
+def : IsOptimizableRegisterMove<[
+ InstructionEquivalenceClass<[
+ // GPR variants.
+ MOV32rr, MOV64rr,
+
+ // MMX variants.
+ MMX_MOVQ64rr,
+
+ // SSE variants.
+ MOVAPSrr, MOVUPSrr,
+ MOVAPDrr, MOVUPDrr,
+ MOVDQArr, MOVDQUrr,
+
+ // AVX variants.
+ VMOVAPSrr, VMOVUPSrr,
+ VMOVAPDrr, VMOVUPDrr,
+ VMOVDQArr, VMOVDQUrr
+ ], TruePred >
+]>;
+
} // SchedModel
# CHECK-NEXT: 1 3 1.00 vaddps %xmm1, %xmm1, %xmm2
# CHECK: Register File statistics:
-# CHECK-NEXT: Total number of mappings created: 6
-# CHECK-NEXT: Max number of mappings used: 5
+# CHECK-NEXT: Total number of mappings created: 3
+# CHECK-NEXT: Max number of mappings used: 3
# CHECK: * Register File #1 -- JFpuPRF:
# CHECK-NEXT: Number of physical registers: 72
-# CHECK-NEXT: Total number of mappings created: 6
-# CHECK-NEXT: Max number of mappings used: 5
+# CHECK-NEXT: Total number of mappings created: 3
+# CHECK-NEXT: Max number of mappings used: 3
# CHECK: * Register File #2 -- JIntegerPRF:
# CHECK-NEXT: Number of physical registers: 64
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - -
+# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - - - - - - - - - - - vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vmovaps %xmm0, %xmm1
+# CHECK-NEXT: - - - - - - - - - - - - - - vmovaps %xmm0, %xmm1
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm1, %xmm1, %xmm2
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DR . . vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: [0,1] DeER . . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [0,1] DR . . vmovaps %xmm0, %xmm1
# CHECK-NEXT: [0,2] .DeeeER . vaddps %xmm1, %xmm1, %xmm2
# CHECK-NEXT: [1,0] .D----R . vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: [1,1] . DeE--R . vmovaps %xmm0, %xmm1
-# CHECK-NEXT: [1,2] . D=eeeER. vaddps %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [1,1] . D----R . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [1,2] . DeeeER . vaddps %xmm1, %xmm1, %xmm2
# CHECK-NEXT: [2,0] . D----R. vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: [2,1] . DeE---R vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [2,1] . D----R. vmovaps %xmm0, %xmm1
# CHECK-NEXT: [2,2] . DeeeER vaddps %xmm1, %xmm1, %xmm2
# CHECK: Average Wait times (based on the timeline view):
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 0.0 0.0 2.7 vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: 1. 3 1.0 1.0 1.7 vmovaps %xmm0, %xmm1
-# CHECK-NEXT: 2. 3 1.3 0.0 0.0 vaddps %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: 1. 3 0.0 0.0 2.7 vmovaps %xmm0, %xmm1
+# CHECK-NEXT: 2. 3 1.0 1.0 0.0 vaddps %xmm1, %xmm1, %xmm2
# CHECK: Iterations: 3
# CHECK-NEXT: Instructions: 27
-# CHECK-NEXT: Total Cycles: 19
+# CHECK-NEXT: Total Cycles: 15
# CHECK-NEXT: Total uOps: 27
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 1.42
-# CHECK-NEXT: IPC: 1.42
+# CHECK-NEXT: uOps Per Cycle: 1.80
+# CHECK-NEXT: IPC: 1.80
# CHECK-NEXT: Block RThroughput: 4.5
# CHECK: Instruction Info:
# CHECK-NEXT: 1 1 0.50 movdqu %xmm5, %xmm0
# CHECK: Register File statistics:
-# CHECK-NEXT: Total number of mappings created: 21
-# CHECK-NEXT: Max number of mappings used: 8
+# CHECK-NEXT: Total number of mappings created: 0
+# CHECK-NEXT: Max number of mappings used: 0
# CHECK: * Register File #1 -- JFpuPRF:
# CHECK-NEXT: Number of physical registers: 72
-# CHECK-NEXT: Total number of mappings created: 21
-# CHECK-NEXT: Max number of mappings used: 8
+# CHECK-NEXT: Total number of mappings created: 0
+# CHECK-NEXT: Max number of mappings used: 0
# CHECK: * Register File #2 -- JIntegerPRF:
# CHECK-NEXT: Number of physical registers: 64
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: - - - 2.00 2.00 3.33 3.67 - - - - 1.33 1.67 -
+# CHECK-NEXT: - - - - - - - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - - - - - - - - - - - pxor %mm0, %mm0
-# CHECK-NEXT: - - - - - - 1.00 - - - - - 1.00 - movq %mm0, %mm1
+# CHECK-NEXT: - - - - - - - - - - - - - - movq %mm0, %mm1
# CHECK-NEXT: - - - - - - - - - - - - - - xorps %xmm0, %xmm0
-# CHECK-NEXT: - - - - 1.00 0.33 0.67 - - - - - - - movaps %xmm0, %xmm1
-# CHECK-NEXT: - - - 1.00 - 0.33 0.67 - - - - - - - movups %xmm1, %xmm2
-# CHECK-NEXT: - - - - 1.00 0.67 0.33 - - - - - - - movapd %xmm2, %xmm3
-# CHECK-NEXT: - - - 1.00 - 0.33 0.67 - - - - - - - movupd %xmm3, %xmm4
-# CHECK-NEXT: - - - - - 1.00 - - - - - 1.00 - - movdqa %xmm4, %xmm5
-# CHECK-NEXT: - - - - - 0.67 0.33 - - - - 0.33 0.67 - movdqu %xmm5, %xmm0
+# CHECK-NEXT: - - - - - - - - - - - - - - movaps %xmm0, %xmm1
+# CHECK-NEXT: - - - - - - - - - - - - - - movups %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - - - - - - - movapd %xmm2, %xmm3
+# CHECK-NEXT: - - - - - - - - - - - - - - movupd %xmm3, %xmm4
+# CHECK-NEXT: - - - - - - - - - - - - - - movdqa %xmm4, %xmm5
+# CHECK-NEXT: - - - - - - - - - - - - - - movdqu %xmm5, %xmm0
# CHECK: Timeline view:
-# CHECK-NEXT: 012345678
+# CHECK-NEXT: 01234
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DR . . . . pxor %mm0, %mm0
-# CHECK-NEXT: [0,1] DeER . . . . movq %mm0, %mm1
-# CHECK-NEXT: [0,2] .D-R . . . . xorps %xmm0, %xmm0
-# CHECK-NEXT: [0,3] .DeER. . . . movaps %xmm0, %xmm1
-# CHECK-NEXT: [0,4] . DeER . . . movups %xmm1, %xmm2
-# CHECK-NEXT: [0,5] . D=eER . . . movapd %xmm2, %xmm3
-# CHECK-NEXT: [0,6] . D=eER . . . movupd %xmm3, %xmm4
-# CHECK-NEXT: [0,7] . D==eER . . . movdqa %xmm4, %xmm5
-# CHECK-NEXT: [0,8] . D==eER. . . movdqu %xmm5, %xmm0
-# CHECK-NEXT: [1,0] . D----R. . . pxor %mm0, %mm0
-# CHECK-NEXT: [1,1] . DeE--R . . movq %mm0, %mm1
-# CHECK-NEXT: [1,2] . D----R . . xorps %xmm0, %xmm0
-# CHECK-NEXT: [1,3] . .DeE--R . . movaps %xmm0, %xmm1
-# CHECK-NEXT: [1,4] . .D=eE-R . . movups %xmm1, %xmm2
-# CHECK-NEXT: [1,5] . . D=eE-R . . movapd %xmm2, %xmm3
-# CHECK-NEXT: [1,6] . . D==eER . . movupd %xmm3, %xmm4
-# CHECK-NEXT: [1,7] . . D==eER . . movdqa %xmm4, %xmm5
-# CHECK-NEXT: [1,8] . . D===eER. . movdqu %xmm5, %xmm0
-# CHECK-NEXT: [2,0] . . D----R. . pxor %mm0, %mm0
-# CHECK-NEXT: [2,1] . . DeE---R . movq %mm0, %mm1
-# CHECK-NEXT: [2,2] . . D----R . xorps %xmm0, %xmm0
-# CHECK-NEXT: [2,3] . . DeE---R . movaps %xmm0, %xmm1
-# CHECK-NEXT: [2,4] . . .DeE--R . movups %xmm1, %xmm2
-# CHECK-NEXT: [2,5] . . .D=eE--R. movapd %xmm2, %xmm3
-# CHECK-NEXT: [2,6] . . . D=eE-R. movupd %xmm3, %xmm4
-# CHECK-NEXT: [2,7] . . . D==eE-R movdqa %xmm4, %xmm5
-# CHECK-NEXT: [2,8] . . . D==eER movdqu %xmm5, %xmm0
+# CHECK: [0,0] DR . . . pxor %mm0, %mm0
+# CHECK-NEXT: [0,1] DR . . . movq %mm0, %mm1
+# CHECK-NEXT: [0,2] .DR . . . xorps %xmm0, %xmm0
+# CHECK-NEXT: [0,3] .DR . . . movaps %xmm0, %xmm1
+# CHECK-NEXT: [0,4] . DR . . . movups %xmm1, %xmm2
+# CHECK-NEXT: [0,5] . DR . . . movapd %xmm2, %xmm3
+# CHECK-NEXT: [0,6] . DR. . . movupd %xmm3, %xmm4
+# CHECK-NEXT: [0,7] . DR. . . movdqa %xmm4, %xmm5
+# CHECK-NEXT: [0,8] . DR . . movdqu %xmm5, %xmm0
+# CHECK-NEXT: [1,0] . DR . . pxor %mm0, %mm0
+# CHECK-NEXT: [1,1] . DR . . movq %mm0, %mm1
+# CHECK-NEXT: [1,2] . DR . . xorps %xmm0, %xmm0
+# CHECK-NEXT: [1,3] . .DR . . movaps %xmm0, %xmm1
+# CHECK-NEXT: [1,4] . .DR . . movups %xmm1, %xmm2
+# CHECK-NEXT: [1,5] . . DR . . movapd %xmm2, %xmm3
+# CHECK-NEXT: [1,6] . . DR . . movupd %xmm3, %xmm4
+# CHECK-NEXT: [1,7] . . DR. . movdqa %xmm4, %xmm5
+# CHECK-NEXT: [1,8] . . DR. . movdqu %xmm5, %xmm0
+# CHECK-NEXT: [2,0] . . DR . pxor %mm0, %mm0
+# CHECK-NEXT: [2,1] . . DR . movq %mm0, %mm1
+# CHECK-NEXT: [2,2] . . DR . xorps %xmm0, %xmm0
+# CHECK-NEXT: [2,3] . . DR . movaps %xmm0, %xmm1
+# CHECK-NEXT: [2,4] . . .DR . movups %xmm1, %xmm2
+# CHECK-NEXT: [2,5] . . .DR . movapd %xmm2, %xmm3
+# CHECK-NEXT: [2,6] . . . DR. movupd %xmm3, %xmm4
+# CHECK-NEXT: [2,7] . . . DR. movdqa %xmm4, %xmm5
+# CHECK-NEXT: [2,8] . . . DR movdqu %xmm5, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 3 0.0 0.0 2.7 pxor %mm0, %mm0
-# CHECK-NEXT: 1. 3 1.0 1.0 1.7 movq %mm0, %mm1
-# CHECK-NEXT: 2. 3 0.0 0.0 3.0 xorps %xmm0, %xmm0
-# CHECK-NEXT: 3. 3 1.0 1.0 1.7 movaps %xmm0, %xmm1
-# CHECK-NEXT: 4. 3 1.3 0.0 1.0 movups %xmm1, %xmm2
-# CHECK-NEXT: 5. 3 2.0 0.0 1.0 movapd %xmm2, %xmm3
-# CHECK-NEXT: 6. 3 2.3 0.0 0.3 movupd %xmm3, %xmm4
-# CHECK-NEXT: 7. 3 3.0 0.0 0.3 movdqa %xmm4, %xmm5
-# CHECK-NEXT: 8. 3 3.3 0.0 0.0 movdqu %xmm5, %xmm0
+# CHECK-NEXT: 0. 3 0.0 0.0 0.0 pxor %mm0, %mm0
+# CHECK-NEXT: 1. 3 0.0 0.0 0.0 movq %mm0, %mm1
+# CHECK-NEXT: 2. 3 0.0 0.0 0.0 xorps %xmm0, %xmm0
+# CHECK-NEXT: 3. 3 0.0 0.0 0.0 movaps %xmm0, %xmm1
+# CHECK-NEXT: 4. 3 0.0 0.0 0.0 movups %xmm1, %xmm2
+# CHECK-NEXT: 5. 3 0.0 0.0 0.0 movapd %xmm2, %xmm3
+# CHECK-NEXT: 6. 3 0.0 0.0 0.0 movupd %xmm3, %xmm4
+# CHECK-NEXT: 7. 3 0.0 0.0 0.0 movdqa %xmm4, %xmm5
+# CHECK-NEXT: 8. 3 0.0 0.0 0.0 movdqu %xmm5, %xmm0
# CHECK: Iterations: 3
# CHECK-NEXT: Instructions: 21
-# CHECK-NEXT: Total Cycles: 16
+# CHECK-NEXT: Total Cycles: 12
# CHECK-NEXT: Total uOps: 21
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 1.31
-# CHECK-NEXT: IPC: 1.31
+# CHECK-NEXT: uOps Per Cycle: 1.75
+# CHECK-NEXT: IPC: 1.75
# CHECK-NEXT: Block RThroughput: 3.5
# CHECK: Instruction Info:
# CHECK-NEXT: 1 1 0.50 vmovdqu %xmm5, %xmm0
# CHECK: Register File statistics:
-# CHECK-NEXT: Total number of mappings created: 18
-# CHECK-NEXT: Max number of mappings used: 9
+# CHECK-NEXT: Total number of mappings created: 0
+# CHECK-NEXT: Max number of mappings used: 0
# CHECK: * Register File #1 -- JFpuPRF:
# CHECK-NEXT: Number of physical registers: 72
-# CHECK-NEXT: Total number of mappings created: 18
-# CHECK-NEXT: Max number of mappings used: 9
+# CHECK-NEXT: Total number of mappings created: 0
+# CHECK-NEXT: Max number of mappings used: 0
# CHECK: * Register File #2 -- JIntegerPRF:
# CHECK-NEXT: Number of physical registers: 64
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: - - - 2.00 2.00 3.00 3.00 - - - - 1.00 1.00 -
+# CHECK-NEXT: - - - - - - - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - - - - - - - - - - - vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: - - - - 1.00 0.33 0.67 - - - - - - - vmovaps %xmm0, %xmm1
-# CHECK-NEXT: - - - 1.00 - 0.67 0.33 - - - - - - - vmovups %xmm1, %xmm2
-# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vmovapd %xmm2, %xmm3
-# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vmovupd %xmm3, %xmm4
-# CHECK-NEXT: - - - - - 0.33 0.67 - - - - - 1.00 - vmovdqa %xmm4, %xmm5
-# CHECK-NEXT: - - - - - 0.67 0.33 - - - - 1.00 - - vmovdqu %xmm5, %xmm0
+# CHECK-NEXT: - - - - - - - - - - - - - - vmovaps %xmm0, %xmm1
+# CHECK-NEXT: - - - - - - - - - - - - - - vmovups %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - - - - - - - vmovapd %xmm2, %xmm3
+# CHECK-NEXT: - - - - - - - - - - - - - - vmovupd %xmm3, %xmm4
+# CHECK-NEXT: - - - - - - - - - - - - - - vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: - - - - - - - - - - - - - - vmovdqu %xmm5, %xmm0
# CHECK: Timeline view:
-# CHECK-NEXT: 012345
+# CHECK-NEXT: 01
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DR . . . vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: [0,1] DeER . . . vmovaps %xmm0, %xmm1
-# CHECK-NEXT: [0,2] .DeER. . . vmovups %xmm1, %xmm2
-# CHECK-NEXT: [0,3] .D=eER . . vmovapd %xmm2, %xmm3
-# CHECK-NEXT: [0,4] . D=eER . . vmovupd %xmm3, %xmm4
-# CHECK-NEXT: [0,5] . D==eER . . vmovdqa %xmm4, %xmm5
-# CHECK-NEXT: [0,6] . D==eER . . vmovdqu %xmm5, %xmm0
-# CHECK-NEXT: [1,0] . D----R . . vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: [1,1] . DeE--R. . vmovaps %xmm0, %xmm1
-# CHECK-NEXT: [1,2] . D=eE-R. . vmovups %xmm1, %xmm2
-# CHECK-NEXT: [1,3] . D=eE-R . vmovapd %xmm2, %xmm3
-# CHECK-NEXT: [1,4] . D==eER . vmovupd %xmm3, %xmm4
-# CHECK-NEXT: [1,5] . .D==eER . vmovdqa %xmm4, %xmm5
-# CHECK-NEXT: [1,6] . .D===eER . vmovdqu %xmm5, %xmm0
-# CHECK-NEXT: [2,0] . . D----R . vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: [2,1] . . DeE---R . vmovaps %xmm0, %xmm1
-# CHECK-NEXT: [2,2] . . DeE--R . vmovups %xmm1, %xmm2
-# CHECK-NEXT: [2,3] . . D=eE--R. vmovapd %xmm2, %xmm3
-# CHECK-NEXT: [2,4] . . D=eE-R. vmovupd %xmm3, %xmm4
-# CHECK-NEXT: [2,5] . . D==eE-R vmovdqa %xmm4, %xmm5
-# CHECK-NEXT: [2,6] . . D==eER vmovdqu %xmm5, %xmm0
+# CHECK: [0,0] DR . .. vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [0,1] DR . .. vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [0,2] .DR . .. vmovups %xmm1, %xmm2
+# CHECK-NEXT: [0,3] .DR . .. vmovapd %xmm2, %xmm3
+# CHECK-NEXT: [0,4] . DR . .. vmovupd %xmm3, %xmm4
+# CHECK-NEXT: [0,5] . DR . .. vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: [0,6] . DR. .. vmovdqu %xmm5, %xmm0
+# CHECK-NEXT: [1,0] . DR. .. vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,1] . DR .. vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [1,2] . DR .. vmovups %xmm1, %xmm2
+# CHECK-NEXT: [1,3] . DR .. vmovapd %xmm2, %xmm3
+# CHECK-NEXT: [1,4] . DR .. vmovupd %xmm3, %xmm4
+# CHECK-NEXT: [1,5] . .DR .. vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: [1,6] . .DR .. vmovdqu %xmm5, %xmm0
+# CHECK-NEXT: [2,0] . . DR .. vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,1] . . DR .. vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [2,2] . . DR.. vmovups %xmm1, %xmm2
+# CHECK-NEXT: [2,3] . . DR.. vmovapd %xmm2, %xmm3
+# CHECK-NEXT: [2,4] . . DR. vmovupd %xmm3, %xmm4
+# CHECK-NEXT: [2,5] . . DR. vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: [2,6] . . DR vmovdqu %xmm5, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 3 0.0 0.0 2.7 vxorps %xmm0, %xmm0, %xmm0
-# CHECK-NEXT: 1. 3 1.0 1.0 1.7 vmovaps %xmm0, %xmm1
-# CHECK-NEXT: 2. 3 1.3 0.0 1.0 vmovups %xmm1, %xmm2
-# CHECK-NEXT: 3. 3 2.0 0.0 1.0 vmovapd %xmm2, %xmm3
-# CHECK-NEXT: 4. 3 2.3 0.0 0.3 vmovupd %xmm3, %xmm4
-# CHECK-NEXT: 5. 3 3.0 0.0 0.3 vmovdqa %xmm4, %xmm5
-# CHECK-NEXT: 6. 3 3.3 0.0 0.0 vmovdqu %xmm5, %xmm0
+# CHECK-NEXT: 0. 3 0.0 0.0 0.0 vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1. 3 0.0 0.0 0.0 vmovaps %xmm0, %xmm1
+# CHECK-NEXT: 2. 3 0.0 0.0 0.0 vmovups %xmm1, %xmm2
+# CHECK-NEXT: 3. 3 0.0 0.0 0.0 vmovapd %xmm2, %xmm3
+# CHECK-NEXT: 4. 3 0.0 0.0 0.0 vmovupd %xmm3, %xmm4
+# CHECK-NEXT: 5. 3 0.0 0.0 0.0 vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: 6. 3 0.0 0.0 0.0 vmovdqu %xmm5, %xmm0
# CHECK: Iterations: 3
# CHECK-NEXT: Instructions: 15
-# CHECK-NEXT: Total Cycles: 12
+# CHECK-NEXT: Total Cycles: 9
# CHECK-NEXT: Total uOps: 15
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 1.25
-# CHECK-NEXT: IPC: 1.25
+# CHECK-NEXT: uOps Per Cycle: 1.67
+# CHECK-NEXT: IPC: 1.67
# CHECK-NEXT: Block RThroughput: 2.5
# CHECK: Instruction Info:
# CHECK-NEXT: 1 1 0.50 movl %edx, %eax
# CHECK: Register File statistics:
-# CHECK-NEXT: Total number of mappings created: 12
-# CHECK-NEXT: Max number of mappings used: 7
+# CHECK-NEXT: Total number of mappings created: 0
+# CHECK-NEXT: Max number of mappings used: 0
# CHECK: * Register File #1 -- JFpuPRF:
# CHECK-NEXT: Number of physical registers: 72
# CHECK: * Register File #2 -- JIntegerPRF:
# CHECK-NEXT: Number of physical registers: 64
-# CHECK-NEXT: Total number of mappings created: 12
-# CHECK-NEXT: Max number of mappings used: 7
+# CHECK-NEXT: Total number of mappings created: 0
+# CHECK-NEXT: Max number of mappings used: 0
# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - -
+# CHECK-NEXT: - - - - - - - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - - - - - - - - - - - xorl %eax, %eax
-# CHECK-NEXT: 0.33 0.67 - - - - - - - - - - - - movl %eax, %ebx
-# CHECK-NEXT: 1.00 - - - - - - - - - - - - - movl %ebx, %ecx
-# CHECK-NEXT: - 1.00 - - - - - - - - - - - - movl %ecx, %edx
-# CHECK-NEXT: 0.67 0.33 - - - - - - - - - - - - movl %edx, %eax
+# CHECK-NEXT: - - - - - - - - - - - - - - movl %eax, %ebx
+# CHECK-NEXT: - - - - - - - - - - - - - - movl %ebx, %ecx
+# CHECK-NEXT: - - - - - - - - - - - - - - movl %ecx, %edx
+# CHECK-NEXT: - - - - - - - - - - - - - - movl %edx, %eax
# CHECK: Timeline view:
-# CHECK-NEXT: 01
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 012345678
-# CHECK: [0,0] DR . .. xorl %eax, %eax
-# CHECK-NEXT: [0,1] DeER . .. movl %eax, %ebx
-# CHECK-NEXT: [0,2] .DeER. .. movl %ebx, %ecx
-# CHECK-NEXT: [0,3] .D=eER .. movl %ecx, %edx
-# CHECK-NEXT: [0,4] . D=eER .. movl %edx, %eax
-# CHECK-NEXT: [1,0] . D---R .. xorl %eax, %eax
-# CHECK-NEXT: [1,1] . DeE-R .. movl %eax, %ebx
-# CHECK-NEXT: [1,2] . D=eER .. movl %ebx, %ecx
-# CHECK-NEXT: [1,3] . D=eER .. movl %ecx, %edx
-# CHECK-NEXT: [1,4] . D==eER.. movl %edx, %eax
-# CHECK-NEXT: [2,0] . D---R.. xorl %eax, %eax
-# CHECK-NEXT: [2,1] . DeE--R. movl %eax, %ebx
-# CHECK-NEXT: [2,2] . .DeE-R. movl %ebx, %ecx
-# CHECK-NEXT: [2,3] . .D=eE-R movl %ecx, %edx
-# CHECK-NEXT: [2,4] . . D=eER movl %edx, %eax
+# CHECK: [0,0] DR . . xorl %eax, %eax
+# CHECK-NEXT: [0,1] DR . . movl %eax, %ebx
+# CHECK-NEXT: [0,2] .DR . . movl %ebx, %ecx
+# CHECK-NEXT: [0,3] .DR . . movl %ecx, %edx
+# CHECK-NEXT: [0,4] . DR . . movl %edx, %eax
+# CHECK-NEXT: [1,0] . DR . . xorl %eax, %eax
+# CHECK-NEXT: [1,1] . DR. . movl %eax, %ebx
+# CHECK-NEXT: [1,2] . DR. . movl %ebx, %ecx
+# CHECK-NEXT: [1,3] . DR . movl %ecx, %edx
+# CHECK-NEXT: [1,4] . DR . movl %edx, %eax
+# CHECK-NEXT: [2,0] . DR . xorl %eax, %eax
+# CHECK-NEXT: [2,1] . DR . movl %eax, %ebx
+# CHECK-NEXT: [2,2] . .DR. movl %ebx, %ecx
+# CHECK-NEXT: [2,3] . .DR. movl %ecx, %edx
+# CHECK-NEXT: [2,4] . . DR movl %edx, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 3 0.0 0.0 2.0 xorl %eax, %eax
-# CHECK-NEXT: 1. 3 1.0 1.0 1.0 movl %eax, %ebx
-# CHECK-NEXT: 2. 3 1.3 0.0 0.3 movl %ebx, %ecx
-# CHECK-NEXT: 3. 3 2.0 0.0 0.3 movl %ecx, %edx
-# CHECK-NEXT: 4. 3 2.3 0.0 0.0 movl %edx, %eax
+# CHECK-NEXT: 0. 3 0.0 0.0 0.0 xorl %eax, %eax
+# CHECK-NEXT: 1. 3 0.0 0.0 0.0 movl %eax, %ebx
+# CHECK-NEXT: 2. 3 0.0 0.0 0.0 movl %ebx, %ecx
+# CHECK-NEXT: 3. 3 0.0 0.0 0.0 movl %ecx, %edx
+# CHECK-NEXT: 4. 3 0.0 0.0 0.0 movl %edx, %eax
# CHECK: Iterations: 3
# CHECK-NEXT: Instructions: 15
-# CHECK-NEXT: Total Cycles: 12
+# CHECK-NEXT: Total Cycles: 9
# CHECK-NEXT: Total uOps: 15
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 1.25
-# CHECK-NEXT: IPC: 1.25
+# CHECK-NEXT: uOps Per Cycle: 1.67
+# CHECK-NEXT: IPC: 1.67
# CHECK-NEXT: Block RThroughput: 2.5
# CHECK: Instruction Info:
# CHECK-NEXT: 1 1 0.50 movq %rdx, %rax
# CHECK: Register File statistics:
-# CHECK-NEXT: Total number of mappings created: 12
-# CHECK-NEXT: Max number of mappings used: 7
+# CHECK-NEXT: Total number of mappings created: 0
+# CHECK-NEXT: Max number of mappings used: 0
# CHECK: * Register File #1 -- JFpuPRF:
# CHECK-NEXT: Number of physical registers: 72
# CHECK: * Register File #2 -- JIntegerPRF:
# CHECK-NEXT: Number of physical registers: 64
-# CHECK-NEXT: Total number of mappings created: 12
-# CHECK-NEXT: Max number of mappings used: 7
+# CHECK-NEXT: Total number of mappings created: 0
+# CHECK-NEXT: Max number of mappings used: 0
# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - -
+# CHECK-NEXT: - - - - - - - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - - - - - - - - - - - xorq %rax, %rax
-# CHECK-NEXT: 0.33 0.67 - - - - - - - - - - - - movq %rax, %rbx
-# CHECK-NEXT: 1.00 - - - - - - - - - - - - - movq %rbx, %rcx
-# CHECK-NEXT: - 1.00 - - - - - - - - - - - - movq %rcx, %rdx
-# CHECK-NEXT: 0.67 0.33 - - - - - - - - - - - - movq %rdx, %rax
+# CHECK-NEXT: - - - - - - - - - - - - - - movq %rax, %rbx
+# CHECK-NEXT: - - - - - - - - - - - - - - movq %rbx, %rcx
+# CHECK-NEXT: - - - - - - - - - - - - - - movq %rcx, %rdx
+# CHECK-NEXT: - - - - - - - - - - - - - - movq %rdx, %rax
# CHECK: Timeline view:
-# CHECK-NEXT: 01
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 012345678
-# CHECK: [0,0] DR . .. xorq %rax, %rax
-# CHECK-NEXT: [0,1] DeER . .. movq %rax, %rbx
-# CHECK-NEXT: [0,2] .DeER. .. movq %rbx, %rcx
-# CHECK-NEXT: [0,3] .D=eER .. movq %rcx, %rdx
-# CHECK-NEXT: [0,4] . D=eER .. movq %rdx, %rax
-# CHECK-NEXT: [1,0] . D---R .. xorq %rax, %rax
-# CHECK-NEXT: [1,1] . DeE-R .. movq %rax, %rbx
-# CHECK-NEXT: [1,2] . D=eER .. movq %rbx, %rcx
-# CHECK-NEXT: [1,3] . D=eER .. movq %rcx, %rdx
-# CHECK-NEXT: [1,4] . D==eER.. movq %rdx, %rax
-# CHECK-NEXT: [2,0] . D---R.. xorq %rax, %rax
-# CHECK-NEXT: [2,1] . DeE--R. movq %rax, %rbx
-# CHECK-NEXT: [2,2] . .DeE-R. movq %rbx, %rcx
-# CHECK-NEXT: [2,3] . .D=eE-R movq %rcx, %rdx
-# CHECK-NEXT: [2,4] . . D=eER movq %rdx, %rax
+# CHECK: [0,0] DR . . xorq %rax, %rax
+# CHECK-NEXT: [0,1] DR . . movq %rax, %rbx
+# CHECK-NEXT: [0,2] .DR . . movq %rbx, %rcx
+# CHECK-NEXT: [0,3] .DR . . movq %rcx, %rdx
+# CHECK-NEXT: [0,4] . DR . . movq %rdx, %rax
+# CHECK-NEXT: [1,0] . DR . . xorq %rax, %rax
+# CHECK-NEXT: [1,1] . DR. . movq %rax, %rbx
+# CHECK-NEXT: [1,2] . DR. . movq %rbx, %rcx
+# CHECK-NEXT: [1,3] . DR . movq %rcx, %rdx
+# CHECK-NEXT: [1,4] . DR . movq %rdx, %rax
+# CHECK-NEXT: [2,0] . DR . xorq %rax, %rax
+# CHECK-NEXT: [2,1] . DR . movq %rax, %rbx
+# CHECK-NEXT: [2,2] . .DR. movq %rbx, %rcx
+# CHECK-NEXT: [2,3] . .DR. movq %rcx, %rdx
+# CHECK-NEXT: [2,4] . . DR movq %rdx, %rax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 3 0.0 0.0 2.0 xorq %rax, %rax
-# CHECK-NEXT: 1. 3 1.0 1.0 1.0 movq %rax, %rbx
-# CHECK-NEXT: 2. 3 1.3 0.0 0.3 movq %rbx, %rcx
-# CHECK-NEXT: 3. 3 2.0 0.0 0.3 movq %rcx, %rdx
-# CHECK-NEXT: 4. 3 2.3 0.0 0.0 movq %rdx, %rax
+# CHECK-NEXT: 0. 3 0.0 0.0 0.0 xorq %rax, %rax
+# CHECK-NEXT: 1. 3 0.0 0.0 0.0 movq %rax, %rbx
+# CHECK-NEXT: 2. 3 0.0 0.0 0.0 movq %rbx, %rcx
+# CHECK-NEXT: 3. 3 0.0 0.0 0.0 movq %rcx, %rdx
+# CHECK-NEXT: 4. 3 0.0 0.0 0.0 movq %rdx, %rax
// registers in register file #0 through the command line flag
// `-register-file-size`.
unsigned RegisterFileIndex = RegisterFiles.size();
- RegisterFiles.emplace_back(RF.NumPhysRegs);
+ RegisterFiles.emplace_back(RF.NumPhysRegs, RF.MaxMovesEliminatedPerCycle,
+ RF.AllowZeroMoveEliminationOnly);
// Special case where there is no register class identifier in the set.
// An empty set of register classes means: this register file contains all
}
IPC = std::make_pair(RegisterFileIndex, RCE.Cost);
Entry.RenameAs = Reg;
+ Entry.AllowMoveElimination = RCE.AllowMoveElimination;
// Assume the same cost for each sub-register.
for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) {
const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()];
const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()];
- // Early exit if the PRF doesn't support move elimination for this register.
- if (!RMTo.second.AllowMoveElimination)
- return false;
-
// From and To must be owned by the same PRF.
const RegisterRenamingInfo &RRIFrom = RMFrom.second;
const RegisterRenamingInfo &RRITo = RMTo.second;
// For now, we assume that there is a strong correlation between registers
// that allow move elimination, and how those same registers are renamed in
// hardware.
- if (RRITo.RenameAs && RRITo.RenameAs != WS.getRegisterID())
+ if (RRITo.RenameAs && RRITo.RenameAs != WS.getRegisterID()) {
+ // Early exit if the PRF doesn't support move elimination for this register.
+ if (!RegisterMappings[RRITo.RenameAs].second.AllowMoveElimination)
+ return false;
if (!WS.clearsSuperRegisters())
return false;
+ }
RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
if (RMT.MaxMoveEliminatedPerCycle &&
bool IsZeroIdiom = MCIA.isZeroIdiom(MCI, Mask, ProcID);
bool IsDepBreaking =
IsZeroIdiom || MCIA.isDependencyBreaking(MCI, Mask, ProcID);
+ if (MCIA.isOptimizableRegisterMove(MCI, ProcID))
+ NewIS->setOptimizableMove();
// Initialize Reads first.
for (const ReadDescriptor &RD : D.Reads) {
CodeGenProcModel &PM = getProcModel(RF->getValueAsDef("SchedModel"));
PM.RegisterFiles.emplace_back(CodeGenRegisterFile(RF->getName(),RF));
CodeGenRegisterFile &CGRF = PM.RegisterFiles.back();
+ CGRF.MaxMovesEliminatedPerCycle =
+ RF->getValueAsInt("MaxMovesEliminatedPerCycle");
+ CGRF.AllowZeroMoveEliminationOnly =
+ RF->getValueAsBit("AllowZeroMoveEliminationOnly");
// Now set the number of physical registers as well as the cost of registers
// in each register class.
RecVec RegisterClasses = RF->getValueAsListOfDefs("RegClasses");
std::vector<int64_t> RegisterCosts = RF->getValueAsListOfInts("RegCosts");
+ ListInit *MoveElimInfo = RF->getValueAsListInit("AllowMoveElimination");
for (unsigned I = 0, E = RegisterClasses.size(); I < E; ++I) {
int Cost = RegisterCosts.size() > I ? RegisterCosts[I] : 1;
- CGRF.Costs.emplace_back(RegisterClasses[I], Cost);
+
+ bool AllowMoveElim = false;
+ if (MoveElimInfo->size() > I) {
+ BitInit *Val = cast<BitInit>(MoveElimInfo->getElement(I));
+ AllowMoveElim = Val->getValue();
+ }
+
+ CGRF.Costs.emplace_back(RegisterClasses[I], Cost, AllowMoveElim);
}
}
}
struct CodeGenRegisterCost {
Record *RCDef;
unsigned Cost;
- CodeGenRegisterCost(Record *RC, unsigned RegisterCost)
- : RCDef(RC), Cost(RegisterCost) {}
+ bool AllowMoveElimination;
+ CodeGenRegisterCost(Record *RC, unsigned RegisterCost, bool AllowMoveElim = false)
+ : RCDef(RC), Cost(RegisterCost), AllowMoveElimination(AllowMoveElim) {}
CodeGenRegisterCost(const CodeGenRegisterCost &) = default;
CodeGenRegisterCost &operator=(const CodeGenRegisterCost &) = delete;
};
struct CodeGenRegisterFile {
std::string Name;
Record *RegisterFileDef;
+ unsigned MaxMovesEliminatedPerCycle;
+ bool AllowZeroMoveEliminationOnly;
unsigned NumPhysRegs;
std::vector<CodeGenRegisterCost> Costs;
- CodeGenRegisterFile(StringRef name, Record *def)
- : Name(name), RegisterFileDef(def), NumPhysRegs(0) {}
+ CodeGenRegisterFile(StringRef name, Record *def, unsigned MaxMoveElimPerCy = 0,
+ bool AllowZeroMoveElimOnly = false)
+ : Name(name), RegisterFileDef(def),
+ MaxMovesEliminatedPerCycle(MaxMoveElimPerCy),
+ AllowZeroMoveEliminationOnly(AllowZeroMoveElimOnly),
+ NumPhysRegs(0) {}
bool hasDefaultCosts() const { return Costs.empty(); }
};
return 0;
// Print the RegisterCost table first.
- OS << "\n// {RegisterClassID, Register Cost}\n";
+ OS << "\n// {RegisterClassID, Register Cost, AllowMoveElimination }\n";
OS << "static const llvm::MCRegisterCostEntry " << ProcModel.ModelName
<< "RegisterCosts"
<< "[] = {\n";
Record *Rec = RC.RCDef;
if (Rec->getValue("Namespace"))
OS << Rec->getValueAsString("Namespace") << "::";
- OS << Rec->getName() << "RegClassID, " << RC.Cost << "},\n";
+ OS << Rec->getName() << "RegClassID, " << RC.Cost << ", "
+ << RC.AllowMoveElimination << "},\n";
}
}
OS << "};\n";
// Now generate a table with register file info.
- OS << "\n // {Name, #PhysRegs, #CostEntries, IndexToCostTbl}\n";
+ OS << "\n // {Name, #PhysRegs, #CostEntries, IndexToCostTbl, "
+ << "MaxMovesEliminatedPerCycle, AllowZeroMoveEliminationOnly }\n";
OS << "static const llvm::MCRegisterFileDesc " << ProcModel.ModelName
<< "RegisterFiles"
<< "[] = {\n"
- << " { \"InvalidRegisterFile\", 0, 0, 0 },\n";
+ << " { \"InvalidRegisterFile\", 0, 0, 0, 0, 0 },\n";
unsigned CostTblIndex = 0;
for (const CodeGenRegisterFile &RD : ProcModel.RegisterFiles) {
OS << " { ";
OS << '"' << RD.Name << '"' << ", " << RD.NumPhysRegs << ", ";
unsigned NumCostEntries = RD.Costs.size();
- OS << NumCostEntries << ", " << CostTblIndex << "},\n";
+ OS << NumCostEntries << ", " << CostTblIndex << ", "
+ << RD.MaxMovesEliminatedPerCycle << ", "
+ << RD.AllowZeroMoveEliminationOnly << "},\n";
CostTblIndex += NumCostEntries;
}
OS << "};\n";