#ifndef LLVM_CODEGEN_TARGETSUBTARGETINFO_H
#define LLVM_CODEGEN_TARGETSUBTARGETINFO_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
return 0;
}
+ /// Returns true if \param MI is a dependency breaking zero-idiom instruction
+ /// for the subtarget.
+ ///
+ /// This function also sets bits in \param Mask related to input operands that
+ /// are not in a data dependency relationship. There is one bit for each
+ /// machine operand; implicit operands follow explicit operands in the bit
+ /// representation used for \param Mask. An empty \param Mask (i.e. a mask
+ /// with all bits cleared) means: data dependencies are "broken" for all the
+ /// explicit input machine operands of \param MI.
+ virtual bool isZeroIdiom(const MachineInstr *MI, APInt &Mask) const {
+ return false;
+ }
+
+ /// Returns true if \param MI is a dependency breaking instruction for the
+ /// subtarget.
+ ///
+ /// Similar in behavior to `isZeroIdiom`. However, it knows how to identify
+ /// all dependency breaking instructions (i.e. not just zero-idioms).
+ ///
+ /// As for `isZeroIdiom`, this method returns a mask of "broken" dependencies.
+ /// (See method `isZeroIdiom` for a detailed description of \param Mask).
+ virtual bool isDependencyBreaking(const MachineInstr *MI, APInt &Mask) const {
+ return isZeroIdiom(MI, Mask);
+ }
+
/// True if the subtarget should run MachineScheduler after aggressive
/// coalescing.
///
const MCInst &Inst,
APInt &Writes) const;
- /// Returns true if \param Inst is a dependency breaking instruction for the
+ /// Returns true if \param MI is a dependency breaking zero-idiom for the
/// given subtarget.
///
+ /// \param Mask is used to identify input operands that have their dependency
+ /// broken. Each bit of the mask is associated with a specific input operand.
+ /// Bits associated with explicit input operands are laid out first in the
+ /// mask; implicit operands come after explicit operands.
+ ///
+ /// Dependencies are broken only for operands that have their corresponding bit
+ /// set. Operands that have their bit cleared, or that don't have a
+ /// corresponding bit in the mask don't have their dependency broken.
+ /// Note that \param Mask may not be big enough to describe all operands.
+ /// The assumption for operands that don't have a correspondent bit in the
+ /// mask is that those are still data dependent.
+ ///
+ /// The only exception to the rule is for when \param Mask has all zeroes.
+ /// A zero mask means: dependencies are broken for all explicit register
+ /// operands.
+ virtual bool isZeroIdiom(const MCInst &MI, APInt &Mask,
+ unsigned CPUID) const {
+ return false;
+ }
+
+ /// Returns true if \param MI is a dependency breaking instruction for the
+ /// subtarget associated with \param CPUID.
+ ///
/// The value computed by a dependency breaking instruction is not dependent
/// on the inputs. An example of dependency breaking instruction on X86 is
/// `XOR %eax, %eax`.
- /// TODO: In future, we could implement an alternative approach where this
- /// method returns `true` if the input instruction is not dependent on
- /// some/all of its input operands. An APInt mask could then be used to
- /// identify independent operands.
- virtual bool isDependencyBreaking(const MCSubtargetInfo &STI,
- const MCInst &Inst) const;
+ ///
+ /// If \param MI is a dependency breaking instruction for subtarget \param
+ /// CPUID, then \param Mask can be inspected to identify independent operands.
+ ///
+ /// Essentially, each bit of the mask corresponds to an input operand.
+ /// Explicit operands are laid out first in the mask; implicit operands follow
+ /// explicit operands. Bits are set for operands that are independent.
+ ///
+ /// Note that the number of bits in Mask may not be equivalent to the sum of
+ /// explicit and implicit operands in \param MI. Operands that don't have a
+ /// corresponding bit in Mask are assumed "not independente".
+ ///
+ /// The only exception is for when \param Mask is all zeroes. That means:
+ /// explicit input operands of \param MI are independent.
+ virtual bool isDependencyBreaking(const MCInst &MI, APInt &Mask,
+ unsigned CPUID) const {
+ return isZeroIdiom(MI, Mask, CPUID);
+ }
/// Given a branch instruction try to get the address the branch
/// targets. Return true on success, and the address in Target.
// Forward declarations.
class Instruction;
+class SchedMachineModel;
// A generic machine instruction predicate.
class MCInstPredicate;
string MCInstFnName = MCInstFn;
string MachineInstrFnName = MachineInstrFn;
}
+
+// Used to classify machine instructions based on a machine instruction
+// predicate.
+//
+// Let IC be an InstructionEquivalenceClass definition, and MI a machine
+// instruction. We say that MI belongs to the equivalence class described by IC
+// if and only if the following two conditions are met:
+// a) MI's opcode is in the `opcodes` set, and
+// b) `Predicate` evaluates to true when applied to MI.
+//
+// Instances of this class can be used by processor scheduling models to
+// describe instructions that have a property in common. For example,
+// InstructionEquivalenceClass definitions can be used to identify the set of
+// dependency breaking instructions for a processor model.
+//
+// An (optional) list of operand indices can be used to further describe
+// properties that apply to instruction operands. For example, it can be used to
+// identify register uses of a dependency breaking instructions that are not in
+// a RAW dependency.
+class InstructionEquivalenceClass<list<Instruction> opcodes,
+ MCInstPredicate pred,
+ list<int> operands = []> {
+ list<Instruction> Opcodes = opcodes;
+ MCInstPredicate Predicate = pred;
+ list<int> OperandIndices = operands;
+}
+
+// Used by processor models to describe dependency breaking instructions.
+//
+// This is mainly an alias for InstructionEquivalenceClass. Input operand
+// `BrokenDeps` identifies the set of "broken dependencies". There is one bit
+// per each implicit and explicit input operand. An empty set of broken
+// dependencies means: "explicit input register operands are independent."
+class DepBreakingClass<list<Instruction> opcodes, MCInstPredicate pred,
+ list<int> BrokenDeps = []>
+ : InstructionEquivalenceClass<opcodes, pred, BrokenDeps>;
+
+// A function descriptor used to describe the signature of a predicate methods
+// which will be expanded by the STIPredicateExpander into a tablegen'd
+// XXXGenSubtargetInfo class member definition (here, XXX is a target name).
+//
+// It describes the signature of a TargetSubtarget hook, as well as a few extra
+// properties. Examples of extra properties are:
+// - The default return value for the auto-generate function hook.
+// - A list of subtarget hooks (Delegates) that are called from this function.
+//
+class STIPredicateDecl<string name, MCInstPredicate default = FalsePred,
+ bit overrides = 1, bit expandForMC = 1,
+ bit updatesOpcodeMask = 0,
+ list<STIPredicateDecl> delegates = []> {
+ string Name = name;
+
+ MCInstPredicate DefaultReturnValue = default;
+
+ // True if this method is declared as virtual in class TargetSubtargetInfo.
+ bit OverridesBaseClassMember = overrides;
+
+ // True if we need an equivalent predicate function in the MC layer.
+ bit ExpandForMC = expandForMC;
+
+ // True if the autogenerated method has a extra in/out APInt param used as a
+ // mask of operands.
+ bit UpdatesOpcodeMask = updatesOpcodeMask;
+
+ // A list of STIPredicates used by this definition to delegate part of the
+ // computation. For example, STIPredicateFunction `isDependencyBreaking()`
+ // delegates to `isZeroIdiom()` part of its computation.
+ list<STIPredicateDecl> Delegates = delegates;
+}
+
+// A predicate function definition member of class `XXXGenSubtargetInfo`.
+//
+// If `Declaration.ExpandForMC` is true, then SubtargetEmitter
+// will also expand another definition of this method that accepts a MCInst.
+class STIPredicate<STIPredicateDecl declaration,
+ list<InstructionEquivalenceClass> classes> {
+ STIPredicateDecl Declaration = declaration;
+ list<InstructionEquivalenceClass> Classes = classes;
+ SchedMachineModel SchedModel = ?;
+}
+
+// Convenience classes and definitions used by processor scheduling models to
+// describe dependency breaking instructions.
+let UpdatesOpcodeMask = 1 in {
+
+def IsZeroIdiomDecl : STIPredicateDecl<"isZeroIdiom">;
+
+let Delegates = [IsZeroIdiomDecl] in
+def IsDepBreakingDecl : STIPredicateDecl<"isDependencyBreaking">;
+
+} // UpdatesOpcodeMask
+
+class IsZeroIdiomFunction<list<DepBreakingClass> classes>
+ : STIPredicate<IsZeroIdiomDecl, classes>;
+
+class IsDepBreakingFunction<list<DepBreakingClass> classes>
+ : STIPredicate<IsDepBreakingDecl, classes>;
return false;
}
-bool MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI,
- const MCInst &Inst) const {
- return false;
-}
-
bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
uint64_t Size, uint64_t &Target) const {
if (Inst.getNumOperands() == 0 ||
public:
X86MCInstrAnalysis(const MCInstrInfo *MCII) : MCInstrAnalysis(MCII) {}
- bool isDependencyBreaking(const MCSubtargetInfo &STI,
- const MCInst &Inst) const override;
+#define GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS
+#include "X86GenSubtargetInfo.inc"
+
bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst,
APInt &Mask) const override;
std::vector<std::pair<uint64_t, uint64_t>>
const Triple &TargetTriple) const override;
};
-bool X86MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI,
- const MCInst &Inst) const {
- if (STI.getCPU() == "btver2") {
- // Reference: Agner Fog's microarchitecture.pdf - Section 20 "AMD Bobcat and
- // Jaguar pipeline", subsection 8 "Dependency-breaking instructions".
- switch (Inst.getOpcode()) {
- default:
- return false;
- case X86::SUB32rr:
- case X86::SUB64rr:
- case X86::SBB32rr:
- case X86::SBB64rr:
- case X86::XOR32rr:
- case X86::XOR64rr:
- case X86::XORPSrr:
- case X86::XORPDrr:
- case X86::VXORPSrr:
- case X86::VXORPDrr:
- case X86::ANDNPSrr:
- case X86::VANDNPSrr:
- case X86::ANDNPDrr:
- case X86::VANDNPDrr:
- case X86::PXORrr:
- case X86::VPXORrr:
- case X86::PANDNrr:
- case X86::VPANDNrr:
- case X86::PSUBBrr:
- case X86::PSUBWrr:
- case X86::PSUBDrr:
- case X86::PSUBQrr:
- case X86::VPSUBBrr:
- case X86::VPSUBWrr:
- case X86::VPSUBDrr:
- case X86::VPSUBQrr:
- case X86::PCMPEQBrr:
- case X86::PCMPEQWrr:
- case X86::PCMPEQDrr:
- case X86::PCMPEQQrr:
- case X86::VPCMPEQBrr:
- case X86::VPCMPEQWrr:
- case X86::VPCMPEQDrr:
- case X86::VPCMPEQQrr:
- case X86::PCMPGTBrr:
- case X86::PCMPGTWrr:
- case X86::PCMPGTDrr:
- case X86::PCMPGTQrr:
- case X86::VPCMPGTBrr:
- case X86::VPCMPGTWrr:
- case X86::VPCMPGTDrr:
- case X86::VPCMPGTQrr:
- case X86::MMX_PXORirr:
- case X86::MMX_PANDNirr:
- case X86::MMX_PSUBBirr:
- case X86::MMX_PSUBDirr:
- case X86::MMX_PSUBQirr:
- case X86::MMX_PSUBWirr:
- case X86::MMX_PCMPGTBirr:
- case X86::MMX_PCMPGTDirr:
- case X86::MMX_PCMPGTWirr:
- case X86::MMX_PCMPEQBirr:
- case X86::MMX_PCMPEQDirr:
- case X86::MMX_PCMPEQWirr:
- return Inst.getOperand(1).getReg() == Inst.getOperand(2).getReg();
- case X86::CMP32rr:
- case X86::CMP64rr:
- return Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg();
- }
- }
-
- return false;
-}
+#define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS
+#include "X86GenSubtargetInfo.inc"
bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
const MCInst &Inst,
def : InstRW<[JSlowLEA16r], (instrs LEA16r)>;
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
+
+ // MMX Zero-idioms.
+ DepBreakingClass<[
+ MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr,
+ MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr,
+ MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr
+ ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
+
+ // int variants.
+ PXORrr, PANDNrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX Zero-idioms.
+ DepBreakingClass<[
+ // xmm fp variants.
+ VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
+
+ // xmm int variants.
+ VPXORrr, VPANDNrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+
+ // ymm variants.
+ VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr
+ ], ZeroIdiomPredicate>
+]>;
+
+def : IsDepBreakingFunction<[
+ // GPR
+ DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
+ DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
+
+ // MMX
+ DepBreakingClass<[
+ MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr
+ ], ZeroIdiomPredicate>,
+
+ // SSE
+ DepBreakingClass<[
+ PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX
+ DepBreakingClass<[
+ VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr
+ ], ZeroIdiomPredicate>
+]>;
+
} // SchedModel
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+# TODO: Fix the processor resource usage for zero-idiom YMM XOR instructions.
+# Those vector XOR instructions should only consume 1cy of JFPU1 (instead
+# of 2cy).
+
+# LLVM-MCA-BEGIN ZERO-IDIOM-1
+
+vaddps %ymm0, %ymm0, %ymm1
+vxorps %ymm1, %ymm1, %ymm1
+vblendps $2, %ymm1, %ymm2, %ymm3
+
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN ZERO-IDIOM-2
+
+vaddpd %ymm0, %ymm0, %ymm1
+vxorpd %ymm1, %ymm1, %ymm1
+vblendpd $2, %ymm1, %ymm2, %ymm3
+
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN ZERO-IDIOM-3
+vaddps %xmm0, %xmm1, %xmm2
+vandnps %xmm2, %xmm2, %xmm3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN ZERO-IDIOM-4
+vaddps %xmm0, %xmm1, %xmm2
+vandnps %xmm2, %xmm2, %xmm3
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - ZERO-IDIOM-1
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 300
+# CHECK-NEXT: Total Cycles: 306
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.96
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 2 1 1.00 vxorps %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 2 1 1.00 vblendps $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - JALU0
+# CHECK-NEXT: [1] - JALU1
+# CHECK-NEXT: [2] - JDiv
+# CHECK-NEXT: [3] - JFPA
+# CHECK-NEXT: [4] - JFPM
+# CHECK-NEXT: [5] - JFPU0
+# CHECK-NEXT: [6] - JFPU1
+# CHECK-NEXT: [7] - JLAGU
+# CHECK-NEXT: [8] - JMul
+# CHECK-NEXT: [9] - JSAGU
+# CHECK-NEXT: [10] - JSTC
+# CHECK-NEXT: [11] - JVALU0
+# CHECK-NEXT: [12] - JVALU1
+# CHECK-NEXT: [13] - JVIMUL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vxorps %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendps $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [0,1] .DeE-R . . vxorps %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,2] . DeE-R . . vblendps $2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: [1,0] . D=eeeER. . vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [1,1] . DeE--R. . vxorps %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [1,2] . D=eE-R . vblendps $2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: [2,0] . .DeeeER. vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [2,1] . . D=eER. vxorps %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [2,2] . . D=eER vblendps $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 1.3 1.3 0.0 vaddps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 1. 3 1.3 1.3 1.0 vxorps %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 2. 3 1.7 0.3 0.7 vblendps $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: [1] Code Region - ZERO-IDIOM-2
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 300
+# CHECK-NEXT: Total Cycles: 306
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.96
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 3 2.00 vaddpd %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 2 1 1.00 vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 2 1 1.00 vblendpd $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - JALU0
+# CHECK-NEXT: [1] - JALU1
+# CHECK-NEXT: [2] - JDiv
+# CHECK-NEXT: [3] - JFPA
+# CHECK-NEXT: [4] - JFPM
+# CHECK-NEXT: [5] - JFPU0
+# CHECK-NEXT: [6] - JFPU1
+# CHECK-NEXT: [7] - JLAGU
+# CHECK-NEXT: [8] - JMul
+# CHECK-NEXT: [9] - JSAGU
+# CHECK-NEXT: [10] - JSTC
+# CHECK-NEXT: [11] - JVALU0
+# CHECK-NEXT: [12] - JVALU1
+# CHECK-NEXT: [13] - JVIMUL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddpd %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendpd $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . vaddpd %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [0,1] .DeE-R . . vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,2] . DeE-R . . vblendpd $2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: [1,0] . D=eeeER. . vaddpd %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [1,1] . DeE--R. . vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [1,2] . D=eE-R . vblendpd $2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT: [2,0] . .DeeeER. vaddpd %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [2,1] . . D=eER. vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [2,2] . . D=eER vblendpd $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 1.3 1.3 0.0 vaddpd %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 1. 3 1.3 1.3 1.0 vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 2. 3 1.7 0.3 0.7 vblendpd $2, %ymm1, %ymm2, %ymm3
+
+# CHECK: [2] Code Region - ZERO-IDIOM-3
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 105
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.90
+# CHECK-NEXT: IPC: 1.90
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 0 0.50 vandnps %xmm2, %xmm2, %xmm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - JALU0
+# CHECK-NEXT: [1] - JALU1
+# CHECK-NEXT: [2] - JDiv
+# CHECK-NEXT: [3] - JFPA
+# CHECK-NEXT: [4] - JFPM
+# CHECK-NEXT: [5] - JFPU0
+# CHECK-NEXT: [6] - JFPU1
+# CHECK-NEXT: [7] - JLAGU
+# CHECK-NEXT: [8] - JMul
+# CHECK-NEXT: [9] - JSAGU
+# CHECK-NEXT: [10] - JSTC
+# CHECK-NEXT: [11] - JVALU0
+# CHECK-NEXT: [12] - JVALU1
+# CHECK-NEXT: [13] - JVIMUL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - - - - - - - vandnps %xmm2, %xmm2, %xmm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,1] D----R . vandnps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [1,0] .DeeeER. vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,1] .D----R. vandnps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [2,0] . DeeeER vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [2,1] . D----R vandnps %xmm2, %xmm2, %xmm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 1.0 1.0 0.0 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1. 3 0.0 0.0 4.0 vandnps %xmm2, %xmm2, %xmm3
+
+# CHECK: [3] Code Region - ZERO-IDIOM-4
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 105
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.90
+# CHECK-NEXT: IPC: 1.90
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 0 0.50 vandnps %xmm2, %xmm2, %xmm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - JALU0
+# CHECK-NEXT: [1] - JALU1
+# CHECK-NEXT: [2] - JDiv
+# CHECK-NEXT: [3] - JFPA
+# CHECK-NEXT: [4] - JFPM
+# CHECK-NEXT: [5] - JFPU0
+# CHECK-NEXT: [6] - JFPU1
+# CHECK-NEXT: [7] - JLAGU
+# CHECK-NEXT: [8] - JMul
+# CHECK-NEXT: [9] - JSAGU
+# CHECK-NEXT: [10] - JSTC
+# CHECK-NEXT: [11] - JVALU0
+# CHECK-NEXT: [12] - JVALU1
+# CHECK-NEXT: [13] - JVIMUL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - - - - - - - vandnps %xmm2, %xmm2, %xmm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,1] D----R . vandnps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [1,0] .DeeeER. vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,1] .D----R. vandnps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [2,0] . DeeeER vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [2,1] . D----R vandnps %xmm2, %xmm2, %xmm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 1.0 1.0 0.0 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1. 3 0.0 0.0 4.0 vandnps %xmm2, %xmm2, %xmm3
std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
// Check if this is a dependency breaking instruction.
- bool IsDepBreaking = MCIA.isDependencyBreaking(STI, MCI);
- // FIXME: this is a temporary hack to identify zero-idioms.
- bool IsZeroIdiom = D.isZeroLatency() && IsDepBreaking;
+ APInt Mask;
+
+ unsigned ProcID = STI.getSchedModel().getProcessorID();
+ bool IsZeroIdiom = MCIA.isZeroIdiom(MCI, Mask, ProcID);
+ bool IsDepBreaking =
+ IsZeroIdiom || MCIA.isDependencyBreaking(MCI, Mask, ProcID);
// Initialize Reads first.
for (const ReadDescriptor &RD : D.Reads) {
assert(RegID > 0 && "Invalid register ID found!");
auto RS = llvm::make_unique<ReadState>(RD, RegID);
- if (IsDepBreaking && !RD.isImplicitRead())
- RS->setIndependentFromDef();
+ if (IsDepBreaking) {
+ // A mask of all zeroes means: explicit input operands are not
+ // independent.
+ if (Mask.isNullValue()) {
+ if (!RD.isImplicitRead())
+ RS->setIndependentFromDef();
+ } else {
+ // Check if this register operand is independent according to `Mask`.
+ // Note that Mask may not have enough bits to describe all explicit and
+ // implicit input operands. If this register operand doesn't have a
+ // corresponding bit in Mask, then conservatively assume that it is
+ // dependent.
+ if (Mask.getBitWidth() > RD.UseIndex) {
+ // Okay. This map describe register use `RD.UseIndex`.
+ if (Mask[RD.UseIndex])
+ RS->setIndependentFromDef();
+ }
+ }
+ }
NewIS->getUses().emplace_back(std::move(RS));
}
// Check MCInstPredicate definitions.
checkMCInstPredicates();
+ // Check STIPredicate definitions.
+ checkSTIPredicates();
+
+ // Find STIPredicate definitions for each processor model, and construct
+ // STIPredicateFunction objects.
+ collectSTIPredicates();
+
checkCompleteness();
}
+void CodeGenSchedModels::checkSTIPredicates() const {
+ DenseMap<StringRef, const Record *> Declarations;
+
+ // There cannot be multiple declarations with the same name.
+ const RecVec Decls = Records.getAllDerivedDefinitions("STIPredicateDecl");
+ for (const Record *R : Decls) {
+ StringRef Name = R->getValueAsString("Name");
+ const auto It = Declarations.find(Name);
+ if (It == Declarations.end()) {
+ Declarations[Name] = R;
+ continue;
+ }
+
+ PrintError(R->getLoc(), "STIPredicate " + Name + " multiply declared.");
+ PrintNote(It->second->getLoc(), "Previous declaration was here.");
+ PrintFatalError(R->getLoc(), "Invalid STIPredicateDecl found.");
+ }
+
+ // Disallow InstructionEquivalenceClasses with an empty instruction list.
+ const RecVec Defs =
+ Records.getAllDerivedDefinitions("InstructionEquivalenceClass");
+ for (const Record *R : Defs) {
+ RecVec Opcodes = R->getValueAsListOfDefs("Opcodes");
+ if (Opcodes.empty()) {
+ PrintFatalError(R->getLoc(), "Invalid InstructionEquivalenceClass "
+ "defined with an empty opcode list.");
+ }
+ }
+}
+
+// Used by function `processSTIPredicate` to construct a mask of machine
+// instruction operands.
+static APInt constructOperandMask(ArrayRef<int64_t> Indices) {
+ APInt OperandMask;
+ if (Indices.empty())
+ return OperandMask;
+
+ int64_t MaxIndex = *std::max_element(Indices.begin(), Indices.end());
+ assert(MaxIndex >= 0 && "Invalid negative indices in input!");
+ OperandMask = OperandMask.zext(MaxIndex + 1);
+ for (const int64_t Index : Indices) {
+ assert(Index >= 0 && "Invalid negative indices!");
+ OperandMask.setBit(Index);
+ }
+
+ return OperandMask;
+}
+
+static void
+processSTIPredicate(STIPredicateFunction &Fn,
+ const DenseMap<Record *, unsigned> &ProcModelMap) {
+ DenseMap<const Record *, unsigned> Opcode2Index;
+ using OpcodeMapPair = std::pair<const Record *, OpcodeInfo>;
+ std::vector<OpcodeMapPair> OpcodeMappings;
+ std::vector<std::pair<APInt, APInt>> OpcodeMasks;
+
+ DenseMap<const Record *, unsigned> Predicate2Index;
+ unsigned NumUniquePredicates = 0;
+
+ // Number unique predicates and opcodes used by InstructionEquivalenceClass
+ // definitions. Each unique opcode will be associated with an OpcodeInfo
+ // object.
+ for (const Record *Def : Fn.getDefinitions()) {
+ RecVec Classes = Def->getValueAsListOfDefs("Classes");
+ for (const Record *EC : Classes) {
+ const Record *Pred = EC->getValueAsDef("Predicate");
+ if (Predicate2Index.find(Pred) == Predicate2Index.end())
+ Predicate2Index[Pred] = NumUniquePredicates++;
+
+ RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes");
+ for (const Record *Opcode : Opcodes) {
+ if (Opcode2Index.find(Opcode) == Opcode2Index.end()) {
+ Opcode2Index[Opcode] = OpcodeMappings.size();
+ OpcodeMappings.emplace_back(Opcode, OpcodeInfo());
+ }
+ }
+ }
+ }
+
+ // Initialize vector `OpcodeMasks` with default values. We want to keep track
+ // of which processors "use" which opcodes. We also want to be able to
+ // identify predicates that are used by different processors for a same
+ // opcode.
+ // This information is used later on by this algorithm to sort OpcodeMapping
+ // elements based on their processor and predicate sets.
+ OpcodeMasks.resize(OpcodeMappings.size());
+ APInt DefaultProcMask(ProcModelMap.size(), 0);
+ APInt DefaultPredMask(NumUniquePredicates, 0);
+ for (std::pair<APInt, APInt> &MaskPair : OpcodeMasks)
+ MaskPair = std::make_pair(DefaultProcMask, DefaultPredMask);
+
+ // Construct a OpcodeInfo object for every unique opcode declared by an
+ // InstructionEquivalenceClass definition.
+ for (const Record *Def : Fn.getDefinitions()) {
+ RecVec Classes = Def->getValueAsListOfDefs("Classes");
+ const Record *SchedModel = Def->getValueAsDef("SchedModel");
+ unsigned ProcIndex = ProcModelMap.find(SchedModel)->second;
+ APInt ProcMask(ProcModelMap.size(), 0);
+ ProcMask.setBit(ProcIndex);
+
+ for (const Record *EC : Classes) {
+ RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes");
+
+ std::vector<int64_t> OpIndices =
+ EC->getValueAsListOfInts("OperandIndices");
+ APInt OperandMask = constructOperandMask(OpIndices);
+
+ const Record *Pred = EC->getValueAsDef("Predicate");
+ APInt PredMask(NumUniquePredicates, 0);
+ PredMask.setBit(Predicate2Index[Pred]);
+
+ for (const Record *Opcode : Opcodes) {
+ unsigned OpcodeIdx = Opcode2Index[Opcode];
+ if (OpcodeMasks[OpcodeIdx].first[ProcIndex]) {
+ std::string Message =
+ "Opcode " + Opcode->getName().str() +
+ " used by multiple InstructionEquivalenceClass definitions.";
+ PrintFatalError(EC->getLoc(), Message);
+ }
+ OpcodeMasks[OpcodeIdx].first |= ProcMask;
+ OpcodeMasks[OpcodeIdx].second |= PredMask;
+ OpcodeInfo &OI = OpcodeMappings[OpcodeIdx].second;
+
+ OI.addPredicateForProcModel(ProcMask, OperandMask, Pred);
+ }
+ }
+ }
+
+ // Sort OpcodeMappings elements based on their CPU and predicate masks.
+ // As a last resort, order elements by opcode identifier.
+ llvm::sort(OpcodeMappings.begin(), OpcodeMappings.end(),
+ [&](const OpcodeMapPair &Lhs, const OpcodeMapPair &Rhs) {
+ unsigned LhsIdx = Opcode2Index[Lhs.first];
+ unsigned RhsIdx = Opcode2Index[Rhs.first];
+ std::pair<APInt, APInt> &LhsMasks = OpcodeMasks[LhsIdx];
+ std::pair<APInt, APInt> &RhsMasks = OpcodeMasks[RhsIdx];
+
+ if (LhsMasks.first != RhsMasks.first) {
+ if (LhsMasks.first.countPopulation() <
+ RhsMasks.first.countPopulation())
+ return true;
+ return LhsMasks.first.countLeadingZeros() >
+ RhsMasks.first.countLeadingZeros();
+ }
+
+ if (LhsMasks.second != RhsMasks.second) {
+ if (LhsMasks.second.countPopulation() <
+ RhsMasks.second.countPopulation())
+ return true;
+ return LhsMasks.second.countLeadingZeros() >
+ RhsMasks.second.countLeadingZeros();
+ }
+
+ return LhsIdx < RhsIdx;
+ });
+
+ // Now construct opcode groups. Groups are used by the SubtargetEmitter when
+ // expanding the body of a STIPredicate function. In particular, each opcode
+ // group is expanded into a sequence of labels in a switch statement.
+ // It identifies opcodes for which different processors define same predicates
+ // and same opcode masks.
+ for (OpcodeMapPair &Info : OpcodeMappings)
+ Fn.addOpcode(Info.first, std::move(Info.second));
+}
+
+void CodeGenSchedModels::collectSTIPredicates() {
+ // Map STIPredicateDecl records to elements of vector
+ // CodeGenSchedModels::STIPredicates.
+ DenseMap<const Record *, unsigned> Decl2Index;
+
+ RecVec RV = Records.getAllDerivedDefinitions("STIPredicate");
+ for (const Record *R : RV) {
+ const Record *Decl = R->getValueAsDef("Declaration");
+
+ const auto It = Decl2Index.find(Decl);
+ if (It == Decl2Index.end()) {
+ Decl2Index[Decl] = STIPredicates.size();
+ STIPredicateFunction Predicate(Decl);
+ Predicate.addDefinition(R);
+ STIPredicates.emplace_back(std::move(Predicate));
+ continue;
+ }
+
+ STIPredicateFunction &PreviousDef = STIPredicates[It->second];
+ PreviousDef.addDefinition(R);
+ }
+
+ for (STIPredicateFunction &Fn : STIPredicates)
+ processSTIPredicate(Fn, ProcModelMap);
+}
+
+void OpcodeInfo::addPredicateForProcModel(const llvm::APInt &CpuMask,
+ const llvm::APInt &OperandMask,
+ const Record *Predicate) {
+ auto It = llvm::find_if(
+ Predicates, [&OperandMask, &Predicate](const PredicateInfo &P) {
+ return P.Predicate == Predicate && P.OperandMask == OperandMask;
+ });
+ if (It == Predicates.end()) {
+ Predicates.emplace_back(CpuMask, OperandMask, Predicate);
+ return;
+ }
+ It->ProcModelMask |= CpuMask;
+}
+
void CodeGenSchedModels::checkMCInstPredicates() const {
RecVec MCPredicates = Records.getAllDerivedDefinitions("TIIPredicate");
if (MCPredicates.empty())
#ifndef LLVM_UTILS_TABLEGEN_CODEGENSCHEDULE_H
#define LLVM_UTILS_TABLEGEN_CODEGENSCHEDULE_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/ErrorHandling.h"
#endif
};
+/// Used to correlate instructions to MCInstPredicates specified by
+/// InstructionEquivalentClass tablegen definitions.
+///
+/// Example: a XOR of a register with self, is a known zero-idiom for most
+/// X86 processors.
+///
+/// Each processor can use a (potentially different) InstructionEquivalenceClass
+/// definition to classify zero-idioms. That means, XORrr is likely to appear
+/// in more than one equivalence class (where each class definition is
+/// contributed by a different processor).
+///
+/// There is no guarantee that the same MCInstPredicate will be used to describe
+/// equivalence classes that identify XORrr as a zero-idiom.
+///
+/// To be more specific, the requirements for being a zero-idiom XORrr may be
+/// different for different processors.
+///
+/// Class PredicateInfo identifies a subset of processors that specify the same
+/// requirements (i.e. same MCInstPredicate and OperandMask) for an instruction
+/// opcode.
+///
+/// Back to the example. Field `ProcModelMask` will have one bit set for every
+/// processor model that sees XORrr as a zero-idiom, and that specifies the same
+/// set of constraints.
+///
+/// By construction, there can be multiple instances of PredicateInfo associated
+/// with a same instruction opcode. For example, different processors may define
+/// different constraints on the same opcode.
+///
+/// Field OperandMask can be used as an extra constraint.
+/// It may be used to describe conditions that appy only to a subset of the
+/// operands of a machine instruction, and the operands subset may not be the
+/// same for all processor models.
+struct PredicateInfo {
+ llvm::APInt ProcModelMask; // A set of processor model indices.
+ llvm::APInt OperandMask; // An operand mask.
+ const Record *Predicate; // MCInstrPredicate definition.
+ PredicateInfo(llvm::APInt CpuMask, llvm::APInt Operands, const Record *Pred)
+ : ProcModelMask(CpuMask), OperandMask(Operands), Predicate(Pred) {}
+
+ bool operator==(const PredicateInfo &Other) const {
+ return ProcModelMask == Other.ProcModelMask &&
+ OperandMask == Other.OperandMask && Predicate == Other.Predicate;
+ }
+};
+
+/// A collection of PredicateInfo objects.
+///
+/// There is at least one OpcodeInfo object for every opcode specified by a
+/// TIPredicate definition.
+class OpcodeInfo {
+ llvm::SmallVector<PredicateInfo, 8> Predicates;
+
+ OpcodeInfo(const OpcodeInfo &Other) = delete;
+ OpcodeInfo &operator=(const OpcodeInfo &Other) = delete;
+
+public:
+ OpcodeInfo() = default;
+ OpcodeInfo &operator=(OpcodeInfo &&Other) = default;
+ OpcodeInfo(OpcodeInfo &&Other) = default;
+
+ ArrayRef<PredicateInfo> getPredicates() const { return Predicates; }
+
+ void addPredicateForProcModel(const llvm::APInt &CpuMask,
+ const llvm::APInt &OperandMask,
+ const Record *Predicate);
+};
+
+/// Used to group together tablegen instruction definitions that are subject
+/// to a same set of constraints (identified by an instance of OpcodeInfo).
+class OpcodeGroup {
+ OpcodeInfo Info;
+ std::vector<const Record *> Opcodes;
+
+ OpcodeGroup(const OpcodeGroup &Other) = delete;
+ OpcodeGroup &operator=(const OpcodeGroup &Other) = delete;
+
+public:
+ OpcodeGroup(OpcodeInfo &&OpInfo) : Info(std::move(OpInfo)) {}
+ OpcodeGroup(OpcodeGroup &&Other) = default;
+
+ void addOpcode(const Record *Opcode) {
+ assert(std::find(Opcodes.begin(), Opcodes.end(), Opcode) == Opcodes.end() &&
+ "Opcode already in set!");
+ Opcodes.push_back(Opcode);
+ }
+
+ ArrayRef<const Record *> getOpcodes() const { return Opcodes; }
+ const OpcodeInfo &getOpcodeInfo() const { return Info; }
+};
+
+/// An STIPredicateFunction descriptor used by tablegen backends to
+/// auto-generate the body of a predicate function as a member of tablegen'd
+/// class XXXGenSubtargetInfo.
+class STIPredicateFunction {
+ const Record *FunctionDeclaration;
+
+ std::vector<const Record *> Definitions;
+ std::vector<OpcodeGroup> Groups;
+
+ STIPredicateFunction(const STIPredicateFunction &Other) = delete;
+ STIPredicateFunction &operator=(const STIPredicateFunction &Other) = delete;
+
+public:
+ STIPredicateFunction(const Record *Rec) : FunctionDeclaration(Rec) {}
+ STIPredicateFunction(STIPredicateFunction &&Other) = default;
+
+ bool isCompatibleWith(const STIPredicateFunction &Other) const {
+ return FunctionDeclaration == Other.FunctionDeclaration;
+ }
+
+ void addDefinition(const Record *Def) { Definitions.push_back(Def); }
+ void addOpcode(const Record *OpcodeRec, OpcodeInfo &&Info) {
+ if (Groups.empty() ||
+ Groups.back().getOpcodeInfo().getPredicates() != Info.getPredicates())
+ Groups.emplace_back(std::move(Info));
+ Groups.back().addOpcode(OpcodeRec);
+ }
+
+ StringRef getName() const {
+ return FunctionDeclaration->getValueAsString("Name");
+ }
+ const Record *getDefaultReturnPredicate() const {
+ return FunctionDeclaration->getValueAsDef("DefaultReturnValue");
+ }
+
+ const Record *getDeclaration() const { return FunctionDeclaration; }
+ ArrayRef<const Record *> getDefinitions() const { return Definitions; }
+ ArrayRef<OpcodeGroup> getGroups() const { return Groups; }
+};
+
/// Top level container for machine model data.
class CodeGenSchedModels {
RecordKeeper &Records;
using InstClassMapTy = DenseMap<Record*, unsigned>;
InstClassMapTy InstrClassMap;
+ std::vector<STIPredicateFunction> STIPredicates;
+
public:
CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT);
Record *findProcResUnits(Record *ProcResKind, const CodeGenProcModel &PM,
ArrayRef<SMLoc> Loc) const;
+ ArrayRef<STIPredicateFunction> getSTIPredicates() const {
+ return STIPredicates;
+ }
private:
void collectProcModels();
void checkMCInstPredicates() const;
+ void checkSTIPredicates() const;
+
+ void collectSTIPredicates();
+
void checkCompleteness();
void inferFromRW(ArrayRef<unsigned> OperWrites, ArrayRef<unsigned> OperReads,
//===----------------------------------------------------------------------===//
#include "PredicateExpander.h"
+#include "CodeGenSchedule.h" // Definition of STIPredicateFunction.
namespace llvm {
llvm_unreachable("No known rules to expand this MCInstPredicate");
}
+void STIPredicateExpander::expandHeader(raw_ostream &OS,
+ const STIPredicateFunction &Fn) {
+ const Record *Rec = Fn.getDeclaration();
+ StringRef FunctionName = Rec->getValueAsString("Name");
+
+ OS.indent(getIndentLevel() * 2);
+ OS << "bool ";
+ if (shouldExpandDefinition())
+ OS << getClassPrefix() << "::";
+ OS << FunctionName << "(";
+ if (shouldExpandForMC())
+ OS << "const MCInst " << (isByRef() ? "&" : "*") << "MI";
+ else
+ OS << "const MachineInstr " << (isByRef() ? "&" : "*") << "MI";
+ if (Rec->getValueAsBit("UpdatesOpcodeMask"))
+ OS << ", APInt &Mask";
+ OS << (shouldExpandForMC() ? ", unsigned ProcessorID) const " : ") const ");
+ if (shouldExpandDefinition()) {
+ OS << "{\n";
+ return;
+ }
+
+ if (Rec->getValueAsBit("OverridesBaseClassMember"))
+ OS << "override";
+ OS << ";\n";
+}
+
+void STIPredicateExpander::expandPrologue(raw_ostream &OS,
+ const STIPredicateFunction &Fn) {
+ RecVec Delegates = Fn.getDeclaration()->getValueAsListOfDefs("Delegates");
+ bool UpdatesOpcodeMask =
+ Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask");
+
+ increaseIndentLevel();
+ unsigned IndentLevel = getIndentLevel();
+ for (const Record *Delegate : Delegates) {
+ OS.indent(IndentLevel * 2);
+ OS << "if (" << Delegate->getValueAsString("Name") << "(MI";
+ if (UpdatesOpcodeMask)
+ OS << ", Mask";
+ if (shouldExpandForMC())
+ OS << ", ProcessorID";
+ OS << "))\n";
+ OS.indent((1 + IndentLevel) * 2);
+ OS << "return true;\n\n";
+ }
+
+ if (shouldExpandForMC())
+ return;
+
+ OS.indent(IndentLevel * 2);
+ OS << "unsigned ProcessorID = getSchedModel().getProcessorID();\n";
+}
+
+void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, const OpcodeGroup &Group,
+ bool ShouldUpdateOpcodeMask) {
+ const OpcodeInfo &OI = Group.getOpcodeInfo();
+ for (const PredicateInfo &PI : OI.getPredicates()) {
+ const APInt &ProcModelMask = PI.ProcModelMask;
+ bool FirstProcID = true;
+ for (unsigned I = 0, E = ProcModelMask.getActiveBits(); I < E; ++I) {
+ if (!ProcModelMask[I])
+ continue;
+
+ if (FirstProcID) {
+ OS.indent(getIndentLevel() * 2);
+ OS << "if (ProcessorID == " << I;
+ } else {
+ OS << " || ProcessorID == " << I;
+ }
+ FirstProcID = false;
+ }
+
+ OS << ") {\n";
+
+ increaseIndentLevel();
+ OS.indent(getIndentLevel() * 2);
+ if (ShouldUpdateOpcodeMask) {
+ if (PI.OperandMask.isNullValue())
+ OS << "Mask.clearAllBits();\n";
+ else
+ OS << "Mask = " << PI.OperandMask << ";\n";
+ OS.indent(getIndentLevel() * 2);
+ }
+ OS << "return ";
+ expandPredicate(OS, PI.Predicate);
+ OS << ";\n";
+ decreaseIndentLevel();
+ OS.indent(getIndentLevel() * 2);
+ OS << "}\n";
+ }
+}
+
+void STIPredicateExpander::expandBody(raw_ostream &OS,
+ const STIPredicateFunction &Fn) {
+ bool UpdatesOpcodeMask =
+ Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask");
+
+ unsigned IndentLevel = getIndentLevel();
+ OS.indent(IndentLevel * 2);
+ OS << "switch(MI" << (isByRef() ? "." : "->") << "getOpcode()) {\n";
+ OS.indent(IndentLevel * 2);
+ OS << "default:\n";
+ OS.indent(IndentLevel * 2);
+ OS << " break;";
+
+ for (const OpcodeGroup &Group : Fn.getGroups()) {
+ for (const Record *Opcode : Group.getOpcodes()) {
+ OS << '\n';
+ OS.indent(IndentLevel * 2);
+ OS << "case " << getTargetName() << "::" << Opcode->getName() << ":";
+ }
+
+ OS << '\n';
+ increaseIndentLevel();
+ expandOpcodeGroup(OS, Group, UpdatesOpcodeMask);
+
+ OS.indent(getIndentLevel() * 2);
+ OS << "break;\n";
+ decreaseIndentLevel();
+ }
+
+ OS.indent(IndentLevel * 2);
+ OS << "}\n";
+}
+
+void STIPredicateExpander::expandEpilogue(raw_ostream &OS,
+ const STIPredicateFunction &Fn) {
+ OS << '\n';
+ OS.indent(getIndentLevel() * 2);
+ OS << "return ";
+ expandPredicate(OS, Fn.getDefaultReturnPredicate());
+ OS << ";\n";
+
+ decreaseIndentLevel();
+ OS.indent(getIndentLevel() * 2);
+ StringRef FunctionName = Fn.getDeclaration()->getValueAsString("Name");
+ OS << "} // " << ClassPrefix << "::" << FunctionName << "\n\n";
+}
+
+void STIPredicateExpander::expandSTIPredicate(raw_ostream &OS,
+ const STIPredicateFunction &Fn) {
+ const Record *Rec = Fn.getDeclaration();
+ if (shouldExpandForMC() && !Rec->getValueAsBit("ExpandForMC"))
+ return;
+
+ expandHeader(OS, Fn);
+ if (shouldExpandDefinition()) {
+ expandPrologue(OS, Fn);
+ expandBody(OS, Fn);
+ expandEpilogue(OS, Fn);
+ }
+}
+
} // namespace llvm
bool shouldNegate() const { return NegatePredicate; }
bool shouldExpandForMC() const { return ExpandForMC; }
unsigned getIndentLevel() const { return IndentLevel; }
+ StringRef getTargetName() const { return TargetName; }
void setByRef(bool Value) { EmitCallsByRef = Value; }
void flipNegatePredicate() { NegatePredicate = !NegatePredicate; }
void setNegatePredicate(bool Value) { NegatePredicate = Value; }
void setExpandForMC(bool Value) { ExpandForMC = Value; }
+ void setIndentLevel(unsigned Level) { IndentLevel = Level; }
void increaseIndentLevel() { ++IndentLevel; }
void decreaseIndentLevel() { --IndentLevel; }
- void setIndentLevel(unsigned Level) { IndentLevel = Level; }
using RecVec = std::vector<Record *>;
void expandTrue(raw_ostream &OS);
void expandStatement(raw_ostream &OS, const Record *Rec);
};
+// Forward declarations.
+class STIPredicateFunction;
+class OpcodeGroup;
+
+class STIPredicateExpander : public PredicateExpander {
+ StringRef ClassPrefix;
+ bool ExpandDefinition;
+
+ STIPredicateExpander(const PredicateExpander &) = delete;
+ STIPredicateExpander &operator=(const PredicateExpander &) = delete;
+
+ void expandHeader(raw_ostream &OS, const STIPredicateFunction &Fn);
+ void expandPrologue(raw_ostream &OS, const STIPredicateFunction &Fn);
+ void expandOpcodeGroup(raw_ostream &OS, const OpcodeGroup &Group,
+ bool ShouldUpdateOpcodeMask);
+ void expandBody(raw_ostream &OS, const STIPredicateFunction &Fn);
+ void expandEpilogue(raw_ostream &OS, const STIPredicateFunction &Fn);
+
+public:
+ STIPredicateExpander(StringRef Target)
+ : PredicateExpander(Target), ClassPrefix(), ExpandDefinition(false) {}
+
+ bool shouldExpandDefinition() const { return ExpandDefinition; }
+ StringRef getClassPrefix() const { return ClassPrefix; }
+ void setClassPrefix(StringRef S) { ClassPrefix = S; }
+ void setExpandDefinition(bool Value) { ExpandDefinition = Value; }
+
+ void expandSTIPredicate(raw_ostream &OS, const STIPredicateFunction &Fn);
+};
+
} // namespace llvm
#endif
void emitSchedModelHelpersImpl(raw_ostream &OS,
bool OnlyExpandMCInstPredicates = false);
void emitGenMCSubtargetInfo(raw_ostream &OS);
+ void EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS);
void EmitSchedModel(raw_ostream &OS);
void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS);
<< " unsigned CPUID) const {\n"
<< " return " << Target << "_MC"
<< "::resolveVariantSchedClassImpl(SchedClass, MI, CPUID);\n"
- << "} // " << ClassName << "::resolveVariantSchedClass\n";
+ << "} // " << ClassName << "::resolveVariantSchedClass\n\n";
+
+ STIPredicateExpander PE(Target);
+ PE.setClassPrefix(ClassName);
+ PE.setExpandDefinition(true);
+ PE.setByRef(false);
+ PE.setIndentLevel(0);
+
+ for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
+ PE.expandSTIPredicate(OS, Fn);
}
void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName,
OS << "};\n";
}
+void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) {
+ OS << "\n#ifdef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n";
+ OS << "#undef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n";
+
+ STIPredicateExpander PE(Target);
+ PE.setExpandForMC(true);
+ PE.setByRef(true);
+ for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
+ PE.expandSTIPredicate(OS, Fn);
+
+ OS << "#endif // GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n";
+
+ OS << "\n#ifdef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n";
+ OS << "#undef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n\n";
+
+ std::string ClassPrefix = Target + "MCInstrAnalysis";
+ PE.setExpandDefinition(true);
+ PE.setClassPrefix(ClassPrefix);
+ PE.setIndentLevel(0);
+ for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
+ PE.expandSTIPredicate(OS, Fn);
+
+ OS << "#endif // GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n\n";
+}
+
//
// SubtargetEmitter::run - Main subtarget enumeration emitter.
//
<< " const;\n";
if (TGT.getHwModes().getNumModeIds() > 1)
OS << " unsigned getHwMode() const override;\n";
+
+ STIPredicateExpander PE(Target);
+ PE.setByRef(false);
+ for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
+ PE.expandSTIPredicate(OS, Fn);
+
OS << "};\n"
<< "} // end namespace llvm\n\n";
OS << "} // end namespace llvm\n\n";
OS << "#endif // GET_SUBTARGETINFO_CTOR\n\n";
+
+ EmitMCInstrAnalysisPredicateFunctions(OS);
}
namespace llvm {