OSDN Git Service

Revert r338365: [X86] Improved sched models for X86 BT*rr instructions.
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Tue, 31 Jul 2018 13:00:51 +0000 (13:00 +0000)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Tue, 31 Jul 2018 13:00:51 +0000 (13:00 +0000)
https://reviews.llvm.org/D49243

Contains WIP code that should not have been included.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338369 91177308-0d34-0410-b5e6-96231b3b80d8

13 files changed:
lib/Target/X86/X86InstrInfo.td
lib/Target/X86/X86SchedBroadwell.td
lib/Target/X86/X86SchedHaswell.td
lib/Target/X86/X86SchedSandyBridge.td
lib/Target/X86/X86SchedSkylakeClient.td
lib/Target/X86/X86SchedSkylakeServer.td
lib/Target/X86/X86Schedule.td
lib/Target/X86/X86ScheduleAtom.td
lib/Target/X86/X86ScheduleBtVer2.td
lib/Target/X86/X86ScheduleSLM.td
lib/Target/X86/X86ScheduleZnver1.td
utils/TableGen/CodeGenSchedule.cpp
utils/TableGen/CodeGenSchedule.h

index bc7afd3..7509b31 100644 (file)
@@ -1750,7 +1750,7 @@ def LAHF     : I<0x9F, RawFrm, (outs),  (ins), "lahf", []>,  // AH = flags
 // Bit tests instructions: BT, BTS, BTR, BTC.
 
 let Defs = [EFLAGS] in {
-let SchedRW = [WriteBitTest] in {
+let SchedRW = [WriteALU] in {
 def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                "bt{w}\t{$src2, $src1|$src1, $src2}",
                [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>,
@@ -1783,7 +1783,7 @@ let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
                   []>, TB, NotMemoryFoldable;
 }
 
-let SchedRW = [WriteBitTest] in {
+let SchedRW = [WriteALU] in {
 def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                 "bt{w}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
@@ -1818,7 +1818,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
 } // SchedRW
 
 let hasSideEffects = 0 in {
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
 def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
                 "btc{w}\t{$src2, $src1|$src1, $src2}", []>,
                 OpSize16, TB, NotMemoryFoldable;
@@ -1842,7 +1842,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  NotMemoryFoldable;
 }
 
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
 def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                     "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
 def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
@@ -1861,7 +1861,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                     Requires<[In64BitMode]>;
 }
 
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
 def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
                 "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
                 OpSize16, TB, NotMemoryFoldable;
@@ -1885,7 +1885,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  NotMemoryFoldable;
 }
 
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
 def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                     "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
                     OpSize16, TB;
@@ -1908,7 +1908,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                     Requires<[In64BitMode]>;
 }
 
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
 def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
                 "bts{w}\t{$src2, $src1|$src1, $src2}", []>,
                 OpSize16, TB, NotMemoryFoldable;
@@ -1932,7 +1932,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  NotMemoryFoldable;
 }
 
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
 def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                     "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
 def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
index a1945c6..3aeb2da 100755 (executable)
@@ -137,7 +137,6 @@ def  : WriteRes<WriteSETCCStore, [BWPort06,BWPort4,BWPort237]> {
   let NumMicroOps = 3;
 }
 def  : WriteRes<WriteLAHFSAHF, [BWPort06]>;
-def  : WriteRes<WriteBitTest,[BWPort06]>; // Bit Test instrs
 
 // Bit counts.
 defm : BWWriteResPair<WriteBSF, [BWPort1], 3>;
@@ -604,6 +603,14 @@ def BWWriteResGroup6 : SchedWriteRes<[BWPort06]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[BWWriteResGroup6], (instrs CDQ, CQO)>;
+def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)ri8",
+                                           "BT(16|32|64)rr",
+                                           "BTC(16|32|64)ri8",
+                                           "BTC(16|32|64)rr",
+                                           "BTR(16|32|64)ri8",
+                                           "BTR(16|32|64)rr",
+                                           "BTS(16|32|64)ri8",
+                                           "BTS(16|32|64)rr")>;
 
 def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> {
   let Latency = 1;
index 9abff9c..824e9e2 100644 (file)
@@ -150,7 +150,6 @@ def  : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> {
   let NumMicroOps = 3;
 }
 def  : WriteRes<WriteLAHFSAHF, [HWPort06]>;
-def  : WriteRes<WriteBitTest,[HWPort06]>;
 
 // This is for simple LEAs with one or two input operands.
 // The complex ones can only execute on port 1, and they require two cycles on
@@ -896,6 +895,14 @@ def HWWriteResGroup7 : SchedWriteRes<[HWPort06]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[HWWriteResGroup7], (instrs CDQ, CQO)>;
+def: InstRW<[HWWriteResGroup7], (instregex "BT(16|32|64)ri8",
+                                           "BT(16|32|64)rr",
+                                           "BTC(16|32|64)ri8",
+                                           "BTC(16|32|64)rr",
+                                           "BTR(16|32|64)ri8",
+                                           "BTR(16|32|64)rr",
+                                           "BTS(16|32|64)ri8",
+                                           "BTS(16|32|64)rr")>;
 
 def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> {
   let Latency = 1;
index 060da44..d43c4e3 100644 (file)
@@ -145,7 +145,6 @@ def  : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
   let NumMicroOps = 3;
 }
 def  : WriteRes<WriteLAHFSAHF, [SBPort05]>;
-def  : WriteRes<WriteBitTest,[SBPort05]>;
 
 // This is for simple LEAs with one or two input operands.
 // The complex ones can only execute on port 1, and they require two cycles on
@@ -571,6 +570,14 @@ def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>;
+def: InstRW<[SBWriteResGroup4], (instregex "BT(16|32|64)ri8",
+                                           "BT(16|32|64)rr",
+                                           "BTC(16|32|64)ri8",
+                                           "BTC(16|32|64)rr",
+                                           "BTR(16|32|64)ri8",
+                                           "BTR(16|32|64)rr",
+                                           "BTS(16|32|64)ri8",
+                                           "BTS(16|32|64)rr")>;
 
 def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
   let Latency = 1;
index f64f381..53d8e63 100644 (file)
@@ -136,7 +136,6 @@ def  : WriteRes<WriteSETCCStore, [SKLPort06,SKLPort4,SKLPort237]> {
   let NumMicroOps = 3;
 }
 def  : WriteRes<WriteLAHFSAHF, [SKLPort06]>;
-def  : WriteRes<WriteBitTest,[SKLPort06]>; //
 
 // Bit counts.
 defm : SKLWriteResPair<WriteBSF, [SKLPort1], 3>;
@@ -606,6 +605,14 @@ def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[SKLWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
+def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)ri8",
+                                            "BT(16|32|64)rr",
+                                            "BTC(16|32|64)ri8",
+                                            "BTC(16|32|64)rr",
+                                            "BTR(16|32|64)ri8",
+                                            "BTR(16|32|64)rr",
+                                            "BTS(16|32|64)ri8",
+                                            "BTS(16|32|64)rr")>;
 
 def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> {
   let Latency = 1;
index 0e2615e..129fc2e 100755 (executable)
@@ -136,7 +136,6 @@ def  : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> {
   let NumMicroOps = 3;
 }
 def  : WriteRes<WriteLAHFSAHF, [SKXPort06]>;
-def  : WriteRes<WriteBitTest,[SKXPort06]>; //
 
 // Integer shifts and rotates.
 defm : SKXWriteResPair<WriteShift, [SKXPort06],  1>;
@@ -619,6 +618,14 @@ def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
+def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)ri8",
+                                            "BT(16|32|64)rr",
+                                            "BTC(16|32|64)ri8",
+                                            "BTC(16|32|64)rr",
+                                            "BTR(16|32|64)ri8",
+                                            "BTR(16|32|64)rr",
+                                            "BTS(16|32|64)ri8",
+                                            "BTS(16|32|64)rr")>;
 
 def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> {
   let Latency = 1;
index d1ec8c6..44079bf 100644 (file)
@@ -142,7 +142,6 @@ def  WriteFCMOV : SchedWrite; // X87 conditional move.
 def  WriteSETCC : SchedWrite; // Set register based on condition code.
 def  WriteSETCCStore : SchedWrite;
 def  WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
-def  WriteBitTest  : SchedWrite; // Bit Test - TODO add memory folding support
 
 // Integer shifts and rotates.
 defm WriteShift : X86SchedWritePair;
index 86cb0aa..2880d47 100644 (file)
@@ -108,7 +108,6 @@ def  : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
   let Latency = 2;
   let ResourceCycles = [2];
 }
-def : WriteRes<WriteBitTest,[AtomPort01]>;
 
 defm : X86WriteResUnsupported<WriteIMulH>;
 
index 499e345..44687e3 100644 (file)
@@ -188,7 +188,6 @@ defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional m
 def  : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
 def  : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
 def  : WriteRes<WriteLAHFSAHF, [JALU01]>;
-def  : WriteRes<WriteBitTest,[JALU01]>;
 
 // This is for simple LEAs with one or two input operands.
 def : WriteRes<WriteLEA, [JALU01]>;
index d45136c..9107e9f 100644 (file)
@@ -120,7 +120,6 @@ def  : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
   let ResourceCycles = [2,1];
 }
 def  : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
-def  : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
 
 // This is for simple LEAs with one or two input operands.
 // The complex ones can only execute on port 1, and they require two cycles on
index 8c13dc1..5c8ed43 100644 (file)
@@ -198,7 +198,6 @@ defm : ZnWriteResPair<WriteCMOV2,  [ZnALU], 1>;
 def  : WriteRes<WriteSETCC,  [ZnALU]>;
 def  : WriteRes<WriteSETCCStore,  [ZnALU, ZnAGU]>;
 defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
-def  : WriteRes<WriteBitTest,[ZnALU]>;
 
 // Bit counts.
 defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>;
index ebfdd36..9331fad 100644 (file)
@@ -21,7 +21,6 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/raw_ostream.h"
@@ -34,16 +33,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "subtarget-emitter"
 
-#ifdef EXPENSIVE_CHECKS
-// FIXME: TableGen is failed iff EXPENSIVE_CHECKS defined
-static constexpr bool OptCheckSchedClasses = true;
-#else
-// FIXME: the default value should be false
-static cl::opt<bool> OptCheckSchedClasses(
-    "check-sched-class-table", cl::init(true), cl::Hidden,
-    cl::desc("Check sched class table on different types of inconsistencies"));
-#endif
-
 #ifndef NDEBUG
 static void dumpIdxVec(ArrayRef<unsigned> V) {
   for (unsigned Idx : V)
@@ -234,7 +223,6 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK,
   collectOptionalProcessorInfo();
 
   checkCompleteness();
-  checkSchedClasses();
 }
 
 void CodeGenSchedModels::collectRetireControlUnits() {
@@ -711,86 +699,6 @@ void CodeGenSchedModels::collectSchedClasses() {
   }
 }
 
-void CodeGenSchedModels::checkSchedClasses() {
-  if (!OptCheckSchedClasses)
-    return;
-
-  std::string str;
-  raw_string_ostream OS(str);
-
-  // Check each instruction for each model to see if its overridden too often.
-  // Iff YES it's a candidate for more fine-grained Sched Class.
-  for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
-    StringRef InstName = Inst->TheDef->getName();
-    unsigned SCIdx = getSchedClassIdx(*Inst);
-    if (!SCIdx)
-      continue;
-    CodeGenSchedClass &SC = getSchedClass(SCIdx);
-    if (SC.Writes.empty())
-      continue;
-    const RecVec &RWDefs = SchedClasses[SCIdx].InstRWs;
-    if (RWDefs.empty())
-      continue;
-    // FIXME: what should be threshold here?
-    if (RWDefs.size() > (ProcModels.size() / 2)) {
-      // FIXME: this dump hangs the execution !!!
-      // SC.dump(&Target.getSchedModels());
-      OS << "SchedRW machine model for inst '" << InstName << "' (";
-      for (auto I : SC.Writes)
-        OS << " " << SchedWrites[I].Name;
-      for (auto I : SC.Reads)
-        OS << " " << SchedReads[I].Name;
-      OS << " ) should be updated /improvedbecause it's overriden " << RWDefs.size()
-         << " times out of " << ProcModels.size() << " models:\n\t";
-      for (Record *RWDef : RWDefs)
-        OS << " " << getProcModel(RWDef->getValueAsDef("SchedModel")).ModelName;
-      PrintWarning(OS.str());
-      str.clear();
-    }
-
-    // TODO: here we should check latency/uop in SC vs. RWDef. Maybe we
-    // should do it iff RWDefs.size() == 1 only.
-    // Iff latency/uop are the same then warn about unnecessary redefine.
-    if (RWDefs.size()) {
-      for (Record *RWDef : RWDefs) {
-        IdxVec Writes;
-        IdxVec Reads;
-        findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"), Writes,
-                Reads);
-
-        if ((Writes.size() == SC.Writes.size()) &&
-            (Reads.size() == SC.Reads.size())) {
-          // TODO: do we need sorting Write & Reads?
-          for (unsigned I = 0, S = SC.Writes.size(); I < S; I++) {
-            auto SCSchedW = SchedWrites[SC.Writes[I]];
-            auto SchedW = SchedWrites[Writes[I]];
-            if (!SCSchedW.TheDef || !SchedW.TheDef)
-              continue;
-            const RecordVal *R = SCSchedW.TheDef->getValue("Latency");
-            // FIXME: We should deal with default Latency here
-            if (!R || !R->getValue())
-              continue;
-            auto SCLat = SCSchedW.TheDef->getValueAsInt("Latency");
-            auto SCuOp = SCSchedW.TheDef->getValueAsInt("NumMicroOps");
-            auto Lat = SchedW.TheDef->getValueAsInt("Latency");
-            auto uOp = SchedW.TheDef->getValueAsInt("NumMicroOps");
-            if ((SCLat == Lat) && (SCuOp == uOp))
-              OS << "Overridden verion of inst '" << InstName
-                 << "' has the same latency & uOp values as the original one "
-                    "for model '"
-                 << getProcModel(RWDef->getValueAsDef("SchedModel")).ModelName
-                 << "'\n";
-          }
-          if (!str.empty()) {
-            PrintWarning(OS.str());
-            str.clear();
-          }
-        }
-      }
-    }
-  }
-}
-
 // Get the SchedClass index for an instruction.
 unsigned
 CodeGenSchedModels::getSchedClassIdx(const CodeGenInstruction &Inst) const {
index ce53d67..07c1159 100644 (file)
@@ -443,8 +443,6 @@ private:
 
   void collectSchedClasses();
 
-  void checkSchedClasses();
-
   void collectRetireControlUnits();
 
   void collectRegisterFiles();