From 546952fd600ddba3f1eb6d4f93ff4eb42821a962 Mon Sep 17 00:00:00 2001 From: David Goodwin Date: Tue, 11 Aug 2009 22:38:43 +0000 Subject: [PATCH] Allow a zero cycle stage to reserve/require a FU without advancing the cycle counter. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78736 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExactHazardRecognizer.cpp | 21 ++++++++--- lib/Target/ARM/ARM.td | 2 +- lib/Target/ARM/ARMSchedule.td | 15 +++++--- lib/Target/ARM/ARMScheduleV6.td | 16 ++++----- lib/Target/ARM/ARMScheduleV7.td | 65 ++++++++++++++++++++++------------- 5 files changed, 75 insertions(+), 44 deletions(-) diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp index 5a89d223598..48043f286cc 100644 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -39,7 +39,7 @@ ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData unsigned ItinDepth = 0; for (; IS != E; ++IS) - ItinDepth += IS->Cycles; + ItinDepth += std::max(1U, IS->Cycles); ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); } @@ -89,9 +89,13 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU unsigned idx = SU->getInstr()->getDesc().getSchedClass(); for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); IS != E; ++IS) { + // If the stages cycles are 0, then we must have the FU free in + // the current cycle, but we don't advance the cycle time . + unsigned StageCycles = std::max(1U, IS->Cycles); + // We must find one of the stage's units free for every cycle the // stage is occupied. - for (unsigned int i = 0; i < IS->Cycles; ++i) { + for (unsigned int i = 0; i < StageCycles; ++i) { assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!"); unsigned index = getFutureIndex(cycle); @@ -103,7 +107,8 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU return Hazard; } - ++cycle; + if (IS->Cycles > 0) + ++cycle; } } @@ -118,9 +123,13 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { unsigned idx = SU->getInstr()->getDesc().getSchedClass(); for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); IS != E; ++IS) { + // If the stages cycles are 0, then we must reserve the FU in the + // current cycle, but we don't advance the cycle time . + unsigned StageCycles = std::max(1U, IS->Cycles); + // We must reserve one of the stage's units for every cycle the // stage is occupied. - for (unsigned int i = 0; i < IS->Cycles; ++i) { + for (unsigned int i = 0; i < StageCycles; ++i) { assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!"); unsigned index = getFutureIndex(cycle); @@ -135,7 +144,9 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { assert(freeUnit && "No function unit available!"); Scoreboard[index] |= freeUnit; - ++cycle; + + if (IS->Cycles > 0) + ++cycle; } } diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index ce28149b54f..eb6304c448e 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -114,7 +114,7 @@ def : Processor<"arm1156t2f-s", V6Itineraries, // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON, FeatureNEONFP]>; -def : Processor<"cortex-a9", V7Itineraries, +def : Processor<"cortex-a9", CortexA9Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index a5ca773ef1d..11a7b2a717a 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -10,8 +10,9 @@ //===----------------------------------------------------------------------===// // Functional units across ARM processors // -def FU_Pipe0 : FuncUnit; // pipeline 0 issue -def FU_Pipe1 : FuncUnit; // pipeline 1 issue +def FU_Issue : FuncUnit; // issue +def FU_Pipe0 : FuncUnit; // pipeline 0 +def FU_Pipe1 : FuncUnit; // pipeline 1 def FU_LdSt0 : FuncUnit; // pipeline 0 load/store def FU_LdSt1 : FuncUnit; // pipeline 1 load/store @@ -19,9 +20,11 @@ def FU_LdSt1 : FuncUnit; // pipeline 1 load/store // Instruction Itinerary classes used for ARM // def IIC_iALU : InstrItinClass; +def IIC_iMPY : InstrItinClass; def IIC_iLoad : InstrItinClass; def IIC_iStore : InstrItinClass; def IIC_fpALU : InstrItinClass; +def IIC_fpMPY : InstrItinClass; def IIC_fpLoad : InstrItinClass; def IIC_fpStore : InstrItinClass; def IIC_Br : InstrItinClass; @@ -31,12 +34,14 @@ def IIC_Br : InstrItinClass; def GenericItineraries : ProcessorItineraries<[ InstrItinData]>, + InstrItinData]>, InstrItinData, InstrStage<1, [FU_LdSt0]>]>, - InstrItinData, InstrStage<1, [FU_LdSt0]>]>, InstrItinData]>, - InstrItinData]>, + InstrItinData]>, InstrItinData]>, - InstrItinData]> + InstrItinData]>, + InstrItinData, InstrStage<1, [FU_LdSt0]>]>, + InstrItinData]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index f0b8116a038..755547a678a 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -11,18 +11,16 @@ // //===----------------------------------------------------------------------===// +// TODO: this should model an ARM11 // Single issue pipeline so every itinerary starts with FU_pipe0 def V6Itineraries : ProcessorItineraries<[ - // single-cycle integer ALU InstrItinData]>, - // loads have an extra cycle of latency, but are fully pipelined + InstrItinData]>, InstrItinData, InstrStage<1, [FU_LdSt0]>]>, - InstrItinData, InstrStage<1, [FU_LdSt0]>]>, - // fully-pipelined stores InstrItinData]>, - InstrItinData]>, - // fp ALU is not pipelined - InstrItinData]>, - // no delay slots, so the latency of a branch is unimportant - InstrItinData]> + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData, InstrStage<1, [FU_LdSt0]>]>, + InstrItinData]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index 30360bc9c41..8a7b42eb729 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -11,34 +11,51 @@ // //===----------------------------------------------------------------------===// -// Single issue pipeline so every itinerary starts with FU_Pipe0 -def V7Itineraries : ProcessorItineraries<[ - // single-cycle integer ALU - InstrItinData]>, - // loads have an extra cycle of latency, but are fully pipelined - InstrItinData, InstrStage<1, [FU_LdSt0]>]>, - InstrItinData, InstrStage<1, [FU_LdSt0]>]>, - // fully-pipelined stores - InstrItinData]>, - InstrItinData]>, - // fp ALU is not pipelined - InstrItinData]>, - // no delay slots, so the latency of a branch is unimportant - InstrItinData]> -]>; - // Dual issue pipeline so every itinerary starts with FU_Pipe0 | FU_Pipe1 def CortexA8Itineraries : ProcessorItineraries<[ - // single-cycle integer ALU + // two fully-pipelined integer ALU pipelines InstrItinData]>, + // one fully-pipelined integer Multiply pipeline + // function units are used in alpha order, so use FU_Pipe1 + // for the Multiple pipeline + InstrItinData]>, // loads have an extra cycle of latency, but are fully pipelined - InstrItinData, InstrStage<1, [FU_LdSt0]>]>, - InstrItinData, InstrStage<1, [FU_LdSt0]>]>, + // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>]>, // fully-pipelined stores - InstrItinData]>, - InstrItinData]>, - // fp ALU is not pipelined - InstrItinData]>, + // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, // no delay slots, so the latency of a branch is unimportant - InstrItinData]> + InstrItinData]>, + + // VFP ALU is not pipelined so stall all issues + // FIXME assume NFP pipeline and 7 cycle non-pipelined latency + InstrItinData]>, + // VFP MPY is not pipelined so stall all issues + // FIXME assume NFP pipeline and 7 cycle non-pipelined latency + InstrItinData]>, + // loads have an extra cycle of latency, but are fully pipelined + // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>]>, + // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>]> +]>; + +// FIXME +def CortexA9Itineraries : ProcessorItineraries<[ + InstrItinData]>, + InstrItinData]>, + InstrItinData, InstrStage<1, [FU_LdSt0]>]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData, InstrStage<1, [FU_LdSt0]>]>, + InstrItinData]> ]>; -- 2.11.0