From 90c64cbaa124e0e8541680efeaa56f0e6eb78d9a Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Thu, 7 Mar 2013 09:03:52 +0000 Subject: [PATCH] R600/SI: add proper formal parameter handling for SI MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176623 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.td | 1 + lib/Target/R600/AMDGPUCallingConv.td | 42 ++++++++++++++ lib/Target/R600/AMDGPUISelLowering.cpp | 20 +++---- lib/Target/R600/AMDGPUISelLowering.h | 9 +-- lib/Target/R600/AMDILISelLowering.cpp | 5 -- lib/Target/R600/SIISelLowering.cpp | 101 +++++++++++++++++++++++++++++++++ lib/Target/R600/SIISelLowering.h | 7 +++ 7 files changed, 163 insertions(+), 22 deletions(-) create mode 100644 lib/Target/R600/AMDGPUCallingConv.td diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index 40f474161a7..1a26c77d6bb 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -38,3 +38,4 @@ include "AMDGPUInstrInfo.td" include "AMDGPUIntrinsics.td" include "AMDGPURegisterInfo.td" include "AMDGPUInstructions.td" +include "AMDGPUCallingConv.td" diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td new file mode 100644 index 00000000000..45ae37ef0c7 --- /dev/null +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -0,0 +1,42 @@ +//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the AMD Radeon GPUs. +// +//===----------------------------------------------------------------------===// + +// Inversion of CCIfInReg +class CCIfNotInReg : CCIf<"!ArgFlags.isInReg()", A> {} + +// Calling convention for SI +def CC_SI : CallingConv<[ + + CCIfInReg>>, + + CCIfInReg>>, + + CCIfNotInReg>> + +]>; + +def CC_AMDGPU : CallingConv<[ + CCIf<"State.getTarget().getSubtarget().device()"# + "->getGeneration() == AMDGPUDeviceInfo::HD7XXX", CCDelegateTo> +]>; diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 0a33264686d..5995b6f5e80 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -14,7 +14,10 @@ //===----------------------------------------------------------------------===// #include "AMDGPUISelLowering.h" +#include "AMDGPURegisterInfo.h" #include "AMDILIntrinsicInfo.h" +#include "AMDGPUSubtarget.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -22,6 +25,8 @@ using namespace llvm; +#include "AMDGPUGenCallingConv.inc" + AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { @@ -64,17 +69,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : // TargetLowering Callbacks //===---------------------------------------------------------------------===// -SDValue AMDGPUTargetLowering::LowerFormalArguments( - SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc DL, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - for (unsigned i = 0, e = Ins.size(); i < e; ++i) { - InVals.push_back(SDValue()); - } - return Chain; +void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State, + const SmallVectorImpl &Ins) const { + + State.AnalyzeFormalArguments(Ins, CC_AMDGPU); } SDValue AMDGPUTargetLowering::LowerReturn( diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 9e7d997d4a0..f31b6466bd4 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -39,15 +39,12 @@ protected: bool isHWTrueValue(SDValue Op) const; bool isHWFalseValue(SDValue Op) const; + void AnalyzeFormalArguments(CCState &State, + const SmallVectorImpl &Ins) const; + public: AMDGPUTargetLowering(TargetMachine &TM); - virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc DL, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp index f65e1f37e77..922cac12b98 100644 --- a/lib/Target/R600/AMDILISelLowering.cpp +++ b/lib/Target/R600/AMDILISelLowering.cpp @@ -33,11 +33,6 @@ using namespace llvm; //===----------------------------------------------------------------------===// -// Calling Convention Implementation -//===----------------------------------------------------------------------===// -#include "AMDGPUGenCallingConv.inc" - -//===----------------------------------------------------------------------===// // TargetLowering Implementation Help Functions End //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index a9eca31f641..8b46cb0fc5a 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -18,6 +18,8 @@ #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -74,6 +76,105 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTargetDAGCombine(ISD::SETCC); } +SDValue SITargetLowering::LowerFormalArguments( + SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc DL, SelectionDAG &DAG, + SmallVectorImpl &InVals) const { + + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + + MachineFunction &MF = DAG.getMachineFunction(); + FunctionType *FType = MF.getFunction()->getFunctionType(); + + assert(CallConv == CallingConv::C); + + SmallVector Splits; + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + const ISD::InputArg &Arg = Ins[i]; + + // Split vertices into their elements + if (Arg.VT.isVector()) { + ISD::InputArg NewArg = Arg; + NewArg.Flags.setSplit(); + NewArg.VT = Arg.VT.getVectorElementType(); + + // We REALLY want the ORIGINAL number of vertex elements here, e.g. a + // three or five element vertex only needs three or five registers, + // NOT four or eigth. + Type *ParamType = FType->getParamType(Arg.OrigArgIndex); + unsigned NumElements = ParamType->getVectorNumElements(); + + for (unsigned j = 0; j != NumElements; ++j) { + Splits.push_back(NewArg); + NewArg.PartOffset += NewArg.VT.getStoreSize(); + } + + } else { + Splits.push_back(Arg); + } + } + + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + AnalyzeFormalArguments(CCInfo, Splits); + + for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) { + + CCValAssign &VA = ArgLocs[ArgIdx++]; + assert(VA.isRegLoc() && "Parameter must be in a register!"); + + unsigned Reg = VA.getLocReg(); + MVT VT = VA.getLocVT(); + + if (VT == MVT::i64) { + // For now assume it is a pointer + Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, + &AMDGPU::SReg_64RegClass); + Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass); + InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT)); + continue; + } + + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); + + Reg = MF.addLiveIn(Reg, RC); + SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT); + + const ISD::InputArg &Arg = Ins[i]; + if (Arg.VT.isVector()) { + + // Build a vector from the registers + Type *ParamType = FType->getParamType(Arg.OrigArgIndex); + unsigned NumElements = ParamType->getVectorNumElements(); + + SmallVector Regs; + Regs.push_back(Val); + for (unsigned j = 1; j != NumElements; ++j) { + Reg = ArgLocs[ArgIdx++].getLocReg(); + Reg = MF.addLiveIn(Reg, RC); + Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT)); + } + + // Fill up the missing vector elements + NumElements = Arg.VT.getVectorNumElements() - NumElements; + for (unsigned j = 0; j != NumElements; ++j) + Regs.push_back(DAG.getUNDEF(VT)); + + InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, + Regs.data(), Regs.size())); + continue; + } + + InVals.push_back(Val); + } + return Chain; +} + MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MachineInstr * MI, MachineBasicBlock * BB) const { MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 737162f5184..6d77d88c401 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -43,6 +43,13 @@ class SITargetLowering : public AMDGPUTargetLowering { public: SITargetLowering(TargetMachine &tm); + + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc DL, SelectionDAG &DAG, + SmallVectorImpl &InVals) const; + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, MachineBasicBlock * BB) const; virtual EVT getSetCCResultType(EVT VT) const; -- 2.11.0