X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FSIRegisterInfo.cpp;h=9224e1435dd838e0b591db0bcc16011fb318f592;hb=ebe69fe11e48d322045d5949c83283927a0d790b;hp=cffea129de9ebf7710f53a516d99c21967d70fd8;hpb=b7d2e72b02a4cb8034f32f8247a2558d2434e121;p=android-x86%2Fexternal-llvm.git diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index cffea129de9..9224e1435dd 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -40,6 +40,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); Reserved.set(AMDGPU::FLAT_SCR); + Reserved.set(AMDGPU::FLAT_SCR_LO); + Reserved.set(AMDGPU::FLAT_SCR_HI); // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs Reserved.set(AMDGPU::VGPR255); @@ -48,9 +50,32 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { - return RC->getNumRegs(); +unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { + + // FIXME: We should adjust the max number of waves based on LDS size. + unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU()); + unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU()); + + for (regclass_iterator I = regclass_begin(), E = regclass_end(); + I != E; ++I) { + + unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1); + unsigned Limit; + + if (isSGPRClass(*I)) { + Limit = SGPRLimit / NumSubRegs; + } else { + Limit = VGPRLimit / NumSubRegs; + } + + const int *Sets = getRegClassPressureSets(*I); + assert(Sets); + for (unsigned i = 0; Sets[i] != -1; ++i) { + if (Sets[i] == (int)Idx) + return Limit; + } + } + return 256; } bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { @@ -92,6 +117,60 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) { } } +void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, + unsigned LoadStoreOp, + unsigned Value, + unsigned ScratchRsrcReg, + unsigned ScratchOffset, + int64_t Offset, + RegScavenger *RS) const { + + const SIInstrInfo *TII = static_cast(ST.getInstrInfo()); + MachineBasicBlock *MBB = MI->getParent(); + const MachineFunction *MF = MI->getParent()->getParent(); + LLVMContext &Ctx = MF->getFunction()->getContext(); + DebugLoc DL = MI->getDebugLoc(); + bool IsLoad = TII->get(LoadStoreOp).mayLoad(); + + bool RanOutOfSGPRs = false; + unsigned SOffset = ScratchOffset; + + unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + unsigned Size = NumSubRegs * 4; + + if (!isUInt<12>(Offset + Size)) { + SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); + if (SOffset == AMDGPU::NoRegister) { + RanOutOfSGPRs = true; + SOffset = AMDGPU::SGPR0; + } + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) + .addReg(ScratchOffset) + .addImm(Offset); + Offset = 0; + } + + if (RanOutOfSGPRs) + Ctx.emitError("Ran out of SGPRs for spilling VGPRS"); + + for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) { + unsigned SubReg = NumSubRegs > 1 ? + getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) : + Value; + bool IsKill = (i == e - 1); + + BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) + .addReg(SubReg, getDefRegState(IsLoad)) + .addReg(ScratchRsrcReg, getKillRegState(IsKill)) + .addImm(Offset) + .addReg(SOffset) + .addImm(0) // glc + .addImm(0) // slc + .addImm(0) // tfe + .addReg(Value, RegState::Implicit | getDefRegState(IsLoad)); + } +} + void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { @@ -125,7 +204,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, Ctx.emitError("Ran out of VGPRs for spilling SGPR"); } - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR) + BuildMI(*MBB, MI, DL, + TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), + Spill.VGPR) .addReg(SubReg) .addImm(Spill.Lane); @@ -154,13 +235,15 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, Ctx.emitError("Ran out of VGPRs for spilling SGPR"); } - if (isM0) { + if (isM0) SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); - } - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) + BuildMI(*MBB, MI, DL, + TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + SubReg) .addReg(Spill.VGPR) - .addImm(Spill.Lane); + .addImm(Spill.Lane) + .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); if (isM0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addReg(SubReg); @@ -177,71 +260,25 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: - case AMDGPU::SI_SPILL_V32_SAVE: { - unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); - unsigned SrcReg = MI->getOperand(0).getReg(); - int64_t Offset = FrameInfo->getObjectOffset(Index); - unsigned Size = NumSubRegs * 4; - unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); - - for (unsigned i = 0, e = NumSubRegs; i != e; ++i) { - unsigned SubReg = NumSubRegs > 1 ? - getPhysRegSubReg(SrcReg, &AMDGPU::VGPR_32RegClass, i) : - SrcReg; - Offset += (i * 4); - MFI->LDSWaveSpillSize = std::max((unsigned)Offset + 4, (unsigned)MFI->LDSWaveSpillSize); - - unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg, - Offset, Size); - - if (AddrReg == AMDGPU::NoRegister) { - LLVMContext &Ctx = MF->getFunction()->getContext(); - Ctx.emitError("Ran out of VGPRs for spilling VGPRS"); - AddrReg = AMDGPU::VGPR0; - } - - // Store the value in LDS - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_WRITE_B32)) - .addImm(0) // gds - .addReg(AddrReg, RegState::Kill) // addr - .addReg(SubReg) // data0 - .addImm(0); // offset - } - + case AMDGPU::SI_SPILL_V32_SAVE: + buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, + TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), + FrameInfo->getObjectOffset(Index), RS); MI->eraseFromParent(); break; - } case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE: + case AMDGPU::SI_SPILL_V96_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { - unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); - unsigned DstReg = MI->getOperand(0).getReg(); - int64_t Offset = FrameInfo->getObjectOffset(Index); - unsigned Size = NumSubRegs * 4; - unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); - - // FIXME: We could use DS_READ_B64 here to optimize for larger registers. - for (unsigned i = 0, e = NumSubRegs; i != e; ++i) { - unsigned SubReg = NumSubRegs > 1 ? - getPhysRegSubReg(DstReg, &AMDGPU::VGPR_32RegClass, i) : - DstReg; - - Offset += (i * 4); - unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg, - Offset, Size); - if (AddrReg == AMDGPU::NoRegister) { - LLVMContext &Ctx = MF->getFunction()->getContext(); - Ctx.emitError("Ran out of VGPRs for spilling VGPRs"); - AddrReg = AMDGPU::VGPR0; - } - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_READ_B32), SubReg) - .addImm(0) // gds - .addReg(AddrReg, RegState::Kill) // addr - .addImm(0); //offset - } + buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, + TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), + FrameInfo->getObjectOffset(Index), RS); MI->eraseFromParent(); break; } @@ -250,11 +287,11 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int64_t Offset = FrameInfo->getObjectOffset(Index); FIOp.ChangeToImmediate(Offset); if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) { - unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VReg_32RegClass, MI, SPAdj); + unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, SPAdj); BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addImm(Offset); - FIOp.ChangeToRegister(TmpReg, false); + FIOp.ChangeToRegister(TmpReg, false, false, true); } } } @@ -264,7 +301,7 @@ const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass( MVT VT) const { switch(VT.SimpleTy) { default: - case MVT::i32: return &AMDGPU::VReg_32RegClass; + case MVT::i32: return &AMDGPU::VGPR_32RegClass; } } @@ -276,7 +313,7 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { assert(!TargetRegisterInfo::isVirtualRegister(Reg)); static const TargetRegisterClass *BaseClasses[] = { - &AMDGPU::VReg_32RegClass, + &AMDGPU::VGPR_32RegClass, &AMDGPU::SReg_32RegClass, &AMDGPU::VReg_64RegClass, &AMDGPU::SReg_64RegClass, @@ -297,7 +334,7 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { } bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { - return getCommonSubClass(&AMDGPU::VReg_32RegClass, RC) || + return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) || getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) || getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) || getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) || @@ -312,7 +349,7 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( } else if (SRC == &AMDGPU::SCCRegRegClass) { return &AMDGPU::VCCRegRegClass; } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) { - return &AMDGPU::VReg_32RegClass; + return &AMDGPU::VGPR_32RegClass; } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) { return &AMDGPU::VReg_64RegClass; } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) { @@ -388,40 +425,17 @@ unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg, return SubRC->getRegister(Index + Channel); } -bool SIRegisterInfo::regClassCanUseLiteralConstant(int RCID) const { - switch (RCID) { - default: return false; - case AMDGPU::SSrc_32RegClassID: - case AMDGPU::SSrc_64RegClassID: - case AMDGPU::VSrc_32RegClassID: - case AMDGPU::VSrc_64RegClassID: - return true; - } -} - -bool SIRegisterInfo::regClassCanUseLiteralConstant( - const TargetRegisterClass *RC) const { - return regClassCanUseLiteralConstant(RC->getID()); +bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const { + return OpType == AMDGPU::OPERAND_REG_IMM32; } -bool SIRegisterInfo::regClassCanUseInlineConstant(int RCID) const { - if (regClassCanUseLiteralConstant(RCID)) +bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const { + if (opCanUseLiteralConstant(OpType)) return true; - switch (RCID) { - default: return false; - case AMDGPU::VCSrc_32RegClassID: - case AMDGPU::VCSrc_64RegClassID: - return true; - } -} - -bool SIRegisterInfo::regClassCanUseInlineConstant( - const TargetRegisterClass *RC) const { - return regClassCanUseInlineConstant(RC->getID()); + return OpType == AMDGPU::OPERAND_REG_INLINE_C; } - unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, enum PreloadedValue Value) const { @@ -434,6 +448,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, case SIRegisterInfo::TGID_Z: return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2); case SIRegisterInfo::SCRATCH_WAVE_OFFSET: + if (MFI->getShaderType() != ShaderType::COMPUTE) + return MFI->ScratchOffsetReg; return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4); case SIRegisterInfo::SCRATCH_PTR: return AMDGPU::SGPR2_SGPR3; @@ -452,9 +468,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, /// \brief Returns a register that is not used at any point in the function. /// If all registers are used, then this function will return // AMDGPU::NoRegister. -unsigned SIRegisterInfo::findUnusedVGPR(const MachineRegisterInfo &MRI) const { - - const TargetRegisterClass *RC = &AMDGPU::VGPR_32RegClass; +unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, + const TargetRegisterClass *RC) const { for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { @@ -464,3 +479,29 @@ unsigned SIRegisterInfo::findUnusedVGPR(const MachineRegisterInfo &MRI) const { return AMDGPU::NoRegister; } +unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const { + switch(WaveCount) { + case 10: return 24; + case 9: return 28; + case 8: return 32; + case 7: return 36; + case 6: return 40; + case 5: return 48; + case 4: return 64; + case 3: return 84; + case 2: return 128; + default: return 256; + } +} + +unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const { + switch(WaveCount) { + case 10: return 48; + case 9: return 56; + case 8: return 64; + case 7: return 72; + case 6: return 80; + case 5: return 96; + default: return 103; + } +}