+static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
+ unsigned UseOpIdx,
+ std::vector<FoldCandidate> &FoldList,
+ SmallVectorImpl<MachineInstr *> &CopiesToReplace,
+ const SIInstrInfo *TII, const SIRegisterInfo &TRI,
+ MachineRegisterInfo &MRI) {
+ const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
+
+ // FIXME: Fold operands with subregs.
+ if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
+ UseOp.isImplicit())) {
+ return;
+ }
+
+ bool FoldingImm = OpToFold.isImm();
+ APInt Imm;
+
+ if (FoldingImm) {
+ unsigned UseReg = UseOp.getReg();
+ const TargetRegisterClass *UseRC
+ = TargetRegisterInfo::isVirtualRegister(UseReg) ?
+ MRI.getRegClass(UseReg) :
+ TRI.getPhysRegClass(UseReg);
+
+ Imm = APInt(64, OpToFold.getImm());
+
+ const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode());
+ const TargetRegisterClass *FoldRC =
+ TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
+
+ // Split 64-bit constants into 32-bits for folding.
+ if (FoldRC->getSize() == 8 && UseOp.getSubReg()) {
+ if (UseRC->getSize() != 8)
+ return;
+
+ if (UseOp.getSubReg() == AMDGPU::sub0) {
+ Imm = Imm.getLoBits(32);
+ } else {
+ assert(UseOp.getSubReg() == AMDGPU::sub1);
+ Imm = Imm.getHiBits(32);
+ }
+ }
+
+ // In order to fold immediates into copies, we need to change the
+ // copy to a MOV.
+ if (UseMI->getOpcode() == AMDGPU::COPY) {
+ unsigned DestReg = UseMI->getOperand(0).getReg();
+ const TargetRegisterClass *DestRC
+ = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+ MRI.getRegClass(DestReg) :
+ TRI.getPhysRegClass(DestReg);
+
+ unsigned MovOp = TII->getMovOpcode(DestRC);
+ if (MovOp == AMDGPU::COPY)
+ return;
+
+ UseMI->setDesc(TII->get(MovOp));
+ CopiesToReplace.push_back(UseMI);
+ }
+ }
+
+ // Special case for REG_SEQUENCE: We can't fold literals into
+ // REG_SEQUENCE instructions, so we have to fold them into the
+ // uses of REG_SEQUENCE.
+ if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+ unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
+ unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
+
+ for (MachineRegisterInfo::use_iterator
+ RSUse = MRI.use_begin(RegSeqDstReg),
+ RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {
+
+ MachineInstr *RSUseMI = RSUse->getParent();
+ if (RSUse->getSubReg() != RegSeqDstSubReg)
+ continue;
+
+ foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
+ CopiesToReplace, TII, TRI, MRI);
+ }
+ return;
+ }
+
+ const MCInstrDesc &UseDesc = UseMI->getDesc();
+
+ // Don't fold into target independent nodes. Target independent opcodes
+ // don't have defined register classes.
+ if (UseDesc.isVariadic() ||
+ UseDesc.OpInfo[UseOpIdx].RegClass == -1)
+ return;
+
+ if (FoldingImm) {
+ MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+ tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
+ return;
+ }
+
+ tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
+
+ // FIXME: We could try to change the instruction from 64-bit to 32-bit
+ // to enable more folding opportunites. The shrink operands pass
+ // already does this.
+ return;
+}
+