}
}
+void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
+ MachineBasicBlock::iterator I,
+ const TargetRegisterClass *DstRC,
+ MachineOperand &Op,
+ MachineRegisterInfo &MRI,
+ const DebugLoc &DL) const {
+
+ unsigned OpReg = Op.getReg();
+ unsigned OpSubReg = Op.getSubReg();
+
+ const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
+ RI.getRegClassForReg(MRI, OpReg), OpSubReg);
+
+ // Check if operand is already the correct register class.
+ if (DstRC == OpRC)
+ return;
+
+ unsigned DstReg = MRI.createVirtualRegister(DstRC);
+ MachineInstr *Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg)
+ .addOperand(Op);
+
+ Op.setReg(DstReg);
+ Op.setSubReg(0);
+
+ MachineInstr *Def = MRI.getVRegDef(OpReg);
+ if (!Def)
+ return;
+
+ // Try to eliminate the copy if it is copying an immediate value.
+ if (Def->isMoveImmediate())
+ FoldImmediate(*Copy, *Def, OpReg, &MRI);
+}
+
void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
MachineOperand &Op = MI.getOperand(I);
if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
continue;
- unsigned DstReg = MRI.createVirtualRegister(RC);
// MI is a PHI instruction.
MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB();
MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
- BuildMI(*InsertBB, Insert, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg)
- .addOperand(Op);
- Op.setReg(DstReg);
+ // Avoid creating no-op copies with the same src and dst reg class. These
+ // confuse some of the machine passes.
+ legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
}
}
if (VRC == OpRC)
continue;
- unsigned DstReg = MRI.createVirtualRegister(VRC);
-
- BuildMI(*MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg)
- .addOperand(Op);
-
- Op.setReg(DstReg);
+ legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc());
Op.setIsKill();
}
}
const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
if (DstRC != Src0RC) {
- MachineBasicBlock &MBB = *MI.getParent();
- unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
- BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
- .addReg(Src0);
- MI.getOperand(1).setReg(NewSrc0);
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineOperand &Op = MI.getOperand(1);
+ legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
}
return;
}
continue;
unsigned DstReg = Inst.getOperand(0).getReg();
+ if (Inst.isCopy() &&
+ TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) &&
+ NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
+ // Instead of creating a copy where src and dst are the same register
+ // class, we just replace all uses of dst with src. These kinds of
+ // copies interfere with the heuristics MachineSink uses to decide
+ // whether or not to split a critical edge. Since the pass assumes
+ // that copies will end up as machine instructions and not be
+ // eliminated.
+ addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
+ MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
+ MRI.clearKillFlags(Inst.getOperand(1).getReg());
+ Inst.getOperand(0).setReg(DstReg);
+ continue;
+ }
+
NewDstReg = MRI.createVirtualRegister(NewDstRC);
MRI.replaceRegWith(DstReg, NewDstReg);
}
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
+ void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
+ MachineBasicBlock::iterator I,
+ const TargetRegisterClass *DstRC,
+ MachineOperand &Op, MachineRegisterInfo &MRI,
+ const DebugLoc &DL) const;
+
/// \brief Legalize all operands in this instruction. This function may
/// create new instruction and insert them before \p MI.
void legalizeOperands(MachineInstr &MI) const;
return AMDGPU::NoRegister;
}
-bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
- unsigned Reg) const {
- const TargetRegisterClass *RC;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- RC = MRI.getRegClass(Reg);
- else
- RC = getPhysRegClass(Reg);
-
- return hasVGPRs(RC);
-}
-
unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const {
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 800;
llvm_unreachable("unhandled register size");
}
}
+
+const TargetRegisterClass*
+SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
+ unsigned Reg) const {
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return MRI.getRegClass(Reg);
+
+ return getPhysRegClass(Reg);
+}
+
+bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
+ unsigned Reg) const {
+ return hasVGPRs(getRegClassForReg(MRI, Reg));
+}
unsigned getSGPRPressureSet() const { return SGPRSetID; };
unsigned getVGPRPressureSet() const { return VGPRSetID; };
+ const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
+ unsigned Reg) const;
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
bool isSGPRPressureSet(unsigned SetID) const {
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:12 ; 8-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:16 ; 8-byte Folded Spill
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:16 ; 8-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:20 ; 8-byte Folded Spill
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:12 ; 8-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:16 ; 8-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:16 ; 8-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:20 ; 8-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]]
; GCN: buffer_load_ushort v{{[0-9]+}}, off
; GCN: buffer_load_ushort v{{[0-9]+}}, off
-; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
; GCN: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}}
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6
}
; GCN-LABEL: {{^}}dynamic_insertelement_v8f64:
-; GCN: SCRATCH_RSRC_DWORD
+; GCN-DAG: SCRATCH_RSRC_DWORD
; FIXME: Should be able to eliminate this?
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:32{{$}}
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:48{{$}}
-; GCN: v_mov_b32_e32 [[BASE_FI0:v[0-9]+]], 0{{$}}
-; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, [[BASE_FI0]], s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+; GCN-DAG: v_mov_b32_e32 [[BASE_FI0:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, [[BASE_FI0]], s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
; GCN-LABEL: {{^}}legal_offset_fi_offset:
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
-; GCN-DAG: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, 0x8000
+; This constant isn't folded, because it has multiple uses.
+; GCN-DAG: v_mov_b32_e32 [[K8000:v[0-9]+]], 0x8000
+; GCN-DAG: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, [[K8000]]
; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
; register operands in the correct order when modifying the opcode of an
; instruction to V_ADD_I32_e32.
-; CHECK: %{{[0-9]+}} = V_ADD_I32_e32 killed %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec
+; CHECK: %{{[0-9]+}} = V_ADD_I32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def %vcc, implicit %exec
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry: