switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::LOAD: {
SDValue Result = LowerLOAD(Op, DAG);
assert((!Result.getNode() ||
return Chain;
}
+SDValue SITargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+ SDLoc DL(Op);
+ // Checking the depth
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0)
+ return DAG.getConstant(0, DL, VT);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ // Check for kernel and shader functions
+ if (Info->isEntryFunction())
+ return DAG.getConstant(0, DL, VT);
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ // There is a call to @llvm.returnaddress in this function
+ MFI.setReturnAddressIsTaken(true);
+
+ const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
+ // Get the return address reg and mark it as an implicit live-in
+ unsigned Reg = MF.addLiveIn(TRI->getReturnAddressReg(MF), getRegClassFor(VT, Op.getNode()->isDivergent()));
+
+ return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
+}
+
SDValue SITargetLowering::getFPExtOrFPTrunc(SelectionDAG &DAG,
SDValue Op,
const SDLoc &DL,
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
-
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M,
SelectionDAG &DAG, ArrayRef<SDValue> Ops,
bool IsIntrinsic = false) const;
--- /dev/null
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+
+; Test with zero frame
+; GCN-LABEL: {{^}}func1
+; GCN: v_mov_b32_e32 v0, s30
+; GCN: v_mov_b32_e32 v1, s31
+; GCN: s_setpc_b64 s[30:31]
+define i8* @func1() nounwind {
+entry:
+ %0 = tail call i8* @llvm.returnaddress(i32 0)
+ ret i8* %0
+}
+
+; Test with non-zero frame
+; GCN-LABEL: {{^}}func2
+; GCN: v_mov_b32_e32 v0, 0
+; GCN: v_mov_b32_e32 v1, 0
+; GCN: s_setpc_b64 s[30:31]
+define i8* @func2() nounwind {
+entry:
+ %0 = tail call i8* @llvm.returnaddress(i32 1)
+ ret i8* %0
+}
+
+; Test with amdgpu_kernel
+; GCN-LABEL: {{^}}func3
+; GCN: v_mov_b32_e32 v0, 0
+; GCN: v_mov_b32_e32 v1, v0
+define amdgpu_kernel void @func3(i8** %out) nounwind {
+entry:
+ %tmp = tail call i8* @llvm.returnaddress(i32 0)
+ store i8* %tmp, i8** %out, align 4
+ ret void
+}
+
+; Test with use outside the entry-block
+; GCN-LABEL: {{^}}func4
+; GCN: v_mov_b32_e32 v0, 0
+; GCN: v_mov_b32_e32 v1, v0
+define amdgpu_kernel void @func4(i8** %out, i32 %val) nounwind {
+entry:
+ %cmp = icmp ne i32 %val, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ %tmp = tail call i8* @llvm.returnaddress(i32 1)
+ store i8* %tmp, i8** %out, align 4
+ ret void
+
+exit:
+ ret void
+}
+
+; Test ending in unreachable
+; GCN-LABEL: {{^}}func5
+; GCN: v_mov_b32_e32 v0, 0
+define void @func5() nounwind {
+entry:
+ %tmp = tail call i8* @llvm.returnaddress(i32 2)
+ store volatile i32 0, i32 addrspace(3)* undef, align 4
+ unreachable
+}
+
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone