From 58b383765e525fe10bca7c10d5dbc1ed1074947a Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sun, 16 Jun 2019 17:32:01 +0000 Subject: [PATCH] AMDGPU: Be explicit about whether the high-word in SI_PC_ADD_REL_OFFSET is 0 Summary: Instead of encoding a high-word of 0 using a fake TargetGlobalAddress, just use a literal target constant. This simplifies some subsequent changes. The generated assembly is now more explicit about the kind of relocation that is to be used. Change-Id: I066835202d23b5941fa7a358eb4b89e9b71ab6f8 Reviewers: arsenm, rampitec Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D61491 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363516 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIISelLowering.cpp | 17 ++++++++++++----- lib/Target/AMDGPU/SIInstrInfo.cpp | 5 +---- lib/Target/AMDGPU/SIInstructions.td | 7 ++++++- test/CodeGen/AMDGPU/llvm.memcpy.ll | 2 +- .../CodeGen/AMDGPU/no-initializer-constant-addrspace.ll | 2 +- 5 files changed, 21 insertions(+), 12 deletions(-) diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index e9f3030dd93..80acf5783ad 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4644,11 +4644,18 @@ buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV, // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too // small. This requires us to add 4 to the global variable offset in order to // compute the correct address. - SDValue PtrLo = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, - GAFlags); - SDValue PtrHi = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, - GAFlags == SIInstrInfo::MO_NONE ? - GAFlags : GAFlags + 1); + unsigned LoFlags = GAFlags; + if (LoFlags == SIInstrInfo::MO_NONE) + LoFlags = SIInstrInfo::MO_REL32; + SDValue PtrLo = + DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, LoFlags); + SDValue PtrHi; + if (GAFlags == SIInstrInfo::MO_NONE) { + PtrHi = DAG.getTargetConstant(0, DL, MVT::i32); + } else { + PtrHi = + DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, GAFlags + 1); + } return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, PtrLo, PtrHi); } diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 7e99377bfb0..55442782621 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1369,10 +1369,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) .addReg(RegHi); - if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE) - MIB.addImm(0); - else - MIB.add(MI.getOperand(2)); + MIB.add(MI.getOperand(2)); Bundler.append(MIB); finalizeBundle(MBB, Bundler.begin()); diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index e6b64ecbfce..202955a9626 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -551,11 +551,16 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI < (outs SReg_64:$dst), (ins si_ga:$ptr_lo, si_ga:$ptr_hi), [(set SReg_64:$dst, - (i64 (SIpc_add_rel_offset (tglobaladdr:$ptr_lo), (tglobaladdr:$ptr_hi))))]> { + (i64 (SIpc_add_rel_offset tglobaladdr:$ptr_lo, tglobaladdr:$ptr_hi)))]> { let Defs = [SCC]; } def : GCNPat < + (SIpc_add_rel_offset tglobaladdr:$ptr_lo, 0), + (SI_PC_ADD_REL_OFFSET $ptr_lo, (i32 0)) +>; + +def : GCNPat < (AMDGPUinit_exec i64:$src), (SI_INIT_EXEC (as_i64imm $src)) >; diff --git a/test/CodeGen/AMDGPU/llvm.memcpy.ll b/test/CodeGen/AMDGPU/llvm.memcpy.ll index cb3f8c5d17b..85e40e26b29 100644 --- a/test/CodeGen/AMDGPU/llvm.memcpy.ll +++ b/test/CodeGen/AMDGPU/llvm.memcpy.ll @@ -333,7 +333,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 ad ; FUNC-LABEL: {{^}}test_memcpy_const_string_align4: ; SI: s_getpc_b64 -; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, hello.align4+20 +; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, hello.align4@rel32@lo+20 ; SI: s_addc_u32 ; SI-DAG: s_load_dwordx4 ; SI-DAG: s_load_dwordx4 diff --git a/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll b/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll index b35b5dad11a..886144a4a1c 100644 --- a/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll +++ b/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -filetype=obj < %s | llvm-readobj -r --symbols | FileCheck %s -check-prefix=GCN ; RUN: llc -march=r600 -mcpu=cypress -filetype=obj < %s | llvm-readobj -r --symbols | FileCheck %s -check-prefix=EG -; GCN: R_AMDGPU_REL32 extern_const_addrspace +; GCN: R_AMDGPU_REL32_LO extern_const_addrspace ; EG: R_AMDGPU_ABS32 extern_const_addrspace ; CHECK-DAG: Name: extern_const_addrspace -- 2.11.0