From 115244a728dc791dbb085fb4590f43ef8148a683 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 18 Jun 2016 05:15:53 +0000 Subject: [PATCH] AMDGPU: Fix kernel argument alignment impacting stack size Don't use AllocateStack because kernel arguments have nothing to do with the stack. The ensureMaxAlignment call was still changing the stack alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273080 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUCallingConv.td | 2 +- lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 16 ++++++---- lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | 10 +++--- lib/Target/AMDGPU/AMDGPUMachineFunction.h | 15 +++++++++ test/CodeGen/AMDGPU/kernarg-stack-alignment.ll | 44 ++++++++++++++++++++++++++ 5 files changed, 73 insertions(+), 14 deletions(-) create mode 100644 test/CodeGen/AMDGPU/kernarg-stack-alignment.ll diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td index 4386c78966b..47dfa499206 100644 --- a/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -110,7 +110,7 @@ def CC_R600 : CallingConv<[ // Calling convention for compute kernels def CC_AMDGPU_Kernel : CallingConv<[ - CCCustom<"allocateStack"> + CCCustom<"allocateKernArg"> ]>; def CC_AMDGPU : CallingConv<[ diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 37f6efa315f..3329e3bafda 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -31,13 +31,15 @@ #include "SIInstrInfo.h" using namespace llvm; -static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - unsigned Offset = State.AllocateStack(ValVT.getStoreSize(), - ArgFlags.getOrigAlign()); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - +static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + MachineFunction &MF = State.getMachineFunction(); + AMDGPUMachineFunction *MFI = MF.getInfo(); + + uint64_t Offset = MFI->allocateKernArg(ValVT.getStoreSize(), + ArgFlags.getOrigAlign()); + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return true; } diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index c824ab83ad8..d19eb655e46 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -1,8 +1,5 @@ #include "AMDGPUMachineFunction.h" -#include "AMDGPU.h" -#include "Utils/AMDGPUBaseInfo.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/Function.h" + using namespace llvm; // Pin the vtable to this file. @@ -10,8 +7,9 @@ void AMDGPUMachineFunction::anchor() {} AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MachineFunctionInfo(), + KernArgSize(0), + MaxKernArgAlign(0), LDSSize(0), ABIArgOffset(0), ScratchSize(0), - IsKernel(true) { -} + IsKernel(true) {} diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 2ef3bf5f2fb..a534024dc2e 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -16,10 +16,25 @@ namespace llvm { class AMDGPUMachineFunction : public MachineFunctionInfo { + uint64_t KernArgSize; + unsigned MaxKernArgAlign; + virtual void anchor(); public: AMDGPUMachineFunction(const MachineFunction &MF); + + uint64_t allocateKernArg(uint64_t Size, unsigned Align) { + assert(isPowerOf2_32(Align)); + KernArgSize = alignTo(KernArgSize, Align); + + uint64_t Result = KernArgSize; + KernArgSize += Size; + + MaxKernArgAlign = std::max(Align, MaxKernArgAlign); + return Result; + } + /// A map to keep track of local memory objects and their offsets within /// the local memory space. std::map LocalMemoryObjects; diff --git a/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll b/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll new file mode 100644 index 00000000000..21c92dbc909 --- /dev/null +++ b/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll @@ -0,0 +1,44 @@ +; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck %s + +; Test that the alignment of kernel arguments does not impact the +; alignment of the stack + +; CHECK-LABEL: {{^}}no_args: +; CHECK: ScratchSize: 8{{$}} +define void @no_args() { + %alloca = alloca i8 + store volatile i8 0, i8* %alloca + ret void +} + +; CHECK-LABEL: {{^}}force_align32: +; CHECK: ScratchSize: 8{{$}} +define void @force_align32(<8 x i32>) { + %alloca = alloca i8 + store volatile i8 0, i8* %alloca + ret void +} + +; CHECK-LABEL: {{^}}force_align64: +; CHECK: ScratchSize: 8{{$}} +define void @force_align64(<16 x i32>) { + %alloca = alloca i8 + store volatile i8 0, i8* %alloca + ret void +} + +; CHECK-LABEL: {{^}}force_align128: +; CHECK: ScratchSize: 8{{$}} +define void @force_align128(<32 x i32>) { + %alloca = alloca i8 + store volatile i8 0, i8* %alloca + ret void +} + +; CHECK-LABEL: {{^}}force_align256: +; CHECK: ScratchSize: 8{{$}} +define void @force_align256(<64 x i32>) { + %alloca = alloca i8 + store volatile i8 0, i8* %alloca + ret void +} -- 2.11.0