From: Matt Arsenault Date: Mon, 7 Aug 2017 17:08:44 +0000 (+0000) Subject: AMDGPU: Use a custom areInlineCompatible X-Git-Tag: android-x86-7.1-r4~12456 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=a4262847c8fb1bfadfbec43e05a739c83431187f;p=android-x86%2Fexternal-llvm.git AMDGPU: Use a custom areInlineCompatible Fixes not inlining OpenCL library functions on AMDGPU, which don't have an explicitly set target-cpu. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310269 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 89a03902dc6..fde2132b40b 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -534,3 +534,16 @@ unsigned AMDGPUTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Inde return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } + +bool AMDGPUTTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; + FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; + return ((RealCallerBits & RealCalleeBits) == RealCalleeBits); +} diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 9a320bdfcc3..5c0e0297f2c 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -34,6 +34,32 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase { const AMDGPUTargetLowering *TLI; bool IsGraphicsShader; + + const FeatureBitset InlineFeatureIgnoreList = { + // Codegen control options which don't matter. + AMDGPU::FeatureEnableLoadStoreOpt, + AMDGPU::FeatureEnableSIScheduler, + AMDGPU::FeatureEnableUnsafeDSOffsetFolding, + AMDGPU::FeatureFlatForGlobal, + AMDGPU::FeaturePromoteAlloca, + AMDGPU::FeatureUnalignedBufferAccess, + AMDGPU::FeatureUnalignedScratchAccess, + + AMDGPU::FeatureAutoWaitcntBeforeBarrier, + AMDGPU::FeatureDebuggerEmitPrologue, + AMDGPU::FeatureDebuggerInsertNops, + AMDGPU::FeatureDebuggerReserveRegs, + + // Property of the kernel/environment which can't actually differ. + AMDGPU::FeatureSGPRInitBug, + AMDGPU::FeatureXNACK, + AMDGPU::FeatureTrapHandler, + + // Perf-tuning features + AMDGPU::FeatureFastFMAF32, + AMDGPU::HalfRate64Ops + }; + const AMDGPUSubtarget *getST() const { return ST; } const AMDGPUTargetLowering *getTLI() const { return TLI; } @@ -121,6 +147,9 @@ public: unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); + + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; }; } // end namespace llvm diff --git a/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll b/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll new file mode 100644 index 00000000000..4258fc5bc82 --- /dev/null +++ b/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll @@ -0,0 +1,90 @@ +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s + +; CHECK-LABEL: @func_no_target_cpu( +define i32 @func_no_target_cpu() #0 { + ret i32 0 +} + +; CHECK-LABEL: @target_cpu_call_no_target_cpu( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_call_no_target_cpu() #1 { + %call = call i32 @func_no_target_cpu() + ret i32 %call +} + +; CHECK-LABEL: @target_cpu_target_features_call_no_target_cpu( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_target_features_call_no_target_cpu() #2 { + %call = call i32 @func_no_target_cpu() + ret i32 %call +} + +; CHECK-LABEL: @fp32_denormals( +define i32 @fp32_denormals() #3 { + ret i32 0 +} + +; CHECK-LABEL: @no_fp32_denormals_call_f32_denormals( +; CHECK-NEXT: call i32 @fp32_denormals() +define i32 @no_fp32_denormals_call_f32_denormals() #4 { + %call = call i32 @fp32_denormals() + ret i32 %call +} + +; Make sure gfx9 can call unspecified functions because of movrel +; feature change. +; CHECK-LABEL: @gfx9_target_features_call_no_target_cpu( +; CHECK-NEXT: ret i32 0 +define i32 @gfx9_target_features_call_no_target_cpu() #5 { + %call = call i32 @func_no_target_cpu() + ret i32 %call +} + +define i32 @func_no_halfrate64ops() #6 { + ret i32 0 +} + +define i32 @func_with_halfrate64ops() #7 { + ret i32 0 +} + +; CHECK-LABEL: @call_func_without_halfrate64ops( +; CHECK-NEXT: ret i32 0 +define i32 @call_func_without_halfrate64ops() #7 { + %call = call i32 @func_no_halfrate64ops() + ret i32 %call +} + +; CHECK-LABEL: @call_func_with_halfrate64ops( +; CHECK-NEXT: ret i32 0 +define i32 @call_func_with_halfrate64ops() #6 { + %call = call i32 @func_with_halfrate64ops() + ret i32 %call +} + +define i32 @func_no_loadstoreopt() #8 { + ret i32 0 +} + +define i32 @func_with_loadstoreopt() #9 { + ret i32 0 +} + +; CHECK-LABEL: @call_func_without_loadstoreopt( +; CHECK-NEXT: ret i32 0 +define i32 @call_func_without_loadstoreopt() #9 { + %call = call i32 @func_no_loadstoreopt() + ret i32 %call +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "target-cpu"="fiji" } +attributes #2 = { nounwind "target-cpu"="fiji" "target-features"="+fp32-denormals" } +attributes #3 = { nounwind "target-features"="+fp32-denormals" } +attributes #4 = { nounwind "target-features"="-fp32-denormals" } +attributes #5 = { nounwind "target-cpu"="gfx900" } +attributes #6 = { nounwind "target-features"="-half-rate-64-ops" } +attributes #7 = { nounwind "target-features"="+half-rate-64-ops" } +attributes #8 = { nounwind "target-features"="-load-store-opt" } +attributes #9 = { nounwind "target-features"="+load-store-opt" } diff --git a/test/Transforms/Inline/AMDGPU/lit.local.cfg b/test/Transforms/Inline/AMDGPU/lit.local.cfg new file mode 100644 index 00000000000..2a665f06be7 --- /dev/null +++ b/test/Transforms/Inline/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True