FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
-ModulePass *createAMDGPUAlwaysInlinePass();
/// \brief Creates an AMDGPU-specific Target Transformation Info pass.
ImmutablePass *
+++ /dev/null
-//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass marks all internal functions as always_inline and creates
-/// duplicates of all other functions a marks the duplicates as always_inline.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUAlwaysInline : public ModulePass {
-
- static char ID;
-
-public:
- AMDGPUAlwaysInline() : ModulePass(ID) { }
- bool runOnModule(Module &M) override;
- const char *getPassName() const override { return "AMDGPU Always Inline Pass"; }
-};
-
-} // End anonymous namespace
-
-char AMDGPUAlwaysInline::ID = 0;
-
-bool AMDGPUAlwaysInline::runOnModule(Module &M) {
-
- std::vector<Function*> FuncsToClone;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Function &F = *I;
- if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty())
- FuncsToClone.push_back(&F);
- }
-
- for (Function *F : FuncsToClone) {
- ValueToValueMapTy VMap;
- Function *NewFunc = CloneFunction(F, VMap, false);
- NewFunc->setLinkage(GlobalValue::InternalLinkage);
- F->getParent()->getFunctionList().push_back(NewFunc);
- F->replaceAllUsesWith(NewFunc);
- }
-
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Function &F = *I;
- if (F.hasLocalLinkage()) {
- F.addFnAttr(Attribute::AlwaysInline);
- }
- }
- return false;
-}
-
-ModulePass *llvm::createAMDGPUAlwaysInlinePass() {
- return new AMDGPUAlwaysInline();
-}
return nullptr;
}
- void addIRPasses() override;
void addCodeGenPrepare() override;
bool addPreISel() override;
bool addInstSelector() override;
PM.add(createAMDGPUTargetTransformInfoPass(this));
}
-void AMDGPUPassConfig::addIRPasses() {
- // Function calls are not supported, so make sure we inline everything.
- addPass(createAMDGPUAlwaysInlinePass());
- addPass(createAlwaysInlinerPass());
- // We need to add the barrier noop pass, otherwise adding the function
- // inlining pass will cause all of the PassConfigs passes to be run
- // one function at a time, which means if we have a nodule with two
- // functions, then we will generate code for the first function
- // without ever running any passes on the second.
- addPass(createBarrierNoopPass());
- TargetPassConfig::addIRPasses();
-}
-
void AMDGPUPassConfig::addCodeGenPrepare() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.isPromoteAllocaEnabled()) {
; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
-; CHECK: error: unsupported call to function external_function in test_call_external
+; CHECK: error: unsupported call to function defined_function in test_call
declare i32 @external_function(i32) nounwind
+++ /dev/null
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
-
-; CHECK-NOT: {{^}}func:
-define internal fastcc i32 @func(i32 %a) {
-entry:
- %tmp0 = add i32 %a, 1
- ret i32 %tmp0
-}
-
-; CHECK: {{^}}kernel:
-define void @kernel(i32 addrspace(1)* %out) {
-entry:
- %tmp0 = call i32 @func(i32 1)
- store i32 %tmp0, i32 addrspace(1)* %out
- ret void
-}
-
-; CHECK: {{^}}kernel2:
-define void @kernel2(i32 addrspace(1)* %out) {
-entry:
- call void @kernel(i32 addrspace(1)* %out)
- ret void
-}