From 214aad9f314593e33a10f1d46cdf19c9e359e5f6 Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Mon, 10 Aug 2015 17:28:08 +0000 Subject: [PATCH] Add new llvm.loop.unroll.enable metadata. This change adds the unroll metadata "llvm.loop.unroll.enable" which directs the optimizer to unroll a loop fully if the trip count is known at compile time, and unroll partially if the trip count is not known at compile time. This differs from "llvm.loop.unroll.full" which explicitly does not unroll a loop if the trip count is not known at compile time. The "llvm.loop.unroll.enable" is intended to be added for loops annotated with "#pragma unroll". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244466 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.rst | 12 +++++ lib/Transforms/Scalar/LoopUnrollPass.cpp | 60 ++++++++++++++++--------- test/Transforms/LoopUnroll/unroll-pragmas.ll | 66 ++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 20 deletions(-) diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 79b89ecbf41..1390fe993a1 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -4279,6 +4279,18 @@ operand which is the string ``llvm.loop.unroll.runtime.disable``. For example: !0 = !{!"llvm.loop.unroll.runtime.disable"} +'``llvm.loop.unroll.enable``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata suggests that the loop should be fully unrolled if the trip count +is known at compile time and partially unrolled if the trip count is not known +at compile time. The metadata has a single operand which is the string +``llvm.loop.unroll.enable``. For example: + +.. code-block:: llvm + + !0 = !{!"llvm.loop.unroll.enable"} + '``llvm.loop.unroll.full``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 67d5824f0ab..0c047e78eab 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -187,7 +187,7 @@ namespace { // total unrolled size. Parameters Threshold and PartialThreshold // are set to the maximum unrolled size for fully and partially // unrolled loops respectively. - void selectThresholds(const Loop *L, bool HasPragma, + void selectThresholds(const Loop *L, bool UsePragmaThreshold, const TargetTransformInfo::UnrollingPreferences &UP, unsigned &Threshold, unsigned &PartialThreshold, unsigned &PercentDynamicCostSavedThreshold, @@ -214,7 +214,7 @@ namespace { Threshold = UP.OptSizeThreshold; PartialThreshold = UP.PartialOptSizeThreshold; } - if (HasPragma) { + if (UsePragmaThreshold) { // If the loop has an unrolling pragma, we want to be more // aggressive with unrolling limits. Set thresholds to at // least the PragmaTheshold value which is larger than the @@ -726,6 +726,12 @@ static bool HasUnrollFullPragma(const Loop *L) { return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full"); } +// Returns true if the loop has an unroll(enable) pragma. This metadata is used +// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives. +static bool HasUnrollEnablePragma(const Loop *L) { + return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable"); +} + // Returns true if the loop has an unroll(disable) pragma. static bool HasUnrollDisablePragma(const Loop *L) { return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable"); @@ -901,8 +907,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { return false; } bool PragmaFullUnroll = HasUnrollFullPragma(L); + bool PragmaEnableUnroll = HasUnrollEnablePragma(L); unsigned PragmaCount = UnrollCountPragmaValue(L); - bool HasPragma = PragmaFullUnroll || PragmaCount > 0; + bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0; TargetTransformInfo::UnrollingPreferences UP; getUnrollingPreferences(L, TTI, UP); @@ -950,7 +957,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { unsigned Threshold, PartialThreshold; unsigned PercentDynamicCostSavedThreshold; unsigned DynamicCostSavingsDiscount; - selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold, + // Only use the high pragma threshold when we have a target unroll factor such + // as with "#pragma unroll N" or a pragma indicating full unrolling and the + // trip count is known. Otherwise we rely on the standard threshold to + // heuristically select a reasonable unroll count. + bool UsePragmaThreshold = + PragmaCount > 0 || + ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0); + + selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold, PercentDynamicCostSavedThreshold, DynamicCostSavingsDiscount); @@ -985,14 +1000,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Reduce count based on the type of unrolling and the threshold values. unsigned OriginalCount = Count; - bool AllowRuntime = - (PragmaCount > 0) || (UserRuntime ? CurrentRuntime : UP.Runtime); + bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || + (UserRuntime ? CurrentRuntime : UP.Runtime); // Don't unroll a runtime trip count loop with unroll full pragma. if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) { AllowRuntime = false; } if (Unrolling == Partial) { - bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; + bool AllowPartial = PragmaEnableUnroll || + (UserAllowPartial ? CurrentAllowPartial : UP.Partial); if (!AllowPartial && !CountSetExplicitly) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); @@ -1032,23 +1048,27 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { DebugLoc LoopLoc = L->getStartLoc(); Function *F = Header->getParent(); LLVMContext &Ctx = F->getContext(); - if (PragmaFullUnroll && PragmaCount == 0) { - if (TripCount && Count != TripCount) { - emitOptimizationRemarkMissed( - Ctx, DEBUG_TYPE, *F, LoopLoc, - "Unable to fully unroll loop as directed by unroll(full) pragma " - "because unrolled size is too large."); - } else if (!TripCount) { - emitOptimizationRemarkMissed( - Ctx, DEBUG_TYPE, *F, LoopLoc, - "Unable to fully unroll loop as directed by unroll(full) pragma " - "because loop has a runtime trip count."); - } - } else if (PragmaCount > 0 && Count != OriginalCount) { + if ((PragmaCount > 0) && Count != OriginalCount) { emitOptimizationRemarkMissed( Ctx, DEBUG_TYPE, *F, LoopLoc, "Unable to unroll loop the number of times directed by " "unroll_count pragma because unrolled size is too large."); + } else if (PragmaFullUnroll && !TripCount) { + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + "Unable to fully unroll loop as directed by unroll(full) pragma " + "because loop has a runtime trip count."); + } else if (PragmaEnableUnroll && Count != TripCount && Count < 2) { + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + "Unable to unroll loop as directed by unroll(enable) pragma because " + "unrolled size is too large."); + } else if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && + Count != TripCount) { + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + "Unable to fully unroll loop as directed by unroll pragma because " + "unrolled size is too large."); } } diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll index 8e0d77513cc..b915b4fdf48 100644 --- a/test/Transforms/LoopUnroll/unroll-pragmas.ll +++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -256,3 +256,69 @@ for.end: ; preds = %for.body ret void } !12 = !{!12, !4} + +; #pragma clang loop unroll(enable) +; Loop should be fully unrolled. +; +; CHECK-LABEL: @loop64_with_enable( +; CHECK-NOT: br i1 +define void @loop64_with_enable(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13 + +for.end: ; preds = %for.body + ret void +} +!13 = !{!13, !14} +!14 = !{!"llvm.loop.unroll.enable"} + +; #pragma clang loop unroll(enable) +; Loop has a runtime trip count and should be runtime unrolled and duplicated +; (original and 8x). +; +; CHECK-LABEL: @runtime_loop_with_enable( +; CHECK: for.body.prol: +; CHECK: store +; CHECK-NOT: store +; CHECK: br i1 +; CHECK: for.body: +; CHECK: store i32 +; CHECK: store i32 +; CHECK: store i32 +; CHECK: store i32 +; CHECK: store i32 +; CHECK: store i32 +; CHECK: store i32 +; CHECK: store i32 +; CHECK-NOT: store i32 +; CHECK: br i1 +define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) { +entry: + %cmp3 = icmp sgt i32 %b, 0 + br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %b + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15 + +for.end: ; preds = %for.body, %entry + ret void +} +!15 = !{!15, !14} -- 2.11.0