[Polly] Reuse multiple uses in operand tree.

author Michael Kruse <llvm-project@meinersbur.de>

Fri, 16 Oct 2020 17:37:34 +0000 (12:37 -0500)

committer Michael Kruse <llvm-project@meinersbur.de>

Tue, 20 Oct 2020 23:05:35 +0000 (18:05 -0500)
author Michael Kruse <llvm-project@meinersbur.de>
Fri, 16 Oct 2020 17:37:34 +0000 (12:37 -0500)
committer Michael Kruse <llvm-project@meinersbur.de>
Tue, 20 Oct 2020 23:05:35 +0000 (18:05 -0500)
diff --git a/polly/lib/Transform/ForwardOpTree.cpp b/polly/lib/Transform/ForwardOpTree.cpp

index a373a4e..56c5fef 100644 (file)
--- a/polly/lib/Transform/ForwardOpTree.cpp
+++ b/polly/lib/Transform/ForwardOpTree.cpp
@@ -94,6 +94,9 @@ namespace {
  /// as the root element. If the value in question is not an instruction in the
  /// SCoP, it can be a leaf of an instruction's operand tree.
  enum ForwardingDecision {
+  /// An uninitialized value.
+  FD_Unknown,
+
    /// The root instruction or value cannot be forwarded at all.
    FD_CannotForward,
  
@@ -119,23 +122,71 @@ enum ForwardingDecision {
    /// location.
    FD_CanForwardProfitably,
  
-  /// Used to indicate that a forwarding has be carried out successfully, and
-  /// the forwarded memory access can be deleted.
-  FD_DidForwardTree,
-
-  /// Used to indicate that a forwarding has be carried out successfully, and
-  /// the forwarded memory access is being reused.
-  FD_DidForwardLeaf,
-
    /// A forwarding method cannot be applied to the operand tree.
    /// The difference to FD_CannotForward is that there might be other methods
    /// that can handle it.
-  /// The conditions that make an operand tree applicable must be checked even
-  /// with DoIt==true because a method following the one that returned
-  /// FD_NotApplicable might have returned FD_CanForwardTree.
    FD_NotApplicable
  };
  
+/// Represents the evaluation of and action to taken when forwarding a value
+/// from an operand tree.
+struct ForwardingAction {
+  using KeyTy = std::pair<Value *, ScopStmt *>;
+
+  /// Evaluation of forwarding a value.
+  ForwardingDecision Decision = FD_Unknown;
+
+  /// Callback to execute the forwarding.
+  /// Returning true allows deleting the polly::MemoryAccess if the value is the
+  /// root of the operand tree (and its elimination the reason why the
+  /// forwarding is done). Return false if the MemoryAccess is reused or there
+  /// might be other users of the read accesses. In the letter case the
+  /// polly::SimplifyPass can remove dead MemoryAccesses.
+  std::function<bool()> Execute = []() -> bool {
+    llvm_unreachable("unspecified how to forward");
+  };
+
+  /// Other values that need to be forwarded if this action is executed. Their
+  /// actions are executed after this one.
+  SmallVector<KeyTy, 4> Depends;
+
+  /// Named ctor: The method creating this object does not apply to the kind of
+  /// value, but other methods may.
+  static ForwardingAction notApplicable() {
+    ForwardingAction Result;
+    Result.Decision = FD_NotApplicable;
+    return Result;
+  }
+
+  /// Named ctor: The value cannot be forwarded.
+  static ForwardingAction cannotForward() {
+    ForwardingAction Result;
+    Result.Decision = FD_CannotForward;
+    return Result;
+  }
+
+  /// Named ctor: The value can just be used without any preparation.
+  static ForwardingAction triviallyForwardable(bool IsProfitable) {
+    ForwardingAction Result;
+    Result.Decision =
+        IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
+    Result.Execute = []() { return true; };
+    return Result;
+  }
+
+  /// Name ctor: The value can be forwarded by executing an action.
+  static ForwardingAction canForward(std::function<bool()> Execute,
+                                     ArrayRef<KeyTy> Depends,
+                                     bool IsProfitable) {
+    ForwardingAction Result;
+    Result.Decision =
+        IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
+    Result.Execute = std::move(Execute);
+    Result.Depends.append(Depends.begin(), Depends.end());
+    return Result;
+  }
+};
+
  /// Implementation of operand tree forwarding for a specific SCoP.
  ///
  /// For a statement that requires a scalar value (through a value read
@@ -145,6 +196,8 @@ enum ForwardingDecision {
  /// statements. The simplification pass can clean these up.
  class ForwardOpTreeImpl : ZoneAlgorithm {
  private:
+  using MemoizationTy = DenseMap<ForwardingAction::KeyTy, ForwardingAction>;
+
    /// Scope guard to limit the number of isl operations for this pass.
    IslMaxOperationsGuard &MaxOpGuard;
  
@@ -169,6 +222,18 @@ private:
    /// Whether we carried out at least one change to the SCoP.
    bool Modified = false;
  
+  /// Cache of how to forward values.
+  /// The key of this map is the llvm::Value to be forwarded and the
+  /// polly::ScopStmt it is forwarded from. This is because the same llvm::Value
+  /// can evaluate differently depending on where it is evaluate. For instance,
+  /// a synthesizable Scev represents a recurrence with an loop but the loop's
+  /// exit value if evaluated after the loop.
+  /// The cached results are only valid for the current TargetStmt.
+  /// CHECKME: ScalarEvolution::getScevAtScope should take care for getting the
+  /// exit value when instantiated outside of the loop. The primary concern is
+  /// ambiguity when crossing PHI nodes, which currently is not supported.
+  MemoizationTy ForwardingActions;
+
    /// Contains the zones where array elements are known to contain a specific
    /// value.
    /// { [Element[] -> Zone[]] -> ValInst[] }
@@ -379,63 +444,62 @@ public:
    /// @param UseLoop     The loop @p Inst is used in.
    /// @param DefStmt     The statement @p Inst is defined in.
    /// @param DefLoop     The loop which contains @p Inst.
-  /// @param DoIt        If false, only determine whether an operand tree can be
-  ///                    forwarded. If true, carry out the forwarding. Do not
-  ///                    use DoIt==true if an operand tree is not known to be
-  ///                    forwardable.
    ///
-  /// @return FD_NotApplicable  if @p Inst cannot be forwarded by creating a new
-  ///                           load.
-  ///         FD_CannotForward  if the pointer operand cannot be forwarded.
-  ///         FD_CanForwardProfitably if @p Inst is forwardable.
-  ///         FD_DidForwardTree if @p DoIt was true.
-  ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
-                                      ScopStmt *UseStmt, Loop *UseLoop,
-                                      ScopStmt *DefStmt, Loop *DefLoop,
-                                      bool DoIt) {
+  /// @return A ForwardingAction object describing the feasibility and
+  ///         profitability evaluation and the callback carrying-out the value
+  ///         forwarding.
+  ForwardingAction forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
+                                    ScopStmt *UseStmt, Loop *UseLoop,
+                                    ScopStmt *DefStmt, Loop *DefLoop) {
      // Cannot do anything without successful known analysis.
      if (Known.is_null() || Translator.is_null() ||
          MaxOpGuard.hasQuotaExceeded())
-      return FD_NotApplicable;
+      return ForwardingAction::notApplicable();
  
      LoadInst *LI = dyn_cast<LoadInst>(Inst);
      if (!LI)
-      return FD_NotApplicable;
-
-    // If the load is already in the statement, no forwarding is necessary.
-    // However, it might happen that the LoadInst is already present in the
-    // statement's instruction list. In that case we do as follows:
-    // - For the evaluation (DoIt==false), we can trivially forward it as it is
-    //   benefit of forwarding an already present instruction.
-    // - For the execution (DoIt==true), prepend the instruction (to make it
-    //   available to all instructions following in the instruction list), but
-    //   do not add another MemoryAccess.
-    MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
-    if (Access && !DoIt)
-      return FD_CanForwardProfitably;
+      return ForwardingAction::notApplicable();
  
-    ForwardingDecision OpDecision = forwardTree(
-        TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt);
+    ForwardingDecision OpDecision =
+        forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop);
      switch (OpDecision) {
-    case FD_CannotForward:
-      assert(!DoIt);
-      return OpDecision;
-
-    case FD_CanForwardLeaf:
      case FD_CanForwardProfitably:
-      assert(!DoIt);
-      break;
-
-    case FD_DidForwardLeaf:
-    case FD_DidForwardTree:
-      assert(DoIt);
+    case FD_CanForwardLeaf:
        break;
-
+    case FD_CannotForward:
+      return ForwardingAction::cannotForward();
      default:
        llvm_unreachable("Shouldn't return this");
      }
  
-    IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
+    MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
+    if (Access) {
+      // If the load is already in the statement, no forwarding is necessary.
+      // However, it might happen that the LoadInst is already present in the
+      // statement's instruction list. In that case we do as follows:
+      // - For the evaluation, we can trivially forward it as it is
+      //   benefit of forwarding an already present instruction.
+      // - For the execution, prepend the instruction (to make it
+      //   available to all instructions following in the instruction list), but
+      //   do not add another MemoryAccess.
+      auto ExecAction = [this, TargetStmt, LI, Access]() -> bool {
+        TargetStmt->prependInstruction(LI);
+        LLVM_DEBUG(
+            dbgs() << "    forwarded known load with preexisting MemoryAccess"
+                   << Access << "\n");
+
+        NumKnownLoadsForwarded++;
+        TotalKnownLoadsForwarded++;
+        return true;
+      };
+      return ForwardingAction::canForward(
+          ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
+    }
+
+    // Allow the following Isl calculations (until we return the
+    // ForwardingAction, excluding the code inside the lambda that will be
+    // executed later) to fail.
+    IslQuotaScope QuotaScope = MaxOpGuard.enter();
  
      // { DomainDef[] -> ValInst[] }
      isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
@@ -455,72 +519,71 @@ public:
  
      isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
      if (!SameVal)
-      return FD_NotApplicable;
+      return ForwardingAction::notApplicable();
  
-    if (DoIt)
-      TargetStmt->prependInstruction(LI);
+    LLVM_DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
+                      << "\n");
+    LLVM_DEBUG(dbgs() << "      candidate elements where " << Candidates
+                      << "\n");
  
-    if (!DoIt)
-      return FD_CanForwardProfitably;
+    // { ValInst[] }
+    isl::space ValInstSpace = ExpectedVal.get_space().range();
+
+    // After adding a new load to the SCoP, also update the Known content
+    // about it. The new load will have a known ValInst of
+    // { [DomainTarget[] -> Value[]] }
+    // but which -- because it is a copy of it -- has same value as the
+    // { [DomainDef[] -> Value[]] }
+    // that it replicates. Instead of  cloning the known content of
+    // [DomainDef[] -> Value[]]
+    // for DomainTarget[], we add a 'translator' that maps
+    // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
+    // before comparing to the known content.
+    // TODO: 'Translator' could also be used to map PHINodes to their incoming
+    // ValInsts.
+    isl::map LocalTranslator;
+    if (!ValInstSpace.is_wrapping().is_false()) {
+      // { DefDomain[] -> Value[] }
+      isl::map ValInsts = ExpectedVal.range().unwrap();
+
+      // { DefDomain[] }
+      isl::set DefDomain = ValInsts.domain();
+
+      // { Value[] }
+      isl::space ValSpace = ValInstSpace.unwrap().range();
+
+      // { Value[] -> Value[] }
+      isl::map ValToVal =
+          isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
+
+      // { DomainDef[] -> DomainTarget[] }
+      isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
+
+      // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
+      LocalTranslator = DefToTarget.reverse().product(ValToVal);
+      LLVM_DEBUG(dbgs() << "      local translator is " << LocalTranslator
+                        << "\n");
+
+      if (!LocalTranslator)
+        return ForwardingAction::notApplicable();
+    }
  
-    if (Access) {
-      LLVM_DEBUG(
-          dbgs() << "    forwarded known load with preexisting MemoryAccess"
-                 << Access << "\n");
-    } else {
-      Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
+    auto ExecAction = [this, TargetStmt, LI, SameVal,
+                       LocalTranslator]() -> bool {
+      TargetStmt->prependInstruction(LI);
+      MemoryAccess *Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
        LLVM_DEBUG(dbgs() << "    forwarded known load with new MemoryAccess"
                          << Access << "\n");
  
-      // { ValInst[] }
-      isl::space ValInstSpace = ExpectedVal.get_space().range();
-
-      // After adding a new load to the SCoP, also update the Known content
-      // about it. The new load will have a known ValInst of
-      // { [DomainTarget[] -> Value[]] }
-      // but which -- because it is a copy of it -- has same value as the
-      // { [DomainDef[] -> Value[]] }
-      // that it replicates. Instead of  cloning the known content of
-      // [DomainDef[] -> Value[]]
-      // for DomainTarget[], we add a 'translator' that maps
-      // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
-      // before comparing to the known content.
-      // TODO: 'Translator' could also be used to map PHINodes to their incoming
-      // ValInsts.
-      if (ValInstSpace.is_wrapping()) {
-        // { DefDomain[] -> Value[] }
-        isl::map ValInsts = ExpectedVal.range().unwrap();
-
-        // { DefDomain[] }
-        isl::set DefDomain = ValInsts.domain();
-
-        // { Value[] }
-        isl::space ValSpace = ValInstSpace.unwrap().range();
-
-        // { Value[] -> Value[] }
-        isl::map ValToVal =
-            isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
-
-        // { DomainDef[] -> DomainTarget[] }
-        isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
-
-        // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
-        isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
-
+      if (LocalTranslator)
          Translator = Translator.add_map(LocalTranslator);
-        LLVM_DEBUG(dbgs() << "      local translator is " << LocalTranslator
-                          << "\n");
-      }
-    }
-    LLVM_DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
-                      << "\n");
-    LLVM_DEBUG(dbgs() << "      candidate elements where " << Candidates
-                      << "\n");
-    assert(Access);
  
-    NumKnownLoadsForwarded++;
-    TotalKnownLoadsForwarded++;
-    return FD_DidForwardTree;
+      NumKnownLoadsForwarded++;
+      TotalKnownLoadsForwarded++;
+      return true;
+    };
+    return ForwardingAction::canForward(
+        ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
    }
  
    /// Forward a scalar by redirecting the access to an array element that stores
@@ -532,36 +595,20 @@ public:
    /// @param UseLoop     The loop @p Inst is used in.
    /// @param DefStmt     The statement @p Inst is defined in.
    /// @param DefLoop     The loop which contains @p Inst.
-  /// @param DoIt        If false, only determine whether an operand tree can be
-  ///                    forwarded. If true, carry out the forwarding. Do not
-  ///                    use DoIt==true if an operand tree is not known to be
-  ///                    forwardable.
    ///
-  /// @return FD_NotApplicable        if @p Inst cannot be reloaded.
-  ///         FD_CanForwardLeaf       if @p Inst can be reloaded.
-  ///         FD_CanForwardProfitably if @p Inst has been reloaded.
-  ///         FD_DidForwardLeaf       if @p DoIt was true.
-  ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
-                                        ScopStmt *UseStmt, Loop *UseLoop,
-                                        ScopStmt *DefStmt, Loop *DefLoop,
-                                        bool DoIt) {
+  /// @return A ForwardingAction object describing the feasibility and
+  ///         profitability evaluation and the callback carrying-out the value
+  ///         forwarding.
+  ForwardingAction reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
+                                      ScopStmt *UseStmt, Loop *UseLoop,
+                                      ScopStmt *DefStmt, Loop *DefLoop) {
      // Cannot do anything without successful known analysis.
      if (Known.is_null() || Translator.is_null() ||
          MaxOpGuard.hasQuotaExceeded())
-      return FD_NotApplicable;
-
-    MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
-    if (Access && Access->isLatestArrayKind()) {
-      if (DoIt)
-        return FD_DidForwardLeaf;
-      return FD_CanForwardLeaf;
-    }
+      return ForwardingAction::notApplicable();
  
-    // Don't spend too much time analyzing whether it can be reloaded. When
-    // carrying-out the forwarding, we cannot bail-out in the middle of the
-    // transformation. It also shouldn't take as long because some results are
-    // cached.
-    IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
+    // Don't spend too much time analyzing whether it can be reloaded.
+    IslQuotaScope QuotaScope = MaxOpGuard.enter();
  
      // { DomainDef[] -> ValInst[] }
      isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
@@ -578,21 +625,22 @@ public:
      isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
  
      isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
+    simplify(SameVal);
      if (!SameVal)
-      return FD_NotApplicable;
-
-    if (!DoIt)
-      return FD_CanForwardProfitably;
+      return ForwardingAction::notApplicable();
  
-    if (!Access)
-      Access = TargetStmt->ensureValueRead(Inst);
+    auto ExecAction = [this, TargetStmt, Inst, SameVal]() {
+      MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
+      if (!Access)
+        Access = TargetStmt->ensureValueRead(Inst);
+      Access->setNewAccessRelation(SameVal);
  
-    simplify(SameVal);
-    Access->setNewAccessRelation(SameVal);
+      TotalReloads++;
+      NumReloads++;
+      return false;
+    };
  
-    TotalReloads++;
-    NumReloads++;
-    return FD_DidForwardLeaf;
+    return ForwardingAction::canForward(ExecAction, {}, true);
    }
  
    /// Forwards a speculatively executable instruction.
@@ -601,23 +649,16 @@ public:
    /// @param UseInst     The (possibly speculatable) instruction to forward.
    /// @param DefStmt     The statement @p UseInst is defined in.
    /// @param DefLoop     The loop which contains @p UseInst.
-  /// @param DoIt        If false, only determine whether an operand tree can be
-  ///                    forwarded. If true, carry out the forwarding. Do not
-  ///                    use DoIt==true if an operand tree is not known to be
-  ///                    forwardable.
    ///
-  /// @return FD_NotApplicable  if @p UseInst is not speculatable.
-  ///         FD_CannotForward  if one of @p UseInst's operands is not
-  ///                           forwardable.
-  ///         FD_CanForwardTree if @p UseInst is forwardable.
-  ///         FD_DidForward     if @p DoIt was true.
-  ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
-                                         Instruction *UseInst,
-                                         ScopStmt *DefStmt, Loop *DefLoop,
-                                         bool DoIt) {
+  /// @return A ForwardingAction object describing the feasibility and
+  ///         profitability evaluation and the callback carrying-out the value
+  ///         forwarding.
+  ForwardingAction forwardSpeculatable(ScopStmt *TargetStmt,
+                                       Instruction *UseInst, ScopStmt *DefStmt,
+                                       Loop *DefLoop) {
      // PHIs, unless synthesizable, are not yet supported.
      if (isa<PHINode>(UseInst))
-      return FD_NotApplicable;
+      return ForwardingAction::notApplicable();
  
      // Compatible instructions must satisfy the following conditions:
      // 1. Idempotent (instruction will be copied, not moved; although its
@@ -632,64 +673,55 @@ public:
      // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
      // not sufficient because it allows memory accesses.
      if (mayBeMemoryDependent(*UseInst))
-      return FD_NotApplicable;
-
-    if (DoIt) {
-      // To ensure the right order, prepend this instruction before its
-      // operands. This ensures that its operands are inserted before the
-      // instruction using them.
-      // TODO: The operand tree is not really a tree, but a DAG. We should be
-      // able to handle DAGs without duplication.
-      TargetStmt->prependInstruction(UseInst);
-      NumInstructionsCopied++;
-      TotalInstructionsCopied++;
-    }
+      return ForwardingAction::notApplicable();
  
+    SmallVector<ForwardingAction::KeyTy, 4> Depends;
+    Depends.reserve(UseInst->getNumOperands());
      for (Value *OpVal : UseInst->operand_values()) {
        ForwardingDecision OpDecision =
-          forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt);
+          forwardTree(TargetStmt, OpVal, DefStmt, DefLoop);
        switch (OpDecision) {
        case FD_CannotForward:
-        assert(!DoIt);
-        return FD_CannotForward;
+        return ForwardingAction::cannotForward();
  
        case FD_CanForwardLeaf:
        case FD_CanForwardProfitably:
-        assert(!DoIt);
-        break;
-
-      case FD_DidForwardLeaf:
-      case FD_DidForwardTree:
-        assert(DoIt);
+        Depends.emplace_back(OpVal, DefStmt);
          break;
  
        case FD_NotApplicable:
-        llvm_unreachable("forwardTree should never return FD_NotApplicable");
+      case FD_Unknown:
+        llvm_unreachable(
+            "forwardTree should never return FD_NotApplicable/FD_Unknown");
        }
      }
  
-    if (DoIt)
-      return FD_DidForwardTree;
-    return FD_CanForwardProfitably;
+    auto ExecAction = [this, TargetStmt, UseInst, DefStmt]() {
+      // To ensure the right order, prepend this instruction before its
+      // operands. This ensures that its operands are inserted before the
+      // instruction using them.
+      TargetStmt->prependInstruction(UseInst);
+      NumInstructionsCopied++;
+      TotalInstructionsCopied++;
+      return true;
+    };
+    return ForwardingAction::canForward(ExecAction, Depends, true);
    }
  
-  /// Determines whether an operand tree can be forwarded or carries out a
-  /// forwarding, depending on the @p DoIt flag.
+  /// Determines whether an operand tree can be forwarded and returns
+  /// instructions how to do so in the form of a ForwardingAction object.
    ///
    /// @param TargetStmt  The statement the operand tree will be copied to.
    /// @param UseVal      The value (usually an instruction) which is root of an
    ///                    operand tree.
    /// @param UseStmt     The statement that uses @p UseVal.
    /// @param UseLoop     The loop @p UseVal is used in.
-  /// @param DoIt        If false, only determine whether an operand tree can be
-  ///                    forwarded. If true, carry out the forwarding. Do not
-  ///                    use DoIt==true if an operand tree is not known to be
-  ///                    forwardable.
    ///
-  /// @return If DoIt==false, return whether the operand tree can be forwarded.
-  ///         If DoIt==true, return FD_DidForward.
-  ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
-                                 ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) {
+  /// @return A ForwardingAction object describing the feasibility and
+  ///         profitability evaluation and the callback carrying-out the value
+  ///         forwarding.
+  ForwardingAction forwardTreeImpl(ScopStmt *TargetStmt, Value *UseVal,
+                                   ScopStmt *UseStmt, Loop *UseLoop) {
      ScopStmt *DefStmt = nullptr;
      Loop *DefLoop = nullptr;
  
@@ -702,16 +734,9 @@ public:
      case VirtualUse::Block:
      case VirtualUse::Hoisted:
        // These can be used anywhere without special considerations.
-      if (DoIt)
-        return FD_DidForwardTree;
-      return FD_CanForwardLeaf;
+      return ForwardingAction::triviallyForwardable(false);
  
      case VirtualUse::Synthesizable: {
-      // ScopExpander will take care for of generating the code at the new
-      // location.
-      if (DoIt)
-        return FD_DidForwardTree;
-
        // Check if the value is synthesizable at the new location as well. This
        // might be possible when leaving a loop for which ScalarEvolution is
        // unable to derive the exit value for.
@@ -725,32 +750,34 @@ public:
        VirtualUse TargetUse = VirtualUse::create(
            S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
        if (TargetUse.getKind() == VirtualUse::Synthesizable)
-        return FD_CanForwardLeaf;
+        return ForwardingAction::triviallyForwardable(false);
  
        LLVM_DEBUG(
            dbgs() << "    Synthesizable would not be synthesizable anymore: "
                   << *UseVal << "\n");
-      return FD_CannotForward;
+      return ForwardingAction::cannotForward();
      }
  
-    case VirtualUse::ReadOnly:
-      // Note that we cannot return FD_CanForwardTree here. With a operand tree
-      // depth of 0, UseVal is the use in TargetStmt that we try to replace.
-      // With -polly-analyze-read-only-scalars=true we would ensure the
-      // existence of a MemoryAccess (which already exists for a leaf) and be
-      // removed again by tryForwardTree because it's goal is to remove this
-      // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
-      // to do so.
-      if (!DoIt)
-        return FD_CanForwardLeaf;
-
+    case VirtualUse::ReadOnly: {
        // If we model read-only scalars, we need to create a MemoryAccess for it.
-      if (ModelReadOnlyScalars)
-        TargetStmt->ensureValueRead(UseVal);
-
-      NumReadOnlyCopied++;
-      TotalReadOnlyCopied++;
-      return FD_DidForwardLeaf;
+      auto ExecAction = [this, TargetStmt, UseVal]() {
+        if (ModelReadOnlyScalars)
+          TargetStmt->ensureValueRead(UseVal);
+
+        NumReadOnlyCopied++;
+        TotalReadOnlyCopied++;
+
+        // Note that we cannot return true here. With a operand tree
+        // depth of 0, UseVal is the use in TargetStmt that we try to replace.
+        // With -polly-analyze-read-only-scalars=true we would ensure the
+        // existence of a MemoryAccess (which already exists for a leaf) and be
+        // removed again by tryForwardTree because it's goal is to remove this
+        // scalar MemoryAccess. It interprets FD_CanForwardTree as the
+        // permission to do so.
+        return false;
+      };
+      return ForwardingAction::canForward(ExecAction, {}, false);
+    }
  
      case VirtualUse::Intra:
        // Knowing that UseStmt and DefStmt are the same statement instance, just
@@ -764,35 +791,140 @@ public:
        if (!DefStmt) {
          DefStmt = S->getStmtFor(Inst);
          if (!DefStmt)
-          return FD_CannotForward;
+          return ForwardingAction::cannotForward();
        }
  
        DefLoop = LI->getLoopFor(Inst->getParent());
  
-      ForwardingDecision SpeculativeResult =
-          forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt);
-      if (SpeculativeResult != FD_NotApplicable)
+      ForwardingAction SpeculativeResult =
+          forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop);
+      if (SpeculativeResult.Decision != FD_NotApplicable)
          return SpeculativeResult;
  
-      ForwardingDecision KnownResult = forwardKnownLoad(
-          TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
-      if (KnownResult != FD_NotApplicable)
+      ForwardingAction KnownResult = forwardKnownLoad(
+          TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
+      if (KnownResult.Decision != FD_NotApplicable)
          return KnownResult;
  
-      ForwardingDecision ReloadResult = reloadKnownContent(
-          TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
-      if (ReloadResult != FD_NotApplicable)
+      ForwardingAction ReloadResult = reloadKnownContent(
+          TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
+      if (ReloadResult.Decision != FD_NotApplicable)
          return ReloadResult;
  
        // When no method is found to forward the operand tree, we effectively
        // cannot handle it.
        LLVM_DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
-      return FD_CannotForward;
+      return ForwardingAction::cannotForward();
      }
  
      llvm_unreachable("Case unhandled");
    }
  
+  /// Determines whether an operand tree can be forwarded. Previous evaluations
+  /// are cached.
+  ///
+  /// @param TargetStmt  The statement the operand tree will be copied to.
+  /// @param UseVal      The value (usually an instruction) which is root of an
+  ///                    operand tree.
+  /// @param UseStmt     The statement that uses @p UseVal.
+  /// @param UseLoop     The loop @p UseVal is used in.
+  ///
+  /// @return FD_CannotForward        if @p UseVal cannot be forwarded.
+  ///         FD_CanForwardLeaf       if @p UseVal is forwardable, but not
+  ///                                 profitable.
+  ///         FD_CanForwardProfitably if @p UseVal is forwardable and useful to
+  ///                                 do.
+  ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
+                                 ScopStmt *UseStmt, Loop *UseLoop) {
+    // Lookup any cached evaluation.
+    auto It = ForwardingActions.find({UseVal, UseStmt});
+    if (It != ForwardingActions.end())
+      return It->second.Decision;
+
+    // Make a new evaluation.
+    ForwardingAction Action =
+        forwardTreeImpl(TargetStmt, UseVal, UseStmt, UseLoop);
+    ForwardingDecision Result = Action.Decision;
+
+    // Remember for the next time.
+    assert(!ForwardingActions.count({UseVal, UseStmt}) &&
+           "circular dependency?");
+    ForwardingActions.insert({{UseVal, UseStmt}, std::move(Action)});
+
+    return Result;
+  }
+
+  /// Forward an operand tree using cached actions.
+  ///
+  /// @param Stmt   Statement the operand tree is moved into.
+  /// @param UseVal Root of the operand tree within @p Stmt.
+  /// @param RA     The MemoryAccess for @p UseVal that the forwarding intends
+  ///               to remove.
+  void applyForwardingActions(ScopStmt *Stmt, Value *UseVal, MemoryAccess *RA) {
+    using ChildItTy =
+        decltype(std::declval<ForwardingAction>().Depends.begin());
+    using EdgeTy = std::pair<ForwardingAction *, ChildItTy>;
+
+    DenseSet<ForwardingAction::KeyTy> Visited;
+    SmallVector<EdgeTy, 32> Stack;
+    SmallVector<ForwardingAction *, 32> Ordered;
+
+    // Seed the tree search using the root value.
+    assert(ForwardingActions.count({UseVal, Stmt}));
+    ForwardingAction *RootAction = &ForwardingActions[{UseVal, Stmt}];
+    Stack.emplace_back(RootAction, RootAction->Depends.begin());
+
+    // Compute the postorder of the operand tree: all operands of an instruction
+    // must be visited before the instruction itself. As an additional
+    // requirement, the topological ordering must be 'compact': Any subtree node
+    // must not be interleaved with nodes from a non-shared subtree. This is
+    // because the same llvm::Instruction can be materialized multiple times as
+    // used at different ScopStmts which might be different values. Intersecting
+    // these lifetimes may result in miscompilations.
+    // FIXME: Intersecting lifetimes might still be possible for the roots
+    // themselves, since instructions are just prepended to a ScopStmt's
+    // instruction list.
+    while (!Stack.empty()) {
+      EdgeTy &Top = Stack.back();
+      ForwardingAction *TopAction = Top.first;
+      ChildItTy &TopEdge = Top.second;
+
+      if (TopEdge == TopAction->Depends.end()) {
+        // Postorder sorting
+        Ordered.push_back(TopAction);
+        Stack.pop_back();
+        continue;
+      }
+      ForwardingAction::KeyTy Key = *TopEdge;
+
+      // Next edge for this level
+      ++TopEdge;
+
+      auto VisitIt = Visited.insert(Key);
+      if (!VisitIt.second)
+        continue;
+
+      assert(ForwardingActions.count(Key) &&
+             "Must not insert new actions during execution phase");
+      ForwardingAction *ChildAction = &ForwardingActions[Key];
+      Stack.emplace_back(ChildAction, ChildAction->Depends.begin());
+    }
+
+    // Actually, we need the reverse postorder because actions prepend new
+    // instructions. Therefore, the first one will always be the action for the
+    // operand tree's root.
+    assert(Ordered.back() == RootAction);
+    if (RootAction->Execute())
+      Stmt->removeSingleMemoryAccess(RA);
+    Ordered.pop_back();
+    for (auto DepAction : reverse(Ordered)) {
+      assert(DepAction->Decision != FD_Unknown &&
+             DepAction->Decision != FD_CannotForward);
+      assert(DepAction != RootAction);
+      DepAction->Execute();
+    }
+  }
+
    /// Try to forward an operand tree rooted in @p RA.
    bool tryForwardTree(MemoryAccess *RA) {
      assert(RA->isLatestScalarKind());
@@ -809,20 +941,17 @@ public:
      }
  
      ForwardingDecision Assessment =
-        forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false);
-    assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
-    if (Assessment != FD_CanForwardProfitably)
-      return false;
+        forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop);
  
-    ForwardingDecision Execution =
-        forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true);
-    assert(((Execution == FD_DidForwardTree) ||
-            (Execution == FD_DidForwardLeaf)) &&
-           "A previous positive assessment must also be executable");
+    // If considered feasible and profitable, forward it.
+    bool Changed = false;
+    if (Assessment == FD_CanForwardProfitably) {
+      applyForwardingActions(Stmt, RA->getAccessValue(), RA);
+      Changed = true;
+    }
  
-    if (Execution == FD_DidForwardTree)
-      Stmt->removeSingleMemoryAccess(RA);
-    return true;
+    ForwardingActions.clear();
+    return Changed;
    }
  
    /// Return which SCoP this instance is processing.
diff --git a/polly/test/ForwardOpTree/forward_load_tripleuse.ll b/polly/test/ForwardOpTree/forward_load_tripleuse.ll

index 2567a55..80567a5 100644 (file)
--- a/polly/test/ForwardOpTree/forward_load_tripleuse.ll
+++ b/polly/test/ForwardOpTree/forward_load_tripleuse.ll
@@ -53,7 +53,7 @@ return:
  
  ; CHECK: Statistics {
  ; CHECK:     Instructions copied: 1
-; CHECK:     Known loads forwarded: 3
+; CHECK:     Known loads forwarded: 2
  ; CHECK:     Operand trees forwarded: 2
  ; CHECK:     Statements with forwarded operand trees: 1
  ; CHECK: }
@@ -80,7 +80,6 @@ return:
  ; CHECK-NEXT:                 [n] -> { Stmt_bodyB[i0] -> MemRef_C[i0] };
  ; CHECK-NEXT:             Instructions {
  ; CHECK-NEXT:                   %val1 = load double, double* %A_idx, align 8
-; CHECK-NEXT:                   %val1 = load double, double* %A_idx, align 8
  ; CHECK-NEXT:                   %val2 = fadd double %val1, %val1
  ; CHECK-NEXT:                   %val1 = load double, double* %A_idx, align 8
  ; CHECK-NEXT:                   store double %val1, double* %B_idx, align 8
@@ -125,7 +124,7 @@ return:
  
  ; CHECK: Statistics {
  ; CHECK:     Instructions copied: 1
-; CHECK:     Known loads forwarded: 3
+; CHECK:     Known loads forwarded: 2
  ; CHECK:     Operand trees forwarded: 2
  ; CHECK:     Statements with forwarded operand trees: 1
  ; CHECK: }
@@ -153,7 +152,6 @@ return:
  ; CHECK-NEXT:             Instructions {
  ; CHECK-NEXT:                   %val1 = load double, double* %A_idx, align 8
  ; CHECK-NEXT:                   %val1 = load double, double* %A_idx, align 8
-; CHECK-NEXT:                   %val1 = load double, double* %A_idx, align 8
  ; CHECK-NEXT:                   %val2 = fadd double %val1, %val1
  ; CHECK-NEXT:                   store double %val2, double* %B_idx, align 8
  ; CHECK-NEXT:                   store double %val1, double* %C_idx, align 8
diff --git a/polly/test/ForwardOpTree/forward_reusue.ll b/polly/test/ForwardOpTree/forward_reusue.ll

new file mode 100644 (file)

index 0000000..2846496
--- /dev/null
+++ b/polly/test/ForwardOpTree/forward_reusue.ll
@@ -0,0 +1,65 @@
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+; Move operand tree without duplicating values used multiple times.
+;
+define void @func(i32 %n, double* noalias nonnull %A) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %bodyA, label %exit
+
+    bodyA:
+      %val1 = fadd double 12.5, 12.5
+      %val2 = fadd double %val1, %val1
+      %val3 = fadd double %val2, %val2
+      %val4 = fadd double %val3, %val3
+      %val5 = fadd double %val4, %val4
+      br label %bodyB
+
+    bodyB:
+      store double %val5, double* %A
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK:     Instructions copied: 5
+; CHECK:     Operand trees forwarded: 1
+; CHECK: }
+
+; CHECK:      After statements {
+; CHECK-NEXT:     Stmt_bodyA
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0] -> MemRef_val5[] };
+; CHECK-NEXT:             Instructions {
+; CHECK-NEXT:                   %val1 = fadd double 1.250000e+01, 1.250000e+01
+; CHECK-NEXT:                   %val2 = fadd double %val1, %val1
+; CHECK-NEXT:                   %val3 = fadd double %val2, %val2
+; CHECK-NEXT:                   %val4 = fadd double %val3, %val3
+; CHECK-NEXT:                   %val5 = fadd double %val4, %val4
+; CHECK-NEXT:             }
+; CHECK-NEXT:     Stmt_bodyB
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyB[i0] -> MemRef_A[0] };
+; CHECK-NEXT:             Instructions {
+; CHECK-NEXT:                   %val1 = fadd double 1.250000e+01, 1.250000e+01
+; CHECK-NEXT:                   %val2 = fadd double %val1, %val1
+; CHECK-NEXT:                   %val3 = fadd double %val2, %val2
+; CHECK-NEXT:                   %val4 = fadd double %val3, %val3
+; CHECK-NEXT:                   %val5 = fadd double %val4, %val4
+; CHECK-NEXT:                   store double %val5, double* %A, align 8
+; CHECK-NEXT:             }
+; CHECK-NEXT: }
author	Michael Kruse <llvm-project@meinersbur.de>
	Fri, 16 Oct 2020 17:37:34 +0000 (12:37 -0500)
committer	Michael Kruse <llvm-project@meinersbur.de>
	Tue, 20 Oct 2020 23:05:35 +0000 (18:05 -0500)
polly/lib/Transform/ForwardOpTree.cpp		patch \| blob \| history
polly/test/ForwardOpTree/forward_load_tripleuse.ll		patch \| blob \| history
polly/test/ForwardOpTree/forward_reusue.ll	[new file with mode: 0644]	patch \| blob