#include "llvm/Pass.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
}
- // Now that we're done with the algorithm, see if we can optimize the
- // results slightly by reducing the number of new instructions needed.
- // Arguably, this should be integrated into the algorithm above, but
- // doing as a post process step is easier to reason about for the moment.
- DenseMap<Value *, Value *> ReverseMap;
- SmallPtrSet<Instruction *, 16> NewInsts;
- SmallSetVector<AssertingVH<Instruction>, 16> Worklist;
- // Note: We need to visit the states in a deterministic order. We uses the
- // Keys we sorted above for this purpose. Note that we are papering over a
- // bigger problem with the algorithm above - it's visit order is not
- // deterministic. A larger change is needed to fix this.
- for (auto Pair : States) {
- auto *BDV = Pair.first;
- auto State = Pair.second;
- Value *Base = State.getBase();
- assert(BDV && Base);
- assert(!isKnownBaseResult(BDV) && "why did it get added?");
- assert(isKnownBaseResult(Base) &&
- "must be something we 'know' is a base pointer");
- if (!State.isConflict())
- continue;
-
- ReverseMap[Base] = BDV;
- if (auto *BaseI = dyn_cast<Instruction>(Base)) {
- NewInsts.insert(BaseI);
- Worklist.insert(BaseI);
- }
- }
- auto ReplaceBaseInstWith = [&](Value *BDV, Instruction *BaseI,
- Value *Replacement) {
- // Add users which are new instructions (excluding self references)
- for (User *U : BaseI->users())
- if (auto *UI = dyn_cast<Instruction>(U))
- if (NewInsts.count(UI) && UI != BaseI)
- Worklist.insert(UI);
- // Then do the actual replacement
- NewInsts.erase(BaseI);
- ReverseMap.erase(BaseI);
- BaseI->replaceAllUsesWith(Replacement);
- assert(States.count(BDV));
- assert(States[BDV].isConflict() && States[BDV].getBase() == BaseI);
- States[BDV] = BDVState(BDVState::Conflict, Replacement);
- BaseI->eraseFromParent();
- };
- const DataLayout &DL = cast<Instruction>(def)->getModule()->getDataLayout();
- while (!Worklist.empty()) {
- Instruction *BaseI = Worklist.pop_back_val();
- assert(NewInsts.count(BaseI));
- Value *Bdv = ReverseMap[BaseI];
- if (auto *BdvI = dyn_cast<Instruction>(Bdv))
- if (BaseI->isIdenticalTo(BdvI)) {
- DEBUG(dbgs() << "Identical Base: " << *BaseI << "\n");
- ReplaceBaseInstWith(Bdv, BaseI, Bdv);
- continue;
- }
- if (Value *V = SimplifyInstruction(BaseI, DL)) {
- DEBUG(dbgs() << "Base " << *BaseI << " simplified to " << *V << "\n");
- ReplaceBaseInstWith(Bdv, BaseI, V);
- continue;
- }
- }
-
// Cache all of our results so we can cheaply reuse them
// NOTE: This is actually two caches: one of the base defining value
// relation and one of the base pointer relation! FIXME
auto *BDV = Pair.first;
Value *base = Pair.second.getBase();
assert(BDV && base);
+ assert(!isKnownBaseResult(BDV) && "why did it get added?");
std::string fromstr = cache.count(BDV) ? cache[BDV]->getName() : "none";
DEBUG(dbgs() << "Updating base value cache"
<< " to: " << base->getName() << "\n");
if (cache.count(BDV)) {
+ assert(isKnownBaseResult(base) &&
+ "must be something we 'know' is a base pointer");
// Once we transition from the BDV relation being store in the cache to
// the base relation being stored, it must be stable
assert((!isKnownBaseResult(cache[BDV]) || cache[BDV] == base) &&
; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %obj_to_consume base %obj_to_consume
+; CHECK: derived %obj_to_consume base %obj_to_consume.base
declare void @foo()
; we'd have commoned these, but that's a missed optimization, not correctness.
; CHECK-DAG: [ [[DISCARD:%.*.base.relocated.casted]], %loop ]
; CHECK-NOT: extra.base
+; CHECK: next.base = select
; CHECK: next = select
; CHECK: extra2.base = select
; CHECK: extra2 = select
merge: ; preds = %taken, %entry
; CHECK-LABEL: merge:
-; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: phi
+; CHECK-NEXT: phi
; CHECK-NEXT: gc.statepoint
%bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %taken ]
call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
merge: ; preds = %taken, %entry
; CHECK-LABEL: merge:
-; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: phi
; CHECK-NEXT: gc.statepoint
%bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj, %taken ]
call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
merge: ; preds = %merge, %entry
; CHECK-LABEL: merge:
-; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: phi
+; CHECK-NEXT: phi
; CHECK-NEXT: br i1
%bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %merge ]
br i1 %cnd, label %merge, label %next
merge2: ; preds = %untaken2, %taken2
; CHECK-LABEL: merge2:
-; CHECK-NEXT: %obj = phi i64 addrspace(1)*
-; CHECK-NEXT: statepoint
+; CHECK: %obj.base = phi i64 addrspace(1)*
+; CHECK: %obj = phi i64 addrspace(1)*
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%obj.base, %obj)
; CHECK: gc.relocate
-; CHECK-DAG: ; (%obj, %obj)
+; CHECK-DAG: ; (%obj.base, %obj.base)
%obj = phi i64 addrspace(1)* [ %obj0, %taken2 ], [ %obj1, %untaken2 ]
call void @do_safepoint() [ "deopt"() ]
ret i64 addrspace(1)* %obj
; CHECK: extractelement
; CHECK: statepoint
; CHECK: gc.relocate
-; CHECK-DAG: (%obj, %obj)
+; CHECK-DAG: (%obj.base, %obj)
entry:
%vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %ptr, i32 0
%obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
; CHECK-LABEL: test4
; CHECK: statepoint
; CHECK: gc.relocate
-; CHECK-DAG: ; (%ptr, %obj)
-; CHECK: gc.relocate
-; CHECK-DAG: ; (%ptr, %ptr)
+; CHECK-DAG: ; (%obj.base, %obj)
; When we can optimize an extractelement from a known
; index and avoid introducing new base pointer instructions
entry:
define void @test5(i1 %cnd, i64 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-LABEL: @test5
; CHECK: gc.relocate
-; CHECK-DAG: (%obj, %bdv)
+; CHECK-DAG: (%bdv.base, %bdv)
; When we fundementally have to duplicate
entry:
%gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p) gc "statepoint-example" {
; CHECK-LABEL: test5
; CHECK: insertelement
+; CHECK-NEXT: insertelement
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %vec.relocated.casted
entry:
%vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %p, i32 0
merge: ; preds = %untaken, %taken
; CHECK-LABEL: merge:
; CHECK-NEXT: = phi
+; CHECK-NEXT: = phi
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
%obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
call void @do_safepoint() [ "deopt"() ]
define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p) gc "statepoint-example" {
; CHECK-LABEL: test5
; CHECK: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: bitcast
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
-; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
; A base vector from a load
entry:
%vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %p, i32 0
merge: ; preds = %untaken, %taken
; CHECK-LABEL: merge:
; CHECK-NEXT: = phi
+; CHECK-NEXT: = phi
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: bitcast
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: ret <2 x i64 addrspace(1)*>