Normally, the FakeUse for preserving the atomic load ends
up on the load's Dest. However, for fused load+add, the load
is deleted, and its Dest is no longer defined. This trips
up the liveness analysis when it happens on a non-entry
block. So the FakeUse should be for the add's dest instead,
in that case.
We have no access to the add, so introduce a
getLastInserted() helper. A couple of ways to do that:
- modify insert() to track explicitly
- rewind from Next one step
Either that, or we disable the fusing for atomic loads.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/
417353003
void LoweringContext::init(CfgNode *N) {
Node = N;
- Cur = getNode()->getInsts().begin();
+ Begin = getNode()->getInsts().begin();
+ Cur = Begin;
End = getNode()->getInsts().end();
skipDeleted(Cur);
Next = Cur;
- advance(Next);
+ advanceForward(Next);
}
void LoweringContext::insert(Inst *Inst) {
++I;
}
-void LoweringContext::advance(InstList::iterator &I) const {
+void LoweringContext::advanceForward(InstList::iterator &I) const {
if (I != End) {
++I;
skipDeleted(I);
}
}
+void LoweringContext::advanceBackward(InstList::iterator &I) const {
+ assert(I != Begin);
+ do {
+ --I;
+ } while (I != Begin && (*I)->isDeleted());
+}
+
+Inst *LoweringContext::getLastInserted() const {
+ InstList::iterator Cursor = Next;
+ advanceBackward(Cursor);
+ return *Cursor;
+}
+
TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
// These statements can be #ifdef'd to specialize the code generator
// to a subset of the available targets. TODO: use CRTP.
return *Next;
}
Inst *getNextInst(InstList::iterator &Iter) const {
- advance(Iter);
+ advanceForward(Iter);
if (Iter == End)
return NULL;
return *Iter;
InstList::iterator getCur() const { return Cur; }
InstList::iterator getEnd() const { return End; }
void insert(Inst *Inst);
+ Inst *getLastInserted() const;
void advanceCur() { Cur = Next; }
- void advanceNext() { advance(Next); }
+ void advanceNext() { advanceForward(Next); }
void setInsertPoint(const InstList::iterator &Position) { Next = Position; }
private:
// insertion point", to avoid confusion when previously-deleted
// instructions come between the two points.
InstList::iterator Next;
+ // Begin is a copy of Insts.begin(), used if iterators are moved backward.
+ InstList::iterator Begin;
// End is a copy of Insts.end(), used if Next needs to be advanced.
InstList::iterator End;
void skipDeleted(InstList::iterator &I) const;
- void advance(InstList::iterator &I) const;
+ void advanceForward(InstList::iterator &I) const;
+ void advanceBackward(InstList::iterator &I) const;
LoweringContext(const LoweringContext &) LLVM_DELETED_FUNCTION;
LoweringContext &operator=(const LoweringContext &) LLVM_DELETED_FUNCTION;
};
// Then cast the bits back out of the XMM register to the i64 Dest.
InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
lowerCast(Cast);
- // Make sure that the atomic load isn't elided.
+ // Make sure that the atomic load isn't elided when unused.
Context.insert(InstFakeUse::create(Func, Dest->getLo()));
Context.insert(InstFakeUse::create(Func, Dest->getHi()));
return;
}
InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
lowerLoad(Load);
- // Make sure the atomic load isn't elided.
- Context.insert(InstFakeUse::create(Func, Dest));
+ // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
+ // Since lowerLoad may fuse the load w/ an arithmetic instruction,
+ // insert the FakeUse on the last-inserted instruction's dest.
+ Context.insert(InstFakeUse::create(Func,
+ Context.getLastInserted()->getDest()));
return;
}
case Intrinsics::AtomicRMW:
define i32 @test_atomic_load_32_with_arith(i32 %iptr) {
entry:
+ br label %next
+
+next:
%ptr = inttoptr i32 %iptr to i32*
%r = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
%r2 = add i32 %r, 32
; CHECK-LABEL: test_atomic_load_32_with_arith
; CHECK: mov {{.*}}, dword
; The next instruction may be a separate load or folded into an add.
+;
+; In O2 mode, we know that the load and add are going to be fused.
+; CHECKO2-LABEL: test_atomic_load_32_with_arith
+; CHECKO2: mov {{.*}}, dword
+; CHECKO2: add {{.*}}, dword
define i32 @test_atomic_load_32_ignored(i32 %iptr) {
entry:
; CHECK-LABEL: test_atomic_load_32_ignored
; CHECK: mov {{.*}}, dword
; CHECK: mov {{.*}}, dword
+; CHECKO2-LABEL: test_atomic_load_32_ignored
+; CHECKO2: mov {{.*}}, dword
+; CHECKO2: mov {{.*}}, dword
define i64 @test_atomic_load_64_ignored(i32 %iptr) {
entry: