SmallPtrSetImpl<Instruction*> &SafeToPromote);
void TruncateSinks(SmallPtrSetImpl<Value*> &Sources,
SmallPtrSetImpl<Instruction*> &Sinks);
- void Cleanup(SmallPtrSetImpl<Instruction*> &Sinks);
+ void Cleanup(SmallPtrSetImpl<Value*> &Visited);
public:
IRPromoter(Module *M) : M(M), Ctx(M->getContext()),
Opc == Instruction::SRem;
}
+static bool EqualTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
+}
+
+static bool LessOrEqualTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() <= ARMCodeGenPrepare::TypeSize;
+}
+
+static bool GreaterThanTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() > ARMCodeGenPrepare::TypeSize;
+}
+
/// Some instructions can use 8- and 16-bit operands, and we don't need to
/// promote anything larger. We disallow booleans to make life easier when
/// dealing with icmps but allow any other integer that is <= 16 bits. Void
if (auto *Ld = dyn_cast<LoadInst>(V))
Ty = cast<PointerType>(Ld->getPointerOperandType())->getElementType();
- const IntegerType *IntTy = dyn_cast<IntegerType>(Ty);
- if (!IntTy)
+ if (!isa<IntegerType>(Ty))
return false;
- return IntTy->getBitWidth() == ARMCodeGenPrepare::TypeSize;
+ return LessOrEqualTypeSize(V);
}
/// Return true if the given value is a source in the use-def chain, producing
else if (auto *Call = dyn_cast<CallInst>(V))
return Call->hasRetAttr(Attribute::AttrKind::ZExt);
else if (auto *Trunc = dyn_cast<TruncInst>(V))
- return isSupportedType(Trunc);
+ return EqualTypeSize(Trunc);
return false;
}
// TODO The truncate also isn't actually necessary because we would already
// proved that the data value is kept within the range of the original data
// type.
- auto UsesNarrowValue = [](Value *V) {
- return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
- };
if (auto *Store = dyn_cast<StoreInst>(V))
- return UsesNarrowValue(Store->getValueOperand());
+ return LessOrEqualTypeSize(Store->getValueOperand());
if (auto *Return = dyn_cast<ReturnInst>(V))
- return UsesNarrowValue(Return->getReturnValue());
+ return LessOrEqualTypeSize(Return->getReturnValue());
if (auto *Trunc = dyn_cast<TruncInst>(V))
- return UsesNarrowValue(Trunc->getOperand(0));
+ return EqualTypeSize(Trunc->getOperand(0));
if (auto *ZExt = dyn_cast<ZExtInst>(V))
- return UsesNarrowValue(ZExt->getOperand(0));
+ return GreaterThanTypeSize(ZExt);
if (auto *ICmp = dyn_cast<ICmpInst>(V))
return ICmp->isSigned();
}
}
}
-
}
-void IRPromoter::Cleanup(SmallPtrSetImpl<Instruction*> &Sinks) {
- // Some zext sinks will now have become redundant, along with their trunc
- // operands, so remove them.
- for (auto I : Sinks) {
- if (auto *ZExt = dyn_cast<ZExtInst>(I)) {
- if (ZExt->getDestTy() != ExtTy)
- continue;
+void IRPromoter::Cleanup(SmallPtrSetImpl<Value*> &Visited) {
+ // Some zexts will now have become redundant, along with their trunc
+ // operands, so remove them
+ for (auto V : Visited) {
+ if (!isa<ZExtInst>(V))
+ continue;
- Value *Src = ZExt->getOperand(0);
- if (ZExt->getSrcTy() == ZExt->getDestTy()) {
- LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary zext\n");
- ReplaceAllUsersOfWith(ZExt, Src);
- InstsToRemove.push_back(ZExt);
- continue;
- }
+ auto ZExt = cast<ZExtInst>(V);
+ if (ZExt->getDestTy() != ExtTy)
+ continue;
- // For any truncs that we insert to handle zexts, we can replace the
- // result of the zext with the input to the trunc.
- if (NewInsts.count(Src) && isa<TruncInst>(Src)) {
- auto *Trunc = cast<TruncInst>(Src);
- assert(Trunc->getOperand(0)->getType() == ExtTy &&
- "expected inserted trunc to be operating on i32");
- LLVM_DEBUG(dbgs() << "ARM CGP: Replacing zext with trunc operand: "
- << *Trunc->getOperand(0));
- ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
- InstsToRemove.push_back(ZExt);
- }
+ Value *Src = ZExt->getOperand(0);
+ if (ZExt->getSrcTy() == ZExt->getDestTy()) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast.\n");
+ ReplaceAllUsersOfWith(ZExt, Src);
+ InstsToRemove.push_back(ZExt);
+ continue;
+ }
+
+ // For any truncs that we insert to handle zexts, we can replace the
+ // result of the zext with the input to the trunc.
+ if (NewInsts.count(Src) && isa<TruncInst>(Src)) {
+ auto *Trunc = cast<TruncInst>(Src);
+ assert(Trunc->getOperand(0)->getType() == ExtTy &&
+ "expected inserted trunc to be operating on i32");
+ LLVM_DEBUG(dbgs() << "ARM CGP: Replacing zext with trunc operand: "
+ << *Trunc->getOperand(0));
+ ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
+ InstsToRemove.push_back(ZExt);
}
}
// Finally, remove unecessary zexts and truncs, delete old instructions and
// clear the data structures.
- Cleanup(Sinks);
+ Cleanup(Visited);
- LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete:\n");
- LLVM_DEBUG(dbgs();
- for (auto *V : Sources)
- V->dump();
- for (auto *I : NewInsts)
- I->dump();
- for (auto *V : Visited) {
- if (!Sources.count(V))
- V->dump();
- });
+ LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete\n");
}
/// We accept most instructions, as well as Arguments and ConstantInsts. We
/// return value is zeroext. We don't allow opcodes that can introduce sign
/// bits.
bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
- if (isa<ICmpInst>(V))
- return true;
+ if (auto *I = dyn_cast<ICmpInst>(V)) {
+ // Now that we allow small types than TypeSize, only allow icmp of
+ // TypeSize because they will require a trunc to be legalised.
+ // TODO: Allow icmp of smaller types, and calculate at the end
+ // whether the transform would be beneficial.
+ if (isa<PointerType>(I->getOperand(0)->getType()))
+ return true;
+ return EqualTypeSize(I->getOperand(0));
+ }
// Memory instructions
if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
isa<LoadInst>(V))
return isSupportedType(V);
- // Truncs can be either sources or sinks.
- if (auto *Trunc = dyn_cast<TruncInst>(V))
- return isSupportedType(Trunc) || isSupportedType(Trunc->getOperand(0));
+ if (isa<SExtInst>(V))
+ return false;
- if (isa<CastInst>(V) && !isa<SExtInst>(V))
- return isSupportedType(cast<CastInst>(V)->getOperand(0));
+ if (auto *Cast = dyn_cast<CastInst>(V))
+ return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0));
// Special cases for calls as we need to check for zeroext
// TODO We should accept calls even if they don't have zeroext, as they can
// Calls can be both sources and sinks.
if (isSink(V))
Sinks.insert(cast<Instruction>(V));
+
if (isSource(V))
Sources.insert(V);
- else if (auto *I = dyn_cast<Instruction>(V)) {
- // Visit operands of any instruction visited.
- for (auto &U : I->operands()) {
- if (!AddLegalInst(U))
- return false;
+
+ if (!isSink(V) && !isSource(V)) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // Visit operands of any instruction visited.
+ for (auto &U : I->operands()) {
+ if (!AddLegalInst(U))
+ return false;
+ }
}
}
if (CI.isSigned() || !isa<IntegerType>(CI.getOperand(0)->getType()))
continue;
+ LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n");
+
for (auto &Op : CI.operands()) {
if (auto *I = dyn_cast<Instruction>(Op))
MadeChange |= TryToPromote(I);
@sh1 = hidden local_unnamed_addr global i16 0, align 2
@d_sh = hidden local_unnamed_addr global [16 x i16] zeroinitializer, align 2
-; CHECK-LABEL: two_stage_zext_trunc_mix
-; CHECK-NOT: uxt
+; CHECK-COMMON-LABEL: two_stage_zext_trunc_mix
+; CHECK-COMMON-NOT: uxt
define i8* @two_stage_zext_trunc_mix(i32* %this, i32 %__pos1, i32 %__n1, i32** %__str, i32 %__pos2, i32 %__n2) {
entry:
%__size_.i.i.i.i = bitcast i32** %__str to i8*
%res = select i1 %tobool.i.i.i.i.i, i8* %7, i8* %8
ret i8* %res
}
+
+; CHECK-COMMON-LABEL: search_through_zext_1
+; CHECK-COMMON-NOT: uxt
+define i8 @search_through_zext_1(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c) {
+entry:
+ %add = add nuw i8 %a, %b
+ %conv = zext i8 %add to i16
+ %cmp = icmp ult i16 %conv, %c
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %sub = sub nuw i8 %b, %a
+ %conv2 = zext i8 %sub to i16
+ %cmp2 = icmp ugt i16 %conv2, %c
+ %res = select i1 %cmp2, i8 %a, i8 %b
+ br label %if.end
+
+if.end:
+ %retval = phi i8 [ 0, %entry ], [ %res, %if.then ]
+ ret i8 %retval
+}
+
+; TODO: We should be able to remove the uxtb here. The transform fails because
+; the icmp ugt uses an i32, which is too large... but this doesn't matter
+; because it won't be writing a large value to a register as a result.
+; CHECK-COMMON-LABEL: search_through_zext_2
+; CHECK-COMMON: uxtb
+; CHECK-COMMON: uxtb
+define i8 @search_through_zext_2(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c, i32 %d) {
+entry:
+ %add = add nuw i8 %a, %b
+ %conv = zext i8 %add to i16
+ %cmp = icmp ult i16 %conv, %c
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %sub = sub nuw i8 %b, %a
+ %conv2 = zext i8 %sub to i32
+ %cmp2 = icmp ugt i32 %conv2, %d
+ %res = select i1 %cmp2, i8 %a, i8 %b
+ br label %if.end
+
+if.end:
+ %retval = phi i8 [ 0, %entry ], [ %res, %if.then ]
+ ret i8 %retval
+}
+
+; TODO: We should be able to remove the uxtb here as all the calculations are
+; performed on i8s. The promotion of i8 to i16 and then the later truncation
+; results in the uxtb.
+; CHECK-COMMON-LABEL: search_through_zext_3
+; CHECK-COMMON: uxtb
+; CHECK-COMMON: uxtb
+define i8 @search_through_zext_3(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c, i32 %d) {
+entry:
+ %add = add nuw i8 %a, %b
+ %conv = zext i8 %add to i16
+ %cmp = icmp ult i16 %conv, %c
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %trunc = trunc i16 %conv to i8
+ %sub = sub nuw i8 %b, %trunc
+ %conv2 = zext i8 %sub to i32
+ %cmp2 = icmp ugt i32 %conv2, %d
+ %res = select i1 %cmp2, i8 %a, i8 %b
+ br label %if.end
+
+if.end:
+ %retval = phi i8 [ 0, %entry ], [ %res, %if.then ]
+ ret i8 %retval
+}
+
+; TODO: We should be able to remove the uxt that gets introduced for %conv2
+; CHECK-COMMON-LABEL: search_through_zext_cmp
+; CHECK-COMMON: uxt
+define i8 @search_through_zext_cmp(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c) {
+entry:
+ %cmp = icmp ne i8 %a, %b
+ %conv = zext i1 %cmp to i16
+ %cmp1 = icmp ult i16 %conv, %c
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then:
+ %sub = sub nuw i8 %b, %a
+ %conv2 = zext i8 %sub to i16
+ %cmp3 = icmp ugt i16 %conv2, %c
+ %res = select i1 %cmp3, i8 %a, i8 %b
+ br label %if.end
+
+if.end:
+ %retval = phi i8 [ 0, %entry ], [ %res, %if.then ]
+ ret i8 %retval
+}
+
+; CHECK-COMMON-LABEL: search_through_zext_load
+; CHECK-COMMON-NOT: uxt
+define i8 @search_through_zext_load(i8* %a, i8 zeroext %b, i16 zeroext %c) {
+entry:
+ %load = load i8, i8* %a
+ %conv = zext i8 %load to i16
+ %cmp1 = icmp ult i16 %conv, %c
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then:
+ %sub = sub nuw i8 %b, %load
+ %conv2 = zext i8 %sub to i16
+ %cmp3 = icmp ugt i16 %conv2, %c
+ %res = select i1 %cmp3, i8 %load, i8 %b
+ br label %if.end
+
+if.end:
+ %retval = phi i8 [ 0, %entry ], [ %res, %if.then ]
+ ret i8 %retval
+}
+
+; CHECK-COMMON-LABEL: trunc_sink_less_than
+; CHECK-COMMON-NOT: uxth
+; CHECK-COMMON: cmp
+; CHECK-COMMON: uxtb
+define i16 @trunc_sink_less_than_cmp(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d) {
+entry:
+ %sub = sub nuw i16 %b, %a
+ %cmp = icmp ult i16 %sub, %c
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %trunc = trunc i16 %sub to i8
+ %add = add nuw i8 %d, 1
+ %cmp2 = icmp ugt i8 %trunc, %add
+ %res = select i1 %cmp2, i16 %a, i16 %b
+ br label %if.end
+
+if.end:
+ %retval = phi i16 [ 0, %entry ], [ %res, %if.then ]
+ ret i16 %retval
+}
+
+; TODO: We should be able to remove the uxth introduced to handle %sub
+; CHECK-COMMON-LABEL: trunc_sink_less_than_arith
+; CHECK-COMMON: uxth
+; CHECK-COMMON: cmp
+; CHECK-COMMON: uxtb
+define i16 @trunc_sink_less_than_arith(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d, i8 zeroext %e) {
+entry:
+ %sub = sub nuw i16 %b, %a
+ %cmp = icmp ult i16 %sub, %c
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %trunc = trunc i16 %sub to i8
+ %add = add nuw i8 %d, %trunc
+ %cmp2 = icmp ugt i8 %e, %add
+ %res = select i1 %cmp2, i16 %a, i16 %b
+ br label %if.end
+
+if.end:
+ %retval = phi i16 [ 0, %entry ], [ %res, %if.then ]
+ ret i16 %retval
+}
+
+; CHECK-COMMON-LABEL: trunc_sink_less_than_store
+; CHECK-COMMON-NOT: uxt
+; CHECK-COMMON: cmp
+; CHECK-COMMON-NOT: uxt
+define i16 @trunc_sink_less_than_store(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d, i8* %e) {
+entry:
+ %sub = sub nuw i16 %b, %a
+ %cmp = icmp ult i16 %sub, %c
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %trunc = trunc i16 %sub to i8
+ %add = add nuw i8 %d, %trunc
+ store i8 %add, i8* %e
+ br label %if.end
+
+if.end:
+ %retval = phi i16 [ 0, %entry ], [ %sub, %if.then ]
+ ret i16 %retval
+}
+
+; CHECK-COMMON-LABEL: trunc_sink_less_than_ret
+; CHECK-COMMON-NOT: uxt
+define i8 @trunc_sink_less_than_ret(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d, i8 zeroext %e) {
+entry:
+ %sub = sub nuw i16 %b, %a
+ %cmp = icmp ult i16 %sub, %c
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %trunc = trunc i16 %sub to i8
+ %add = add nuw i8 %d, %trunc
+ br label %if.end
+
+if.end:
+ %retval = phi i8 [ 0, %entry ], [ %add, %if.then ]
+ ret i8 %retval
+}
+
+; CHECK-COMMON-LABEL: trunc_sink_less_than_zext_ret
+; CHECK-COMMON-NOT: uxt
+; CHECK-COMMON: cmp
+; CHECK-COMMON: uxtb
+define zeroext i8 @trunc_sink_less_than_zext_ret(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d, i8 zeroext %e) {
+entry:
+ %sub = sub nuw i16 %b, %a
+ %cmp = icmp ult i16 %sub, %c
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %trunc = trunc i16 %sub to i8
+ %add = add nuw i8 %d, %trunc
+ br label %if.end
+
+if.end:
+ %retval = phi i8 [ 0, %entry ], [ %add, %if.then ]
+ ret i8 %retval
+}