//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/Support/PatternMatch.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
using namespace PatternMatch;
-/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
-/// expression. If so, decompose it, returning some value X, such that Val is
+#define DEBUG_TYPE "instcombine"
+
+/// Analyze 'Val', seeing if it is a simple linear expression.
+/// If so, decompose it, returning some value X, such that Val is
/// X*Scale+Offset.
///
-static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
+static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
uint64_t &Offset) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
Offset = CI->getZExtValue();
// where C1 is divisible by C2.
unsigned SubScale;
Value *SubVal =
- DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+ decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
Offset += RHS->getZExtValue();
Scale = SubScale;
return SubVal;
return Val;
}
-/// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
-/// try to eliminate the cast by moving the type information into the alloc.
+/// If we find a cast of an allocation instruction, try to eliminate the cast by
+/// moving the type information into the alloc.
Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst &AI) {
- // This requires DataLayout to get the alloca alignment and size information.
- if (!TD) return 0;
-
PointerType *PTy = cast<PointerType>(CI.getType());
BuilderTy AllocaBuilder(*Builder);
- AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
+ AllocaBuilder.SetInsertPoint(&AI);
// Get the type really allocated and the type casted to.
Type *AllocElTy = AI.getAllocatedType();
Type *CastElTy = PTy->getElementType();
- if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
+ if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
- unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy);
- unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy);
- if (CastElTyAlign < AllocElTyAlign) return 0;
+ unsigned AllocElTyAlign = DL.getABITypeAlignment(AllocElTy);
+ unsigned CastElTyAlign = DL.getABITypeAlignment(CastElTy);
+ if (CastElTyAlign < AllocElTyAlign) return nullptr;
// If the allocation has multiple uses, only promote it if we are strictly
// increasing the alignment of the resultant allocation. If we keep it the
// same, we open the door to infinite loops of various kinds.
- if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0;
+ if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
- uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy);
- uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
- if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+ uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy);
+ uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy);
+ if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
// If the allocation has multiple uses, only promote it if we're not
// shrinking the amount of memory being allocated.
- uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy);
- uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy);
- if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0;
+ uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy);
+ uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy);
+ if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
// See if we can satisfy the modulus by pulling a scale out of the array
// size argument.
unsigned ArraySizeScale;
uint64_t ArrayOffset;
Value *NumElements = // See if the array size is a decomposable linear expr.
- DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
+ decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
// If we can now satisfy the modulus, by using a non-1 scale, we really can
// do the xform.
if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
- (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0;
+ (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
- Value *Amt = 0;
+ Value *Amt = nullptr;
if (Scale == 1) {
Amt = NumElements;
} else {
AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
New->setAlignment(AI.getAlignment());
New->takeName(&AI);
+ New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
// If the allocation has multiple real uses, insert a cast and change all
// things that used it to use the new cast. This will also hack on CI, but it
return ReplaceInstUsesWith(CI, New);
}
-/// EvaluateInDifferentType - Given an expression that
-/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
-/// insert the code to evaluate the expression.
+/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
+/// true for, actually insert the code to evaluate the expression.
Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
bool isSigned) {
if (Constant *C = dyn_cast<Constant>(V)) {
C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
- // If we got a constantexpr back, try to simplify it with TD info.
+ // If we got a constantexpr back, try to simplify it with DL info.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- C = ConstantFoldConstantExpression(CE, TD, TLI);
+ C = ConstantFoldConstantExpression(CE, DL, TLI);
return C;
}
// Otherwise, it must be an instruction.
Instruction *I = cast<Instruction>(V);
- Instruction *Res = 0;
+ Instruction *Res = nullptr;
unsigned Opc = I->getOpcode();
switch (Opc) {
case Instruction::Add:
PHINode *OPN = cast<PHINode>(I);
PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
- Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+ Value *V =
+ EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
NPN->addIncoming(V, OPN->getIncomingBlock(i));
}
Res = NPN;
/// This function is a wrapper around CastInst::isEliminableCastPair. It
/// simply extracts arguments and returns what that function returns.
static Instruction::CastOps
-isEliminableCastPair(
- const CastInst *CI, ///< The first cast instruction
- unsigned opcode, ///< The opcode of the second cast instruction
- Type *DstTy, ///< The target type for the second cast instruction
- DataLayout *TD ///< The target data for pointer size
-) {
-
+isEliminableCastPair(const CastInst *CI, ///< First cast instruction
+ unsigned opcode, ///< Opcode for the second cast
+ Type *DstTy, ///< Target type for the second cast
+ const DataLayout &DL) {
Type *SrcTy = CI->getOperand(0)->getType(); // A from above
Type *MidTy = CI->getType(); // B from above
// Get the opcodes of the two Cast instructions
Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
Instruction::CastOps secondOp = Instruction::CastOps(opcode);
- Type *SrcIntPtrTy = TD && SrcTy->isPtrOrPtrVectorTy() ?
- TD->getIntPtrType(SrcTy) : 0;
- Type *MidIntPtrTy = TD && MidTy->isPtrOrPtrVectorTy() ?
- TD->getIntPtrType(MidTy) : 0;
- Type *DstIntPtrTy = TD && DstTy->isPtrOrPtrVectorTy() ?
- TD->getIntPtrType(DstTy) : 0;
+ Type *SrcIntPtrTy =
+ SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
+ Type *MidIntPtrTy =
+ MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
+ Type *DstIntPtrTy =
+ DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
DstTy, SrcIntPtrTy, MidIntPtrTy,
DstIntPtrTy);
return Instruction::CastOps(Res);
}
-/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
-/// results in any code being generated and is interesting to optimize out. If
-/// the cast can be eliminated by some other simple transformation, we prefer
+/// Return true if the cast from "V to Ty" actually results in any code being
+/// generated and is interesting to optimize out.
+/// If the cast can be eliminated by some other simple transformation, we prefer
/// to do the simplification first.
bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
Type *Ty) {
// If this is another cast that can be eliminated, we prefer to have it
// eliminated.
if (const CastInst *CI = dyn_cast<CastInst>(V))
- if (isEliminableCastPair(CI, opc, Ty, TD))
+ if (isEliminableCastPair(CI, opc, Ty, DL))
return false;
// If this is a vector sext from a compare, then we don't want to break the
// eliminate it now.
if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
if (Instruction::CastOps opc =
- isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
+ isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
// The first cast (CSrc) is eliminable so we need to fix up or replace
// the second cast (CI). CSrc will then have a good chance of being dead.
return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
if (isa<PHINode>(Src)) {
// We don't do this if this would create a PHI node with an illegal type if
// it is currently legal.
- if (!Src->getType()->isIntegerTy() ||
- !CI.getType()->isIntegerTy() ||
+ if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
ShouldChangeType(CI.getType(), Src->getType()))
if (Instruction *NV = FoldOpIntoPhi(CI))
return NV;
}
- return 0;
+ return nullptr;
}
-/// CanEvaluateTruncated - Return true if we can evaluate the specified
-/// expression tree as type Ty instead of its larger type, and arrive with the
-/// same value. This is used by code that tries to eliminate truncates.
+/// Return true if we can evaluate the specified expression tree as type Ty
+/// instead of its larger type, and arrive with the same value.
+/// This is used by code that tries to eliminate truncates.
///
/// Ty will always be a type smaller than V. We should return true if trunc(V)
/// can be computed by computing V in the smaller type. If V is an instruction,
///
/// This function works on both vectors and scalars.
///
-static bool CanEvaluateTruncated(Value *V, Type *Ty) {
+static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
+ Instruction *CxtI) {
// We can always evaluate constants in another type.
if (isa<Constant>(V))
return true;
case Instruction::Or:
case Instruction::Xor:
// These operators can all arbitrarily be extended or truncated.
- return CanEvaluateTruncated(I->getOperand(0), Ty) &&
- CanEvaluateTruncated(I->getOperand(1), Ty);
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
case Instruction::UDiv:
case Instruction::URem: {
uint32_t BitWidth = Ty->getScalarSizeInBits();
if (BitWidth < OrigBitWidth) {
APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
- if (MaskedValueIsZero(I->getOperand(0), Mask) &&
- MaskedValueIsZero(I->getOperand(1), Mask)) {
- return CanEvaluateTruncated(I->getOperand(0), Ty) &&
- CanEvaluateTruncated(I->getOperand(1), Ty);
+ if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
+ IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
}
}
break;
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint32_t BitWidth = Ty->getScalarSizeInBits();
if (CI->getLimitedValue(BitWidth) < BitWidth)
- return CanEvaluateTruncated(I->getOperand(0), Ty);
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
}
break;
case Instruction::LShr:
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
uint32_t BitWidth = Ty->getScalarSizeInBits();
- if (MaskedValueIsZero(I->getOperand(0),
- APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+ if (IC.MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth), 0, CxtI) &&
CI->getLimitedValue(BitWidth) < BitWidth) {
- return CanEvaluateTruncated(I->getOperand(0), Ty);
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
}
}
break;
return true;
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
- return CanEvaluateTruncated(SI->getTrueValue(), Ty) &&
- CanEvaluateTruncated(SI->getFalseValue(), Ty);
+ return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
+ canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
}
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty))
+ for (Value *IncValue : PN->incoming_values())
+ if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
return false;
return true;
}
return false;
}
+/// Given a vector that is bitcast to an integer, optionally logically
+/// right-shifted, and truncated, convert it to an extractelement.
+/// Example (big endian):
+/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
+/// --->
+/// extractelement <4 x i32> %X, 1
+static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC,
+ const DataLayout &DL) {
+ Value *TruncOp = Trunc.getOperand(0);
+ Type *DestType = Trunc.getType();
+ if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
+ return nullptr;
+
+ Value *VecInput = nullptr;
+ ConstantInt *ShiftVal = nullptr;
+ if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
+ m_LShr(m_BitCast(m_Value(VecInput)),
+ m_ConstantInt(ShiftVal)))) ||
+ !isa<VectorType>(VecInput->getType()))
+ return nullptr;
+
+ VectorType *VecType = cast<VectorType>(VecInput->getType());
+ unsigned VecWidth = VecType->getPrimitiveSizeInBits();
+ unsigned DestWidth = DestType->getPrimitiveSizeInBits();
+ unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
+
+ if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
+ return nullptr;
+
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to a vector type that we can extract from.
+ unsigned NumVecElts = VecWidth / DestWidth;
+ if (VecType->getElementType() != DestType) {
+ VecType = VectorType::get(DestType, NumVecElts);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc");
+ }
+
+ unsigned Elt = ShiftAmount / DestWidth;
+ if (DL.isBigEndian())
+ Elt = NumVecElts - 1 - Elt;
+
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+}
+
Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
if (Instruction *Result = commonCastTransforms(CI))
return Result;
+ // Test if the trunc is the user of a select which is part of a
+ // minimum or maximum operation. If so, don't do any more simplification.
+ // Even simplifying demanded bits can break the canonical form of a
+ // min/max.
+ Value *LHS, *RHS;
+ if (SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0)))
+ if (matchSelectPattern(SI, LHS, RHS).Flavor != SPF_UNKNOWN)
+ return nullptr;
+
// See if we can simplify any instructions used by the input whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(CI))
// expression tree to something weird like i93 unless the source is also
// strange.
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateTruncated(Src, DestTy)) {
+ canEvaluateTruncated(Src, DestTy, *this, &CI)) {
// If this cast is a truncate, evaluting in a different type always
// eliminates the cast, so it is always a win.
// Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
if (DestTy->getScalarSizeInBits() == 1) {
- Constant *One = ConstantInt::get(Src->getType(), 1);
+ Constant *One = ConstantInt::get(SrcTy, 1);
Src = Builder->CreateAnd(Src, One);
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
// Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
- Value *A = 0; ConstantInt *Cst = 0;
+ Value *A = nullptr; ConstantInt *Cst = nullptr;
if (Src->hasOneUse() &&
match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) {
// We have three types to worry about here, the type of A, the source of
// If the shift amount is larger than the size of A, then the result is
// known to be zero because all the input bits got shifted out.
if (Cst->getZExtValue() >= ASize)
- return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(DestTy));
// Since we're doing an lshr and a zero extend, and know that the shift
// amount is smaller than ASize, it is always safe to do the shift in A's
// type, then zero extend or truncate to the result.
Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
Shift->takeName(Src);
- return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+ return CastInst::CreateIntegerCast(Shift, DestTy, false);
+ }
+
+ // Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type
+ // conversion.
+ // It works because bits coming from sign extension have the same value as
+ // the sign bit of the original value; performing ashr instead of lshr
+ // generates bits of the same value as the sign bit.
+ if (Src->hasOneUse() &&
+ match(Src, m_LShr(m_SExt(m_Value(A)), m_ConstantInt(Cst))) &&
+ cast<Instruction>(Src)->getOperand(0)->hasOneUse()) {
+ const unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+ // This optimization can be only performed when zero bits generated by
+ // the original lshr aren't pulled into the value after truncation, so we
+ // can only shift by values smaller than the size of destination type (in
+ // bits).
+ if (Cst->getValue().ult(ASize)) {
+ Value *Shift = Builder->CreateAShr(A, Cst->getZExtValue());
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, CI.getType(), true);
+ }
}
// Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
// type isn't non-native.
- if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
- ShouldChangeType(Src->getType(), CI.getType()) &&
+ if (Src->hasOneUse() && isa<IntegerType>(SrcTy) &&
+ ShouldChangeType(SrcTy, DestTy) &&
match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
- Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr");
+ Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr");
return BinaryOperator::CreateAnd(NewTrunc,
- ConstantExpr::getTrunc(Cst, CI.getType()));
+ ConstantExpr::getTrunc(Cst, DestTy));
}
- return 0;
+ if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL))
+ return I;
+
+ return nullptr;
}
-/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
-/// in order to eliminate the icmp.
+/// Transform (zext icmp) to bitwise / integer operations in order to eliminate
+/// the icmp.
Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
bool DoXform) {
// If we are just checking for a icmp eq of a single bit and zext'ing it
// If Op1C some other power of two, convert:
uint32_t BitWidth = Op1C->getType()->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne);
+ computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne, 0, &CI);
APInt KnownZeroMask(~KnownZero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
- ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS);
- ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS);
+ computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS, 0, &CI);
+ computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS, 0, &CI);
if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
APInt KnownBits = KnownZeroLHS | KnownOneLHS;
}
}
- return 0;
+ return nullptr;
}
-/// CanEvaluateZExtd - Determine if the specified value can be computed in the
-/// specified wider type and produce the same low bits. If not, return false.
+/// Determine if the specified value can be computed in the specified wider type
+/// and produce the same low bits. If not, return false.
///
/// If this function returns true, it can also return a non-zero number of bits
/// (in BitsToClear) which indicates that the value it computes is correct for
/// clear the top bits anyway, doing this has no extra cost.
///
/// This function works on both vectors and scalars.
-static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
+static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
+ InstCombiner &IC, Instruction *CxtI) {
BitsToClear = 0;
if (isa<Constant>(V))
return true;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
- case Instruction::Shl:
- if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) ||
- !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp))
+ if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
+ !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
return false;
// These can all be promoted if neither operand has 'bits to clear'.
if (BitsToClear == 0 && Tmp == 0)
// We use MaskedValueIsZero here for generality, but the case we care
// about the most is constant RHS.
unsigned VSize = V->getType()->getScalarSizeInBits();
- if (MaskedValueIsZero(I->getOperand(1),
- APInt::getHighBitsSet(VSize, BitsToClear)))
+ if (IC.MaskedValueIsZero(I->getOperand(1),
+ APInt::getHighBitsSet(VSize, BitsToClear),
+ 0, CxtI))
return true;
}
// Otherwise, we don't know how to analyze this BitsToClear case yet.
return false;
+ case Instruction::Shl:
+ // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
+ // upper bits we can reduce BitsToClear by the shift amount.
+ if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
+ return false;
+ uint64_t ShiftAmt = Amt->getZExtValue();
+ BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
+ return true;
+ }
+ return false;
case Instruction::LShr:
// We can promote lshr(x, cst) if we can promote x. This requires the
// ultimate 'and' to clear out the high zero bits we're clearing out though.
if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
- if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear))
+ if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
return false;
BitsToClear += Amt->getZExtValue();
if (BitsToClear > V->getType()->getScalarSizeInBits())
// Cannot promote variable LSHR.
return false;
case Instruction::Select:
- if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) ||
- !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) ||
+ if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
+ !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
// TODO: If important, we could handle the case when the BitsToClear are
// known zero in the disagreeing side.
Tmp != BitsToClear)
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear))
+ if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
return false;
for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) ||
+ if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
// TODO: If important, we could handle the case when the BitsToClear
// are known zero in the disagreeing input.
Tmp != BitsToClear)
Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// If this zero extend is only used by a truncate, let the truncate be
// eliminated before we try to optimize this zext.
- if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
- return 0;
+ if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
+ return nullptr;
// If one of the common conversion will work, do it.
if (Instruction *Result = commonCastTransforms(CI))
// strange.
unsigned BitsToClear;
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
+ canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
"Unreasonable BitsToClear");
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
- " to avoid zero extend: " << CI);
+ " to avoid zero extend: " << CI << '\n');
Value *Res = EvaluateInDifferentType(Src, DestTy, false);
assert(Res->getType() == DestTy);
// If the high bits are already filled with zeros, just replace this
// cast with the result.
- if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
- DestBitSize-SrcBitsKept)))
+ if (MaskedValueIsZero(Res,
+ APInt::getHighBitsSet(DestBitSize,
+ DestBitSize-SrcBitsKept),
+ 0, &CI))
return ReplaceInstUsesWith(CI, Res);
// We need to emit an AND to clear the high bits.
}
}
- // zext(trunc(t) & C) -> (t & zext(C)).
- if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())
- if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
- if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {
- Value *TI0 = TI->getOperand(0);
- if (TI0->getType() == CI.getType())
- return
- BinaryOperator::CreateAnd(TI0,
- ConstantExpr::getZExt(C, CI.getType()));
- }
-
- // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
- if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())
- if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
- if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))
- if (And->getOpcode() == Instruction::And && And->hasOneUse() &&
- And->getOperand(1) == C)
- if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
- Value *TI0 = TI->getOperand(0);
- if (TI0->getType() == CI.getType()) {
- Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
- Value *NewAnd = Builder->CreateAnd(TI0, ZC);
- return BinaryOperator::CreateXor(NewAnd, ZC);
- }
- }
+ // zext(trunc(X) & C) -> (X & zext(C)).
+ Constant *C;
+ Value *X;
+ if (SrcI &&
+ match(SrcI, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
+ X->getType() == CI.getType())
+ return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));
+
+ // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
+ Value *And;
+ if (SrcI && match(SrcI, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
+ match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
+ X->getType() == CI.getType()) {
+ Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+ return BinaryOperator::CreateXor(Builder->CreateAnd(X, ZC), ZC);
+ }
// zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1
- Value *X;
- if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) &&
- match(SrcI, m_Not(m_Value(X))) &&
- (!X->hasOneUse() || !isa<CmpInst>(X))) {
+ if (SrcI && SrcI->hasOneUse() &&
+ SrcI->getType()->getScalarType()->isIntegerTy(1) &&
+ match(SrcI, m_Not(m_Value(X))) && (!X->hasOneUse() || !isa<CmpInst>(X))) {
Value *New = Builder->CreateZExt(X, CI.getType());
return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
}
- return 0;
+ return nullptr;
}
-/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
-/// in order to eliminate the icmp.
+/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
ICmpInst::Predicate Pred = ICI->getPredicate();
- if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ // Don't bother if Op1 isn't of vector or integer type.
+ if (!Op1->getType()->isIntOrIntVectorTy())
+ return nullptr;
+
+ if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
// (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
// (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
- if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) ||
+ if ((Pred == ICmpInst::ICMP_SLT && Op1C->isNullValue()) ||
(Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) {
Value *Sh = ConstantInt::get(Op0->getType(),
In = Builder->CreateNot(In, In->getName()+".not");
return ReplaceInstUsesWith(CI, In);
}
+ }
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
// If we know that only one bit of the LHS of the icmp can be set and we
// have an equality comparison with zero or a power of 2, we can transform
// the icmp and sext into bitwise/integer operations.
ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
unsigned BitWidth = Op1C->getType()->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(Op0, KnownZero, KnownOne);
+ computeKnownBits(Op0, KnownZero, KnownOne, 0, &CI);
APInt KnownZeroMask(~KnownZero);
if (KnownZeroMask.isPowerOf2()) {
}
}
- // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed.
- if (VectorType *VTy = dyn_cast<VectorType>(CI.getType())) {
- if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) &&
- Op0->getType() == CI.getType()) {
- Type *EltTy = VTy->getElementType();
-
- // splat the shift constant to a constant vector.
- Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
- Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit");
- return ReplaceInstUsesWith(CI, In);
- }
- }
-
- return 0;
+ return nullptr;
}
-/// CanEvaluateSExtd - Return true if we can take the specified value
-/// and return it as type Ty without inserting any new casts and without
-/// changing the value of the common low bits. This is used by code that tries
-/// to promote integer operations to a wider types will allow us to eliminate
-/// the extension.
+/// Return true if we can take the specified value and return it as type Ty
+/// without inserting any new casts and without changing the value of the common
+/// low bits. This is used by code that tries to promote integer operations to
+/// a wider types will allow us to eliminate the extension.
///
/// This function works on both vectors and scalars.
///
-static bool CanEvaluateSExtd(Value *V, Type *Ty) {
+static bool canEvaluateSExtd(Value *V, Type *Ty) {
assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
"Can't sign extend type to a smaller type");
// If this is a constant, it can be trivially promoted.
case Instruction::Sub:
case Instruction::Mul:
// These operators can all arbitrarily be extended if their inputs can.
- return CanEvaluateSExtd(I->getOperand(0), Ty) &&
- CanEvaluateSExtd(I->getOperand(1), Ty);
+ return canEvaluateSExtd(I->getOperand(0), Ty) &&
+ canEvaluateSExtd(I->getOperand(1), Ty);
//case Instruction::Shl: TODO
//case Instruction::LShr: TODO
case Instruction::Select:
- return CanEvaluateSExtd(I->getOperand(1), Ty) &&
- CanEvaluateSExtd(I->getOperand(2), Ty);
+ return canEvaluateSExtd(I->getOperand(1), Ty) &&
+ canEvaluateSExtd(I->getOperand(2), Ty);
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false;
+ for (Value *IncValue : PN->incoming_values())
+ if (!canEvaluateSExtd(IncValue, Ty)) return false;
return true;
}
default:
Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// If this sign extend is only used by a truncate, let the truncate be
// eliminated before we try to optimize this sext.
- if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
- return 0;
+ if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
+ return nullptr;
if (Instruction *I = commonCastTransforms(CI))
return I;
Value *Src = CI.getOperand(0);
Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+ // If we know that the value being extended is positive, we can use a zext
+ // instead.
+ bool KnownZero, KnownOne;
+ ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI);
+ if (KnownZero) {
+ Value *ZExt = Builder->CreateZExt(Src, DestTy);
+ return ReplaceInstUsesWith(CI, ZExt);
+ }
+
// Attempt to extend the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateSExtd(Src, DestTy)) {
+ canEvaluateSExtd(Src, DestTy)) {
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
- " to avoid sign extend: " << CI);
+ " to avoid sign extend: " << CI << '\n');
Value *Res = EvaluateInDifferentType(Src, DestTy, true);
assert(Res->getType() == DestTy);
// If the high bits are already filled with sign bit, just replace this
// cast with the result.
- if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize)
+ if (ComputeNumSignBits(Res, 0, &CI) > DestBitSize - SrcBitSize)
return ReplaceInstUsesWith(CI, Res);
// We need to emit a shl + ashr to do the sign extend.
// into:
// %a = shl i32 %i, 30
// %d = ashr i32 %a, 30
- Value *A = 0;
+ Value *A = nullptr;
// TODO: Eventually this could be subsumed by EvaluateInDifferentType.
- ConstantInt *BA = 0, *CA = 0;
+ ConstantInt *BA = nullptr, *CA = nullptr;
if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)),
m_ConstantInt(CA))) &&
BA == CA && A->getType() == CI.getType()) {
return BinaryOperator::CreateAShr(A, ShAmtV);
}
- return 0;
+ return nullptr;
}
-/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
+/// Return a Constant* for the specified floating-point constant if it fits
/// in the specified FP type without changing its value.
-static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+static Constant *fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
bool losesInfo;
APFloat F = CFP->getValueAPF();
(void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
if (!losesInfo)
return ConstantFP::get(CFP->getContext(), F);
- return 0;
+ return nullptr;
}
-/// LookThroughFPExtensions - If this is an fp extension instruction, look
+/// If this is a floating-point extension instruction, look
/// through it until we get the source value.
-static Value *LookThroughFPExtensions(Value *V) {
+static Value *lookThroughFPExtensions(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V))
if (I->getOpcode() == Instruction::FPExt)
- return LookThroughFPExtensions(I->getOperand(0));
+ return lookThroughFPExtensions(I->getOperand(0));
// If this value is a constant, return the constant in the smallest FP type
// that can accurately represent it. This allows us to turn
if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
return V; // No constant folding of this.
// See if the value can be truncated to half and then reextended.
- if (Value *V = FitsInFPType(CFP, APFloat::IEEEhalf))
+ if (Value *V = fitsInFPType(CFP, APFloat::IEEEhalf))
return V;
// See if the value can be truncated to float and then reextended.
- if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+ if (Value *V = fitsInFPType(CFP, APFloat::IEEEsingle))
return V;
if (CFP->getType()->isDoubleTy())
return V; // Won't shrink.
- if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+ if (Value *V = fitsInFPType(CFP, APFloat::IEEEdouble))
return V;
// Don't try to shrink to various long double types.
}
Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
if (Instruction *I = commonCastTransforms(CI))
return I;
-
- // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
- // smaller than the destination type, we can eliminate the truncate by doing
- // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well
- // as many builtins (sqrt, etc).
+ // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
+ // simplify this expression to avoid one or more of the trunc/extend
+ // operations if we can do so without changing the numerical results.
+ //
+ // The exact manner in which the widths of the operands interact to limit
+ // what we can and cannot do safely varies from operation to operation, and
+ // is explained below in the various case statements.
BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
if (OpI && OpI->hasOneUse()) {
+ Value *LHSOrig = lookThroughFPExtensions(OpI->getOperand(0));
+ Value *RHSOrig = lookThroughFPExtensions(OpI->getOperand(1));
+ unsigned OpWidth = OpI->getType()->getFPMantissaWidth();
+ unsigned LHSWidth = LHSOrig->getType()->getFPMantissaWidth();
+ unsigned RHSWidth = RHSOrig->getType()->getFPMantissaWidth();
+ unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
+ unsigned DstWidth = CI.getType()->getFPMantissaWidth();
switch (OpI->getOpcode()) {
- default: break;
- case Instruction::FAdd:
- case Instruction::FSub:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FRem:
- Type *SrcTy = OpI->getType();
- Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
- Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
- if (LHSTrunc->getType() != SrcTy &&
- RHSTrunc->getType() != SrcTy) {
- unsigned DstSize = CI.getType()->getScalarSizeInBits();
- // If the source types were both smaller than the destination type of
- // the cast, do this xform.
- if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
- RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
- LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType());
- RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType());
- return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
+ default: break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ // For addition and subtraction, the infinitely precise result can
+ // essentially be arbitrarily wide; proving that double rounding
+ // will not occur because the result of OpI is exact (as we will for
+ // FMul, for example) is hopeless. However, we *can* nonetheless
+ // frequently know that double rounding cannot occur (or that it is
+ // innocuous) by taking advantage of the specific structure of
+ // infinitely-precise results that admit double rounding.
+ //
+ // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
+ // to represent both sources, we can guarantee that the double
+ // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
+ // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
+ // for proof of this fact).
+ //
+ // Note: Figueroa does not consider the case where DstFormat !=
+ // SrcFormat. It's possible (likely even!) that this analysis
+ // could be tightened for those cases, but they are rare (the main
+ // case of interest here is (float)((double)float + float)).
+ if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
+ if (LHSOrig->getType() != CI.getType())
+ LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+ if (RHSOrig->getType() != CI.getType())
+ RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+ Instruction *RI =
+ BinaryOperator::Create(OpI->getOpcode(), LHSOrig, RHSOrig);
+ RI->copyFastMathFlags(OpI);
+ return RI;
+ }
+ break;
+ case Instruction::FMul:
+ // For multiplication, the infinitely precise result has at most
+ // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
+ // that such a value can be exactly represented, then no double
+ // rounding can possibly occur; we can safely perform the operation
+ // in the destination format if it can represent both sources.
+ if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
+ if (LHSOrig->getType() != CI.getType())
+ LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+ if (RHSOrig->getType() != CI.getType())
+ RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+ Instruction *RI =
+ BinaryOperator::CreateFMul(LHSOrig, RHSOrig);
+ RI->copyFastMathFlags(OpI);
+ return RI;
+ }
+ break;
+ case Instruction::FDiv:
+ // For division, we use again use the bound from Figueroa's
+ // dissertation. I am entirely certain that this bound can be
+ // tightened in the unbalanced operand case by an analysis based on
+ // the diophantine rational approximation bound, but the well-known
+ // condition used here is a good conservative first pass.
+ // TODO: Tighten bound via rigorous analysis of the unbalanced case.
+ if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
+ if (LHSOrig->getType() != CI.getType())
+ LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+ if (RHSOrig->getType() != CI.getType())
+ RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+ Instruction *RI =
+ BinaryOperator::CreateFDiv(LHSOrig, RHSOrig);
+ RI->copyFastMathFlags(OpI);
+ return RI;
+ }
+ break;
+ case Instruction::FRem:
+ // Remainder is straightforward. Remainder is always exact, so the
+ // type of OpI doesn't enter into things at all. We simply evaluate
+ // in whichever source type is larger, then convert to the
+ // destination type.
+ if (SrcWidth == OpWidth)
+ break;
+ if (LHSWidth < SrcWidth)
+ LHSOrig = Builder->CreateFPExt(LHSOrig, RHSOrig->getType());
+ else if (RHSWidth <= SrcWidth)
+ RHSOrig = Builder->CreateFPExt(RHSOrig, LHSOrig->getType());
+ if (LHSOrig != OpI->getOperand(0) || RHSOrig != OpI->getOperand(1)) {
+ Value *ExactResult = Builder->CreateFRem(LHSOrig, RHSOrig);
+ if (Instruction *RI = dyn_cast<Instruction>(ExactResult))
+ RI->copyFastMathFlags(OpI);
+ return CastInst::CreateFPCast(ExactResult, CI.getType());
}
- }
- break;
}
// (fptrunc (fneg x)) -> (fneg (fptrunc x))
if (BinaryOperator::isFNeg(OpI)) {
Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1),
CI.getType());
- return BinaryOperator::CreateFNeg(InnerTrunc);
+ Instruction *RI = BinaryOperator::CreateFNeg(InnerTrunc);
+ RI->copyFastMathFlags(OpI);
+ return RI;
}
}
+ // (fptrunc (select cond, R1, Cst)) -->
+ // (select cond, (fptrunc R1), (fptrunc Cst))
+ //
+ // - but only if this isn't part of a min/max operation, else we'll
+ // ruin min/max canonical form which is to have the select and
+ // compare's operands be of the same type with no casts to look through.
+ Value *LHS, *RHS;
+ SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0));
+ if (SI &&
+ (isa<ConstantFP>(SI->getOperand(1)) ||
+ isa<ConstantFP>(SI->getOperand(2))) &&
+ matchSelectPattern(SI, LHS, RHS).Flavor == SPF_UNKNOWN) {
+ Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1),
+ CI.getType());
+ Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2),
+ CI.getType());
+ return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc);
+ }
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
if (II) {
switch (II->getIntrinsicID()) {
Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
CI.getType());
Type *IntrinsicType[] = { CI.getType() };
- Function *Overload =
- Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(),
- II->getIntrinsicID(), IntrinsicType);
+ Function *Overload = Intrinsic::getDeclaration(
+ CI.getModule(), II->getIntrinsicID(), IntrinsicType);
Value *Args[] = { InnerTrunc };
return CallInst::Create(Overload, Args, II->getName());
}
}
- // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
- CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
- if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
- Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
- Call->getNumArgOperands() == 1 &&
- Call->hasOneUse()) {
- CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
- if (Arg && Arg->getOpcode() == Instruction::FPExt &&
- CI.getType()->isFloatTy() &&
- Call->getType()->isDoubleTy() &&
- Arg->getType()->isDoubleTy() &&
- Arg->getOperand(0)->getType()->isFloatTy()) {
- Function *Callee = Call->getCalledFunction();
- Module *M = CI.getParent()->getParent()->getParent();
- Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
- Callee->getAttributes(),
- Builder->getFloatTy(),
- Builder->getFloatTy(),
- NULL);
- CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
- "sqrtfcall");
- ret->setAttributes(Callee->getAttributes());
-
-
- // Remove the old Call. With -fmath-errno, it won't get marked readnone.
- ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType()));
- EraseInstFromFunction(*Call);
- return ret;
- }
- }
-
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitFPExt(CastInst &CI) {
return commonCastTransforms(CI);
}
+// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
+// This is safe if the intermediate type has enough bits in its mantissa to
+// accurately represent all values of X. For example, this won't work with
+// i64 -> float -> i64.
+Instruction *InstCombiner::FoldItoFPtoI(Instruction &FI) {
+ if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
+ return nullptr;
+ Instruction *OpI = cast<Instruction>(FI.getOperand(0));
+
+ Value *SrcI = OpI->getOperand(0);
+ Type *FITy = FI.getType();
+ Type *OpITy = OpI->getType();
+ Type *SrcTy = SrcI->getType();
+ bool IsInputSigned = isa<SIToFPInst>(OpI);
+ bool IsOutputSigned = isa<FPToSIInst>(FI);
+
+ // We can safely assume the conversion won't overflow the output range,
+ // because (for example) (uint8_t)18293.f is undefined behavior.
+
+ // Since we can assume the conversion won't overflow, our decision as to
+ // whether the input will fit in the float should depend on the minimum
+ // of the input range and output range.
+
+ // This means this is also safe for a signed input and unsigned output, since
+ // a negative input would lead to undefined behavior.
+ int InputSize = (int)SrcTy->getScalarSizeInBits() - IsInputSigned;
+ int OutputSize = (int)FITy->getScalarSizeInBits() - IsOutputSigned;
+ int ActualSize = std::min(InputSize, OutputSize);
+
+ if (ActualSize <= OpITy->getFPMantissaWidth()) {
+ if (FITy->getScalarSizeInBits() > SrcTy->getScalarSizeInBits()) {
+ if (IsInputSigned && IsOutputSigned)
+ return new SExtInst(SrcI, FITy);
+ return new ZExtInst(SrcI, FITy);
+ }
+ if (FITy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits())
+ return new TruncInst(SrcI, FITy);
+ if (SrcTy == FITy)
+ return ReplaceInstUsesWith(FI, SrcI);
+ return new BitCastInst(SrcI, FITy);
+ }
+ return nullptr;
+}
+
Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
- if (OpI == 0)
+ if (!OpI)
return commonCastTransforms(FI);
- // fptoui(uitofp(X)) --> X
- // fptoui(sitofp(X)) --> X
- // This is safe if the intermediate type has enough bits in its mantissa to
- // accurately represent all values of X. For example, do not do this with
- // i64->float->i64. This is also safe for sitofp case, because any negative
- // 'X' value would cause an undefined result for the fptoui.
- if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
- OpI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
- OpI->getType()->getFPMantissaWidth())
- return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+ if (Instruction *I = FoldItoFPtoI(FI))
+ return I;
return commonCastTransforms(FI);
}
Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
- if (OpI == 0)
+ if (!OpI)
return commonCastTransforms(FI);
- // fptosi(sitofp(X)) --> X
- // fptosi(uitofp(X)) --> X
- // This is safe if the intermediate type has enough bits in its mantissa to
- // accurately represent all values of X. For example, do not do this with
- // i64->float->i64. This is also safe for sitofp case, because any negative
- // 'X' value would cause an undefined result for the fptoui.
- if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
- OpI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getScalarSizeInBits() <=
- OpI->getType()->getFPMantissaWidth())
- return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+ if (Instruction *I = FoldItoFPtoI(FI))
+ return I;
return commonCastTransforms(FI);
}
// If the source integer type is not the intptr_t type for this target, do a
// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
// cast to be exposed to other transforms.
- if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() !=
- TD->getPointerSizeInBits()) {
- Type *Ty = TD->getIntPtrType(CI.getContext());
+ unsigned AS = CI.getAddressSpace();
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+ DL.getPointerSizeInBits(AS)) {
+ Type *Ty = DL.getIntPtrType(CI.getContext(), AS);
if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
if (Instruction *I = commonCastTransforms(CI))
return I;
- return 0;
+ return nullptr;
}
/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
// If casting the result of a getelementptr instruction with no offset, turn
// this into a cast of the original pointer!
- if (GEP->hasAllZeroIndices()) {
+ if (GEP->hasAllZeroIndices() &&
+ // If CI is an addrspacecast and GEP changes the poiner type, merging
+ // GEP into CI would undo canonicalizing addrspacecast with different
+ // pointer types, causing infinite loops.
+ (!isa<AddrSpaceCastInst>(CI) ||
+ GEP->getType() == GEP->getPointerOperand()->getType())) {
// Changing the cast operand is usually not a good idea but it is safe
// here because the pointer operand is being replaced with another
// pointer operand so the opcode doesn't need to change.
CI.setOperand(0, GEP->getOperand(0));
return &CI;
}
-
- // If the GEP has a single use, and the base pointer is a bitcast, and the
- // GEP computes a constant offset, see if we can convert these three
- // instructions into fewer. This typically happens with unions and other
- // non-type-safe code.
- APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
- if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
- GEP->accumulateConstantOffset(*TD, Offset)) {
- // Get the base pointer input of the bitcast, and the type it points to.
- Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
- Type *GEPIdxTy =
- cast<PointerType>(OrigBase->getType())->getElementType();
- SmallVector<Value*, 8> NewIndices;
- if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) {
- // If we were able to index down into an element, create the GEP
- // and bitcast the result. This eliminates one bitcast, potentially
- // two.
- Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
- Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
- Builder->CreateGEP(OrigBase, NewIndices);
- NGEP->takeName(GEP);
-
- if (isa<BitCastInst>(CI))
- return new BitCastInst(NGEP, CI.getType());
- assert(isa<PtrToIntInst>(CI));
- return new PtrToIntInst(NGEP, CI.getType());
- }
- }
}
return commonCastTransforms(CI);
// If the destination integer type is not the intptr_t type for this target,
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
// to be exposed to other transforms.
- if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) {
- Type *Ty = TD->getIntPtrType(CI.getContext());
- if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
- Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
- Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty);
- return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false);
- }
+ Type *Ty = CI.getType();
+ unsigned AS = CI.getPointerAddressSpace();
- return commonPointerCastTransforms(CI);
+ if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS))
+ return commonPointerCastTransforms(CI);
+
+ Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS);
+ if (Ty->isVectorTy()) // Handle vectors of pointers.
+ PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
+
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy);
+ return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
}
-/// OptimizeVectorResize - This input value (which is known to have vector type)
-/// is being zero extended or truncated to the specified vector type. Try to
-/// replace it with a shuffle (and vector/vector bitcast) if possible.
+/// This input value (which is known to have vector type) is being zero extended
+/// or truncated to the specified vector type.
+/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
///
/// The source and destination vector types may have different element types.
-static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
+static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
InstCombiner &IC) {
// We can only do this optimization if the output is a multiple of the input
// element size, or the input is a multiple of the output element size.
// there yet.
if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
DestTy->getElementType()->getPrimitiveSizeInBits())
- return 0;
+ return nullptr;
SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
return Value / Ty->getPrimitiveSizeInBits();
}
-/// CollectInsertionElements - V is a value which is inserted into a vector of
-/// VecEltTy. Look through the value to see if we can decompose it into
+/// V is a value which is inserted into a vector of VecEltTy.
+/// Look through the value to see if we can decompose it into
/// insertions into the vector. See the example in the comment for
/// OptimizeIntegerToVectorInsertions for the pattern this handles.
/// The type of V is always a non-zero multiple of VecEltTy's size.
+/// Shift is the number of bits between the lsb of V and the lsb of
+/// the vector.
///
/// This returns false if the pattern can't be matched or true if it can,
/// filling in Elements with the elements found here.
-static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
- SmallVectorImpl<Value*> &Elements,
- Type *VecEltTy) {
+static bool collectInsertionElements(Value *V, unsigned Shift,
+ SmallVectorImpl<Value *> &Elements,
+ Type *VecEltTy, bool isBigEndian) {
+ assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
+ "Shift should be a multiple of the element type size");
+
// Undef values never contribute useful bits to the result.
if (isa<UndefValue>(V)) return true;
if (C->isNullValue())
return true;
+ unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
+ if (isBigEndian)
+ ElementIndex = Elements.size() - ElementIndex - 1;
+
// Fail if multiple elements are inserted into this slot.
- if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+ if (Elements[ElementIndex])
return false;
Elements[ElementIndex] = V;
// If the constant is the size of a vector element, we just need to bitcast
// it to the right type so it gets properly inserted.
if (NumElts == 1)
- return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
- ElementIndex, Elements, VecEltTy);
+ return collectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+ Shift, Elements, VecEltTy, isBigEndian);
// Okay, this is a constant that covers multiple elements. Slice it up into
// pieces and insert each element-sized piece into the vector.
Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned ShiftI = Shift+i*ElementSize;
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
- i*ElementSize));
+ ShiftI));
Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
- if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+ if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
+ isBigEndian))
return false;
}
return true;
if (!V->hasOneUse()) return false;
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) return false;
+ if (!I) return false;
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::Or:
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy) &&
- CollectInsertionElements(I->getOperand(1), ElementIndex,
- Elements, VecEltTy);
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian) &&
+ collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::Shl: {
// Must be shifting by a constant that is a multiple of the element size.
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
- if (CI == 0) return false;
- if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
- unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
-
- return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
- Elements, VecEltTy);
+ if (!CI) return false;
+ Shift += CI->getZExtValue();
+ if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
}
}
}
-/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
-/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// If the input is an 'or' instruction, we may be doing shifts and ors to
+/// assemble the elements of the vector manually.
/// Try to rip the code out and replace it with insertelements. This is to
/// optimize code like this:
///
/// %tmp43 = bitcast i64 %ins35 to <2 x float>
///
/// Into two insertelements that do "buildvector{%inc, %inc5}".
-static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
- if (!CollectInsertionElements(IntInput, 0, Elements,
- DestVecTy->getElementType()))
- return 0;
+ if (!collectInsertionElements(IntInput, 0, Elements,
+ DestVecTy->getElementType(),
+ IC.getDataLayout().isBigEndian()))
+ return nullptr;
// If we succeeded, we know that all of the element are specified by Elements
// or are zero if Elements has a null entry. Recast this as a set of
// insertions.
Value *Result = Constant::getNullValue(CI.getType());
for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
- if (Elements[i] == 0) continue; // Unset element.
+ if (!Elements[i]) continue; // Unset element.
Result = IC.Builder->CreateInsertElement(Result, Elements[i],
IC.Builder->getInt32(i));
return Result;
}
-
-/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
-/// bitcast. The various long double bitcasts can't get in here.
-static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
- // We need to know the target byte order to perform this optimization.
- if (!IC.getDataLayout()) return 0;
-
- Value *Src = CI.getOperand(0);
- Type *DestTy = CI.getType();
-
- // If this is a bitcast from int to float, check to see if the int is an
- // extraction from a vector.
- Value *VecInput = 0;
- // bitcast(trunc(bitcast(somevector)))
- if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
- isa<VectorType>(VecInput->getType())) {
- VectorType *VecTy = cast<VectorType>(VecInput->getType());
- unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
-
- if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
- // If the element type of the vector doesn't match the result type,
- // bitcast it to be a vector type we can extract from.
- if (VecTy->getElementType() != DestTy) {
- VecTy = VectorType::get(DestTy,
- VecTy->getPrimitiveSizeInBits() / DestWidth);
- VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
- }
-
- unsigned Elt = 0;
- if (IC.getDataLayout()->isBigEndian())
- Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
- return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
- }
- }
-
- // bitcast(trunc(lshr(bitcast(somevector), cst))
- ConstantInt *ShAmt = 0;
- if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
- m_ConstantInt(ShAmt)))) &&
- isa<VectorType>(VecInput->getType())) {
- VectorType *VecTy = cast<VectorType>(VecInput->getType());
- unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
- if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
- ShAmt->getZExtValue() % DestWidth == 0) {
- // If the element type of the vector doesn't match the result type,
- // bitcast it to be a vector type we can extract from.
- if (VecTy->getElementType() != DestTy) {
- VecTy = VectorType::get(DestTy,
- VecTy->getPrimitiveSizeInBits() / DestWidth);
- VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
- }
-
- unsigned Elt = ShAmt->getZExtValue() / DestWidth;
- if (IC.getDataLayout()->isBigEndian())
- Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
- return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
- }
- }
- return 0;
+/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
+/// vector followed by extract element. The backend tends to handle bitcasts of
+/// vectors better than bitcasts of scalars because vector registers are
+/// usually not type-specific like scalar integer or scalar floating-point.
+static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
+ InstCombiner &IC,
+ const DataLayout &DL) {
+ // TODO: Create and use a pattern matcher for ExtractElementInst.
+ auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
+ if (!ExtElt || !ExtElt->hasOneUse())
+ return nullptr;
+
+ // The bitcast must be to a vectorizable type, otherwise we can't make a new
+ // type to extract from.
+ Type *DestType = BitCast.getType();
+ if (!VectorType::isValidElementType(DestType))
+ return nullptr;
+
+ unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements();
+ auto *NewVecType = VectorType::get(DestType, NumElts);
+ auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(),
+ NewVecType, "bc");
+ return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
}
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
Type *DstElTy = DstPTy->getElementType();
Type *SrcElTy = SrcPTy->getElementType();
- // If the address spaces don't match, don't eliminate the bitcast, which is
- // required for changing types.
- if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
- return 0;
-
// If we are casting a alloca to a pointer to a type of the same
// size, rewrite the allocation instruction to allocate the "right" type.
// There is no need to modify malloc calls because it is their bitcast that
// If the source and destination are pointers, and this cast is equivalent
// to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
// This can enhance SROA and other transforms that want type-safe pointers.
- Constant *ZeroUInt =
- Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
unsigned NumZeros = 0;
while (SrcElTy != DstElTy &&
isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
SrcElTy->getNumContainedTypes() /* not "{}" */) {
- SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+ SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(0U);
++NumZeros;
}
// If we found a path from the src to dest, create the getelementptr now.
if (SrcElTy == DstElTy) {
- SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+ SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder->getInt32(0));
return GetElementPtrInst::CreateInBounds(Src, Idxs);
}
}
- // Try to optimize int -> float bitcasts.
- if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
- if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
- return I;
-
if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
CastInst *SrcCast = cast<CastInst>(Src);
if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
if (isa<VectorType>(BCIn->getOperand(0)->getType()))
- if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+ if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0),
cast<VectorType>(DestTy), *this))
return I;
}
// If the input is an 'or' instruction, we may be doing shifts and ors to
// assemble the elements of the vector manually. Try to rip the code out
// and replace it with insertelements.
- if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+ if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
return ReplaceInstUsesWith(CI, V);
}
}
// Okay, we have (bitcast (shuffle ..)). Check to see if this is
// a bitcast to a vector with the same # elts.
if (SVI->hasOneUse() && DestTy->isVectorTy() &&
- cast<VectorType>(DestTy)->getNumElements() ==
- SVI->getType()->getNumElements() &&
+ DestTy->getVectorNumElements() == SVI->getType()->getNumElements() &&
SVI->getType()->getNumElements() ==
- cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
+ SVI->getOperand(0)->getType()->getVectorNumElements()) {
BitCastInst *Tmp;
// If either of the operands is a cast from CI.getType(), then
// evaluating the shuffle in the casted destination's type will allow
}
}
+ if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL))
+ return I;
+
if (SrcTy->isPointerTy())
return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
}
+
+Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
+ // If the destination pointer element type is not the same as the source's
+ // first do a bitcast to the destination type, and then the addrspacecast.
+ // This allows the cast to be exposed to other transforms.
+ Value *Src = CI.getOperand(0);
+ PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
+ PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
+
+ Type *DestElemTy = DestTy->getElementType();
+ if (SrcTy->getElementType() != DestElemTy) {
+ Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace());
+ if (VectorType *VT = dyn_cast<VectorType>(CI.getType())) {
+ // Handle vectors of pointers.
+ MidTy = VectorType::get(MidTy, VT->getNumElements());
+ }
+
+ Value *NewBitCast = Builder->CreateBitCast(Src, MidTy);
+ return new AddrSpaceCastInst(NewBitCast, CI.getType());
+ }
+
+ return commonPointerCastTransforms(CI);
+}