This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790
Differential Revision: https://reviews.llvm.org/D82573
/// \p UseMaskForCond indicates if the memory access is predicated.
/// \p UseMaskForGaps indicates if gaps should be masked.
int getInterleavedMemoryOpCost(
- unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
- bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
/// Calculate the cost of performing a vector reduction.
///
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) = 0;
- virtual int
- getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
- ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false) = 0;
+ virtual int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm,
TTI::TargetCostKind CostKind) = 0;
Alignment, CostKind, I);
}
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
- ArrayRef<unsigned> Indices, unsigned Alignment,
+ ArrayRef<unsigned> Indices, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
bool UseMaskForCond,
return 1;
}
- unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+ unsigned getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
return 1;
}
return Cost;
}
- unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false) {
+ unsigned getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false) {
auto *VT = cast<FixedVectorType>(VecTy);
unsigned NumElts = VT->getNumElements();
unsigned Cost;
if (UseMaskForCond || UseMaskForGaps)
Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
- Opcode, VecTy, Align(Alignment), AddressSpace, CostKind);
+ Opcode, VecTy, Alignment, AddressSpace, CostKind);
else
Cost = static_cast<T *>(this)->getMemoryOpCost(
- Opcode, VecTy, Align(Alignment), AddressSpace, CostKind);
+ Opcode, VecTy, Alignment, AddressSpace, CostKind);
// Legalize the vector type, and get the legalized and unlegalized type
// sizes.
int TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) const {
int Cost = TTIImpl->getInterleavedMemoryOpCost(
Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind,
for (unsigned i = 0; i < Factor; i++)
Indices.push_back(i);
InterleavedCost = TTI.getInterleavedMemoryOpCost(
- Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlignment(),
+ Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(),
InsertionPoint->getPointerAddressSpace());
if (InterleavedCost >= InstructionCost) {
return LT.first;
}
-int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int AArch64TTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");
auto *VecVTy = cast<VectorType>(VecTy);
return BaseT::isLegalNTStore(DataType, Alignment);
}
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
- ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
+ int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
bool
shouldConsiderAddressTypePromotion(const Instruction &I,
int ARMTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
- ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
+ int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Alignment, CostKind, I);
}
-unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
- Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind, bool UseMaskForCond,
- bool UseMaskForGaps) {
+unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace,
unsigned Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I);
- unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
- bool UseMaskForCond = false, bool UseMaskForGaps = false);
+ unsigned getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
return Cost;
}
-int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int PPCTTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, CostKind,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
+ int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
// needed for using / defining the vector operands. The SystemZ version does
// roughly the same but bases the computations on vector permutations
// instead.
-int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int SystemZTTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, CostKind,
unsigned AddressSpace, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
+ int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
// computing the cost using a generic formula as a function of generic
// shuffles. We therefore use a lookup table instead, filled according to
// the instruction sequences that codegen currently generates.
-int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int X86TTIImpl::getInterleavedMemoryOpCostAVX2(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
// \p Indices contains indices for strided load.
// \p Factor - the factor of interleaving.
// AVX-512 provides 3-src shuffles that significantly reduces the cost.
-int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int X86TTIImpl::getInterleavedMemoryOpCostAVX512(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
return Cost;
}
-int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int X86TTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) {
Type *EltTy = cast<VectorType>(VecTy)->getElementType();
if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
bool IsPairwiseForm, bool IsUnsigned,
TTI::TargetCostKind CostKind);
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
- int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
- unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
- int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
- unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
+ int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
+ int getInterleavedMemoryOpCostAVX512(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
+ int getInterleavedMemoryOpCostAVX2(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
int getIntImmCost(int64_t);
bool UseMaskForGaps =
Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed();
unsigned Cost = TTI.getInterleavedMemoryOpCost(
- I->getOpcode(), WideVecTy, Group->getFactor(), Indices,
- Group->getAlign().value(), AS, TTI::TCK_RecipThroughput,
- Legal->isMaskRequired(I), UseMaskForGaps);
+ I->getOpcode(), WideVecTy, Group->getFactor(), Indices, Group->getAlign(),
+ AS, TTI::TCK_RecipThroughput, Legal->isMaskRequired(I), UseMaskForGaps);
if (Group->isReverse()) {
// TODO: Add support for reversed masked interleaved access.