: SubtargetFeature<"fast-gather", "HasFastGather", "true",
"Indicates if gather is reasonably fast.">;
+def FeaturePrefer256Bit
+ : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
+ "Prefer 256-bit AVX instructions">;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
GatherOverhead = 2;
if (hasAVX512())
ScatterOverhead = 2;
+
+ // Consume the vector width attribute or apply any target specific limit.
+ if (PreferVectorWidthOverride)
+ PreferVectorWidth = PreferVectorWidthOverride;
+ else if (Prefer256Bit)
+ PreferVectorWidth = 256;
}
void X86Subtarget::initializeEnvironment() {
X86ProcFamily = Others;
GatherOverhead = 1024;
ScatterOverhead = 1024;
+ PreferVectorWidth = UINT32_MAX;
+ Prefer256Bit = false;
}
X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
const X86TargetMachine &TM,
- unsigned StackAlignOverride)
+ unsigned StackAlignOverride,
+ unsigned PreferVectorWidthOverride)
: X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
PICStyle(PICStyles::None), TM(TM), TargetTriple(TT),
StackAlignOverride(StackAlignOverride),
+ PreferVectorWidthOverride(PreferVectorWidthOverride),
In64BitMode(TargetTriple.getArch() == Triple::x86_64),
In32BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() != Triple::CODE16),
///
unsigned MaxInlineSizeThreshold;
+ /// Indicates target prefers 256 bit instructions.
+ bool Prefer256Bit;
+
/// What processor and OS we're targeting.
Triple TargetTriple;
/// Override the stack alignment.
unsigned StackAlignOverride;
+ /// Preferred vector width from function attribute.
+ unsigned PreferVectorWidthOverride;
+
+ /// Resolved preferred vector width from function attribute and subtarget
+ /// features.
+ unsigned PreferVectorWidth;
+
/// True if compiling for 64-bit, false for 16-bit or 32-bit.
bool In64BitMode;
/// of the specified triple.
///
X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
- const X86TargetMachine &TM, unsigned StackAlignOverride);
+ const X86TargetMachine &TM, unsigned StackAlignOverride,
+ unsigned PreferVectorWidthOverride);
const X86TargetLowering *getTargetLowering() const override {
return &TLInfo;
bool hasCLWB() const { return HasCLWB; }
bool hasRDPID() const { return HasRDPID; }
+ unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
+
bool isXRaySupported() const override { return is64Bit(); }
X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; }
if (SoftFloat)
Key += FS.empty() ? "+soft-float" : ",+soft-float";
- FS = Key.substr(CPU.size());
+ // Keep track of the key width after all features are added so we can extract
+ // the feature string out later.
+ unsigned CPUFSWidth = Key.size();
+
+ // Translate vector width function attribute into subtarget features. This
+ // overrides any CPU specific turning parameter
+ unsigned PreferVectorWidthOverride = 0;
+ if (F.hasFnAttribute("prefer-vector-width")) {
+ StringRef Val = F.getFnAttribute("prefer-vector-width").getValueAsString();
+ unsigned Width;
+ if (!Val.getAsInteger(0, Width)) {
+ Key += ",prefer-vector-width=";
+ Key += Val;
+ PreferVectorWidthOverride = Width;
+ }
+ }
+
+ FS = Key.slice(CPU.size(), CPUFSWidth);
auto &I = SubtargetMap[Key];
if (!I) {
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this,
- Options.StackAlignmentOverride);
+ Options.StackAlignmentOverride,
+ PreferVectorWidthOverride);
}
return I.get();
}
}
unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) const {
+ unsigned PreferVectorWidth = ST->getPreferVectorWidth();
if (Vector) {
- if (ST->hasAVX512())
+ if (ST->hasAVX512() && PreferVectorWidth >= 512)
return 512;
- if (ST->hasAVX())
+ if (ST->hasAVX() && PreferVectorWidth >= 256)
return 256;
- if (ST->hasSSE1())
+ if (ST->hasSSE1() && PreferVectorWidth >= 128)
return 128;
return 0;
}
// TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only
// enable gather with a -march.
return (DataWidth == 32 || DataWidth == 64) &&
- (ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2()));
+ (ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2()));
}
bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) {
; RUN: opt -mattr=+avx512f --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s
+; RUN: opt -mattr=+avx512vl,+prefer-256-bit --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK-PREFER-AVX256
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
; CHECK: vmovdqu64 %zmm{{.}},
; CHECK-NOT: %ymm
+; Verify that we don't generate 512-bit wide vectors when subtarget feature says not to
+
+; CHECK-PREFER-AVX256-LABEL: f:
+; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
+; CHECK-PREFER-AVX256-NOT: %zmm
+
define void @f(i32* %a, i32 %n) {
entry:
%cmp4 = icmp sgt i32 %n, 0
for.end: ; preds = %for.end.loopexit, %entry
ret void
}
+
+; Verify that the "prefer-vector-width=256" attribute prevents the use of 512-bit
+; vectors
+
+; CHECK-LABEL: g:
+; CHECK: vmovdqu %ymm{{.}},
+; CHECK-NOT: %zmm
+
+; CHECK-PREFER-AVX256-LABEL: g:
+; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
+; CHECK-PREFER-AVX256-NOT: %zmm
+
+define void @g(i32* %a, i32 %n) "prefer-vector-width"="256" {
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ store i32 %n, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+; Verify that the "prefer-vector-width=512" attribute override the subtarget
+; vectors
+
+; CHECK-LABEL: h:
+; CHECK: vmovdqu64 %zmm{{.}},
+; CHECK-NOT: %ymm
+
+; CHECK-PREFER-AVX256-LABEL: h:
+; CHECK-PREFER-AVX256: vmovdqu64 %zmm{{.}},
+; CHECK-PREFER-AVX256-NOT: %ymm
+
+define void @h(i32* %a, i32 %n) "prefer-vector-width"="512" {
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ store i32 %n, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}