From 0218e1e1da557f986ae7984e9731fdc5bdaafe7f Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Tue, 16 Sep 2014 07:57:37 +0000 Subject: [PATCH] AVX-512: added cost for some AVX-512 instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217863 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 62 +++++++++++++++++++++++++++++++ test/Analysis/CostModel/X86/cast.ll | 39 +++++++++++++++++++ test/Analysis/CostModel/X86/cmp.ll | 11 ++++++ 3 files changed, 112 insertions(+) diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index e23d1b95ceb..531e0353d3c 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -225,6 +225,15 @@ unsigned X86TTI::getArithmeticInstrCost( return LT.first * AVX2UniformConstCostTable[Idx].Cost; } + static const CostTblEntry AVX512CostTable[] = { + { ISD::SHL, MVT::v16i32, 1 }, + { ISD::SRL, MVT::v16i32, 1 }, + { ISD::SRA, MVT::v16i32, 1 }, + { ISD::SHL, MVT::v8i64, 1 }, + { ISD::SRL, MVT::v8i64, 1 }, + { ISD::SRA, MVT::v8i64, 1 }, + }; + static const CostTblEntry AVX2CostTable[] = { // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to // customize them to detect the cases where shift amount is a scalar one. @@ -260,6 +269,11 @@ unsigned X86TTI::getArithmeticInstrCost( { ISD::UDIV, MVT::v4i64, 4*20 }, }; + if (ST->hasAVX512()) { + int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second); + if (Idx != -1) + return LT.first * AVX512CostTable[Idx].Cost; + } // Look for AVX2 lowering tricks. if (ST->hasAVX2()) { if (ISD == ISD::SHL && LT.second == MVT::v16i16 && @@ -580,6 +594,38 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { return LTSrc.first * SSE2ConvTbl[Idx].Cost; } + static const TypeConversionCostTblEntry + AVX512ConversionTbl[] = { + { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 }, + { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 }, + { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 }, + { ISD::FP_ROUND, MVT::v16f32, MVT::v8f64, 3 }, + + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 }, + { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 }, + { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 }, + { ISD::TRUNCATE, MVT::v16i32, MVT::v8i64, 4 }, + + // v16i1 -> v16i32 - load + broadcast + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 }, + + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 }, + + }; + + if (ST->hasAVX512()) { + int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second, + LTSrc.second); + if (Idx != -1) + return AVX512ConversionTbl[Idx].Cost; + } EVT SrcTy = TLI->getValueType(Src); EVT DstTy = TLI->getValueType(Dst); @@ -612,6 +658,9 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 }, + + { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 }, + { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 }, }; static const TypeConversionCostTblEntry @@ -738,6 +787,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v32i8, 1 }, }; + static const CostTblEntry AVX512CostTbl[] = { + { ISD::SETCC, MVT::v8i64, 1 }, + { ISD::SETCC, MVT::v16i32, 1 }, + { ISD::SETCC, MVT::v8f64, 1 }, + { ISD::SETCC, MVT::v16f32, 1 }, + }; + + if (ST->hasAVX512()) { + int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy); + if (Idx != -1) + return LT.first * AVX512CostTbl[Idx].Cost; + } + if (ST->hasAVX2()) { int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy); if (Idx != -1) diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll index 7f97b176f7c..cecd069cd25 100644 --- a/test/Analysis/CostModel/X86/cast.ll +++ b/test/Analysis/CostModel/X86/cast.ll @@ -1,3 +1,4 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX @@ -83,6 +84,19 @@ define i32 @zext_sext(<8 x i1> %in) { ;CHECK-AVX: cost of 4 {{.*}} zext %D = zext <4 x i32> undef to <4 x i64> + ;CHECK-AVX512: cost of 3 {{.*}} %D1 = zext + %D1 = zext <16 x i32> undef to <16 x i64> + + ;CHECK-AVX512: cost of 3 {{.*}} %D2 = sext + %D2 = sext <16 x i32> undef to <16 x i64> + + ;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext + %D3 = zext <16 x i16> undef to <16 x i32> + ;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext + %D4 = zext <16 x i8> undef to <16 x i32> + ;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext + %D5 = zext <16 x i1> undef to <16 x i32> + ;CHECK-AVX2: cost of 2 {{.*}} trunc ;CHECK-AVX: cost of 4 {{.*}} trunc %E = trunc <4 x i64> undef to <4 x i32> @@ -101,8 +115,12 @@ define i32 @zext_sext(<8 x i1> %in) { ;CHECK-AVX2: cost of 4 {{.*}} trunc ;CHECK-AVX: cost of 9 {{.*}} trunc + ;CHECK_AVX512: cost of 1 {{.*}} G = trunc %G = trunc <8 x i64> undef to <8 x i32> + ;CHECK-AVX512: cost of 4 {{.*}} %G1 = trunc + %G1 = trunc <16 x i64> undef to <16 x i32> + ret i32 undef } @@ -211,3 +229,24 @@ define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { %D1 = uitofp <8 x i32> %d to <8 x float> ret void } + +define void @fp_conv(<8 x float> %a, <16 x float>%b) { +;CHECK-LABEL: for function 'fp_conv' + ; CHECK-AVX512: cost of 1 {{.*}} fpext + %A1 = fpext <8 x float> %a to <8 x double> + + ; CHECK-AVX512: cost of 3 {{.*}} fpext + %A2 = fpext <16 x float> %b to <16 x double> + + ; CHECK-AVX2: cost of 3 {{.*}} %A3 = fpext + ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext + %A3 = fpext <8 x float> %a to <8 x double> + + ; CHECK-AVX2: cost of 3 {{.*}} %A4 = fptrunc + ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc + %A4 = fptrunc <8 x double> undef to <8 x float> + + ; CHECK-AVX512: cost of 3 {{.*}} %A5 = fptrunc + %A5 = fptrunc <16 x double> undef to <16 x float> + ret void +} diff --git a/test/Analysis/CostModel/X86/cmp.ll b/test/Analysis/CostModel/X86/cmp.ll index 9f2bdb3c21b..469cd735f8a 100644 --- a/test/Analysis/CostModel/X86/cmp.ll +++ b/test/Analysis/CostModel/X86/cmp.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck -check-prefix=CHECK -check-prefix=AVX1 %s ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck -check-prefix=CHECK -check-prefix=AVX2 %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck -check-prefix=CHECK -check-prefix=AVX512 %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -22,6 +23,11 @@ define i32 @cmp(i32 %arg) { ;AVX2: cost of 1 {{.*}} fcmp %E = fcmp olt <4 x double> undef, undef + ; AVX512: cost of 1 {{.*}} %E1 = fcmp + %E1 = fcmp olt <16 x float> undef, undef + ; AVX512: cost of 2 {{.*}} %E2 = fcmp + %E2 = fcmp olt <16 x double> undef, undef + ; -- integers -- ;AVX1: cost of 1 {{.*}} icmp @@ -49,6 +55,11 @@ define i32 @cmp(i32 %arg) { ;AVX2: cost of 1 {{.*}} icmp %M = icmp eq <32 x i8> undef, undef + ; AVX512: cost of 1 {{.*}} %M1 = icmp + %M1 = icmp eq <16 x i32> undef, undef + ; AVX512: cost of 2 {{.*}} %M2 = icmp + %M2 = icmp eq <16 x i64> undef, undef + ;CHECK: cost of 0 {{.*}} ret ret i32 undef } -- 2.11.0