From 0218e1e1da557f986ae7984e9731fdc5bdaafe7f Mon Sep 17 00:00:00 2001
From: Elena Demikhovsky <elena.demikhovsky@intel.com>
Date: Tue, 16 Sep 2014 07:57:37 +0000
Subject: [PATCH] AVX-512: added cost for some AVX-512 instructions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217863 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86TargetTransformInfo.cpp | 62 +++++++++++++++++++++++++++++++
 test/Analysis/CostModel/X86/cast.ll       | 39 +++++++++++++++++++
 test/Analysis/CostModel/X86/cmp.ll        | 11 ++++++
 3 files changed, 112 insertions(+)

diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index e23d1b95ceb..531e0353d3c 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -225,6 +225,15 @@ unsigned X86TTI::getArithmeticInstrCost(
       return LT.first * AVX2UniformConstCostTable[Idx].Cost;
   }
 
+  static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
+    { ISD::SHL,     MVT::v16i32,    1 },
+    { ISD::SRL,     MVT::v16i32,    1 },
+    { ISD::SRA,     MVT::v16i32,    1 },
+    { ISD::SHL,     MVT::v8i64,    1 },
+    { ISD::SRL,     MVT::v8i64,    1 },
+    { ISD::SRA,     MVT::v8i64,    1 },
+  };
+
   static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
     // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
     // customize them to detect the cases where shift amount is a scalar one.
@@ -260,6 +269,11 @@ unsigned X86TTI::getArithmeticInstrCost(
     { ISD::UDIV,  MVT::v4i64,  4*20 },
   };
 
+  if (ST->hasAVX512()) {
+    int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * AVX512CostTable[Idx].Cost;
+  }
   // Look for AVX2 lowering tricks.
   if (ST->hasAVX2()) {
     if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
@@ -580,6 +594,38 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
       return LTSrc.first * SSE2ConvTbl[Idx].Cost;
   }
 
+  static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+  AVX512ConversionTbl[] = {
+    { ISD::FP_EXTEND, MVT::v8f64,   MVT::v8f32,  1 },
+    { ISD::FP_EXTEND, MVT::v8f64,   MVT::v16f32, 3 },
+    { ISD::FP_ROUND,  MVT::v8f32,   MVT::v8f64,  1 },
+    { ISD::FP_ROUND,  MVT::v16f32,  MVT::v8f64,  3 },
+
+    { ISD::TRUNCATE,  MVT::v16i8,   MVT::v16i32, 1 },
+    { ISD::TRUNCATE,  MVT::v16i16,  MVT::v16i32, 1 },
+    { ISD::TRUNCATE,  MVT::v8i16,   MVT::v8i64,  1 },
+    { ISD::TRUNCATE,  MVT::v8i32,   MVT::v8i64,  1 },
+    { ISD::TRUNCATE,  MVT::v16i32,  MVT::v8i64,  4 },
+
+    // v16i1 -> v16i32 - load + broadcast
+    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1,  2 },
+    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1,  2 },
+
+    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8,  1 },
+    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8,  1 },
+    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
+    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
+    { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v16i32, 3 },
+    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v16i32, 3 },
+
+  };
+
+  if (ST->hasAVX512()) {
+    int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
+                                     LTSrc.second);
+    if (Idx != -1)
+      return AVX512ConversionTbl[Idx].Cost;
+  }
   EVT SrcTy = TLI->getValueType(Src);
   EVT DstTy = TLI->getValueType(Dst);
 
@@ -612,6 +658,9 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
     { ISD::TRUNCATE,    MVT::v8i8,   MVT::v8i32,  2 },
     { ISD::TRUNCATE,    MVT::v8i16,  MVT::v8i32,  2 },
     { ISD::TRUNCATE,    MVT::v8i32,  MVT::v8i64,  4 },
+
+    { ISD::FP_EXTEND,   MVT::v8f64,  MVT::v8f32,  3 },
+    { ISD::FP_ROUND,    MVT::v8f32,  MVT::v8f64,  3 },
   };
 
   static const TypeConversionCostTblEntry<MVT::SimpleValueType>
@@ -738,6 +787,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     { ISD::SETCC,   MVT::v32i8,   1 },
   };
 
+  static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
+    { ISD::SETCC,   MVT::v8i64,   1 },
+    { ISD::SETCC,   MVT::v16i32,  1 },
+    { ISD::SETCC,   MVT::v8f64,   1 },
+    { ISD::SETCC,   MVT::v16f32,  1 },
+  };
+
+  if (ST->hasAVX512()) {
+    int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
+    if (Idx != -1)
+      return LT.first * AVX512CostTbl[Idx].Cost;
+  }
+
   if (ST->hasAVX2()) {
     int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
     if (Idx != -1)
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll
index 7f97b176f7c..cecd069cd25 100644
--- a/test/Analysis/CostModel/X86/cast.ll
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -1,3 +1,4 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512
 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX
 
@@ -83,6 +84,19 @@ define i32 @zext_sext(<8 x i1> %in) {
   ;CHECK-AVX: cost of 4 {{.*}} zext
   %D = zext <4 x i32> undef to <4 x i64>
 
+  ;CHECK-AVX512: cost of 3 {{.*}} %D1 = zext
+  %D1 = zext <16 x i32> undef to <16 x i64>
+
+  ;CHECK-AVX512: cost of 3 {{.*}} %D2 = sext
+  %D2 = sext <16 x i32> undef to <16 x i64>
+
+  ;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext
+  %D3 = zext <16 x i16> undef to <16 x i32>
+  ;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext
+  %D4 = zext <16 x i8> undef to <16 x i32>
+  ;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext
+  %D5 = zext <16 x i1> undef to <16 x i32>
+
   ;CHECK-AVX2: cost of 2 {{.*}} trunc
   ;CHECK-AVX: cost of 4 {{.*}} trunc
   %E = trunc <4 x i64> undef to <4 x i32>
@@ -101,8 +115,12 @@ define i32 @zext_sext(<8 x i1> %in) {
 
   ;CHECK-AVX2: cost of 4 {{.*}} trunc
   ;CHECK-AVX: cost of 9 {{.*}} trunc
+  ;CHECK_AVX512: cost of 1 {{.*}} G = trunc
   %G = trunc <8 x i64> undef to <8 x i32>
 
+  ;CHECK-AVX512: cost of 4 {{.*}} %G1 = trunc
+  %G1 = trunc <16 x i64> undef to <16 x i32>
+
   ret i32 undef
 }
 
@@ -211,3 +229,24 @@ define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
   %D1 = uitofp <8 x i32> %d to <8 x float>
   ret void
 }
+
+define void @fp_conv(<8 x float> %a, <16 x float>%b) {
+;CHECK-LABEL: for function 'fp_conv'
+  ; CHECK-AVX512: cost of 1 {{.*}} fpext
+  %A1 = fpext <8 x float> %a to <8 x double>
+
+  ; CHECK-AVX512: cost of 3 {{.*}} fpext
+  %A2 = fpext <16 x float> %b to <16 x double>
+
+  ; CHECK-AVX2:   cost of 3 {{.*}} %A3 = fpext
+  ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
+  %A3 = fpext <8 x float> %a to <8 x double>
+
+  ; CHECK-AVX2:   cost of 3 {{.*}} %A4 = fptrunc
+  ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
+  %A4 = fptrunc <8 x double> undef to <8 x float>
+
+  ; CHECK-AVX512: cost of 3 {{.*}} %A5 = fptrunc
+  %A5 = fptrunc <16 x double> undef to <16 x float>
+  ret void
+}
diff --git a/test/Analysis/CostModel/X86/cmp.ll b/test/Analysis/CostModel/X86/cmp.ll
index 9f2bdb3c21b..469cd735f8a 100644
--- a/test/Analysis/CostModel/X86/cmp.ll
+++ b/test/Analysis/CostModel/X86/cmp.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck -check-prefix=CHECK -check-prefix=AVX1 %s
 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck -check-prefix=CHECK -check-prefix=AVX2 %s
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck -check-prefix=CHECK -check-prefix=AVX512 %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -22,6 +23,11 @@ define i32 @cmp(i32 %arg) {
   ;AVX2: cost of 1 {{.*}} fcmp
   %E = fcmp olt <4 x double> undef, undef
 
+  ; AVX512: cost of 1 {{.*}} %E1 = fcmp
+  %E1 = fcmp olt <16 x float> undef, undef
+  ; AVX512: cost of 2 {{.*}} %E2 = fcmp
+  %E2 = fcmp olt <16 x double> undef, undef
+
   ;  -- integers --
 
   ;AVX1: cost of 1 {{.*}} icmp
@@ -49,6 +55,11 @@ define i32 @cmp(i32 %arg) {
   ;AVX2: cost of 1 {{.*}} icmp
   %M = icmp eq <32 x i8> undef, undef
 
+  ; AVX512: cost of 1 {{.*}} %M1 = icmp
+  %M1 = icmp eq <16 x i32> undef, undef
+  ; AVX512: cost of 2 {{.*}} %M2 = icmp
+  %M2 = icmp eq <16 x i64> undef, undef
+
   ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
-- 
2.11.0