[CostModel][X86] Fixed vXi8 uniform shift costs.

author Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 8 Jan 2017 14:14:36 +0000 (14:14 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 8 Jan 2017 14:14:36 +0000 (14:14 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 8 Jan 2017 14:14:36 +0000 (14:14 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 8 Jan 2017 14:14:36 +0000 (14:14 +0000)
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp

index f5ebfa0..107ed93 100644 (file)
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -144,6 +144,10 @@ int X86TTIImpl::getArithmeticInstrCost(
    }
  
    static const CostTblEntry AVX512BWUniformConstCostTable[] = {
+    { ISD::SHL,  MVT::v64i8,   2 }, // psllw + pand.
+    { ISD::SRL,  MVT::v64i8,   2 }, // psrlw + pand.
+    { ISD::SRA,  MVT::v64i8,   4 }, // psrlw, pand, pxor, psubb.
+
      { ISD::SDIV, MVT::v32i16,  6 }, // vpmulhw sequence
      { ISD::UDIV, MVT::v32i16,  6 }, // vpmulhuw sequence
    };
@@ -168,6 +172,10 @@ int X86TTIImpl::getArithmeticInstrCost(
    }
  
    static const CostTblEntry AVX2UniformConstCostTable[] = {
+    { ISD::SHL,  MVT::v32i8,   2 }, // psllw + pand.
+    { ISD::SRL,  MVT::v32i8,   2 }, // psrlw + pand.
+    { ISD::SRA,  MVT::v32i8,   4 }, // psrlw, pand, pxor, psubb.
+
      { ISD::SRA,  MVT::v4i64,   4 }, // 2 x psrad + shuffle.
  
      { ISD::SDIV, MVT::v16i16,  6 }, // vpmulhw sequence
@@ -184,6 +192,14 @@ int X86TTIImpl::getArithmeticInstrCost(
    }
  
    static const CostTblEntry SSE2UniformConstCostTable[] = {
+    { ISD::SHL,  MVT::v16i8,   2 }, // psllw + pand.
+    { ISD::SRL,  MVT::v16i8,   2 }, // psrlw + pand.
+    { ISD::SRA,  MVT::v16i8,   4 }, // psrlw, pand, pxor, psubb.
+
+    { ISD::SHL,  MVT::v32i8,   4 }, // 2*(psllw + pand).
+    { ISD::SRL,  MVT::v32i8,   4 }, // 2*(psrlw + pand).
+    { ISD::SRA,  MVT::v32i8,   8 }, // 2*(psrlw, pand, pxor, psubb).
+
      { ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence
      { ISD::SDIV, MVT::v8i16,   6 }, // pmulhw sequence
      { ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence
@@ -364,20 +380,14 @@ int X86TTIImpl::getArithmeticInstrCost(
  
    static const CostTblEntry SSE2UniformShiftCostTable[] = {
      // Uniform splats are cheaper for the following instructions.
-    { ISD::SHL,  MVT::v16i8,  1 }, // psllw.
-    { ISD::SHL,  MVT::v32i8,  2 }, // psllw.
      { ISD::SHL,  MVT::v16i16, 2 }, // psllw.
      { ISD::SHL,  MVT::v8i32,  2 }, // pslld
      { ISD::SHL,  MVT::v4i64,  2 }, // psllq.
  
-    { ISD::SRL,  MVT::v16i8,  1 }, // psrlw.
-    { ISD::SRL,  MVT::v32i8,  2 }, // psrlw.
      { ISD::SRL,  MVT::v16i16, 2 }, // psrlw.
      { ISD::SRL,  MVT::v8i32,  2 }, // psrld.
      { ISD::SRL,  MVT::v4i64,  2 }, // psrlq.
  
-    { ISD::SRA,  MVT::v16i8,  4 }, // psrlw, pand, pxor, psubb.
-    { ISD::SRA,  MVT::v32i8,  8 }, // psrlw, pand, pxor, psubb.
      { ISD::SRA,  MVT::v16i16, 2 }, // psraw.
      { ISD::SRA,  MVT::v8i32,  2 }, // psrad.
      { ISD::SRA,  MVT::v2i64,  4 }, // 2 x psrad + shuffle.
diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll

index 52f176f..e5fff9b 100644 (file)
--- a/test/Analysis/CostModel/X86/testshiftlshr.ll
+++ b/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -498,7 +498,7 @@ entry:
  define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
  entry:
    ; SSE2: shift16i8c
-  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2: cost of 2 {{.*}} lshr
    ; SSE2-CODEGEN: shift16i8c
    ; SSE2-CODEGEN: psrlw $3
  
@@ -513,7 +513,7 @@ entry:
  define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
  entry:
    ; SSE2: shift32i8c
-  ; SSE2: cost of 2 {{.*}} lshr
+  ; SSE2: cost of 4 {{.*}} lshr
    ; SSE2-CODEGEN: shift32i8c
    ; SSE2-CODEGEN: psrlw $3
  
diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll

index e385c5b..6628b9b 100644 (file)
--- a/test/Analysis/CostModel/X86/testshiftshl.ll
+++ b/test/Analysis/CostModel/X86/testshiftshl.ll
@@ -498,7 +498,7 @@ entry:
  define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
  entry:
    ; SSE2: shift16i8c
-  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2: cost of 2 {{.*}} shl
    ; SSE2-CODEGEN: shift16i8c
    ; SSE2-CODEGEN: psllw $3
  
@@ -513,7 +513,7 @@ entry:
  define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
  entry:
    ; SSE2: shift32i8c
-  ; SSE2: cost of 2 {{.*}} shl
+  ; SSE2: cost of 4 {{.*}} shl
    ; SSE2-CODEGEN: shift32i8c
    ; SSE2-CODEGEN: psllw $3
  
diff --git a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll

index 3b87e6a..6756f3b 100644 (file)
--- a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -606,7 +606,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
  ; AVX: Found an estimated cost of 4 for instruction:   %shift
  ; AVX2: Found an estimated cost of 4 for instruction:   %shift
  ; AVX512: Found an estimated cost of 4 for instruction:   %shift
-; XOP: Found an estimated cost of 2 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
    %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    ret <16 x i8> %shift
  }
@@ -616,9 +616,10 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
  ; SSE2: Found an estimated cost of 8 for instruction:   %shift
  ; SSE41: Found an estimated cost of 8 for instruction:   %shift
  ; AVX: Found an estimated cost of 8 for instruction:   %shift
-; AVX2: Found an estimated cost of 8 for instruction:   %shift
-; AVX512: Found an estimated cost of 8 for instruction:   %shift
-; XOP: Found an estimated cost of 4 for instruction:   %shift
+; AVX2: Found an estimated cost of 4 for instruction:   %shift
+; AVX512: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 8 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 4 for instruction:   %shift
    %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    ret <32 x i8> %shift
  }
@@ -628,10 +629,11 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
  ; SSE2: Found an estimated cost of 16 for instruction:   %shift
  ; SSE41: Found an estimated cost of 16 for instruction:   %shift
  ; AVX: Found an estimated cost of 16 for instruction:   %shift
-; AVX2: Found an estimated cost of 16 for instruction:   %shift
-; AVX512F: Found an estimated cost of 16 for instruction:   %shift
-; AVX512BW: Found an estimated cost of 2 for instruction:   %shift
-; XOP: Found an estimated cost of 8 for instruction:   %shift
+; AVX2: Found an estimated cost of 8 for instruction:   %shift
+; AVX512F: Found an estimated cost of 8 for instruction:   %shift
+; AVX512BW: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 16 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 8 for instruction:   %shift
    %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    ret <64 x i8> %shift
  }
diff --git a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll

index 2717fcf..63e6db1 100644 (file)
--- a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -611,11 +611,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
  
  define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
  ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
-; SSE2: Found an estimated cost of 1 for instruction:   %shift
-; SSE41: Found an estimated cost of 1 for instruction:   %shift
-; AVX: Found an estimated cost of 1 for instruction:   %shift
-; AVX2: Found an estimated cost of 1 for instruction:   %shift
-; AVX512: Found an estimated cost of 1 for instruction:   %shift
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX2: Found an estimated cost of 2 for instruction:   %shift
+; AVX512: Found an estimated cost of 2 for instruction:   %shift
  ; XOP: Found an estimated cost of 2 for instruction:   %shift
    %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    ret <16 x i8> %shift
@@ -623,25 +623,27 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
  
  define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
  ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
-; SSE2: Found an estimated cost of 2 for instruction:   %shift
-; SSE41: Found an estimated cost of 2 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; SSE2: Found an estimated cost of 4 for instruction:   %shift
+; SSE41: Found an estimated cost of 4 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
  ; AVX2: Found an estimated cost of 2 for instruction:   %shift
  ; AVX512: Found an estimated cost of 2 for instruction:   %shift
-; XOP: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction:   %shift
    %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    ret <32 x i8> %shift
  }
  
  define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
  ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8':
-; SSE2: Found an estimated cost of 4 for instruction:   %shift
-; SSE41: Found an estimated cost of 4 for instruction:   %shift
-; AVX: Found an estimated cost of 4 for instruction:   %shift
+; SSE2: Found an estimated cost of 8 for instruction:   %shift
+; SSE41: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
  ; AVX2: Found an estimated cost of 4 for instruction:   %shift
  ; AVX512F: Found an estimated cost of 4 for instruction:   %shift
  ; AVX512BW: Found an estimated cost of 2 for instruction:   %shift
-; XOP: Found an estimated cost of 8 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 8 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 4 for instruction:   %shift
    %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    ret <64 x i8> %shift
  }
diff --git a/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/test/Analysis/CostModel/X86/vshift-shl-cost.ll

index 9898222..8c42bd6 100644 (file)
--- a/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -616,37 +616,39 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
  
  define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
  ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
-; SSE2: Found an estimated cost of 1 for instruction:   %shift
-; SSE41: Found an estimated cost of 1 for instruction:   %shift
-; AVX: Found an estimated cost of 1 for instruction:   %shift
-; AVX2: Found an estimated cost of 1 for instruction:   %shift
-; AVX512: Found an estimated cost of 1 for instruction:   %shift
-; XOP: Found an estimated cost of 1 for instruction:   %shift
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX2: Found an estimated cost of 2 for instruction:   %shift
+; AVX512: Found an estimated cost of 2 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
    %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    ret <16 x i8> %shift
  }
  
  define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
  ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
-; SSE2: Found an estimated cost of 2 for instruction:   %shift
-; SSE41: Found an estimated cost of 2 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; SSE2: Found an estimated cost of 4 for instruction:   %shift
+; SSE41: Found an estimated cost of 4 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
  ; AVX2: Found an estimated cost of 2 for instruction:   %shift
  ; AVX512: Found an estimated cost of 2 for instruction:   %shift
-; XOP: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction:   %shift
    %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    ret <32 x i8> %shift
  }
  
  define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
  ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8':
-; SSE2: Found an estimated cost of 4 for instruction:   %shift
-; SSE41: Found an estimated cost of 4 for instruction:   %shift
-; AVX: Found an estimated cost of 4 for instruction:   %shift
+; SSE2: Found an estimated cost of 8 for instruction:   %shift
+; SSE41: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
  ; AVX2: Found an estimated cost of 4 for instruction:   %shift
  ; AVX512F: Found an estimated cost of 4 for instruction:   %shift
  ; AVX512BW: Found an estimated cost of 2 for instruction:   %shift
-; XOP: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 8 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 4 for instruction:   %shift
    %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    ret <64 x i8> %shift
  }
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 8 Jan 2017 14:14:36 +0000 (14:14 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 8 Jan 2017 14:14:36 +0000 (14:14 +0000)
lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
test/Analysis/CostModel/X86/testshiftlshr.ll		patch \| blob \| history
test/Analysis/CostModel/X86/testshiftshl.ll		patch \| blob \| history
test/Analysis/CostModel/X86/vshift-ashr-cost.ll		patch \| blob \| history
test/Analysis/CostModel/X86/vshift-lshr-cost.ll		patch \| blob \| history
test/Analysis/CostModel/X86/vshift-shl-cost.ll		patch \| blob \| history