[AArch64] Implement getArithmeticReductionCost

author Matthew Simpson <mssimpso@codeaurora.org>

Fri, 16 Mar 2018 11:34:15 +0000 (11:34 +0000)

committer Matthew Simpson <mssimpso@codeaurora.org>

Fri, 16 Mar 2018 11:34:15 +0000 (11:34 +0000)
author Matthew Simpson <mssimpso@codeaurora.org>
Fri, 16 Mar 2018 11:34:15 +0000 (11:34 +0000)
committer Matthew Simpson <mssimpso@codeaurora.org>
Fri, 16 Mar 2018 11:34:15 +0000 (11:34 +0000)
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

index c2204af..a626323 100644 (file)
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -884,3 +884,31 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
    }
    return false;
  }
+
+int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
+                                               bool IsPairwiseForm) {
+
+  if (IsPairwiseForm)
+    return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
+
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  MVT MTy = LT.second;
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  // Horizontal adds can use the 'addv' instruction. We model the cost of these
+  // instructions as normal vector adds. This is the only arithmetic vector
+  // reduction operation for which we have an instruction.
+  static const CostTblEntry CostTblNoPairwise[]{
+      {ISD::ADD, MVT::v8i8,  1},
+      {ISD::ADD, MVT::v16i8, 1},
+      {ISD::ADD, MVT::v4i16, 1},
+      {ISD::ADD, MVT::v8i16, 1},
+      {ISD::ADD, MVT::v4i32, 1},
+  };
+
+  if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
+    return LT.first * Entry->Cost;
+
+  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
+}
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h

index 08c693f..e71eb51 100644 (file)
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -166,6 +166,9 @@ public:
  
    bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
                               TTI::ReductionFlags Flags) const;
+
+  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+                                 bool IsPairwiseForm);
    /// @}
  };
  
diff --git a/test/Analysis/CostModel/AArch64/vector-reduce.ll b/test/Analysis/CostModel/AArch64/vector-reduce.ll

index 79d74a9..5bf5076 100644 (file)
--- a/test/Analysis/CostModel/AArch64/vector-reduce.ll
+++ b/test/Analysis/CostModel/AArch64/vector-reduce.ll
@@ -2,7 +2,7 @@
  ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
  
  ; COST-LABEL: add.i8.v8i8
-; COST:       Found an estimated cost of 27 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %v)
+; COST:       Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %v)
  ; CODE-LABEL: add.i8.v8i8
  ; CODE:       addv b0, v0.8b
  define i8 @add.i8.v8i8(<8 x i8> %v) {
@@ -11,7 +11,7 @@ define i8 @add.i8.v8i8(<8 x i8> %v) {
  }
  
  ; COST-LABEL: add.i8.v16i8
-; COST:       Found an estimated cost of 53 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %v)
+; COST:       Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %v)
  ; CODE-LABEL: add.i8.v16i8
  ; CODE:       addv b0, v0.16b
  define i8 @add.i8.v16i8(<16 x i8> %v) {
@@ -20,7 +20,7 @@ define i8 @add.i8.v16i8(<16 x i8> %v) {
  }
  
  ; COST-LABEL: add.i16.v4i16
-; COST:       Found an estimated cost of 13 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %v)
+; COST:       Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %v)
  ; CODE-LABEL: add.i16.v4i16
  ; CODE:       addv h0, v0.4h
  define i16 @add.i16.v4i16(<4 x i16> %v) {
@@ -29,7 +29,7 @@ define i16 @add.i16.v4i16(<4 x i16> %v) {
  }
  
  ; COST-LABEL: add.i16.v8i16
-; COST:       Found an estimated cost of 27 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %v)
+; COST:       Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %v)
  ; CODE-LABEL: add.i16.v8i16
  ; CODE:       addv h0, v0.8h
  define i16 @add.i16.v8i16(<8 x i16> %v) {
@@ -38,7 +38,7 @@ define i16 @add.i16.v8i16(<8 x i16> %v) {
  }
  
  ; COST-LABEL: add.i32.v4i32
-; COST:       Found an estimated cost of 13 for instruction: %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %v)
+; COST:       Found an estimated cost of 1 for instruction: %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %v)
  ; CODE-LABEL: add.i32.v4i32
  ; CODE:       addv s0, v0.4s
  define i32 @add.i32.v4i32(<4 x i32> %v) {
diff --git a/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll

index cb49150..02cf09d 100644 (file)
--- a/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
@@ -21,7 +21,7 @@ target triple = "aarch64--linux"
  ; YAML-NEXT: Function:        test_select
  ; YAML-NEXT: Args:
  ; YAML-NEXT:   - String:          'Vectorized horizontal reduction with cost '
-; YAML-NEXT:   - Cost:            '4'
+; YAML-NEXT:   - Cost:            '-8'
  ; YAML-NEXT:   - String:          ' and with tree size '
  ; YAML-NEXT:   - TreeSize:        '8'
  
@@ -115,7 +115,7 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia
  ; YAML-NEXT: Function:        reduction_with_br
  ; YAML-NEXT: Args:
  ; YAML-NEXT:   - String:          'Vectorized horizontal reduction with cost '
-; YAML-NEXT:   - Cost:            '1'
+; YAML-NEXT:   - Cost:            '-11'
  ; YAML-NEXT:   - String:          ' and with tree size '
  ; YAML-NEXT:   - TreeSize:        '3'
  
@@ -183,7 +183,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
  ; YAML-NEXT: Function:        test_unrolled_select
  ; YAML-NEXT: Args:
  ; YAML-NEXT:   - String:          'Vectorized horizontal reduction with cost '
-; YAML-NEXT:   - Cost:            '-33'
+; YAML-NEXT:   - Cost:            '-47'
  ; YAML-NEXT:   - String:          ' and with tree size '
  ; YAML-NEXT:   - TreeSize:        '10'
author	Matthew Simpson <mssimpso@codeaurora.org>
	Fri, 16 Mar 2018 11:34:15 +0000 (11:34 +0000)
committer	Matthew Simpson <mssimpso@codeaurora.org>
	Fri, 16 Mar 2018 11:34:15 +0000 (11:34 +0000)
lib/Target/AArch64/AArch64TargetTransformInfo.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64TargetTransformInfo.h		patch \| blob \| history
test/Analysis/CostModel/AArch64/vector-reduce.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/horizontal.ll		patch \| blob \| history