[LSR] Don't force bases of foldable formulae to the final type.

author Mikael Holmen <mikael.holmen@ericsson.com>

Thu, 1 Feb 2018 06:38:34 +0000 (06:38 +0000)

committer Mikael Holmen <mikael.holmen@ericsson.com>

Thu, 1 Feb 2018 06:38:34 +0000 (06:38 +0000)
author Mikael Holmen <mikael.holmen@ericsson.com>
Thu, 1 Feb 2018 06:38:34 +0000 (06:38 +0000)
committer Mikael Holmen <mikael.holmen@ericsson.com>
Thu, 1 Feb 2018 06:38:34 +0000 (06:38 +0000)
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp

index 332c074..4b8e228 100644 (file)
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4993,7 +4993,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
        // Unless the addressing mode will not be folded.
        if (!Ops.empty() && LU.Kind == LSRUse::Address &&
            isAMCompletelyFolded(TTI, LU, F)) {
-        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
+        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
          Ops.clear();
          Ops.push_back(SE.getUnknown(FullV));
        }
diff --git a/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll b/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll

index b1909ed..219aed4 100644 (file)
--- a/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll
@@ -16,27 +16,25 @@ target triple = "x86_64-unknown-unknown"
  define void @maxArray(double* noalias nocapture %x, double* noalias nocapture readonly %y) {
  ; JAG-LABEL: @maxArray(
  ; JAG-NEXT:  entry:
-; JAG-NEXT:    [[Y1:%.*]] = bitcast double* [[Y:%.*]] to <2 x double>*
-; JAG-NEXT:    [[X4:%.*]] = bitcast double* [[X:%.*]] to <2 x double>*
-; JAG-NEXT:    [[X45:%.*]] = bitcast <2 x double>* [[X4]] to i8*
-; JAG-NEXT:    [[Y12:%.*]] = bitcast <2 x double>* [[Y1]] to i8*
+; JAG-NEXT:    [[Y1:%.*]] = bitcast double* [[Y:%.*]] to i8*
+; JAG-NEXT:    [[X3:%.*]] = bitcast double* [[X:%.*]] to i8*
  ; JAG-NEXT:    br label [[VECTOR_BODY:%.*]]
  ; JAG:       vector.body:
  ; JAG-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ -524288, [[ENTRY:%.*]] ]
-; JAG-NEXT:    [[UGLYGEP9:%.*]] = getelementptr i8, i8* [[X45]], i64 [[LSR_IV]]
-; JAG-NEXT:    [[UGLYGEP910:%.*]] = bitcast i8* [[UGLYGEP9]] to <2 x double>*
-; JAG-NEXT:    [[SCEVGEP11:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP910]], i64 32768
-; JAG-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y12]], i64 [[LSR_IV]]
-; JAG-NEXT:    [[UGLYGEP3:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>*
-; JAG-NEXT:    [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP3]], i64 32768
-; JAG-NEXT:    [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP11]], align 8
+; JAG-NEXT:    [[UGLYGEP7:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
+; JAG-NEXT:    [[UGLYGEP78:%.*]] = bitcast i8* [[UGLYGEP7]] to <2 x double>*
+; JAG-NEXT:    [[SCEVGEP9:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP78]], i64 32768
+; JAG-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y1]], i64 [[LSR_IV]]
+; JAG-NEXT:    [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>*
+; JAG-NEXT:    [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP2]], i64 32768
+; JAG-NEXT:    [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP9]], align 8
  ; JAG-NEXT:    [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP]], align 8
  ; JAG-NEXT:    [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]]
  ; JAG-NEXT:    [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]]
-; JAG-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[X45]], i64 [[LSR_IV]]
-; JAG-NEXT:    [[UGLYGEP67:%.*]] = bitcast i8* [[UGLYGEP6]] to <2 x double>*
-; JAG-NEXT:    [[SCEVGEP8:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP67]], i64 32768
-; JAG-NEXT:    store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP8]], align 8
+; JAG-NEXT:    [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
+; JAG-NEXT:    [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to <2 x double>*
+; JAG-NEXT:    [[SCEVGEP6:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP45]], i64 32768
+; JAG-NEXT:    store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP6]], align 8
  ; JAG-NEXT:    [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 16
  ; JAG-NEXT:    [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
  ; JAG-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
@@ -45,27 +43,25 @@ define void @maxArray(double* noalias nocapture %x, double* noalias nocapture re
  ;
  ; HSW-LABEL: @maxArray(
  ; HSW-NEXT:  entry:
-; HSW-NEXT:    [[Y1:%.*]] = bitcast double* [[Y:%.*]] to <2 x double>*
-; HSW-NEXT:    [[X4:%.*]] = bitcast double* [[X:%.*]] to <2 x double>*
-; HSW-NEXT:    [[X45:%.*]] = bitcast <2 x double>* [[X4]] to i8*
-; HSW-NEXT:    [[Y12:%.*]] = bitcast <2 x double>* [[Y1]] to i8*
+; HSW-NEXT:    [[Y1:%.*]] = bitcast double* [[Y:%.*]] to i8*
+; HSW-NEXT:    [[X3:%.*]] = bitcast double* [[X:%.*]] to i8*
  ; HSW-NEXT:    br label [[VECTOR_BODY:%.*]]
  ; HSW:       vector.body:
  ; HSW-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ -524288, [[ENTRY:%.*]] ]
-; HSW-NEXT:    [[UGLYGEP9:%.*]] = getelementptr i8, i8* [[X45]], i64 [[LSR_IV]]
-; HSW-NEXT:    [[UGLYGEP910:%.*]] = bitcast i8* [[UGLYGEP9]] to <2 x double>*
-; HSW-NEXT:    [[SCEVGEP11:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP910]], i64 32768
-; HSW-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y12]], i64 [[LSR_IV]]
-; HSW-NEXT:    [[UGLYGEP3:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>*
-; HSW-NEXT:    [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP3]], i64 32768
-; HSW-NEXT:    [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP11]], align 8
+; HSW-NEXT:    [[UGLYGEP7:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
+; HSW-NEXT:    [[UGLYGEP78:%.*]] = bitcast i8* [[UGLYGEP7]] to <2 x double>*
+; HSW-NEXT:    [[SCEVGEP9:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP78]], i64 32768
+; HSW-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y1]], i64 [[LSR_IV]]
+; HSW-NEXT:    [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>*
+; HSW-NEXT:    [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP2]], i64 32768
+; HSW-NEXT:    [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP9]], align 8
  ; HSW-NEXT:    [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP]], align 8
  ; HSW-NEXT:    [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]]
  ; HSW-NEXT:    [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]]
-; HSW-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[X45]], i64 [[LSR_IV]]
-; HSW-NEXT:    [[UGLYGEP67:%.*]] = bitcast i8* [[UGLYGEP6]] to <2 x double>*
-; HSW-NEXT:    [[SCEVGEP8:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP67]], i64 32768
-; HSW-NEXT:    store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP8]], align 8
+; HSW-NEXT:    [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
+; HSW-NEXT:    [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to <2 x double>*
+; HSW-NEXT:    [[SCEVGEP6:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP45]], i64 32768
+; HSW-NEXT:    store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP6]], align 8
  ; HSW-NEXT:    [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 16
  ; HSW-NEXT:    [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
  ; HSW-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
diff --git a/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll b/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll

index 5a582b2..b9af5a0 100644 (file)
--- a/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
@@ -15,28 +15,25 @@ define void @foo(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8* %maxarray)
  ; CHECK-NEXT:    [[T0:%.*]] = zext i32 [[SIZE]] to i64
  ; CHECK-NEXT:    [[T1:%.*]] = sext i32 [[NSTEPS:%.*]] to i64
  ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[T0]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to i8*
  ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
  ; CHECK:       for.body:
  ; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[FOR_INC:%.*]] ], [ 1, [[ENTRY:%.*]] ]
  ; CHECK-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV13:%.*]] = inttoptr i64 [[LSR_IV1]] to i8*
  ; CHECK-NEXT:    br i1 [[CMP215]], label [[FOR_BODY2_PREHEADER:%.*]], label [[FOR_INC]]
  ; CHECK:       for.body2.preheader:
  ; CHECK-NEXT:    br label [[FOR_BODY2:%.*]]
  ; CHECK:       for.body2:
-; CHECK-NEXT:    [[LSR_IV4:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[FOR_BODY2]] ], [ [[MAXARRAY:%.*]], [[FOR_BODY2_PREHEADER]] ]
+; CHECK-NEXT:    [[LSR_IV3:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[FOR_BODY2]] ], [ [[MAXARRAY:%.*]], [[FOR_BODY2_PREHEADER]] ]
  ; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY2]] ], [ [[TMP0]], [[FOR_BODY2_PREHEADER]] ]
-; CHECK-NEXT:    [[LSR_IV45:%.*]] = ptrtoint i8* [[LSR_IV4]] to i64
-; CHECK-NEXT:    [[SCEVGEP8:%.*]] = getelementptr i8, i8* [[LSR_IV4]], i64 1
-; CHECK-NEXT:    [[V1:%.*]] = load i8, i8* [[SCEVGEP8]], align 1
-; CHECK-NEXT:    [[SCEVGEP7:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[LSR_IV45]]
-; CHECK-NEXT:    [[V2:%.*]] = load i8, i8* [[SCEVGEP7]], align 1
+; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 1
+; CHECK-NEXT:    [[V1:%.*]] = load i8, i8* [[SCEVGEP6]], align 1
+; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 [[TMP0]]
+; CHECK-NEXT:    [[V2:%.*]] = load i8, i8* [[SCEVGEP5]], align 1
  ; CHECK-NEXT:    [[TMPV:%.*]] = xor i8 [[V1]], [[V2]]
-; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i8, i8* [[LSR_IV13]], i64 [[LSR_IV45]]
-; CHECK-NEXT:    store i8 [[TMPV]], i8* [[SCEVGEP6]], align 1
+; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 [[LSR_IV1]]
+; CHECK-NEXT:    store i8 [[TMPV]], i8* [[SCEVGEP4]], align 1
  ; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
-; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV4]], i64 1
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV3]], i64 1
  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[LSR_IV_NEXT]], 0
  ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY2]], label [[FOR_INC_LOOPEXIT:%.*]]
  ; CHECK:       for.inc.loopexit:
@@ -94,4 +91,3 @@ for.inc:                                          ; preds = %for.inc.loopexit, %
  for.end.loopexit:                                 ; preds = %for.inc
    ret void
  }
-
author	Mikael Holmen <mikael.holmen@ericsson.com>
	Thu, 1 Feb 2018 06:38:34 +0000 (06:38 +0000)
committer	Mikael Holmen <mikael.holmen@ericsson.com>
	Thu, 1 Feb 2018 06:38:34 +0000 (06:38 +0000)
lib/Transforms/Scalar/LoopStrengthReduce.cpp		patch \| blob \| history
test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll		patch \| blob \| history
test/Transforms/LoopStrengthReduce/X86/nested-loop.ll		patch \| blob \| history