Subzero: Fold the load instruction into the next cast instruction.

author Jim Stichnoth <stichnot@chromium.org>

Mon, 1 Jun 2015 06:34:44 +0000 (23:34 -0700)

committer Jim Stichnoth <stichnot@chromium.org>

Mon, 1 Jun 2015 06:34:44 +0000 (23:34 -0700)
author Jim Stichnoth <stichnot@chromium.org>
Mon, 1 Jun 2015 06:34:44 +0000 (23:34 -0700)
committer Jim Stichnoth <stichnot@chromium.org>
Mon, 1 Jun 2015 06:34:44 +0000 (23:34 -0700)
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp

index 7edb2c3..a1ba9d6 100644 (file)
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -883,7 +883,7 @@ void TargetX8632::addProlog(CfgNode *Node) {
      // that stack slot.
      if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
        assert(Var->getWeight().isZero());
-      if (!SpillVar->getLinkedTo()->hasReg()) {
+      if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
          VariablesLinkedToSpillSlots.push_back(Var);
          continue;
        }
@@ -1160,8 +1160,9 @@ void TargetX8632::split64(Variable *Var) {
  }
  
  Operand *TargetX8632::loOperand(Operand *Operand) {
-  assert(Operand->getType() == IceType_i64);
-  if (Operand->getType() != IceType_i64)
+  assert(Operand->getType() == IceType_i64 ||
+         Operand->getType() == IceType_f64);
+  if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
      return Operand;
    if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
      split64(Var);
@@ -1180,8 +1181,9 @@ Operand *TargetX8632::loOperand(Operand *Operand) {
  }
  
  Operand *TargetX8632::hiOperand(Operand *Operand) {
-  assert(Operand->getType() == IceType_i64);
-  if (Operand->getType() != IceType_i64)
+  assert(Operand->getType() == IceType_i64 ||
+         Operand->getType() == IceType_f64);
+  if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
      return Operand;
    if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
      split64(Var);
@@ -2463,20 +2465,25 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
        //   a_lo.i32 = t_lo.i32
        //   t_hi.i32 = hi(s.f64)
        //   a_hi.i32 = t_hi.i32
-      SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
-      SpillVar->setLinkedTo(llvm::dyn_cast<Variable>(Src0RM));
-      Variable *Spill = SpillVar;
-      Spill->setWeight(RegWeight::Zero);
-      _movq(Spill, Src0RM);
+      Operand *SpillLo, *SpillHi;
+      if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
+        SpillVariable *SpillVar =
+            Func->makeVariable<SpillVariable>(IceType_f64);
+        SpillVar->setLinkedTo(Src0Var);
+        Variable *Spill = SpillVar;
+        Spill->setWeight(RegWeight::Zero);
+        _movq(Spill, Src0RM);
+        SpillLo = VariableSplit::create(Func, Spill, VariableSplit::Low);
+        SpillHi = VariableSplit::create(Func, Spill, VariableSplit::High);
+      } else {
+        SpillLo = loOperand(Src0RM);
+        SpillHi = hiOperand(Src0RM);
+      }
  
        Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
        Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
        Variable *T_Lo = makeReg(IceType_i32);
        Variable *T_Hi = makeReg(IceType_i32);
-      VariableSplit *SpillLo =
-          VariableSplit::create(Func, Spill, VariableSplit::Low);
-      VariableSplit *SpillHi =
-          VariableSplit::create(Func, Spill, VariableSplit::High);
  
        _mov(T_Lo, SpillLo);
        _mov(DestLo, T_Lo);
@@ -2486,6 +2493,12 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
      case IceType_f64: {
        Src0 = legalize(Src0);
        assert(Src0->getType() == IceType_i64);
+      if (llvm::isa<OperandX8632Mem>(Src0)) {
+        Variable *T = Func->makeVariable(Dest->getType());
+        _movq(T, Src0);
+        _movq(Dest, T);
+        break;
+      }
        // a.f64 = bitcast b.i64 ==>
        //   t_lo.i32 = b_lo.i32
        //   FakeDef(s.f64)
@@ -3955,20 +3968,23 @@ void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
  
  } // anonymous namespace
  
-void TargetX8632::lowerLoad(const InstLoad *Inst) {
+void TargetX8632::lowerLoad(const InstLoad *Load) {
    // A Load instruction can be treated the same as an Assign
    // instruction, after the source operand is transformed into an
    // OperandX8632Mem operand.  Note that the address mode
    // optimization already creates an OperandX8632Mem operand, so it
    // doesn't need another level of transformation.
-  Type Ty = Inst->getDest()->getType();
-  Operand *Src0 = FormMemoryOperand(Inst->getSourceAddress(), Ty);
+  Type Ty = Load->getDest()->getType();
+  Operand *Src0 = FormMemoryOperand(Load->getSourceAddress(), Ty);
  
    // Fuse this load with a subsequent Arithmetic instruction in the
    // following situations:
    //   a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
    //   a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
    //
+  // Fuse this load with a subsequent Cast instruction:
+  //   a=[mem]; b=cast(a) ==> b=cast([mem]) if last use of a
+  //
    // TODO: Clean up and test thoroughly.
    // (E.g., if there is an mfence-all make sure the load ends up on the
    // same side of the fence).
@@ -3979,30 +3995,46 @@ void TargetX8632::lowerLoad(const InstLoad *Inst) {
    // load instruction's dest variable, and that instruction ends that
    // variable's live range, then make the substitution.  Deal with
    // commutativity optimization in the arithmetic instruction lowering.
-  InstArithmetic *NewArith = nullptr;
-  if (InstArithmetic *Arith =
-          llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) {
-    Variable *DestLoad = Inst->getDest();
-    Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
-    Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
-    if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) &&
-        DestLoad != Src0Arith) {
-      NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
-                                        Arith->getSrc(0), Src0);
-    } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
-               Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) {
-      NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
-                                        Arith->getSrc(1), Src0);
-    }
-    if (NewArith) {
-      Arith->setDeleted();
-      Context.advanceNext();
-      lowerArithmetic(NewArith);
-      return;
+  //
+  // TODO(stichnot): Do load fusing as a separate pass.  Run it before
+  // the bool folding pass.  Modify Ice::Inst to allow src operands to
+  // be replaced, including updating Inst::LiveRangesEnded, to avoid
+  // having to manually mostly clone each instruction type.
+  Inst *NextInst = Context.getNextInst();
+  Variable *DestLoad = Load->getDest();
+  if (NextInst && NextInst->isLastUse(DestLoad)) {
+    if (auto *Arith = llvm::dyn_cast<InstArithmetic>(NextInst)) {
+      InstArithmetic *NewArith = nullptr;
+      Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
+      Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
+      if (Src1Arith == DestLoad && DestLoad != Src0Arith) {
+        NewArith = InstArithmetic::create(
+            Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(0), Src0);
+      } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
+                 DestLoad != Src1Arith) {
+        NewArith = InstArithmetic::create(
+            Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(1), Src0);
+      }
+      if (NewArith) {
+        Arith->setDeleted();
+        Context.advanceNext();
+        lowerArithmetic(NewArith);
+        return;
+      }
+    } else if (auto *Cast = llvm::dyn_cast<InstCast>(NextInst)) {
+      Variable *Src0Cast = llvm::dyn_cast<Variable>(Cast->getSrc(0));
+      if (Src0Cast == DestLoad) {
+        InstCast *NewCast =
+            InstCast::create(Func, Cast->getCastKind(), Cast->getDest(), Src0);
+        Cast->setDeleted();
+        Context.advanceNext();
+        lowerCast(NewCast);
+        return;
+      }
      }
    }
  
-  InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
+  InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
    lowerAssign(Assign);
  }
  
diff --git a/tests_lit/llvm2ice_tests/8bit.pnacl.ll b/tests_lit/llvm2ice_tests/8bit.pnacl.ll

index 4987a1d..81db96d 100644 (file)
--- a/tests_lit/llvm2ice_tests/8bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/8bit.pnacl.ll
@@ -335,7 +335,8 @@ define i32 @load_i8(i32 %addr_arg) {
  entry:
    %addr = inttoptr i32 %addr_arg to i8*
    %ret = load i8* %addr, align 1
-  %ret_ext = zext i8 %ret to i32
+  %ret2 = sub i8 %ret, 0
+  %ret_ext = zext i8 %ret2 to i32
    ret i32 %ret_ext
  }
  ; CHECK-LABEL: load_i8
@@ -345,7 +346,8 @@ define i32 @load_i8_global(i32 %addr_arg) {
  entry:
    %addr = bitcast [1 x i8]* @global8 to i8*
    %ret = load i8* %addr, align 1
-  %ret_ext = zext i8 %ret to i32
+  %ret2 = sub i8 %ret, 0
+  %ret_ext = zext i8 %ret2 to i32
    ret i32 %ret_ext
  }
  ; CHECK-LABEL: load_i8_global
diff --git a/tests_lit/llvm2ice_tests/load_cast.ll b/tests_lit/llvm2ice_tests/load_cast.ll

new file mode 100644 (file)

index 0000000..5395d04
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/load_cast.ll
@@ -0,0 +1,266 @@
+; Tests desired and undesired folding of load instructions into cast
+; instructions.  The folding is only done when liveness analysis is performed,
+; so only O2 is tested.
+
+; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 | FileCheck %s
+
+; Not testing trunc, or 32-bit bitcast, because the lowered code uses pretty
+; much the same mov instructions regardless of whether folding is done.
+
+define internal i32 @zext_fold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to i8*
+  %load = load i8* %addr, align 1
+  %result = zext i8 %load to i32
+  ret i32 %result
+}
+; CHECK-LABEL: zext_fold
+; CHECK: movzx {{.*}},BYTE PTR [{{.*}}+0xc8]
+
+define internal i32 @zext_nofold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to i8*
+  %load = load i8* %addr, align 1
+  %tmp1 = zext i8 %load to i32
+  %tmp2 = zext i8 %load to i32
+  %result = add i32 %tmp1, %tmp2
+  ret i32 %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: zext_nofold
+; CHECK-NOT: movzx {{.*}},BYTE PTR [{{.*}}+0xc8]
+
+define internal i32 @sext_fold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to i8*
+  %load = load i8* %addr, align 1
+  %result = sext i8 %load to i32
+  ret i32 %result
+}
+; CHECK-LABEL: sext_fold
+; CHECK: movsx {{.*}},BYTE PTR [{{.*}}+0xc8]
+
+define internal i32 @sext_nofold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to i8*
+  %load = load i8* %addr, align 1
+  %tmp1 = sext i8 %load to i32
+  %tmp2 = sext i8 %load to i32
+  %result = add i32 %tmp1, %tmp2
+  ret i32 %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: sext_nofold
+; CHECK-NOT: movsx {{.*}},BYTE PTR [{{.*}}+0xc8]
+
+define internal float @fptrunc_fold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to double*
+  %load = load double* %addr, align 8
+  %result = fptrunc double %load to float
+  ret float %result
+}
+; CHECK-LABEL: fptrunc_fold
+; CHECK: cvtsd2ss {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal float @fptrunc_nofold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to double*
+  %load = load double* %addr, align 8
+  %tmp1 = fptrunc double %load to float
+  %tmp2 = fptrunc double %load to float
+  %result = fadd float %tmp1, %tmp2
+  ret float %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: fptrunc_nofold
+; CHECK-NOT: cvtsd2ss {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal double @fpext_fold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to float*
+  %load = load float* %addr, align 4
+  %result = fpext float %load to double
+  ret double %result
+}
+; CHECK-LABEL: fpext_fold
+; CHECK: cvtss2sd {{.*}},DWORD PTR [{{.*}}+0xc8]
+
+define internal double @fpext_nofold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to float*
+  %load = load float* %addr, align 4
+  %tmp1 = fpext float %load to double
+  %tmp2 = fpext float %load to double
+  %result = fadd double %tmp1, %tmp2
+  ret double %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: fpext_nofold
+; CHECK-NOT: cvtss2sd {{.*}},DWORD PTR [{{.*}}+0xc8]
+
+define internal i32 @fptoui_fold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to double*
+  %load = load double* %addr, align 8
+  %result = fptoui double %load to i16
+  %result2 = zext i16 %result to i32
+  ret i32 %result2
+}
+; CHECK-LABEL: fptoui_fold
+; CHECK: cvttsd2si {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal i32 @fptoui_nofold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to double*
+  %load = load double* %addr, align 8
+  %tmp1 = fptoui double %load to i16
+  %tmp2 = fptoui double %load to i16
+  %result = add i16 %tmp1, %tmp2
+  %result2 = zext i16 %result to i32
+  ret i32 %result2
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: fptoui_nofold
+; CHECK-NOT: cvttsd2si {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal i32 @fptosi_fold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to double*
+  %load = load double* %addr, align 8
+  %result = fptosi double %load to i16
+  %result2 = zext i16 %result to i32
+  ret i32 %result2
+}
+; CHECK-LABEL: fptosi_fold
+; CHECK: cvttsd2si {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal i32 @fptosi_nofold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to double*
+  %load = load double* %addr, align 8
+  %tmp1 = fptosi double %load to i16
+  %tmp2 = fptosi double %load to i16
+  %result = add i16 %tmp1, %tmp2
+  %result2 = zext i16 %result to i32
+  ret i32 %result2
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: fptosi_nofold
+; CHECK-NOT: cvttsd2si {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal double @uitofp_fold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to i16*
+  %load = load i16* %addr, align 1
+  %result = uitofp i16 %load to double
+  ret double %result
+}
+; CHECK-LABEL: uitofp_fold
+; CHECK: movzx {{.*}},WORD PTR [{{.*}}+0xc8]
+
+define internal double @uitofp_nofold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to i16*
+  %load = load i16* %addr, align 1
+  %tmp1 = uitofp i16 %load to double
+  %tmp2 = uitofp i16 %load to double
+  %result = fadd double %tmp1, %tmp2
+  ret double %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: uitofp_nofold
+; CHECK-NOT: movzx {{.*}},WORD PTR [{{.*}}+0xc8]
+
+define internal double @sitofp_fold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to i16*
+  %load = load i16* %addr, align 1
+  %result = sitofp i16 %load to double
+  ret double %result
+}
+; CHECK-LABEL: sitofp_fold
+; CHECK: movsx {{.*}},WORD PTR [{{.*}}+0xc8]
+
+define internal double @sitofp_nofold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to i16*
+  %load = load i16* %addr, align 1
+  %tmp1 = sitofp i16 %load to double
+  %tmp2 = sitofp i16 %load to double
+  %result = fadd double %tmp1, %tmp2
+  ret double %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: sitofp_nofold
+; CHECK-NOT: movsx {{.*}},WORD PTR [{{.*}}+0xc8]
+
+define internal double @bitcast_i64_fold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to i64*
+  %load = load i64* %addr, align 1
+  %result = bitcast i64 %load to double
+  ret double %result
+}
+; CHECK-LABEL: bitcast_i64_fold
+; CHECK: movq {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal double @bitcast_i64_nofold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to i64*
+  %load = load i64* %addr, align 1
+  %tmp1 = bitcast i64 %load to double
+  %tmp2 = bitcast i64 %load to double
+  %result = fadd double %tmp1, %tmp2
+  ret double %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: bitcast_i64_nofold
+; CHECK-NOT: movq {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal i64 @bitcast_double_fold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to double*
+  %load = load double* %addr, align 8
+  %result = bitcast double %load to i64
+  ret i64 %result
+}
+; CHECK-LABEL: bitcast_double_fold
+; CHECK-NOT: QWORD PTR
+; CHECK: mov {{.*}},DWORD PTR [{{.*}}+0xc8]
+; CHECK: mov {{.*}},DWORD PTR [{{.*}}+0xcc]
+; CHECK-NOT: QWORD PTR
+
+define internal i64 @bitcast_double_nofold(i32 %arg) {
+entry:
+  %ptr = add i32 %arg, 200
+  %addr = inttoptr i32 %ptr to double*
+  %load = load double* %addr, align 8
+  %tmp1 = bitcast double %load to i64
+  %tmp2 = bitcast double %load to i64
+  %result = add i64 %tmp1, %tmp2
+  ret i64 %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: bitcast_double_nofold
+; CHECK: QWORD PTR
+; CHECK: QWORD PTR
diff --git a/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll

index a0b8b6c..1d24497 100644 (file)
--- a/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
@@ -48,7 +48,8 @@ entry:
    %ptr = inttoptr i32 %iptr to i8*
    ; parameter value "6" is for the sequential consistency memory order.
    %i = call i8 @llvm.nacl.atomic.load.i8(i8* %ptr, i32 6)
-  %r = zext i8 %i to i32
+  %i2 = sub i8 %i, 0
+  %r = zext i8 %i2 to i32
    ret i32 %r
  }
  ; CHECK-LABEL: test_atomic_load_8
@@ -59,7 +60,8 @@ define i32 @test_atomic_load_16(i32 %iptr) {
  entry:
    %ptr = inttoptr i32 %iptr to i16*
    %i = call i16 @llvm.nacl.atomic.load.i16(i16* %ptr, i32 6)
-  %r = zext i16 %i to i32
+  %i2 = sub i16 %i, 0
+  %r = zext i16 %i2 to i32
    ret i32 %r
  }
  ; CHECK-LABEL: test_atomic_load_16
author	Jim Stichnoth <stichnot@chromium.org>
	Mon, 1 Jun 2015 06:34:44 +0000 (23:34 -0700)
committer	Jim Stichnoth <stichnot@chromium.org>
	Mon, 1 Jun 2015 06:34:44 +0000 (23:34 -0700)
src/IceTargetLoweringX8632.cpp		patch \| blob \| history
tests_lit/llvm2ice_tests/8bit.pnacl.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/load_cast.ll	[new file with mode: 0644]	patch \| blob
tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll		patch \| blob \| history