Use vabs/fabs on arm/arm64 for intrinsic abs().

author Vladimir Marko <vmarko@google.com>

Thu, 17 Jul 2014 09:43:08 +0000 (10:43 +0100)

committer Vladimir Marko <vmarko@google.com>

Mon, 21 Jul 2014 18:32:44 +0000 (19:32 +0100)
author Vladimir Marko <vmarko@google.com>
Thu, 17 Jul 2014 09:43:08 +0000 (10:43 +0100)
committer Vladimir Marko <vmarko@google.com>
Mon, 21 Jul 2014 18:32:44 +0000 (19:32 +0100)
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h

index 8f60390..fa252a1 100644 (file)
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -110,6 +110,8 @@ class ArmMir2Lir FINAL : public Mir2Lir {
      void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                    RegLocation rl_src2);
      void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
+    bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
+    bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
      bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
      bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
      bool GenInlinedSqrt(CallInfo* info);
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc

index e06d814..dcb8857 100644 (file)
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -338,6 +338,60 @@ void ArmMir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
    StoreValueWide(rl_dest, rl_result);
  }
  
+static RegisterClass RegClassForAbsFP(RegLocation rl_src, RegLocation rl_dest) {
+  // If src is in a core reg or, unlikely, dest has been promoted to a core reg, use core reg.
+  if ((rl_src.location == kLocPhysReg && !rl_src.reg.IsFloat()) ||
+      (rl_dest.location == kLocPhysReg && !rl_dest.reg.IsFloat())) {
+    return kCoreReg;
+  }
+  // If src is in an fp reg or dest has been promoted to an fp reg, use fp reg.
+  if (rl_src.location == kLocPhysReg || rl_dest.location == kLocPhysReg) {
+    return kFPReg;
+  }
+  // With both src and dest in the stack frame we have to perform load+abs+store. Whether this
+  // is faster using a core reg or fp reg depends on the particular CPU. Without further
+  // investigation and testing we prefer core register. (If the result is subsequently used in
+  // another fp operation, the dalvik reg will probably get promoted and that should be handled
+  // by the cases above.)
+  return kCoreReg;
+}
+
+bool ArmMir2Lir::GenInlinedAbsFloat(CallInfo* info) {
+  if (info->result.location == kLocInvalid) {
+    return true;  // Result is unused: inlining successful, no code generated.
+  }
+  RegLocation rl_dest = info->result;
+  RegLocation rl_src = UpdateLoc(info->args[0]);
+  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
+  rl_src = LoadValue(rl_src, reg_class);
+  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
+  if (reg_class == kFPReg) {
+    NewLIR2(kThumb2Vabss, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  } else {
+    OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
+  }
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
+bool ArmMir2Lir::GenInlinedAbsDouble(CallInfo* info) {
+  if (info->result.location == kLocInvalid) {
+    return true;  // Result is unused: inlining successful, no code generated.
+  }
+  RegLocation rl_dest = info->result;
+  RegLocation rl_src = UpdateLocWide(info->args[0]);
+  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
+  rl_src = LoadValueWide(rl_src, reg_class);
+  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
+  if (reg_class == kFPReg) {
+    NewLIR2(kThumb2Vabsd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  } else {
+    OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
+  }
+  StoreValueWide(rl_dest, rl_result);
+  return true;
+}
+
  bool ArmMir2Lir::GenInlinedSqrt(CallInfo* info) {
    DCHECK_EQ(cu_->instruction_set, kThumb2);
    RegLocation rl_src = info->args[0];
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h

index 4551e2a..8d15326 100644 (file)
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -175,6 +175,7 @@ class Arm64Mir2Lir FINAL : public Mir2Lir {
                    RegLocation rl_src2);
      void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
      bool GenInlinedReverseBits(CallInfo* info, OpSize size);
+    bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
      bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
      bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
      bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc

index 6594c4b..175cef1 100644 (file)
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -323,12 +323,57 @@ void Arm64Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
    StoreValueWide(rl_dest, rl_result);
  }
  
+static RegisterClass RegClassForAbsFP(RegLocation rl_src, RegLocation rl_dest) {
+  // If src is in a core reg or, unlikely, dest has been promoted to a core reg, use core reg.
+  if ((rl_src.location == kLocPhysReg && !rl_src.reg.IsFloat()) ||
+      (rl_dest.location == kLocPhysReg && !rl_dest.reg.IsFloat())) {
+    return kCoreReg;
+  }
+  // If src is in an fp reg or dest has been promoted to an fp reg, use fp reg.
+  if (rl_src.location == kLocPhysReg || rl_dest.location == kLocPhysReg) {
+    return kFPReg;
+  }
+  // With both src and dest in the stack frame we have to perform load+abs+store. Whether this
+  // is faster using a core reg or fp reg depends on the particular CPU. For example, on A53
+  // it's faster using core reg while on A57 it's faster with fp reg, the difference being
+  // bigger on the A53. Without further investigation and testing we prefer core register.
+  // (If the result is subsequently used in another fp operation, the dalvik reg will probably
+  // get promoted and that should be handled by the cases above.)
+  return kCoreReg;
+}
+
+bool Arm64Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
+  if (info->result.location == kLocInvalid) {
+    return true;  // Result is unused: inlining successful, no code generated.
+  }
+  RegLocation rl_dest = info->result;
+  RegLocation rl_src = UpdateLoc(info->args[0]);
+  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
+  rl_src = LoadValue(rl_src, reg_class);
+  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
+  if (reg_class == kFPReg) {
+    NewLIR2(kA64Fabs2ff, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  } else {
+    NewLIR4(kA64Ubfm4rrdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 30);
+  }
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
  bool Arm64Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
-  RegLocation rl_src = info->args[0];
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_dest = InlineTargetWide(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  NewLIR4(WIDE(kA64Ubfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 62);
+  if (info->result.location == kLocInvalid) {
+    return true;  // Result is unused: inlining successful, no code generated.
+  }
+  RegLocation rl_dest = info->result;
+  RegLocation rl_src = UpdateLocWide(info->args[0]);
+  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
+  rl_src = LoadValueWide(rl_src, reg_class);
+  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
+  if (reg_class == kFPReg) {
+    NewLIR2(FWIDE(kA64Fabs2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  } else {
+    NewLIR4(WIDE(kA64Ubfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 62);
+  }
    StoreValueWide(rl_dest, rl_result);
    return true;
  }
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc

index fdb3d83..2c69593 100755 (executable)
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1553,20 +1553,6 @@ bool Mir2Lir::GenInlinedAbsLong(CallInfo* info) {
    return true;
  }
  
-bool Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
-  if (cu_->instruction_set == kMips) {
-    // TODO - add Mips implementation
-    return false;
-  }
-  RegLocation rl_src = info->args[0];
-  rl_src = LoadValue(rl_src, kCoreReg);
-  RegLocation rl_dest = InlineTarget(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
  bool Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
    // Currently implemented only for ARM64
    return false;
@@ -1577,22 +1563,6 @@ bool Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
    return false;
  }
  
-bool Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
-  if (cu_->instruction_set == kMips) {
-    // TODO - add Mips implementation
-    return false;
-  }
-  RegLocation rl_src = info->args[0];
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_dest = InlineTargetWide(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
-  OpRegCopyWide(rl_result.reg, rl_src.reg);
-  OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
-  StoreValueWide(rl_dest, rl_result);
-  return true;
-}
-
  bool Mir2Lir::GenInlinedFloatCvt(CallInfo* info) {
    if (cu_->instruction_set == kMips) {
      // TODO - add Mips implementation
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h

index 6a186b1..bb18ad2 100644 (file)
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -110,6 +110,8 @@ class MipsMir2Lir FINAL : public Mir2Lir {
      void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                    RegLocation rl_src2);
      void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
+    bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
+    bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
      bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
      bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
      bool GenInlinedSqrt(CallInfo* info);
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc

index 1f8f9ec..054514e 100644 (file)
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -291,6 +291,16 @@ bool MipsMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
    return false;
  }
  
+bool MipsMir2Lir::GenInlinedAbsFloat(CallInfo* info) {
+  // TODO - add Mips implementation
+  return false;
+}
+
+bool MipsMir2Lir::GenInlinedAbsDouble(CallInfo* info) {
+  // TODO - add Mips implementation
+  return false;
+}
+
  bool MipsMir2Lir::GenInlinedSqrt(CallInfo* info) {
    DCHECK_NE(cu_->instruction_set, kThumb2);
    return false;
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h

index 512849f..1795931 100644 (file)
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -990,8 +990,8 @@ class Mir2Lir : public Backend {
      bool GenInlinedReverseBytes(CallInfo* info, OpSize size);
      bool GenInlinedAbsInt(CallInfo* info);
      virtual bool GenInlinedAbsLong(CallInfo* info);
-    virtual bool GenInlinedAbsFloat(CallInfo* info);
-    virtual bool GenInlinedAbsDouble(CallInfo* info);
+    virtual bool GenInlinedAbsFloat(CallInfo* info) = 0;
+    virtual bool GenInlinedAbsDouble(CallInfo* info) = 0;
      bool GenInlinedFloatCvt(CallInfo* info);
      bool GenInlinedDoubleCvt(CallInfo* info);
      virtual bool GenInlinedArrayCopyCharArray(CallInfo* info);
author	Vladimir Marko <vmarko@google.com>
	Thu, 17 Jul 2014 09:43:08 +0000 (10:43 +0100)
committer	Vladimir Marko <vmarko@google.com>
	Mon, 21 Jul 2014 18:32:44 +0000 (19:32 +0100)
compiler/dex/quick/arm/codegen_arm.h		patch \| blob \| history
compiler/dex/quick/arm/fp_arm.cc		patch \| blob \| history
compiler/dex/quick/arm64/codegen_arm64.h		patch \| blob \| history
compiler/dex/quick/arm64/fp_arm64.cc		patch \| blob \| history
compiler/dex/quick/gen_invoke.cc		patch \| blob \| history
compiler/dex/quick/mips/codegen_mips.h		patch \| blob \| history
compiler/dex/quick/mips/int_mips.cc		patch \| blob \| history
compiler/dex/quick/mir_to_lir.h		patch \| blob \| history