From 0b7ac98ecdb891797a282c508b41238451d6c770 Mon Sep 17 00:00:00 2001
From: Chris Larsen <chris.larsen@imgtec.com>
Date: Fri, 4 Sep 2015 12:54:28 -0700
Subject: [PATCH] MIPS64: Implement intrinsics from java.lang.Math:

- abs(double)           - abs(float)            - abs(int)
- abs(long)             - ceil(double)          - floor(double)
- max(double, double)   - max(float, float)     - max(int, int)
- max(long, long)       - min(double, double)   - min(float, float)
- min(int, int)         - min(long, long)       - rint(double)
- sqrt(double)

The math intrinsics:

- round(double)         - round(float)

aren't implemented because there is no simple mapping from these Java
functions into MIPS assembly code.

Change-Id: I37ee5224d3d64f7162ed98521ba566fbc58d7905
---
 compiler/optimizing/code_generator_mips64.cc |   1 -
 compiler/optimizing/intrinsics_mips64.cc     | 379 +++++++++++++++++++++++++--
 runtime/arch/mips64/registers_mips64.h       |   1 +
 3 files changed, 364 insertions(+), 17 deletions(-)
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 4095ba4ec..25ef3880b 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -38,7 +38,6 @@ static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr GpuRegister kMethodRegisterArgument = A0;
 
 // We need extra temporary/scratch registers (in addition to AT) in some cases.
-static constexpr GpuRegister TMP = T8;
 static constexpr FpuRegister FTMP = F8;
 
 // ART Thread Register.
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 2f6a87dd6..52e2cbec3 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -251,6 +251,368 @@ void IntrinsicCodeGeneratorMIPS64::VisitLongReverse(HInvoke* invoke) {
   GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
 }
 
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  if (is64bit) {
+    __ AbsD(out, in);
+  } else {
+    __ AbsS(out, in);
+  }
+}
+
+// double java.lang.Math.abs(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+// float java.lang.Math.abs(float)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+  GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (is64bit) {
+    __ Dsra32(AT, in, 31);
+    __ Xor(out, in, AT);
+    __ Dsubu(out, out, AT);
+  } else {
+    __ Sra(AT, in, 31);
+    __ Xor(out, in, AT);
+    __ Subu(out, out, AT);
+  }
+}
+
+// int java.lang.Math.abs(int)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) {
+  CreateIntToInt(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+}
+
+// long java.lang.Math.abs(long)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) {
+  CreateIntToInt(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenMinMaxFP(LocationSummary* locations,
+                        bool is_min,
+                        bool is_double,
+                        Mips64Assembler* assembler) {
+  FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  if (is_double) {
+    if (is_min) {
+      __ MinD(out, lhs, rhs);
+    } else {
+      __ MaxD(out, lhs, rhs);
+    }
+  } else {
+    if (is_min) {
+      __ MinS(out, lhs, rhs);
+    } else {
+      __ MaxS(out, lhs, rhs);
+    }
+  }
+}
+
+static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+// double java.lang.Math.min(double, double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+// float java.lang.Math.min(float, float)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+}
+
+// double java.lang.Math.max(double, double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+}
+
+// float java.lang.Math.max(float, float)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+}
+
+static void GenMinMax(LocationSummary* locations,
+                      bool is_min,
+                      Mips64Assembler* assembler) {
+  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (out == lhs) {
+    __ Slt(AT, rhs, lhs);
+    if (is_min) {
+      __ Seleqz(out, lhs, AT);
+      __ Selnez(AT, rhs, AT);
+    } else {
+      __ Selnez(out, lhs, AT);
+      __ Seleqz(AT, rhs, AT);
+    }
+  } else {
+    __ Slt(AT, lhs, rhs);
+    if (is_min) {
+      __ Seleqz(out, rhs, AT);
+      __ Selnez(AT, lhs, AT);
+    } else {
+      __ Selnez(out, rhs, AT);
+      __ Seleqz(AT, lhs, AT);
+    }
+  }
+  __ Or(out, out, AT);
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+// int java.lang.Math.min(int, int)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+}
+
+// long java.lang.Math.min(long, long)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+}
+
+// int java.lang.Math.max(int, int)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+}
+
+// long java.lang.Math.max(long, long)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+}
+
+// double java.lang.Math.sqrt(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathSqrt(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathSqrt(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  __ SqrtD(out, in);
+}
+
+static void CreateFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+// double java.lang.Math.rint(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathRint(HInvoke* invoke) {
+  CreateFPToFP(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathRint(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  __ RintD(out, in);
+}
+
+// double java.lang.Math.floor(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathFloor(HInvoke* invoke) {
+  CreateFPToFP(arena_, invoke);
+}
+
+// 0x200 - +zero
+// 0x040 - +infinity
+// 0x020 - -zero
+// 0x004 - -infinity
+// 0x002 - quiet NaN
+// 0x001 - signaling NaN
+const constexpr uint16_t CLASS_MASK = 0x267;
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathFloor(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  Label done;
+
+  // double floor(double in) {
+  //     if in.isNaN || in.isInfinite || in.isZero {
+  //         return in;
+  //     }
+  __ ClassD(out, in);
+  __ Dmfc1(AT, out);
+  __ Andi(AT, AT, CLASS_MASK);       // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN
+  __ MovD(out, in);
+  __ Bnezc(AT, &done);
+
+  //     Long outLong = floor(in);
+  //     if outLong == Long.MAX_VALUE {
+  //         // floor() has almost certainly returned a value which
+  //         // can't be successfully represented as a signed 64-bit
+  //         // number.  Java expects that the input value will be
+  //         // returned in these cases.
+  //         // There is also a small probability that floor(in)
+  //         // correctly truncates the input value to Long.MAX_VALUE.  In
+  //         // that case, this exception handling code still does the
+  //         // correct thing.
+  //         return in;
+  //     }
+  __ FloorLD(out, in);
+  __ Dmfc1(AT, out);
+  __ MovD(out, in);
+  __ LoadConst64(TMP, kPrimLongMax);
+  __ Beqc(AT, TMP, &done);
+
+  //     double out = outLong;
+  //     return out;
+  __ Dmtc1(AT, out);
+  __ Cvtdl(out, out);
+  __ Bind(&done);
+  // }
+}
+
+// double java.lang.Math.ceil(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathCeil(HInvoke* invoke) {
+  CreateFPToFP(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathCeil(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  Label done;
+
+  // double ceil(double in) {
+  //     if in.isNaN || in.isInfinite || in.isZero {
+  //         return in;
+  //     }
+  __ ClassD(out, in);
+  __ Dmfc1(AT, out);
+  __ Andi(AT, AT, CLASS_MASK);       // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN
+  __ MovD(out, in);
+  __ Bnezc(AT, &done);
+
+  //     Long outLong = ceil(in);
+  //     if outLong == Long.MAX_VALUE {
+  //         // ceil() has almost certainly returned a value which
+  //         // can't be successfully represented as a signed 64-bit
+  //         // number.  Java expects that the input value will be
+  //         // returned in these cases.
+  //         // There is also a small probability that ceil(in)
+  //         // correctly rounds up the input value to Long.MAX_VALUE.  In
+  //         // that case, this exception handling code still does the
+  //         // correct thing.
+  //         return in;
+  //     }
+  __ CeilLD(out, in);
+  __ Dmfc1(AT, out);
+  __ MovD(out, in);
+  __ LoadConst64(TMP, kPrimLongMax);
+  __ Beqc(AT, TMP, &done);
+
+  //     double out = outLong;
+  //     return out;
+  __ Dmtc1(AT, out);
+  __ Cvtdl(out, out);
+  __ Bind(&done);
+  // }
+}
+
 // byte libcore.io.Memory.peekByte(long address)
 void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekByte(HInvoke* invoke) {
   CreateIntToIntLocations(arena_, invoke);
@@ -371,24 +733,9 @@ void IntrinsicLocationsBuilderMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN
 void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
-UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
-UNIMPLEMENTED_INTRINSIC(MathAbsInt)
-UNIMPLEMENTED_INTRINSIC(MathAbsLong)
-UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
-UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
-UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble)
-UNIMPLEMENTED_INTRINSIC(MathMaxFloatFloat)
-UNIMPLEMENTED_INTRINSIC(MathMinIntInt)
-UNIMPLEMENTED_INTRINSIC(MathMinLongLong)
-UNIMPLEMENTED_INTRINSIC(MathMaxIntInt)
-UNIMPLEMENTED_INTRINSIC(MathMaxLongLong)
-UNIMPLEMENTED_INTRINSIC(MathSqrt)
-UNIMPLEMENTED_INTRINSIC(MathCeil)
-UNIMPLEMENTED_INTRINSIC(MathFloor)
-UNIMPLEMENTED_INTRINSIC(MathRint)
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
+
 UNIMPLEMENTED_INTRINSIC(ThreadCurrentThread)
 UNIMPLEMENTED_INTRINSIC(UnsafeGet)
 UNIMPLEMENTED_INTRINSIC(UnsafeGetVolatile)
diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h
index 38bc8f268..cd94d5ec6 100644
--- a/runtime/arch/mips64/registers_mips64.h
+++ b/runtime/arch/mips64/registers_mips64.h
@@ -52,6 +52,7 @@ enum GpuRegister {
   S6   = 22,
   S7   = 23,
   T8   = 24,  // More temporaries.
+  TMP  = T8,  // scratch register (in addition to AT)
   T9   = 25,
   K0   = 26,  // Reserved for trap handler.
   K1   = 27,
-- 
2.11.0