OSDN Git Service

Optimizing: Use more X86 3 operand multiplies
authorMark Mendell <mark.p.mendell@intel.com>
Mon, 27 Jul 2015 20:13:10 +0000 (16:13 -0400)
committerMark Mendell <mark.p.mendell@intel.com>
Tue, 28 Jul 2015 19:54:05 +0000 (15:54 -0400)
The X86_64 code generator generated 3 operand multiplies for long
multiplication only.  Add support for 3 operand multiplication for
int as well for both X86 and X86_64.

Note that the RHS operand must be a 32 bit constant, and that it is
possible for the constant to end up in a register (!) due to a previous
use by another instruction.  Handle this case by checking the operand,
otherwise the first input might not be the same as the output, due to
the use of Any().

Also allow stack operands for multiplication.

Change-Id: I8f3d14cc01e9a91210f418258aa18065ee87979d
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
compiler/optimizing/code_generator_x86.cc
compiler/optimizing/code_generator_x86_64.cc
compiler/utils/x86/assembler_x86.cc
compiler/utils/x86/assembler_x86.h
compiler/utils/x86_64/assembler_x86_64.cc
compiler/utils/x86_64/assembler_x86_64.h

index e15eff9..850a2a6 100644 (file)
@@ -2348,7 +2348,12 @@ void LocationsBuilderX86::VisitMul(HMul* mul) {
     case Primitive::kPrimInt:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
-      locations->SetOut(Location::SameAsFirstInput());
+      if (mul->InputAt(1)->IsIntConstant()) {
+        // Can use 3 operand multiply.
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      } else {
+        locations->SetOut(Location::SameAsFirstInput());
+      }
       break;
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
@@ -2376,21 +2381,24 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
   LocationSummary* locations = mul->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
-  DCHECK(first.Equals(locations->Out()));
+  Location out = locations->Out();
 
   switch (mul->GetResultType()) {
-    case Primitive::kPrimInt: {
-      if (second.IsRegister()) {
+    case Primitive::kPrimInt:
+      // The constant may have ended up in a register, so test explicitly to avoid
+      // problems where the output may not be the same as the first operand.
+      if (mul->InputAt(1)->IsIntConstant()) {
+        Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
+        __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
+      } else if (second.IsRegister()) {
+        DCHECK(first.Equals(out));
         __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
-      } else if (second.IsConstant()) {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
-        __ imull(first.AsRegister<Register>(), imm);
       } else {
         DCHECK(second.IsStackSlot());
+        DCHECK(first.Equals(out));
         __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
       }
       break;
-    }
 
     case Primitive::kPrimLong: {
       Register in1_hi = first.AsRegisterPairHigh<Register>();
index a95ce68..287737b 100644 (file)
@@ -2535,13 +2535,19 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
-      locations->SetOut(Location::SameAsFirstInput());
+      if (mul->InputAt(1)->IsIntConstant()) {
+        // Can use 3 operand multiply.
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      } else {
+        locations->SetOut(Location::SameAsFirstInput());
+      }
       break;
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(mul->InputAt(1)));
-      if (locations->InAt(1).IsConstant()) {
+      locations->SetInAt(1, Location::Any());
+      if (mul->InputAt(1)->IsLongConstant() &&
+          IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
         // Can use 3 operand multiply.
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       } else {
@@ -2566,37 +2572,51 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
   LocationSummary* locations = mul->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
+  Location out = locations->Out();
   switch (mul->GetResultType()) {
-    case Primitive::kPrimInt: {
-      DCHECK(first.Equals(locations->Out()));
-      if (second.IsRegister()) {
+    case Primitive::kPrimInt:
+      // The constant may have ended up in a register, so test explicitly to avoid
+      // problems where the output may not be the same as the first operand.
+      if (mul->InputAt(1)->IsIntConstant()) {
+        Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
+        __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
+      } else if (second.IsRegister()) {
+        DCHECK(first.Equals(out));
         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
-      } else if (second.IsConstant()) {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
-        __ imull(first.AsRegister<CpuRegister>(), imm);
       } else {
+        DCHECK(first.Equals(out));
         DCHECK(second.IsStackSlot());
         __ imull(first.AsRegister<CpuRegister>(),
                  Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
-    }
     case Primitive::kPrimLong: {
-      if (second.IsConstant()) {
-        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
-        DCHECK(IsInt<32>(value));
-        __ imulq(locations->Out().AsRegister<CpuRegister>(),
-                 first.AsRegister<CpuRegister>(),
-                 Immediate(static_cast<int32_t>(value)));
-      } else {
-        DCHECK(first.Equals(locations->Out()));
+      // The constant may have ended up in a register, so test explicitly to avoid
+      // problems where the output may not be the same as the first operand.
+      if (mul->InputAt(1)->IsLongConstant()) {
+        int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
+        if (IsInt<32>(value)) {
+          __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
+                   Immediate(static_cast<int32_t>(value)));
+        } else {
+          // Have to use the constant area.
+          DCHECK(first.Equals(out));
+          __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
+        }
+      } else if (second.IsRegister()) {
+        DCHECK(first.Equals(out));
         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        DCHECK(first.Equals(out));
+        __ imulq(first.AsRegister<CpuRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
     }
 
     case Primitive::kPrimFloat: {
-      DCHECK(first.Equals(locations->Out()));
+      DCHECK(first.Equals(out));
       if (second.IsFpuRegister()) {
         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
@@ -2611,7 +2631,7 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
     }
 
     case Primitive::kPrimDouble: {
-      DCHECK(first.Equals(locations->Out()));
+      DCHECK(first.Equals(out));
       if (second.IsFpuRegister()) {
         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
index 44efc65..17fef0f 100644 (file)
@@ -1194,11 +1194,26 @@ void X86Assembler::imull(Register dst, Register src) {
 }
 
 
+void X86Assembler::imull(Register dst, Register src, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  // See whether imm can be represented as a sign-extended 8bit value.
+  int32_t v32 = static_cast<int32_t>(imm.value());
+  if (IsInt<8>(v32)) {
+    // Sign-extension works.
+    EmitUint8(0x6B);
+    EmitOperand(dst, Operand(src));
+    EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
+  } else {
+    // Not representable, use full immediate.
+    EmitUint8(0x69);
+    EmitOperand(dst, Operand(src));
+    EmitImmediate(imm);
+  }
+}
+
+
 void X86Assembler::imull(Register reg, const Immediate& imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitUint8(0x69);
-  EmitOperand(reg, Operand(reg));
-  EmitImmediate(imm);
+  imull(reg, reg, imm);
 }
 
 
index e2abcde..71c24b2 100644 (file)
@@ -409,6 +409,7 @@ class X86Assembler FINAL : public Assembler {
 
   void imull(Register dst, Register src);
   void imull(Register reg, const Immediate& imm);
+  void imull(Register dst, Register src, const Immediate& imm);
   void imull(Register reg, const Address& address);
 
   void imull(Register reg);
index 93c90db..f4d833d 100644 (file)
@@ -1672,28 +1672,33 @@ void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
   EmitOperand(dst.LowBits(), Operand(src));
 }
 
-void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
+void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int32());  // imull only supports 32b immediate.
 
-  EmitOptionalRex32(reg, reg);
+  EmitOptionalRex32(dst, src);
 
   // See whether imm can be represented as a sign-extended 8bit value.
   int32_t v32 = static_cast<int32_t>(imm.value());
   if (IsInt<8>(v32)) {
     // Sign-extension works.
     EmitUint8(0x6B);
-    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitOperand(dst.LowBits(), Operand(src));
     EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
   } else {
     // Not representable, use full immediate.
     EmitUint8(0x69);
-    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitOperand(dst.LowBits(), Operand(src));
     EmitImmediate(imm);
   }
 }
 
 
+void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
+  imull(reg, reg, imm);
+}
+
+
 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg, address);
index 0cd3197..47a317e 100644 (file)
@@ -539,6 +539,7 @@ class X86_64Assembler FINAL : public Assembler {
 
   void imull(CpuRegister dst, CpuRegister src);
   void imull(CpuRegister reg, const Immediate& imm);
+  void imull(CpuRegister dst, CpuRegister src, const Immediate& imm);
   void imull(CpuRegister reg, const Address& address);
 
   void imulq(CpuRegister src);