From d5897678eb555a797d4e84e07814d79f0e0bb465 Mon Sep 17 00:00:00 2001 From: Mark Mendell Date: Wed, 12 Aug 2015 21:16:41 -0400 Subject: [PATCH] Implement CountLeadingZeros for x86 Generate Long and Integer numberOfLeadingZeros for x86 and x86_64. Uses 'bsr' instruction to find the first one bit, and then corrects the result. Added some more tests with constant values to test constant folding. Also add a runtime test with 0 as the input. Change-Id: I920b21bb00069bccf5f921f8f87a77e334114926 Signed-off-by: Mark Mendell --- compiler/optimizing/intrinsics_x86.cc | 112 ++++++++++++++++++++++++++++++- compiler/optimizing/intrinsics_x86_64.cc | 81 +++++++++++++++++++++- test/082-inline-execute/src/Main.java | 23 +++++++ 3 files changed, 212 insertions(+), 4 deletions(-) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 4471d713e..b7126b24e 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -20,6 +20,7 @@ #include "arch/x86/instruction_set_features_x86.h" #include "art_method.h" +#include "base/bit_utils.h" #include "code_generator_x86.h" #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" @@ -1835,6 +1836,115 @@ void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) { SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler); } +static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + if (is_long) { + locations->SetInAt(0, Location::RequiresRegister()); + } else { + locations->SetInAt(0, Location::Any()); + } + locations->SetOut(Location::RequiresRegister()); +} + +static void GenLeadingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_long) { + LocationSummary* locations = invoke->GetLocations(); + Location src = locations->InAt(0); + Register out = locations->Out().AsRegister(); + + if (invoke->InputAt(0)->IsConstant()) { + // Evaluate this at compile time. + int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); + if (value == 0) { + value = is_long ? 64 : 32; + } else { + value = is_long ? CLZ(static_cast(value)) : CLZ(static_cast(value)); + } + if (value == 0) { + __ xorl(out, out); + } else { + __ movl(out, Immediate(value)); + } + return; + } + + // Handle the non-constant cases. + if (!is_long) { + if (src.IsRegister()) { + __ bsrl(out, src.AsRegister()); + } else { + DCHECK(src.IsStackSlot()); + __ bsrl(out, Address(ESP, src.GetStackIndex())); + } + + // BSR sets ZF if the input was zero, and the output is undefined. + Label all_zeroes, done; + __ j(kEqual, &all_zeroes); + + // Correct the result from BSR to get the final CLZ result. + __ xorl(out, Immediate(31)); + __ jmp(&done); + + // Fix the zero case with the expected result. + __ Bind(&all_zeroes); + __ movl(out, Immediate(32)); + + __ Bind(&done); + return; + } + + // 64 bit case needs to worry about both parts of the register. + DCHECK(src.IsRegisterPair()); + Register src_lo = src.AsRegisterPairLow(); + Register src_hi = src.AsRegisterPairHigh(); + Label handle_low, done, all_zeroes; + + // Is the high word zero? + __ testl(src_hi, src_hi); + __ j(kEqual, &handle_low); + + // High word is not zero. We know that the BSR result is defined in this case. + __ bsrl(out, src_hi); + + // Correct the result from BSR to get the final CLZ result. + __ xorl(out, Immediate(31)); + __ jmp(&done); + + // High word was zero. We have to compute the low word count and add 32. + __ Bind(&handle_low); + __ bsrl(out, src_lo); + __ j(kEqual, &all_zeroes); + + // We had a valid result. Use an XOR to both correct the result and add 32. + __ xorl(out, Immediate(63)); + __ jmp(&done); + + // All zero case. + __ Bind(&all_zeroes); + __ movl(out, Immediate(64)); + + __ Bind(&done); +} + +void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + CreateLeadingZeroLocations(arena_, invoke, /* is_long */ false); +} + +void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + X86Assembler* assembler = down_cast(codegen_->GetAssembler()); + GenLeadingZeros(assembler, invoke, /* is_long */ false); +} + +void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + CreateLeadingZeroLocations(arena_, invoke, /* is_long */ true); +} + +void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + X86Assembler* assembler = down_cast(codegen_->GetAssembler()); + GenLeadingZeros(assembler, invoke, /* is_long */ true); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -1847,8 +1957,6 @@ UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros) -UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 9ea68ec07..15fbac1c6 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -20,6 +20,7 @@ #include "arch/x86_64/instruction_set_features_x86_64.h" #include "art_method-inl.h" +#include "base/bit_utils.h" #include "code_generator_x86_64.h" #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" @@ -1685,6 +1686,84 @@ void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) { SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler); } +static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::RequiresRegister()); +} + +static void GenLeadingZeros(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) { + LocationSummary* locations = invoke->GetLocations(); + Location src = locations->InAt(0); + CpuRegister out = locations->Out().AsRegister(); + + int zero_value_result = is_long ? 64 : 32; + if (invoke->InputAt(0)->IsConstant()) { + // Evaluate this at compile time. + int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); + if (value == 0) { + value = zero_value_result; + } else { + value = is_long ? CLZ(static_cast(value)) : CLZ(static_cast(value)); + } + if (value == 0) { + __ xorl(out, out); + } else { + __ movl(out, Immediate(value)); + } + return; + } + + // Handle the non-constant cases. + if (src.IsRegister()) { + if (is_long) { + __ bsrq(out, src.AsRegister()); + } else { + __ bsrl(out, src.AsRegister()); + } + } else if (is_long) { + DCHECK(src.IsDoubleStackSlot()); + __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex())); + } else { + DCHECK(src.IsStackSlot()); + __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex())); + } + + // BSR sets ZF if the input was zero, and the output is undefined. + Label is_zero, done; + __ j(kEqual, &is_zero); + + // Correct the result from BSR to get the CLZ result. + __ xorl(out, Immediate(zero_value_result - 1)); + __ jmp(&done); + + // Fix the zero case with the expected result. + __ Bind(&is_zero); + __ movl(out, Immediate(zero_value_result)); + + __ Bind(&done); +} + +void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + CreateLeadingZeroLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + X86_64Assembler* assembler = down_cast(codegen_->GetAssembler()); + GenLeadingZeros(assembler, invoke, /* is_long */ false); +} + +void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + CreateLeadingZeroLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + X86_64Assembler* assembler = down_cast(codegen_->GetAssembler()); + GenLeadingZeros(assembler, invoke, /* is_long */ true); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -1696,8 +1775,6 @@ void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros) -UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros) #undef UNIMPLEMENTED_INTRINSIC diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java index 77c1a9972..bd606a6d4 100644 --- a/test/082-inline-execute/src/Main.java +++ b/test/082-inline-execute/src/Main.java @@ -1043,8 +1043,20 @@ public class Main { return (r1 / i1) + (r2 / i2) + i3 + i4 + i5 + i6 + i7 + i8; } + public static boolean doThrow = false; + + public static int $noinline$return_int_zero() { + if (doThrow) { + throw new Error(); + } + return 0; + } + public static void test_Integer_numberOfLeadingZeros() { Assert.assertEquals(Integer.numberOfLeadingZeros(0), Integer.SIZE); + Assert.assertEquals(Integer.numberOfLeadingZeros(1), Integer.SIZE - 1); + Assert.assertEquals(Integer.numberOfLeadingZeros(1 << (Integer.SIZE-1)), 0); + Assert.assertEquals(Integer.numberOfLeadingZeros($noinline$return_int_zero()), Integer.SIZE); for (int i = 0; i < Integer.SIZE; i++) { Assert.assertEquals(Integer.numberOfLeadingZeros(1 << i), Integer.SIZE - 1 - i); Assert.assertEquals(Integer.numberOfLeadingZeros((1 << i) | 1), Integer.SIZE - 1 - i); @@ -1052,8 +1064,19 @@ public class Main { } } + public static long $noinline$return_long_zero() { + if (doThrow) { + throw new Error(); + } + return 0; + } + public static void test_Long_numberOfLeadingZeros() { Assert.assertEquals(Long.numberOfLeadingZeros(0L), Long.SIZE); + Assert.assertEquals(Long.numberOfLeadingZeros(1L), Long.SIZE - 1); + Assert.assertEquals(Long.numberOfLeadingZeros(1L << ((Long.SIZE/2)-1)), Long.SIZE/2); + Assert.assertEquals(Long.numberOfLeadingZeros(1L << (Long.SIZE-1)), 0); + Assert.assertEquals(Long.numberOfLeadingZeros($noinline$return_long_zero()), Long.SIZE); for (int i = 0; i < Long.SIZE; i++) { Assert.assertEquals(Long.numberOfLeadingZeros(1L << i), Long.SIZE - 1 - i); Assert.assertEquals(Long.numberOfLeadingZeros((1L << i) | 1L), Long.SIZE - 1 - i); -- 2.11.0