From: xueliang.zhong Date: Tue, 5 Jul 2016 14:28:19 +0000 (+0100) Subject: Integer.bitCount and Long.bitCount intrinsics for ARM X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=f1073c81a88bd545a45639865c38c43c83f89419;p=android-x86%2Fart.git Integer.bitCount and Long.bitCount intrinsics for ARM Change-Id: I4ed3e779415be026c7d090b61a3e356b37c418e5 --- diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 579fb9d3b..d25f439b0 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1979,6 +1979,51 @@ void IntrinsicCodeGeneratorARM::VisitShortReverseBytes(HInvoke* invoke) { __ revsh(out, in); } +static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmAssembler* assembler) { + DCHECK(Primitive::IsIntOrLongType(type)) << type; + DCHECK_EQ(instr->GetType(), Primitive::kPrimInt); + DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type); + + bool is_long = type == Primitive::kPrimLong; + LocationSummary* locations = instr->GetLocations(); + Location in = locations->InAt(0); + Register src_0 = is_long ? in.AsRegisterPairLow() : in.AsRegister(); + Register src_1 = is_long ? in.AsRegisterPairHigh() : src_0; + SRegister tmp_s = locations->GetTemp(0).AsFpuRegisterPairLow(); + DRegister tmp_d = FromLowSToD(tmp_s); + Register out_r = locations->Out().AsRegister(); + + // Move data from core register(s) to temp D-reg for bit count calculation, then move back. + // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg, + // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency, + // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'. + __ vmovdrr(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0| + __ vcntd(tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c| + __ vpaddld(tmp_d, tmp_d, 8, /* is_unsigned */ true); // Temp DReg |--c|--c|--c|--c| + __ vpaddld(tmp_d, tmp_d, 16, /* is_unsigned */ true); // Temp DReg |------c|------c| + if (is_long) { + __ vpaddld(tmp_d, tmp_d, 32, /* is_unsigned */ true); // Temp DReg |--------------c| + } + __ vmovrs(out_r, tmp_s); +} + +void IntrinsicLocationsBuilderARM::VisitIntegerBitCount(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); +} + +void IntrinsicCodeGeneratorARM::VisitIntegerBitCount(HInvoke* invoke) { + GenBitCount(invoke, Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitLongBitCount(HInvoke* invoke) { + VisitIntegerBitCount(invoke); +} + +void IntrinsicCodeGeneratorARM::VisitLongBitCount(HInvoke* invoke) { + GenBitCount(invoke, Primitive::kPrimLong, GetAssembler()); +} + void IntrinsicLocationsBuilderARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kNoCall, @@ -2119,8 +2164,6 @@ void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) { __ Lsr(out, out, 5); } -UNIMPLEMENTED_INTRINSIC(ARM, IntegerBitCount) -UNIMPLEMENTED_INTRINSIC(ARM, LongBitCount) UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble) UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat) UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)