From: Nicolas Geoffray Date: Thu, 8 Jan 2015 14:52:29 +0000 (+0000) Subject: Fix handling of long argument spanning register/memory. X-Git-Tag: android-x86-7.1-r1~889^2~2238^2 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=425f239c291d435f519a1cf4bdd9ccc9a2c0c070;p=android-x86%2Fart.git Fix handling of long argument spanning register/memory. Comment in arm_lir.h says: * If a 64-bit argument would span the register/memory argument * boundary, it will instead be fully passed in the frame. This change implements such logic for all platforms. We still need to pass the low part in register as well because I haven't ported the jni compilers (x86 and mips) to it. Once the jni compilers are updated, we can remove the register assignment. Note that this greatly simplifies optimizing's register allocator by not having to understand a long spanning register and memory. Change-Id: I59706ca5d47269fc46e5489ac99bd6576e87e7f3 --- diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index c99be641a..4139b51a6 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -410,7 +410,7 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { // If the wide input appeared as single, flush it and go // as it comes from memory. if (t_loc->wide && reg.Valid() && !reg.Is64Bit()) { - StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, k32, kNotVolatile); + // The memory already holds the half. Don't do anything. reg = RegStorage::InvalidReg(); } @@ -881,21 +881,23 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state, if (rl_arg.wide) { // if reg is not 64-bit (it is half of 64-bit) then handle it separately. if (!reg.Is64Bit()) { - // TODO: REVISIT: This adds a spill of low part while we could just copy it. ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (rl_arg.location == kLocPhysReg) { int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); - // Dump it to memory and then load only low part + // Dump it to memory. StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile); LoadBaseDisp(TargetPtrReg(kSp), out_offset, reg, k32, kNotVolatile); } else { - int out_offset = StackVisitor::GetOutVROffset(i + 1, cu_->instruction_set); + int high_offset = StackVisitor::GetOutVROffset(i + 1, cu_->instruction_set); // First, use target reg for high part. LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low + 1), reg, k32, kNotVolatile); - StoreBaseDisp(TargetPtrReg(kSp), out_offset, reg, k32, kNotVolatile); - // Now load target reg with low part. + StoreBaseDisp(TargetPtrReg(kSp), high_offset, reg, k32, kNotVolatile); + // Now, use target reg for low part. LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low), reg, k32, kNotVolatile); + int low_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); + // And store it to the expected memory location. + StoreBaseDisp(TargetPtrReg(kSp), low_offset, reg, k32, kNotVolatile); } } else { LoadValueDirectWideFixed(rl_arg, reg); diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc index a3323e133..fd207150f 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.cc +++ b/compiler/jni/quick/arm/calling_convention_arm.cc @@ -168,9 +168,13 @@ const ManagedRegisterEntrySpills& ArmManagedRuntimeCallingConvention::EntrySpill } else { // FIXME: Pointer this returns as both reference and long. if (IsCurrentParamALong() && !IsCurrentParamAReference()) { // Long. - if (gpr_index < arraysize(kHFCoreArgumentRegisters)) { + // If it spans register and memory, we must use the value in memory. + if (gpr_index < arraysize(kHFCoreArgumentRegisters) - 1) { entry_spills_.push_back( ArmManagedRegister::FromCoreRegister(kHFCoreArgumentRegisters[gpr_index++])); + } else if (gpr_index == arraysize(kHFCoreArgumentRegisters) - 1) { + gpr_index++; + entry_spills_.push_back(ManagedRegister::NoRegister(), 4); } else { entry_spills_.push_back(ManagedRegister::NoRegister(), 4); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index d0a72bb42..1cc2dcc9b 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -593,8 +593,6 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type ArmManagedRegister pair = ArmManagedRegister::FromRegisterPair( calling_convention.GetRegisterPairAt(index)); return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); - } else if (index + 1 == calling_convention.GetNumberOfRegisters()) { - return Location::QuickParameter(index, stack_index); } else { return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index)); } @@ -711,16 +709,6 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { Location::RegisterLocation(destination.AsRegisterPairLow())); } else if (source.IsFpuRegister()) { UNIMPLEMENTED(FATAL); - } else if (source.IsQuickParameter()) { - uint16_t register_index = source.GetQuickParameterRegisterIndex(); - uint16_t stack_index = source.GetQuickParameterStackIndex(); - InvokeDexCallingConvention calling_convention; - EmitParallelMoves( - Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)), - Location::RegisterLocation(destination.AsRegisterPairLow()), - Location::StackSlot( - calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()), - Location::RegisterLocation(destination.AsRegisterPairHigh())); } else { // No conflict possible, so just do the moves. DCHECK(source.IsDoubleStackSlot()); @@ -741,22 +729,6 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { } else { UNIMPLEMENTED(FATAL); } - } else if (destination.IsQuickParameter()) { - InvokeDexCallingConvention calling_convention; - uint16_t register_index = destination.GetQuickParameterRegisterIndex(); - uint16_t stack_index = destination.GetQuickParameterStackIndex(); - if (source.IsRegisterPair()) { - UNIMPLEMENTED(FATAL); - } else if (source.IsFpuRegister()) { - UNIMPLEMENTED(FATAL); - } else { - DCHECK(source.IsDoubleStackSlot()); - EmitParallelMoves( - Location::StackSlot(source.GetStackIndex()), - Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)), - Location::StackSlot(source.GetHighStackIndex(kArmWordSize)), - Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1))); - } } else { DCHECK(destination.IsDoubleStackSlot()); if (source.IsRegisterPair()) { @@ -769,17 +741,6 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { __ StoreToOffset(kStoreWordPair, source.AsRegisterPairLow(), SP, destination.GetStackIndex()); } - } else if (source.IsQuickParameter()) { - InvokeDexCallingConvention calling_convention; - uint16_t register_index = source.GetQuickParameterRegisterIndex(); - uint16_t stack_index = source.GetQuickParameterStackIndex(); - // Just move the low part. The only time a source is a quick parameter is - // when moving the parameter to its stack locations. And the (Java) caller - // of this method has already done that. - __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(register_index), - SP, destination.GetStackIndex()); - DCHECK_EQ(calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize(), - static_cast(destination.GetHighStackIndex(kArmWordSize))); } else if (source.IsFpuRegisterPair()) { __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow()), SP, diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index d377cb57c..04e36cc58 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -643,9 +643,10 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { DCHECK(source.IsDoubleStackSlot()); EmitParallelMoves( Location::StackSlot(source.GetStackIndex()), - Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)), + Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index)), Location::StackSlot(source.GetHighStackIndex(kX86WordSize)), Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1))); + __ movl(calling_convention.GetRegisterAt(register_index), Address(ESP, source.GetStackIndex())); } } else if (destination.IsFpuRegister()) { if (source.IsDoubleStackSlot()) { diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 9db1646f5..4bec70acd 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -304,14 +304,9 @@ class QuickArgumentVisitor { } uint64_t ReadSplitLongParam() const { - DCHECK(IsSplitLongOrDouble()); - // Read low half from register. - uint64_t low_half = *reinterpret_cast(GetParamAddress()); - // Read high half from the stack. As current stack_index_ indexes the argument, the high part - // index should be (stack_index_ + 1). - uint64_t high_half = *reinterpret_cast(stack_args_ - + (stack_index_ + 1) * kBytesStackArgLocation); - return (low_half & 0xffffffffULL) | (high_half << 32); + // The splitted long is always available through the stack. + return *reinterpret_cast(stack_args_ + + stack_index_ * kBytesStackArgLocation); } void VisitArguments() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {