From 53f1262773516a247e7bfad50de3cd94a4dcf4df Mon Sep 17 00:00:00 2001 From: Nicolas Geoffray Date: Tue, 13 Jan 2015 18:04:41 +0000 Subject: [PATCH] Implement ParallelMoveResolver::Swap for doubles on arm. Currently reserve a global register DTMP for these operations. Change-Id: Ie88b4696af51834492fd062082335bc2e1137be2 --- compiler/optimizing/code_generator_arm.cc | 40 +++++++++++++++++++++++---- test/439-swap-double/expected.txt | 4 +++ test/439-swap-double/info.txt | 2 ++ test/439-swap-double/src/Main.java | 46 +++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 5 deletions(-) create mode 100644 test/439-swap-double/expected.txt create mode 100644 test/439-swap-double/info.txt create mode 100644 test/439-swap-double/src/Main.java diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 1cc2dcc9b..af385ebe3 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -49,6 +49,9 @@ static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0, S1, S2, S3 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +static constexpr DRegister DTMP = D7; +static constexpr SRegister STMP = S14; + class InvokeRuntimeCallingConvention : public CallingConvention { public: InvokeRuntimeCallingConvention() @@ -472,6 +475,11 @@ void CodeGeneratorARM::SetupBlockedRegisters() const { blocked_core_registers_[R10] = true; blocked_core_registers_[R11] = true; + // Don't allocate our temporary double register. + blocked_fpu_registers_[STMP] = true; + blocked_fpu_registers_[STMP + 1] = true; + DCHECK_EQ(FromLowSToD(STMP), DTMP); + blocked_fpu_registers_[S16] = true; blocked_fpu_registers_[S17] = true; blocked_fpu_registers_[S18] = true; @@ -3364,9 +3372,9 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) { } else if (source.IsStackSlot() && destination.IsStackSlot()) { Exchange(source.GetStackIndex(), destination.GetStackIndex()); } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { - __ vmovrs(IP, source.AsFpuRegister()); + __ vmovs(STMP, source.AsFpuRegister()); __ vmovs(source.AsFpuRegister(), destination.AsFpuRegister()); - __ vmovsr(destination.AsFpuRegister(), IP); + __ vmovs(destination.AsFpuRegister(), STMP); } else if (source.IsFpuRegister() || destination.IsFpuRegister()) { SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister() : destination.AsFpuRegister(); @@ -3374,11 +3382,33 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) { ? destination.GetStackIndex() : source.GetStackIndex(); - __ vmovrs(IP, reg); + __ vmovs(STMP, reg); __ LoadSFromOffset(reg, SP, mem); - __ StoreToOffset(kStoreWord, IP, SP, mem); + __ StoreSToOffset(STMP, SP, mem); + } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) { + __ vmovd(DTMP, FromLowSToD(source.AsFpuRegisterPairLow())); + __ vmovd(FromLowSToD(source.AsFpuRegisterPairLow()), + FromLowSToD(destination.AsFpuRegisterPairLow())); + __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow()), DTMP); + } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) { + DRegister reg = source.IsFpuRegisterPair() + ? FromLowSToD(source.AsFpuRegisterPairLow()) + : FromLowSToD(destination.AsFpuRegisterPairLow()); + int mem = source.IsFpuRegisterPair() + ? destination.GetStackIndex() + : source.GetStackIndex(); + + __ vmovd(DTMP, reg); + __ LoadDFromOffset(reg, SP, mem); + __ StoreDToOffset(DTMP, SP, mem); + } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { + // TODO: We could use DTMP and ask for a pair scratch register (float or core). + // This would save four instructions if two scratch registers are available, and + // two instructions if not. + Exchange(source.GetStackIndex(), destination.GetStackIndex()); + Exchange(source.GetHighStackIndex(kArmWordSize), destination.GetHighStackIndex(kArmWordSize)); } else { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unimplemented" << source << " <-> " << destination; } } diff --git a/test/439-swap-double/expected.txt b/test/439-swap-double/expected.txt new file mode 100644 index 000000000..019c90149 --- /dev/null +++ b/test/439-swap-double/expected.txt @@ -0,0 +1,4 @@ +-26.0 +-24.0 +-22.0 +-20.0 diff --git a/test/439-swap-double/info.txt b/test/439-swap-double/info.txt new file mode 100644 index 000000000..23447d2b1 --- /dev/null +++ b/test/439-swap-double/info.txt @@ -0,0 +1,2 @@ +Test for the optimizing compiler's parallel swap support in +the presence of register pairs (in this case, doubles on ARM). diff --git a/test/439-swap-double/src/Main.java b/test/439-swap-double/src/Main.java new file mode 100644 index 000000000..da11577e2 --- /dev/null +++ b/test/439-swap-double/src/Main.java @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Test for the optimizing compiler's parallel swap support in +// the presence of register pairs (in this case, doubles on ARM). +public class Main { + public static void main(String[] args) { + new Main().foo(); + } + + public void foo() { + // Do multiple calls to force swapping of registers. Note that + // this depends on the calling convention, as a stack-only convention + // may not need the swapping. + callWithDoubles(a, b, c, d, e, f, g); + callWithDoubles(b, c, d, e, f, g, a); + callWithDoubles(c, d, e, f, g, a, b); + callWithDoubles(d, e, f, g, a, b, c); + } + + public static void callWithDoubles( + double a, double b, double c, double d, double e, double f, double g) { + System.out.println(a - b - c - d - e - f - g); + } + + double a = 1.0; + double b = 2.0; + double c = 3.0; + double d = 4.0; + double e = 5.0; + double f = 6.0; + double g = 7.0; +} -- 2.11.0