From 3416601a9e9be81bb7494864287fd3602d18ef13 Mon Sep 17 00:00:00 2001 From: Calin Juravle Date: Fri, 19 Dec 2014 17:22:29 +0000 Subject: [PATCH] Look at instruction set features when generating volatiles code Change-Id: Ia882405719fdd60b63e4102af7e085f7cbe0bb2a --- compiler/optimizing/code_generator.cc | 6 ++++-- compiler/optimizing/code_generator.h | 4 +++- compiler/optimizing/code_generator_arm.cc | 33 ++++++++++++++++++------------ compiler/optimizing/code_generator_arm.h | 7 ++++++- compiler/optimizing/codegen_test.cc | 5 +++-- compiler/optimizing/optimizing_compiler.cc | 15 +++++++------- 6 files changed, 44 insertions(+), 26 deletions(-) diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 91426f347..4d8154e6a 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -327,11 +327,13 @@ bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) con CodeGenerator* CodeGenerator::Create(ArenaAllocator* allocator, HGraph* graph, - InstructionSet instruction_set) { + InstructionSet instruction_set, + const InstructionSetFeatures& isa_features) { switch (instruction_set) { case kArm: case kThumb2: { - return new (allocator) arm::CodeGeneratorARM(graph); + return new (allocator) arm::CodeGeneratorARM(graph, + isa_features.AsArmInstructionSetFeatures()); } case kArm64: { return new (allocator) arm64::CodeGeneratorARM64(graph); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 2e7eca2ea..4205ebebf 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ #include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" #include "base/bit_field.h" #include "globals.h" #include "locations.h" @@ -84,7 +85,8 @@ class CodeGenerator : public ArenaObject { void CompileOptimized(CodeAllocator* allocator); static CodeGenerator* Create(ArenaAllocator* allocator, HGraph* graph, - InstructionSet instruction_set); + InstructionSet instruction_set, + const InstructionSetFeatures& isa_features); HGraph* GetGraph() const { return graph_; } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 063dc7caf..2f49107bc 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -16,6 +16,7 @@ #include "code_generator_arm.h" +#include "arch/arm/instruction_set_features_arm.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "mirror/array-inl.h" @@ -372,13 +373,15 @@ size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id return kArmWordSize; } -CodeGeneratorARM::CodeGeneratorARM(HGraph* graph) +CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, + const ArmInstructionSetFeatures* isa_features) : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - assembler_(true) {} + assembler_(true), + isa_features_(isa_features) {} size_t CodeGeneratorARM::FrameEntrySpillSize() const { return kNumberOfPushedRegistersAtEntry * kArmWordSize; @@ -2615,16 +2618,18 @@ void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldI locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble; - + bool generate_volatile = field_info.IsVolatile() + && is_wide + && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); // Temporary registers for the write barrier. // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - } else if (is_volatile && is_wide) { + } else if (generate_volatile) { // Arm encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. @@ -2651,6 +2656,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, Location value = locations->InAt(1); bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); Primitive::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); @@ -2684,10 +2690,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, } case Primitive::kPrimLong: { - if (is_volatile) { - // TODO: We could use ldrd and strd that are atomic with Large Physical Address Extension - // support. This info is stored in the compiler driver (HasAtomicLdrdAndStrd) and we should - // pass it around to be able to optimize. + if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicStore(base, offset, value.AsRegisterPairLow(), value.AsRegisterPairHigh(), @@ -2706,7 +2709,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimDouble: { DRegister value_reg = FromLowSToD(value.AsFpuRegisterPairLow()); - if (is_volatile) { + if (is_volatile && !atomic_ldrd_strd) { Register value_reg_lo = locations->GetTemp(0).AsRegister(); Register value_reg_hi = locations->GetTemp(1).AsRegister(); @@ -2740,7 +2743,10 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble)) { + bool generate_volatile = field_info.IsVolatile() + && (field_info.GetFieldType() == Primitive::kPrimDouble) + && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); + if (generate_volatile) { // Arm encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. @@ -2760,6 +2766,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, Register base = locations->InAt(0).AsRegister(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); Primitive::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); @@ -2791,7 +2798,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, } case Primitive::kPrimLong: { - if (is_volatile) { + if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicLoad(base, offset, out.AsRegisterPairLow(), out.AsRegisterPairHigh()); @@ -2808,7 +2815,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimDouble: { DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow()); - if (is_volatile) { + if (is_volatile && !atomic_ldrd_strd) { Register lo = locations->GetTemp(0).AsRegister(); Register hi = locations->GetTemp(1).AsRegister(); GenerateWideAtomicLoad(base, offset, lo, hi); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index b86670d93..40f4edc4e 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -159,7 +159,7 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { class CodeGeneratorARM : public CodeGenerator { public: - explicit CodeGeneratorARM(HGraph* graph); + CodeGeneratorARM(HGraph* graph, const ArmInstructionSetFeatures* isa_features); virtual ~CodeGeneratorARM() {} void GenerateFrameEntry() OVERRIDE; @@ -233,6 +233,10 @@ class CodeGeneratorARM : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + const ArmInstructionSetFeatures* GetInstructionSetFeatures() const { + return isa_features_; + } + private: // Labels for each block that will be compiled. GrowableArray