From 6557bce3ec8d5a82b2ea299a18cb51677b299633 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 22 Feb 2011 19:53:14 +0000 Subject: [PATCH] VFP single precision arith instructions can go down to NEON pipeline, but on Cortex-A8 only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126238 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.h | 3 +- lib/Target/ARM/ARMHazardRecognizer.cpp | 17 ++-- lib/Target/ARM/ARMInstrFormats.td | 7 +- lib/Target/ARM/ARMInstrVFP.td | 162 ++++++++++++++++----------------- lib/Target/ARM/MLxExpansionPass.cpp | 20 ++-- lib/Target/ARM/NEONMoveFix.cpp | 14 ++- 6 files changed, 114 insertions(+), 109 deletions(-) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 1fb88726d0d..7e2183d7cd5 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -155,10 +155,11 @@ namespace ARMII { //===------------------------------------------------------------------===// // Code domain. DomainShift = 18, - DomainMask = 3 << DomainShift, + DomainMask = 7 << DomainShift, DomainGeneral = 0 << DomainShift, DomainVFP = 1 << DomainShift, DomainNEON = 2 << DomainShift, + DomainNEONA8 = 4 << DomainShift, //===------------------------------------------------------------------===// // Field shifts - such shifts are used to set field while generating diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 676b01e91c5..e97ce50bc42 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -21,17 +21,14 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, // FIXME: Detect integer instructions properly. const TargetInstrDesc &TID = MI->getDesc(); unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (Domain == ARMII::DomainVFP) { - unsigned Opcode = MI->getOpcode(); - if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || - Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) - return false; - } else if (Domain == ARMII::DomainNEON) { - if (MI->getDesc().mayStore() || MI->getDesc().mayLoad()) - return false; - } else + if (TID.mayStore()) return false; - return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI); + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) + return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI); + return false; } ScheduleHazardRecognizer::HazardType diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 765cba42d0b..b0cec4b1c37 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -127,13 +127,14 @@ def IndexModePost : IndexMode<2>; def IndexModeUpd : IndexMode<3>; // Instruction execution domain. -class Domain val> { - bits<2> Value = val; +class Domain val> { + bits<3> Value = val; } def GenericDomain : Domain<0>; def VFPDomain : Domain<1>; // Instructions in VFP domain only def NeonDomain : Domain<2>; // Instructions in Neon domain only def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains +def VFPNeonA8Domain : Domain<7>; // Instructions in VFP & Neon under A8 //===----------------------------------------------------------------------===// // ARM special operands. @@ -249,7 +250,7 @@ class InstTemplate { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSUBD : ADbI<0b11100, 0b11, 1, 0, @@ -211,9 +211,9 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VDIVD : ADbI<0b11101, 0b00, 0, 0, @@ -235,9 +235,9 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VNMULD : ADbI<0b11100, 0b10, 1, 0, @@ -249,9 +249,9 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // Match reassociated forms only if not sign dependent rounding. @@ -271,9 +271,9 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FIXME: Verify encoding after integrated assembler is working. @@ -286,9 +286,9 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // Defs = [FPSCR] @@ -305,9 +305,9 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fabs SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } let Defs = [FPSCR] in { @@ -326,9 +326,9 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, let Inst{3-0} = 0b0000; let Inst{5} = 0; - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FIXME: Verify encoding after integrated assembler is working. @@ -347,9 +347,9 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, let Inst{3-0} = 0b0000; let Inst{5} = 0; - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // Defs = [FPSCR] @@ -423,9 +423,9 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fneg SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, @@ -598,9 +598,9 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> { let Inst{7} = 1; // s32 - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, @@ -616,9 +616,9 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> { let Inst{7} = 0; // u32 - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FP -> Int: @@ -671,9 +671,9 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> { let Inst{7} = 1; // Z bit - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, @@ -689,9 +689,9 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> { let Inst{7} = 1; // Z bit - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. @@ -743,36 +743,36 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, @@ -801,36 +801,36 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, @@ -874,9 +874,9 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), @@ -901,9 +901,9 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), @@ -928,9 +928,9 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), @@ -954,9 +954,9 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), @@ -995,9 +995,9 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, RegConstraint<"$Sn = $Sd"> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // neverHasSideEffects diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index f9e86eb36e0..9a27e2f4706 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -132,22 +132,16 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { } bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); // FIXME: Detect integer instructions properly. + const TargetInstrDesc &TID = MI->getDesc(); unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (Domain == ARMII::DomainVFP) { - unsigned Opcode = TID.getOpcode(); - if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || - Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) - return false; - } else if (Domain == ARMII::DomainNEON) { - if (TID.mayStore() || TID.mayLoad()) - return false; - } else { + if (TID.mayStore()) return false; - } - - return MI->readsRegister(Reg, TRI); + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) + return MI->readsRegister(Reg, TRI); return false; } diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 97e54bfaed9..e8393c53dd8 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -35,6 +35,7 @@ namespace { private: const TargetRegisterInfo *TRI; const ARMBaseInstrInfo *TII; + bool isA8; typedef DenseMap RegMap; @@ -43,6 +44,16 @@ namespace { char NEONMoveFixPass::ID = 0; } +static bool inNEONDomain(unsigned Domain, bool isA8) { + if (Domain & ARMII::DomainNEON) { + // Some instructions only go down NEON pipeline when executed on CortexA8. + if (Domain & ARMII::DomainNEONA8) + return isA8; + return true; + } + return false; +} + bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { RegMap Defs; bool Modified = false; @@ -70,7 +81,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { Domain = ARMII::DomainNEON; } - if (Domain & ARMII::DomainNEON) { + if (inNEONDomain(Domain, isA8)) { // Convert VMOVD to VMOVDneon unsigned DestReg = MI->getOperand(0).getReg(); @@ -123,6 +134,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) { TRI = TM.getRegisterInfo(); TII = static_cast(TM.getInstrInfo()); + isA8 = TM.getSubtarget().isCortexA8(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; -- 2.11.0