From d0c6bc220433fab06bc1507f963ea5883fdc4f69 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Tue, 2 Nov 2010 21:18:25 +0000 Subject: [PATCH] Add NEON VST1-lane instructions. Partial fix for Radar 8599955. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118069 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 19 ++++++++++++++++--- lib/Target/ARM/ARMInstrNEON.td | 33 ++++++++++++++++++++++++++++++++- lib/Target/ARM/ARMSchedule.td | 2 ++ lib/Target/ARM/ARMScheduleA8.td | 12 ++++++++++++ lib/Target/ARM/ARMScheduleA9.td | 18 ++++++++++++++++++ 5 files changed, 80 insertions(+), 4 deletions(-) diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 774324b4528..4c556479d18 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -111,11 +111,11 @@ namespace { static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 }, -{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, false, EvenDblSpc, 1, 4 }, +{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 }, { ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 }, -{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, false, EvenDblSpc, 1, 2 }, +{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 }, { ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 }, -{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, false, EvenDblSpc, 1, 8 }, +{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 }, { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 }, { ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 }, @@ -206,6 +206,13 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 }, { ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 }, +{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 }, +{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 }, +{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 }, +{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 }, +{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 }, +{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 }, + { ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 }, { ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 }, { ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 }, @@ -989,6 +996,12 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { case ARM::VLD4LNd32Pseudo_UPD: case ARM::VLD4LNq16Pseudo_UPD: case ARM::VLD4LNq32Pseudo_UPD: + case ARM::VST1LNq8Pseudo: + case ARM::VST1LNq16Pseudo: + case ARM::VST1LNq32Pseudo: + case ARM::VST1LNq8Pseudo_UPD: + case ARM::VST1LNq16Pseudo_UPD: + case ARM::VST1LNq32Pseudo_UPD: case ARM::VST2LNd8Pseudo: case ARM::VST2LNd16Pseudo: case ARM::VST2LNd32Pseudo: diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 8927789567a..f17ce836e4f 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1087,6 +1087,8 @@ def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo; def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo; def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo; +} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 + // Classes for VST*LN pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. class VSTQLNPseudo @@ -1112,7 +1114,36 @@ class VSTQQQQLNWBPseudo nohash_imm:$lane), itin, "$addr.addr = $wb">; // VST1LN : Vector Store (single element from one lane) -// FIXME: Not yet implemented. +class VST1LN op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$addr, DPR:$src, nohash_imm:$lane), + IIC_VST1ln, "vst1", Dt, "\\{$src[$lane]\\}, $addr", "", []>; + +def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8">; +def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16">; +def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32">; + +def VST1LNq8Pseudo : VSTQLNPseudo; +def VST1LNq16Pseudo : VSTQLNPseudo; +def VST1LNq32Pseudo : VSTQLNPseudo; + +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { + +// ...with address register writeback: +class VST1LNWB op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, + "\\{$src[$lane]\\}, $addr$offset", + "$addr.addr = $wb", []>; + +def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8">; +def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16">; +def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32">; + +def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo; +def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo; +def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo; // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN op11_8, bits<4> op7_4, string Dt> diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 7f4d2bbd44a..c35cadb12ca 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -158,6 +158,8 @@ def IIC_VST1u : InstrItinClass; def IIC_VST1x2u : InstrItinClass; def IIC_VST1x3u : InstrItinClass; def IIC_VST1x4u : InstrItinClass; +def IIC_VST1ln : InstrItinClass; +def IIC_VST1lnu : InstrItinClass; def IIC_VST2 : InstrItinClass; def IIC_VST2x2 : InstrItinClass; def IIC_VST2u : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index b5c8def42f9..25bdaa2753b 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -601,6 +601,18 @@ def CortexA8Itineraries : ProcessorItineraries< InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 1, 1, 2, 2]>, // + // VST1ln + InstrItinData, + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1]>, + // + // VST1lnu + InstrItinData, + InstrStage<2, [A8_NLSPipe], 1>, + InstrStage<2, [A8_LSPipe]>], + [2, 1, 1, 1, 1]>, + // // VST2 InstrItinData, InstrStage<2, [A8_NLSPipe], 1>, diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index c78f59383f2..fb2c24d1610 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1005,6 +1005,24 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<3, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2, 2]>, // + // VST1ln + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 1>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1]>, + // + // VST1lnu + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 1>, + InstrStage<3, [A9_LSUnit]>], + [2, 1, 1, 1, 1]>, + // // VST2 InstrItinData, InstrStage<1, [A9_MUX0], 0>, -- 2.11.0