From 1314c168894e4ef19adc810b67c92b788ee2d9e9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 28 Jul 2016 15:28:56 +0000 Subject: [PATCH] [X86] Remove CustomInserter for FMA3 instructions. Looks like since we got full commuting support for FMAs after this was added, the coalescer can now get this right on its own. Differential Revision: https://reviews.llvm.org/D22799 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@276987 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 191 --------------------------------- lib/Target/X86/X86InstrFMA.td | 3 - test/CodeGen/X86/fma-do-not-commute.ll | 4 +- 3 files changed, 2 insertions(+), 196 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cd3b31eb9db..5aea18986dd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -24236,164 +24236,6 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, return BB; } -// Replace 213-type (isel default) FMA3 instructions with 231-type for -// accumulator loops. Writing back to the accumulator allows the coalescer -// to remove extra copies in the loop. -// FIXME: Do this on AVX512. We don't support 231 variants yet (PR23937). -MachineBasicBlock * -X86TargetLowering::emitFMA3Instr(MachineInstr &MI, - MachineBasicBlock *MBB) const { - MachineOperand &AddendOp = MI.getOperand(3); - - // Bail out early if the addend isn't a register - we can't switch these. - if (!AddendOp.isReg()) - return MBB; - - MachineFunction &MF = *MBB->getParent(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - // Check whether the addend is defined by a PHI: - assert(MRI.hasOneDef(AddendOp.getReg()) && "Multiple defs in SSA?"); - MachineInstr &AddendDef = *MRI.def_instr_begin(AddendOp.getReg()); - if (!AddendDef.isPHI()) - return MBB; - - // Look for the following pattern: - // loop: - // %addend = phi [%entry, 0], [%loop, %result] - // ... - // %result = FMA213 %m2, %m1, %addend - - // Replace with: - // loop: - // %addend = phi [%entry, 0], [%loop, %result] - // ... - // %result = FMA231 %addend, %m1, %m2 - - for (unsigned i = 1, e = AddendDef.getNumOperands(); i < e; i += 2) { - assert(AddendDef.getOperand(i).isReg()); - MachineOperand PHISrcOp = AddendDef.getOperand(i); - MachineInstr &PHISrcInst = *MRI.def_instr_begin(PHISrcOp.getReg()); - if (&PHISrcInst == &MI) { - // Found a matching instruction. - unsigned NewFMAOpc = 0; - switch (MI.getOpcode()) { - case X86::VFMADD213PDr: - NewFMAOpc = X86::VFMADD231PDr; - break; - case X86::VFMADD213PSr: - NewFMAOpc = X86::VFMADD231PSr; - break; - case X86::VFMADD213SDr: - NewFMAOpc = X86::VFMADD231SDr; - break; - case X86::VFMADD213SSr: - NewFMAOpc = X86::VFMADD231SSr; - break; - case X86::VFMSUB213PDr: - NewFMAOpc = X86::VFMSUB231PDr; - break; - case X86::VFMSUB213PSr: - NewFMAOpc = X86::VFMSUB231PSr; - break; - case X86::VFMSUB213SDr: - NewFMAOpc = X86::VFMSUB231SDr; - break; - case X86::VFMSUB213SSr: - NewFMAOpc = X86::VFMSUB231SSr; - break; - case X86::VFNMADD213PDr: - NewFMAOpc = X86::VFNMADD231PDr; - break; - case X86::VFNMADD213PSr: - NewFMAOpc = X86::VFNMADD231PSr; - break; - case X86::VFNMADD213SDr: - NewFMAOpc = X86::VFNMADD231SDr; - break; - case X86::VFNMADD213SSr: - NewFMAOpc = X86::VFNMADD231SSr; - break; - case X86::VFNMSUB213PDr: - NewFMAOpc = X86::VFNMSUB231PDr; - break; - case X86::VFNMSUB213PSr: - NewFMAOpc = X86::VFNMSUB231PSr; - break; - case X86::VFNMSUB213SDr: - NewFMAOpc = X86::VFNMSUB231SDr; - break; - case X86::VFNMSUB213SSr: - NewFMAOpc = X86::VFNMSUB231SSr; - break; - case X86::VFMADDSUB213PDr: - NewFMAOpc = X86::VFMADDSUB231PDr; - break; - case X86::VFMADDSUB213PSr: - NewFMAOpc = X86::VFMADDSUB231PSr; - break; - case X86::VFMSUBADD213PDr: - NewFMAOpc = X86::VFMSUBADD231PDr; - break; - case X86::VFMSUBADD213PSr: - NewFMAOpc = X86::VFMSUBADD231PSr; - break; - - case X86::VFMADD213PDYr: - NewFMAOpc = X86::VFMADD231PDYr; - break; - case X86::VFMADD213PSYr: - NewFMAOpc = X86::VFMADD231PSYr; - break; - case X86::VFMSUB213PDYr: - NewFMAOpc = X86::VFMSUB231PDYr; - break; - case X86::VFMSUB213PSYr: - NewFMAOpc = X86::VFMSUB231PSYr; - break; - case X86::VFNMADD213PDYr: - NewFMAOpc = X86::VFNMADD231PDYr; - break; - case X86::VFNMADD213PSYr: - NewFMAOpc = X86::VFNMADD231PSYr; - break; - case X86::VFNMSUB213PDYr: - NewFMAOpc = X86::VFNMSUB231PDYr; - break; - case X86::VFNMSUB213PSYr: - NewFMAOpc = X86::VFNMSUB231PSYr; - break; - case X86::VFMADDSUB213PDYr: - NewFMAOpc = X86::VFMADDSUB231PDYr; - break; - case X86::VFMADDSUB213PSYr: - NewFMAOpc = X86::VFMADDSUB231PSYr; - break; - case X86::VFMSUBADD213PDYr: - NewFMAOpc = X86::VFMSUBADD231PDYr; - break; - case X86::VFMSUBADD213PSYr: - NewFMAOpc = X86::VFMSUBADD231PSYr; - break; - default: - llvm_unreachable("Unrecognized FMA variant."); - } - - const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - MachineInstrBuilder MIB = - BuildMI(MF, MI.getDebugLoc(), TII.get(NewFMAOpc)) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(3)) - .addOperand(MI.getOperand(2)) - .addOperand(MI.getOperand(1)); - MBB->insert(MachineBasicBlock::iterator(MI), MIB); - MI.eraseFromParent(); - } - } - - return MBB; -} - MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { @@ -24616,39 +24458,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); - case X86::VFMADD213PDr: - case X86::VFMADD213PSr: - case X86::VFMADD213SDr: - case X86::VFMADD213SSr: - case X86::VFMSUB213PDr: - case X86::VFMSUB213PSr: - case X86::VFMSUB213SDr: - case X86::VFMSUB213SSr: - case X86::VFNMADD213PDr: - case X86::VFNMADD213PSr: - case X86::VFNMADD213SDr: - case X86::VFNMADD213SSr: - case X86::VFNMSUB213PDr: - case X86::VFNMSUB213PSr: - case X86::VFNMSUB213SDr: - case X86::VFNMSUB213SSr: - case X86::VFMADDSUB213PDr: - case X86::VFMADDSUB213PSr: - case X86::VFMSUBADD213PDr: - case X86::VFMSUBADD213PSr: - case X86::VFMADD213PDYr: - case X86::VFMADD213PSYr: - case X86::VFMSUB213PDYr: - case X86::VFMSUB213PSYr: - case X86::VFNMADD213PDYr: - case X86::VFNMADD213PSYr: - case X86::VFNMSUB213PDYr: - case X86::VFNMSUB213PSYr: - case X86::VFMADDSUB213PDYr: - case X86::VFMADDSUB213PSYr: - case X86::VFMSUBADD213PDYr: - case X86::VFMSUBADD213PSYr: - return emitFMA3Instr(MI, BB); case X86::LCMPXCHG8B_SAVE_EBX: case X86::LCMPXCHG16B_SAVE_RBX: { unsigned BasePtr = diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 8b3dbbcd5e6..49cf6dc06d5 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -39,7 +39,6 @@ multiclass fma3p_rm opc, string OpcodeStr, PatFrag MemFrag128, PatFrag MemFrag256, ValueType OpVT128, ValueType OpVT256, SDPatternOperator Op = null_frag> { - let usesCustomInserter = 1 in def r : FMA3 opc, string OpcodeStr, [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, (MemFrag128 addr:$src3))))]>; - let usesCustomInserter = 1 in def Yr : FMA3 opc, string OpcodeStr, X86MemOperand x86memop, RegisterClass RC, SDPatternOperator OpNode = null_frag> { - let usesCustomInserter = 1 in def r : FMA3