MLxExpansionPass.cpp revision 221345
1218885Sdim//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=// 2218885Sdim// 3218885Sdim// The LLVM Compiler Infrastructure 4218885Sdim// 5218885Sdim// This file is distributed under the University of Illinois Open Source 6218885Sdim// License. See LICENSE.TXT for details. 7218885Sdim// 8218885Sdim//===----------------------------------------------------------------------===// 9218885Sdim// 10218885Sdim// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of 11218885Sdim// multiple and add / sub instructions) when special VMLx hazards are detected. 12218885Sdim// 13218885Sdim//===----------------------------------------------------------------------===// 14218885Sdim 15218885Sdim#define DEBUG_TYPE "mlx-expansion" 16218885Sdim#include "ARM.h" 17218885Sdim#include "ARMBaseInstrInfo.h" 18221345Sdim#include "ARMSubtarget.h" 19218885Sdim#include "llvm/CodeGen/MachineInstr.h" 20218885Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 21218885Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 22218885Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 23218885Sdim#include "llvm/Target/TargetRegisterInfo.h" 24221345Sdim#include "llvm/ADT/SmallPtrSet.h" 25218885Sdim#include "llvm/ADT/Statistic.h" 26218885Sdim#include "llvm/Support/CommandLine.h" 27218885Sdim#include "llvm/Support/Debug.h" 28218885Sdim#include "llvm/Support/raw_ostream.h" 29218885Sdimusing namespace llvm; 30218885Sdim 31218885Sdimstatic cl::opt<bool> 32218885SdimForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden); 33218885Sdimstatic cl::opt<unsigned> 34218885SdimExpandLimit("expand-limit", cl::init(~0U), cl::Hidden); 35218885Sdim 36218885SdimSTATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded"); 37218885Sdim 38218885Sdimnamespace { 39218885Sdim struct MLxExpansion : public MachineFunctionPass { 40218885Sdim static char ID; 41218885Sdim MLxExpansion() : MachineFunctionPass(ID) {} 42218885Sdim 43218885Sdim virtual bool runOnMachineFunction(MachineFunction &Fn); 44218885Sdim 45218885Sdim virtual const char *getPassName() const { 46218885Sdim return "ARM MLA / MLS expansion pass"; 47218885Sdim } 48218885Sdim 49218885Sdim private: 50218885Sdim const ARMBaseInstrInfo *TII; 51218885Sdim const TargetRegisterInfo *TRI; 52218885Sdim MachineRegisterInfo *MRI; 53218885Sdim 54221345Sdim bool isA9; 55218885Sdim unsigned MIIdx; 56218885Sdim MachineInstr* LastMIs[4]; 57221345Sdim SmallPtrSet<MachineInstr*, 4> IgnoreStall; 58218885Sdim 59218885Sdim void clearStack(); 60218885Sdim void pushStack(MachineInstr *MI); 61218885Sdim MachineInstr *getAccDefMI(MachineInstr *MI) const; 62218885Sdim unsigned getDefReg(MachineInstr *MI) const; 63218885Sdim bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const; 64221345Sdim bool FindMLxHazard(MachineInstr *MI); 65218885Sdim void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, 66218885Sdim unsigned MulOpc, unsigned AddSubOpc, 67218885Sdim bool NegAcc, bool HasLane); 68218885Sdim bool ExpandFPMLxInstructions(MachineBasicBlock &MBB); 69218885Sdim }; 70218885Sdim char MLxExpansion::ID = 0; 71218885Sdim} 72218885Sdim 73218885Sdimvoid MLxExpansion::clearStack() { 74218885Sdim std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0); 75218885Sdim MIIdx = 0; 76218885Sdim} 77218885Sdim 78218885Sdimvoid MLxExpansion::pushStack(MachineInstr *MI) { 79218885Sdim LastMIs[MIIdx] = MI; 80218885Sdim if (++MIIdx == 4) 81218885Sdim MIIdx = 0; 82218885Sdim} 83218885Sdim 84218885SdimMachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { 85218885Sdim // Look past COPY and INSERT_SUBREG instructions to find the 86218885Sdim // real definition MI. This is important for _sfp instructions. 87218885Sdim unsigned Reg = MI->getOperand(1).getReg(); 88218885Sdim if (TargetRegisterInfo::isPhysicalRegister(Reg)) 89218885Sdim return 0; 90218885Sdim 91218885Sdim MachineBasicBlock *MBB = MI->getParent(); 92218885Sdim MachineInstr *DefMI = MRI->getVRegDef(Reg); 93218885Sdim while (true) { 94218885Sdim if (DefMI->getParent() != MBB) 95218885Sdim break; 96218885Sdim if (DefMI->isCopyLike()) { 97218885Sdim Reg = DefMI->getOperand(1).getReg(); 98218885Sdim if (TargetRegisterInfo::isVirtualRegister(Reg)) { 99218885Sdim DefMI = MRI->getVRegDef(Reg); 100218885Sdim continue; 101218885Sdim } 102218885Sdim } else if (DefMI->isInsertSubreg()) { 103218885Sdim Reg = DefMI->getOperand(2).getReg(); 104218885Sdim if (TargetRegisterInfo::isVirtualRegister(Reg)) { 105218885Sdim DefMI = MRI->getVRegDef(Reg); 106218885Sdim continue; 107218885Sdim } 108218885Sdim } 109218885Sdim break; 110218885Sdim } 111218885Sdim return DefMI; 112218885Sdim} 113218885Sdim 114218885Sdimunsigned MLxExpansion::getDefReg(MachineInstr *MI) const { 115218885Sdim unsigned Reg = MI->getOperand(0).getReg(); 116218885Sdim if (TargetRegisterInfo::isPhysicalRegister(Reg) || 117218885Sdim !MRI->hasOneNonDBGUse(Reg)) 118218885Sdim return Reg; 119218885Sdim 120218885Sdim MachineBasicBlock *MBB = MI->getParent(); 121218885Sdim MachineInstr *UseMI = &*MRI->use_nodbg_begin(Reg); 122218885Sdim if (UseMI->getParent() != MBB) 123218885Sdim return Reg; 124218885Sdim 125218885Sdim while (UseMI->isCopy() || UseMI->isInsertSubreg()) { 126218885Sdim Reg = UseMI->getOperand(0).getReg(); 127218885Sdim if (TargetRegisterInfo::isPhysicalRegister(Reg) || 128218885Sdim !MRI->hasOneNonDBGUse(Reg)) 129218885Sdim return Reg; 130218885Sdim UseMI = &*MRI->use_nodbg_begin(Reg); 131218885Sdim if (UseMI->getParent() != MBB) 132218885Sdim return Reg; 133218885Sdim } 134218885Sdim 135218885Sdim return Reg; 136218885Sdim} 137218885Sdim 138218885Sdimbool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { 139219077Sdim // FIXME: Detect integer instructions properly. 140218885Sdim const TargetInstrDesc &TID = MI->getDesc(); 141218885Sdim unsigned Domain = TID.TSFlags & ARMII::DomainMask; 142219077Sdim if (TID.mayStore()) 143218885Sdim return false; 144219077Sdim unsigned Opcode = TID.getOpcode(); 145219077Sdim if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 146219077Sdim return false; 147219077Sdim if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) 148219077Sdim return MI->readsRegister(Reg, TRI); 149218885Sdim return false; 150218885Sdim} 151218885Sdim 152218885Sdim 153221345Sdimbool MLxExpansion::FindMLxHazard(MachineInstr *MI) { 154218885Sdim if (NumExpand >= ExpandLimit) 155218885Sdim return false; 156218885Sdim 157218885Sdim if (ForceExapnd) 158218885Sdim return true; 159218885Sdim 160218885Sdim MachineInstr *DefMI = getAccDefMI(MI); 161221345Sdim if (TII->isFpMLxInstruction(DefMI->getOpcode())) { 162218885Sdim // r0 = vmla 163218885Sdim // r3 = vmla r0, r1, r2 164218885Sdim // takes 16 - 17 cycles 165218885Sdim // 166218885Sdim // r0 = vmla 167218885Sdim // r4 = vmul r1, r2 168218885Sdim // r3 = vadd r0, r4 169218885Sdim // takes about 14 - 15 cycles even with vmul stalling for 4 cycles. 170221345Sdim IgnoreStall.insert(DefMI); 171218885Sdim return true; 172221345Sdim } 173218885Sdim 174221345Sdim if (IgnoreStall.count(MI)) 175221345Sdim return false; 176221345Sdim 177218885Sdim // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the 178218885Sdim // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall 179218885Sdim // preserves the in-order retirement of the instructions. 180218885Sdim // Look at the next few instructions, if *most* of them can cause hazards, 181218885Sdim // then the scheduler can't *fix* this, we'd better break up the VMLA. 182221345Sdim unsigned Limit1 = isA9 ? 1 : 4; 183221345Sdim unsigned Limit2 = isA9 ? 1 : 4; 184218885Sdim for (unsigned i = 1; i <= 4; ++i) { 185218885Sdim int Idx = ((int)MIIdx - i + 4) % 4; 186218885Sdim MachineInstr *NextMI = LastMIs[Idx]; 187218885Sdim if (!NextMI) 188218885Sdim continue; 189218885Sdim 190221345Sdim if (TII->canCauseFpMLxStall(NextMI->getOpcode())) { 191221345Sdim if (i <= Limit1) 192221345Sdim return true; 193221345Sdim } 194218885Sdim 195218885Sdim // Look for VMLx RAW hazard. 196221345Sdim if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI)) 197218885Sdim return true; 198218885Sdim } 199218885Sdim 200218885Sdim return false; 201218885Sdim} 202218885Sdim 203218885Sdim/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair 204218885Sdim/// of MUL + ADD / SUB instructions. 205218885Sdimvoid 206218885SdimMLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, 207218885Sdim unsigned MulOpc, unsigned AddSubOpc, 208218885Sdim bool NegAcc, bool HasLane) { 209218885Sdim unsigned DstReg = MI->getOperand(0).getReg(); 210218885Sdim bool DstDead = MI->getOperand(0).isDead(); 211218885Sdim unsigned AccReg = MI->getOperand(1).getReg(); 212218885Sdim unsigned Src1Reg = MI->getOperand(2).getReg(); 213218885Sdim unsigned Src2Reg = MI->getOperand(3).getReg(); 214218885Sdim bool Src1Kill = MI->getOperand(2).isKill(); 215218885Sdim bool Src2Kill = MI->getOperand(3).isKill(); 216218885Sdim unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0; 217218885Sdim unsigned NextOp = HasLane ? 5 : 4; 218218885Sdim ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm(); 219218885Sdim unsigned PredReg = MI->getOperand(++NextOp).getReg(); 220218885Sdim 221218885Sdim const TargetInstrDesc &TID1 = TII->get(MulOpc); 222218885Sdim const TargetInstrDesc &TID2 = TII->get(AddSubOpc); 223218885Sdim unsigned TmpReg = MRI->createVirtualRegister(TID1.getRegClass(0, TRI)); 224218885Sdim 225218885Sdim MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID1, TmpReg) 226218885Sdim .addReg(Src1Reg, getKillRegState(Src1Kill)) 227218885Sdim .addReg(Src2Reg, getKillRegState(Src2Kill)); 228218885Sdim if (HasLane) 229218885Sdim MIB.addImm(LaneImm); 230218885Sdim MIB.addImm(Pred).addReg(PredReg); 231218885Sdim 232218885Sdim MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID2) 233218885Sdim .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)); 234218885Sdim 235218885Sdim if (NegAcc) { 236218885Sdim bool AccKill = MRI->hasOneNonDBGUse(AccReg); 237218885Sdim MIB.addReg(TmpReg, getKillRegState(true)) 238218885Sdim .addReg(AccReg, getKillRegState(AccKill)); 239218885Sdim } else { 240218885Sdim MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true)); 241218885Sdim } 242218885Sdim MIB.addImm(Pred).addReg(PredReg); 243218885Sdim 244218885Sdim DEBUG({ 245218885Sdim dbgs() << "Expanding: " << *MI; 246218885Sdim dbgs() << " to:\n"; 247218885Sdim MachineBasicBlock::iterator MII = MI; 248218885Sdim MII = llvm::prior(MII); 249218885Sdim MachineInstr &MI2 = *MII; 250218885Sdim MII = llvm::prior(MII); 251218885Sdim MachineInstr &MI1 = *MII; 252218885Sdim dbgs() << " " << MI1; 253218885Sdim dbgs() << " " << MI2; 254218885Sdim }); 255218885Sdim 256218885Sdim MI->eraseFromParent(); 257218885Sdim ++NumExpand; 258218885Sdim} 259218885Sdim 260218885Sdimbool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { 261218885Sdim bool Changed = false; 262218885Sdim 263218885Sdim clearStack(); 264221345Sdim IgnoreStall.clear(); 265218885Sdim 266218885Sdim unsigned Skip = 0; 267218885Sdim MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend(); 268218885Sdim while (MII != E) { 269218885Sdim MachineInstr *MI = &*MII; 270218885Sdim 271218885Sdim if (MI->isLabel() || MI->isImplicitDef() || MI->isCopy()) { 272218885Sdim ++MII; 273218885Sdim continue; 274218885Sdim } 275218885Sdim 276218885Sdim const TargetInstrDesc &TID = MI->getDesc(); 277218885Sdim if (TID.isBarrier()) { 278218885Sdim clearStack(); 279218885Sdim Skip = 0; 280218885Sdim ++MII; 281218885Sdim continue; 282218885Sdim } 283218885Sdim 284218885Sdim unsigned Domain = TID.TSFlags & ARMII::DomainMask; 285218885Sdim if (Domain == ARMII::DomainGeneral) { 286218885Sdim if (++Skip == 2) 287218885Sdim // Assume dual issues of non-VFP / NEON instructions. 288218885Sdim pushStack(0); 289218885Sdim } else { 290218885Sdim Skip = 0; 291218885Sdim 292218885Sdim unsigned MulOpc, AddSubOpc; 293218885Sdim bool NegAcc, HasLane; 294218885Sdim if (!TII->isFpMLxInstruction(TID.getOpcode(), 295218885Sdim MulOpc, AddSubOpc, NegAcc, HasLane) || 296218885Sdim !FindMLxHazard(MI)) 297218885Sdim pushStack(MI); 298218885Sdim else { 299218885Sdim ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane); 300218885Sdim E = MBB.rend(); // May have changed if MI was the 1st instruction. 301218885Sdim Changed = true; 302218885Sdim continue; 303218885Sdim } 304218885Sdim } 305218885Sdim 306218885Sdim ++MII; 307218885Sdim } 308218885Sdim 309218885Sdim return Changed; 310218885Sdim} 311218885Sdim 312218885Sdimbool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { 313218885Sdim TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); 314218885Sdim TRI = Fn.getTarget().getRegisterInfo(); 315218885Sdim MRI = &Fn.getRegInfo(); 316221345Sdim const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 317221345Sdim isA9 = STI->isCortexA9(); 318218885Sdim 319218885Sdim bool Modified = false; 320218885Sdim for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 321218885Sdim ++MFI) { 322218885Sdim MachineBasicBlock &MBB = *MFI; 323218885Sdim Modified |= ExpandFPMLxInstructions(MBB); 324218885Sdim } 325218885Sdim 326218885Sdim return Modified; 327218885Sdim} 328218885Sdim 329218885SdimFunctionPass *llvm::createMLxExpansionPass() { 330218885Sdim return new MLxExpansion(); 331218885Sdim} 332