MLxExpansionPass.cpp revision 239462
1234353Sdim//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
2218885Sdim//
3218885Sdim//                     The LLVM Compiler Infrastructure
4218885Sdim//
5218885Sdim// This file is distributed under the University of Illinois Open Source
6218885Sdim// License. See LICENSE.TXT for details.
7218885Sdim//
8218885Sdim//===----------------------------------------------------------------------===//
9218885Sdim//
10218885Sdim// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
11218885Sdim// multiple and add / sub instructions) when special VMLx hazards are detected.
12218885Sdim//
13218885Sdim//===----------------------------------------------------------------------===//
14218885Sdim
15218885Sdim#define DEBUG_TYPE "mlx-expansion"
16218885Sdim#include "ARM.h"
17218885Sdim#include "ARMBaseInstrInfo.h"
18221345Sdim#include "ARMSubtarget.h"
19218885Sdim#include "llvm/CodeGen/MachineInstr.h"
20218885Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
21218885Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
22218885Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
23218885Sdim#include "llvm/Target/TargetRegisterInfo.h"
24221345Sdim#include "llvm/ADT/SmallPtrSet.h"
25218885Sdim#include "llvm/ADT/Statistic.h"
26218885Sdim#include "llvm/Support/CommandLine.h"
27218885Sdim#include "llvm/Support/Debug.h"
28218885Sdim#include "llvm/Support/raw_ostream.h"
29218885Sdimusing namespace llvm;
30218885Sdim
31218885Sdimstatic cl::opt<bool>
32218885SdimForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
33218885Sdimstatic cl::opt<unsigned>
34218885SdimExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
35218885Sdim
36218885SdimSTATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
37218885Sdim
38218885Sdimnamespace {
39218885Sdim  struct MLxExpansion : public MachineFunctionPass {
40218885Sdim    static char ID;
41218885Sdim    MLxExpansion() : MachineFunctionPass(ID) {}
42218885Sdim
43218885Sdim    virtual bool runOnMachineFunction(MachineFunction &Fn);
44218885Sdim
45218885Sdim    virtual const char *getPassName() const {
46218885Sdim      return "ARM MLA / MLS expansion pass";
47218885Sdim    }
48218885Sdim
49218885Sdim  private:
50218885Sdim    const ARMBaseInstrInfo *TII;
51218885Sdim    const TargetRegisterInfo *TRI;
52218885Sdim    MachineRegisterInfo *MRI;
53218885Sdim
54221345Sdim    bool isA9;
55218885Sdim    unsigned MIIdx;
56218885Sdim    MachineInstr* LastMIs[4];
57221345Sdim    SmallPtrSet<MachineInstr*, 4> IgnoreStall;
58218885Sdim
59218885Sdim    void clearStack();
60218885Sdim    void pushStack(MachineInstr *MI);
61218885Sdim    MachineInstr *getAccDefMI(MachineInstr *MI) const;
62218885Sdim    unsigned getDefReg(MachineInstr *MI) const;
63218885Sdim    bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
64221345Sdim    bool FindMLxHazard(MachineInstr *MI);
65218885Sdim    void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
66218885Sdim                                unsigned MulOpc, unsigned AddSubOpc,
67218885Sdim                                bool NegAcc, bool HasLane);
68218885Sdim    bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
69218885Sdim  };
70218885Sdim  char MLxExpansion::ID = 0;
71218885Sdim}
72218885Sdim
73218885Sdimvoid MLxExpansion::clearStack() {
74218885Sdim  std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0);
75218885Sdim  MIIdx = 0;
76218885Sdim}
77218885Sdim
78218885Sdimvoid MLxExpansion::pushStack(MachineInstr *MI) {
79218885Sdim  LastMIs[MIIdx] = MI;
80218885Sdim  if (++MIIdx == 4)
81218885Sdim    MIIdx = 0;
82218885Sdim}
83218885Sdim
84218885SdimMachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
85218885Sdim  // Look past COPY and INSERT_SUBREG instructions to find the
86218885Sdim  // real definition MI. This is important for _sfp instructions.
87218885Sdim  unsigned Reg = MI->getOperand(1).getReg();
88218885Sdim  if (TargetRegisterInfo::isPhysicalRegister(Reg))
89218885Sdim    return 0;
90218885Sdim
91218885Sdim  MachineBasicBlock *MBB = MI->getParent();
92218885Sdim  MachineInstr *DefMI = MRI->getVRegDef(Reg);
93218885Sdim  while (true) {
94218885Sdim    if (DefMI->getParent() != MBB)
95218885Sdim      break;
96218885Sdim    if (DefMI->isCopyLike()) {
97218885Sdim      Reg = DefMI->getOperand(1).getReg();
98218885Sdim      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
99218885Sdim        DefMI = MRI->getVRegDef(Reg);
100218885Sdim        continue;
101218885Sdim      }
102218885Sdim    } else if (DefMI->isInsertSubreg()) {
103218885Sdim      Reg = DefMI->getOperand(2).getReg();
104218885Sdim      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
105218885Sdim        DefMI = MRI->getVRegDef(Reg);
106218885Sdim        continue;
107218885Sdim      }
108218885Sdim    }
109218885Sdim    break;
110218885Sdim  }
111218885Sdim  return DefMI;
112218885Sdim}
113218885Sdim
114218885Sdimunsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
115218885Sdim  unsigned Reg = MI->getOperand(0).getReg();
116218885Sdim  if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
117218885Sdim      !MRI->hasOneNonDBGUse(Reg))
118218885Sdim    return Reg;
119218885Sdim
120218885Sdim  MachineBasicBlock *MBB = MI->getParent();
121218885Sdim  MachineInstr *UseMI = &*MRI->use_nodbg_begin(Reg);
122218885Sdim  if (UseMI->getParent() != MBB)
123218885Sdim    return Reg;
124218885Sdim
125218885Sdim  while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
126218885Sdim    Reg = UseMI->getOperand(0).getReg();
127218885Sdim    if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
128218885Sdim        !MRI->hasOneNonDBGUse(Reg))
129218885Sdim      return Reg;
130218885Sdim    UseMI = &*MRI->use_nodbg_begin(Reg);
131218885Sdim    if (UseMI->getParent() != MBB)
132218885Sdim      return Reg;
133218885Sdim  }
134218885Sdim
135218885Sdim  return Reg;
136218885Sdim}
137218885Sdim
138218885Sdimbool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
139219077Sdim  // FIXME: Detect integer instructions properly.
140224145Sdim  const MCInstrDesc &MCID = MI->getDesc();
141224145Sdim  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
142234353Sdim  if (MI->mayStore())
143218885Sdim    return false;
144224145Sdim  unsigned Opcode = MCID.getOpcode();
145219077Sdim  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
146219077Sdim    return false;
147219077Sdim  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
148219077Sdim    return MI->readsRegister(Reg, TRI);
149218885Sdim  return false;
150218885Sdim}
151218885Sdim
152218885Sdim
153221345Sdimbool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
154218885Sdim  if (NumExpand >= ExpandLimit)
155218885Sdim    return false;
156218885Sdim
157218885Sdim  if (ForceExapnd)
158218885Sdim    return true;
159218885Sdim
160218885Sdim  MachineInstr *DefMI = getAccDefMI(MI);
161221345Sdim  if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
162218885Sdim    // r0 = vmla
163218885Sdim    // r3 = vmla r0, r1, r2
164218885Sdim    // takes 16 - 17 cycles
165218885Sdim    //
166218885Sdim    // r0 = vmla
167218885Sdim    // r4 = vmul r1, r2
168218885Sdim    // r3 = vadd r0, r4
169218885Sdim    // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
170221345Sdim    IgnoreStall.insert(DefMI);
171218885Sdim    return true;
172221345Sdim  }
173218885Sdim
174221345Sdim  if (IgnoreStall.count(MI))
175221345Sdim    return false;
176221345Sdim
177218885Sdim  // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
178218885Sdim  // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
179218885Sdim  // preserves the in-order retirement of the instructions.
180218885Sdim  // Look at the next few instructions, if *most* of them can cause hazards,
181218885Sdim  // then the scheduler can't *fix* this, we'd better break up the VMLA.
182221345Sdim  unsigned Limit1 = isA9 ? 1 : 4;
183221345Sdim  unsigned Limit2 = isA9 ? 1 : 4;
184218885Sdim  for (unsigned i = 1; i <= 4; ++i) {
185218885Sdim    int Idx = ((int)MIIdx - i + 4) % 4;
186218885Sdim    MachineInstr *NextMI = LastMIs[Idx];
187218885Sdim    if (!NextMI)
188218885Sdim      continue;
189218885Sdim
190221345Sdim    if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
191221345Sdim      if (i <= Limit1)
192221345Sdim        return true;
193221345Sdim    }
194218885Sdim
195218885Sdim    // Look for VMLx RAW hazard.
196221345Sdim    if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
197218885Sdim      return true;
198218885Sdim  }
199218885Sdim
200218885Sdim  return false;
201218885Sdim}
202218885Sdim
203218885Sdim/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
204218885Sdim/// of MUL + ADD / SUB instructions.
205218885Sdimvoid
206218885SdimMLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
207218885Sdim                                     unsigned MulOpc, unsigned AddSubOpc,
208218885Sdim                                     bool NegAcc, bool HasLane) {
209218885Sdim  unsigned DstReg = MI->getOperand(0).getReg();
210218885Sdim  bool DstDead = MI->getOperand(0).isDead();
211218885Sdim  unsigned AccReg = MI->getOperand(1).getReg();
212218885Sdim  unsigned Src1Reg = MI->getOperand(2).getReg();
213218885Sdim  unsigned Src2Reg = MI->getOperand(3).getReg();
214218885Sdim  bool Src1Kill = MI->getOperand(2).isKill();
215218885Sdim  bool Src2Kill = MI->getOperand(3).isKill();
216218885Sdim  unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
217218885Sdim  unsigned NextOp = HasLane ? 5 : 4;
218218885Sdim  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
219218885Sdim  unsigned PredReg = MI->getOperand(++NextOp).getReg();
220218885Sdim
221224145Sdim  const MCInstrDesc &MCID1 = TII->get(MulOpc);
222224145Sdim  const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
223239462Sdim  const MachineFunction &MF = *MI->getParent()->getParent();
224239462Sdim  unsigned TmpReg = MRI->createVirtualRegister(
225239462Sdim                      TII->getRegClass(MCID1, 0, TRI, MF));
226218885Sdim
227234353Sdim  MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
228218885Sdim    .addReg(Src1Reg, getKillRegState(Src1Kill))
229218885Sdim    .addReg(Src2Reg, getKillRegState(Src2Kill));
230218885Sdim  if (HasLane)
231218885Sdim    MIB.addImm(LaneImm);
232218885Sdim  MIB.addImm(Pred).addReg(PredReg);
233218885Sdim
234234353Sdim  MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
235218885Sdim    .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
236218885Sdim
237218885Sdim  if (NegAcc) {
238218885Sdim    bool AccKill = MRI->hasOneNonDBGUse(AccReg);
239218885Sdim    MIB.addReg(TmpReg, getKillRegState(true))
240218885Sdim       .addReg(AccReg, getKillRegState(AccKill));
241218885Sdim  } else {
242218885Sdim    MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
243218885Sdim  }
244218885Sdim  MIB.addImm(Pred).addReg(PredReg);
245218885Sdim
246218885Sdim  DEBUG({
247218885Sdim      dbgs() << "Expanding: " << *MI;
248218885Sdim      dbgs() << "  to:\n";
249218885Sdim      MachineBasicBlock::iterator MII = MI;
250218885Sdim      MII = llvm::prior(MII);
251218885Sdim      MachineInstr &MI2 = *MII;
252218885Sdim      MII = llvm::prior(MII);
253218885Sdim      MachineInstr &MI1 = *MII;
254218885Sdim      dbgs() << "    " << MI1;
255218885Sdim      dbgs() << "    " << MI2;
256218885Sdim   });
257218885Sdim
258218885Sdim  MI->eraseFromParent();
259218885Sdim  ++NumExpand;
260218885Sdim}
261218885Sdim
262218885Sdimbool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
263218885Sdim  bool Changed = false;
264218885Sdim
265218885Sdim  clearStack();
266221345Sdim  IgnoreStall.clear();
267218885Sdim
268218885Sdim  unsigned Skip = 0;
269218885Sdim  MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
270218885Sdim  while (MII != E) {
271218885Sdim    MachineInstr *MI = &*MII;
272218885Sdim
273218885Sdim    if (MI->isLabel() || MI->isImplicitDef() || MI->isCopy()) {
274218885Sdim      ++MII;
275218885Sdim      continue;
276218885Sdim    }
277218885Sdim
278224145Sdim    const MCInstrDesc &MCID = MI->getDesc();
279234353Sdim    if (MI->isBarrier()) {
280218885Sdim      clearStack();
281218885Sdim      Skip = 0;
282218885Sdim      ++MII;
283218885Sdim      continue;
284218885Sdim    }
285218885Sdim
286224145Sdim    unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
287218885Sdim    if (Domain == ARMII::DomainGeneral) {
288218885Sdim      if (++Skip == 2)
289218885Sdim        // Assume dual issues of non-VFP / NEON instructions.
290218885Sdim        pushStack(0);
291218885Sdim    } else {
292218885Sdim      Skip = 0;
293218885Sdim
294218885Sdim      unsigned MulOpc, AddSubOpc;
295218885Sdim      bool NegAcc, HasLane;
296224145Sdim      if (!TII->isFpMLxInstruction(MCID.getOpcode(),
297218885Sdim                                   MulOpc, AddSubOpc, NegAcc, HasLane) ||
298218885Sdim          !FindMLxHazard(MI))
299218885Sdim        pushStack(MI);
300218885Sdim      else {
301218885Sdim        ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
302218885Sdim        E = MBB.rend(); // May have changed if MI was the 1st instruction.
303218885Sdim        Changed = true;
304218885Sdim        continue;
305218885Sdim      }
306218885Sdim    }
307218885Sdim
308218885Sdim    ++MII;
309218885Sdim  }
310218885Sdim
311218885Sdim  return Changed;
312218885Sdim}
313218885Sdim
314218885Sdimbool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
315218885Sdim  TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
316218885Sdim  TRI = Fn.getTarget().getRegisterInfo();
317218885Sdim  MRI = &Fn.getRegInfo();
318221345Sdim  const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
319221345Sdim  isA9 = STI->isCortexA9();
320218885Sdim
321218885Sdim  bool Modified = false;
322218885Sdim  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
323218885Sdim       ++MFI) {
324218885Sdim    MachineBasicBlock &MBB = *MFI;
325218885Sdim    Modified |= ExpandFPMLxInstructions(MBB);
326218885Sdim  }
327218885Sdim
328218885Sdim  return Modified;
329218885Sdim}
330218885Sdim
331218885SdimFunctionPass *llvm::createMLxExpansionPass() {
332218885Sdim  return new MLxExpansion();
333218885Sdim}
334