MLxExpansionPass.cpp revision 221345
1218885Sdim//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=//
2218885Sdim//
3218885Sdim//                     The LLVM Compiler Infrastructure
4218885Sdim//
5218885Sdim// This file is distributed under the University of Illinois Open Source
6218885Sdim// License. See LICENSE.TXT for details.
7218885Sdim//
8218885Sdim//===----------------------------------------------------------------------===//
9218885Sdim//
10218885Sdim// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
11218885Sdim// multiple and add / sub instructions) when special VMLx hazards are detected.
12218885Sdim//
13218885Sdim//===----------------------------------------------------------------------===//
14218885Sdim
15218885Sdim#define DEBUG_TYPE "mlx-expansion"
16218885Sdim#include "ARM.h"
17218885Sdim#include "ARMBaseInstrInfo.h"
18221345Sdim#include "ARMSubtarget.h"
19218885Sdim#include "llvm/CodeGen/MachineInstr.h"
20218885Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
21218885Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
22218885Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
23218885Sdim#include "llvm/Target/TargetRegisterInfo.h"
24221345Sdim#include "llvm/ADT/SmallPtrSet.h"
25218885Sdim#include "llvm/ADT/Statistic.h"
26218885Sdim#include "llvm/Support/CommandLine.h"
27218885Sdim#include "llvm/Support/Debug.h"
28218885Sdim#include "llvm/Support/raw_ostream.h"
29218885Sdimusing namespace llvm;
30218885Sdim
31218885Sdimstatic cl::opt<bool>
32218885SdimForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
33218885Sdimstatic cl::opt<unsigned>
34218885SdimExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
35218885Sdim
36218885SdimSTATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
37218885Sdim
38218885Sdimnamespace {
39218885Sdim  struct MLxExpansion : public MachineFunctionPass {
40218885Sdim    static char ID;
41218885Sdim    MLxExpansion() : MachineFunctionPass(ID) {}
42218885Sdim
43218885Sdim    virtual bool runOnMachineFunction(MachineFunction &Fn);
44218885Sdim
45218885Sdim    virtual const char *getPassName() const {
46218885Sdim      return "ARM MLA / MLS expansion pass";
47218885Sdim    }
48218885Sdim
49218885Sdim  private:
50218885Sdim    const ARMBaseInstrInfo *TII;
51218885Sdim    const TargetRegisterInfo *TRI;
52218885Sdim    MachineRegisterInfo *MRI;
53218885Sdim
54221345Sdim    bool isA9;
55218885Sdim    unsigned MIIdx;
56218885Sdim    MachineInstr* LastMIs[4];
57221345Sdim    SmallPtrSet<MachineInstr*, 4> IgnoreStall;
58218885Sdim
59218885Sdim    void clearStack();
60218885Sdim    void pushStack(MachineInstr *MI);
61218885Sdim    MachineInstr *getAccDefMI(MachineInstr *MI) const;
62218885Sdim    unsigned getDefReg(MachineInstr *MI) const;
63218885Sdim    bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
64221345Sdim    bool FindMLxHazard(MachineInstr *MI);
65218885Sdim    void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
66218885Sdim                                unsigned MulOpc, unsigned AddSubOpc,
67218885Sdim                                bool NegAcc, bool HasLane);
68218885Sdim    bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
69218885Sdim  };
70218885Sdim  char MLxExpansion::ID = 0;
71218885Sdim}
72218885Sdim
73218885Sdimvoid MLxExpansion::clearStack() {
74218885Sdim  std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0);
75218885Sdim  MIIdx = 0;
76218885Sdim}
77218885Sdim
78218885Sdimvoid MLxExpansion::pushStack(MachineInstr *MI) {
79218885Sdim  LastMIs[MIIdx] = MI;
80218885Sdim  if (++MIIdx == 4)
81218885Sdim    MIIdx = 0;
82218885Sdim}
83218885Sdim
84218885SdimMachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
85218885Sdim  // Look past COPY and INSERT_SUBREG instructions to find the
86218885Sdim  // real definition MI. This is important for _sfp instructions.
87218885Sdim  unsigned Reg = MI->getOperand(1).getReg();
88218885Sdim  if (TargetRegisterInfo::isPhysicalRegister(Reg))
89218885Sdim    return 0;
90218885Sdim
91218885Sdim  MachineBasicBlock *MBB = MI->getParent();
92218885Sdim  MachineInstr *DefMI = MRI->getVRegDef(Reg);
93218885Sdim  while (true) {
94218885Sdim    if (DefMI->getParent() != MBB)
95218885Sdim      break;
96218885Sdim    if (DefMI->isCopyLike()) {
97218885Sdim      Reg = DefMI->getOperand(1).getReg();
98218885Sdim      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
99218885Sdim        DefMI = MRI->getVRegDef(Reg);
100218885Sdim        continue;
101218885Sdim      }
102218885Sdim    } else if (DefMI->isInsertSubreg()) {
103218885Sdim      Reg = DefMI->getOperand(2).getReg();
104218885Sdim      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
105218885Sdim        DefMI = MRI->getVRegDef(Reg);
106218885Sdim        continue;
107218885Sdim      }
108218885Sdim    }
109218885Sdim    break;
110218885Sdim  }
111218885Sdim  return DefMI;
112218885Sdim}
113218885Sdim
114218885Sdimunsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
115218885Sdim  unsigned Reg = MI->getOperand(0).getReg();
116218885Sdim  if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
117218885Sdim      !MRI->hasOneNonDBGUse(Reg))
118218885Sdim    return Reg;
119218885Sdim
120218885Sdim  MachineBasicBlock *MBB = MI->getParent();
121218885Sdim  MachineInstr *UseMI = &*MRI->use_nodbg_begin(Reg);
122218885Sdim  if (UseMI->getParent() != MBB)
123218885Sdim    return Reg;
124218885Sdim
125218885Sdim  while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
126218885Sdim    Reg = UseMI->getOperand(0).getReg();
127218885Sdim    if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
128218885Sdim        !MRI->hasOneNonDBGUse(Reg))
129218885Sdim      return Reg;
130218885Sdim    UseMI = &*MRI->use_nodbg_begin(Reg);
131218885Sdim    if (UseMI->getParent() != MBB)
132218885Sdim      return Reg;
133218885Sdim  }
134218885Sdim
135218885Sdim  return Reg;
136218885Sdim}
137218885Sdim
138218885Sdimbool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
139219077Sdim  // FIXME: Detect integer instructions properly.
140218885Sdim  const TargetInstrDesc &TID = MI->getDesc();
141218885Sdim  unsigned Domain = TID.TSFlags & ARMII::DomainMask;
142219077Sdim  if (TID.mayStore())
143218885Sdim    return false;
144219077Sdim  unsigned Opcode = TID.getOpcode();
145219077Sdim  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
146219077Sdim    return false;
147219077Sdim  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
148219077Sdim    return MI->readsRegister(Reg, TRI);
149218885Sdim  return false;
150218885Sdim}
151218885Sdim
152218885Sdim
153221345Sdimbool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
154218885Sdim  if (NumExpand >= ExpandLimit)
155218885Sdim    return false;
156218885Sdim
157218885Sdim  if (ForceExapnd)
158218885Sdim    return true;
159218885Sdim
160218885Sdim  MachineInstr *DefMI = getAccDefMI(MI);
161221345Sdim  if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
162218885Sdim    // r0 = vmla
163218885Sdim    // r3 = vmla r0, r1, r2
164218885Sdim    // takes 16 - 17 cycles
165218885Sdim    //
166218885Sdim    // r0 = vmla
167218885Sdim    // r4 = vmul r1, r2
168218885Sdim    // r3 = vadd r0, r4
169218885Sdim    // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
170221345Sdim    IgnoreStall.insert(DefMI);
171218885Sdim    return true;
172221345Sdim  }
173218885Sdim
174221345Sdim  if (IgnoreStall.count(MI))
175221345Sdim    return false;
176221345Sdim
177218885Sdim  // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
178218885Sdim  // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
179218885Sdim  // preserves the in-order retirement of the instructions.
180218885Sdim  // Look at the next few instructions, if *most* of them can cause hazards,
181218885Sdim  // then the scheduler can't *fix* this, we'd better break up the VMLA.
182221345Sdim  unsigned Limit1 = isA9 ? 1 : 4;
183221345Sdim  unsigned Limit2 = isA9 ? 1 : 4;
184218885Sdim  for (unsigned i = 1; i <= 4; ++i) {
185218885Sdim    int Idx = ((int)MIIdx - i + 4) % 4;
186218885Sdim    MachineInstr *NextMI = LastMIs[Idx];
187218885Sdim    if (!NextMI)
188218885Sdim      continue;
189218885Sdim
190221345Sdim    if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
191221345Sdim      if (i <= Limit1)
192221345Sdim        return true;
193221345Sdim    }
194218885Sdim
195218885Sdim    // Look for VMLx RAW hazard.
196221345Sdim    if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
197218885Sdim      return true;
198218885Sdim  }
199218885Sdim
200218885Sdim  return false;
201218885Sdim}
202218885Sdim
203218885Sdim/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
204218885Sdim/// of MUL + ADD / SUB instructions.
205218885Sdimvoid
206218885SdimMLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
207218885Sdim                                     unsigned MulOpc, unsigned AddSubOpc,
208218885Sdim                                     bool NegAcc, bool HasLane) {
209218885Sdim  unsigned DstReg = MI->getOperand(0).getReg();
210218885Sdim  bool DstDead = MI->getOperand(0).isDead();
211218885Sdim  unsigned AccReg = MI->getOperand(1).getReg();
212218885Sdim  unsigned Src1Reg = MI->getOperand(2).getReg();
213218885Sdim  unsigned Src2Reg = MI->getOperand(3).getReg();
214218885Sdim  bool Src1Kill = MI->getOperand(2).isKill();
215218885Sdim  bool Src2Kill = MI->getOperand(3).isKill();
216218885Sdim  unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
217218885Sdim  unsigned NextOp = HasLane ? 5 : 4;
218218885Sdim  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
219218885Sdim  unsigned PredReg = MI->getOperand(++NextOp).getReg();
220218885Sdim
221218885Sdim  const TargetInstrDesc &TID1 = TII->get(MulOpc);
222218885Sdim  const TargetInstrDesc &TID2 = TII->get(AddSubOpc);
223218885Sdim  unsigned TmpReg = MRI->createVirtualRegister(TID1.getRegClass(0, TRI));
224218885Sdim
225218885Sdim  MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID1, TmpReg)
226218885Sdim    .addReg(Src1Reg, getKillRegState(Src1Kill))
227218885Sdim    .addReg(Src2Reg, getKillRegState(Src2Kill));
228218885Sdim  if (HasLane)
229218885Sdim    MIB.addImm(LaneImm);
230218885Sdim  MIB.addImm(Pred).addReg(PredReg);
231218885Sdim
232218885Sdim  MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID2)
233218885Sdim    .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
234218885Sdim
235218885Sdim  if (NegAcc) {
236218885Sdim    bool AccKill = MRI->hasOneNonDBGUse(AccReg);
237218885Sdim    MIB.addReg(TmpReg, getKillRegState(true))
238218885Sdim       .addReg(AccReg, getKillRegState(AccKill));
239218885Sdim  } else {
240218885Sdim    MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
241218885Sdim  }
242218885Sdim  MIB.addImm(Pred).addReg(PredReg);
243218885Sdim
244218885Sdim  DEBUG({
245218885Sdim      dbgs() << "Expanding: " << *MI;
246218885Sdim      dbgs() << "  to:\n";
247218885Sdim      MachineBasicBlock::iterator MII = MI;
248218885Sdim      MII = llvm::prior(MII);
249218885Sdim      MachineInstr &MI2 = *MII;
250218885Sdim      MII = llvm::prior(MII);
251218885Sdim      MachineInstr &MI1 = *MII;
252218885Sdim      dbgs() << "    " << MI1;
253218885Sdim      dbgs() << "    " << MI2;
254218885Sdim   });
255218885Sdim
256218885Sdim  MI->eraseFromParent();
257218885Sdim  ++NumExpand;
258218885Sdim}
259218885Sdim
260218885Sdimbool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
261218885Sdim  bool Changed = false;
262218885Sdim
263218885Sdim  clearStack();
264221345Sdim  IgnoreStall.clear();
265218885Sdim
266218885Sdim  unsigned Skip = 0;
267218885Sdim  MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
268218885Sdim  while (MII != E) {
269218885Sdim    MachineInstr *MI = &*MII;
270218885Sdim
271218885Sdim    if (MI->isLabel() || MI->isImplicitDef() || MI->isCopy()) {
272218885Sdim      ++MII;
273218885Sdim      continue;
274218885Sdim    }
275218885Sdim
276218885Sdim    const TargetInstrDesc &TID = MI->getDesc();
277218885Sdim    if (TID.isBarrier()) {
278218885Sdim      clearStack();
279218885Sdim      Skip = 0;
280218885Sdim      ++MII;
281218885Sdim      continue;
282218885Sdim    }
283218885Sdim
284218885Sdim    unsigned Domain = TID.TSFlags & ARMII::DomainMask;
285218885Sdim    if (Domain == ARMII::DomainGeneral) {
286218885Sdim      if (++Skip == 2)
287218885Sdim        // Assume dual issues of non-VFP / NEON instructions.
288218885Sdim        pushStack(0);
289218885Sdim    } else {
290218885Sdim      Skip = 0;
291218885Sdim
292218885Sdim      unsigned MulOpc, AddSubOpc;
293218885Sdim      bool NegAcc, HasLane;
294218885Sdim      if (!TII->isFpMLxInstruction(TID.getOpcode(),
295218885Sdim                                   MulOpc, AddSubOpc, NegAcc, HasLane) ||
296218885Sdim          !FindMLxHazard(MI))
297218885Sdim        pushStack(MI);
298218885Sdim      else {
299218885Sdim        ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
300218885Sdim        E = MBB.rend(); // May have changed if MI was the 1st instruction.
301218885Sdim        Changed = true;
302218885Sdim        continue;
303218885Sdim      }
304218885Sdim    }
305218885Sdim
306218885Sdim    ++MII;
307218885Sdim  }
308218885Sdim
309218885Sdim  return Changed;
310218885Sdim}
311218885Sdim
312218885Sdimbool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
313218885Sdim  TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
314218885Sdim  TRI = Fn.getTarget().getRegisterInfo();
315218885Sdim  MRI = &Fn.getRegInfo();
316221345Sdim  const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
317221345Sdim  isA9 = STI->isCortexA9();
318218885Sdim
319218885Sdim  bool Modified = false;
320218885Sdim  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
321218885Sdim       ++MFI) {
322218885Sdim    MachineBasicBlock &MBB = *MFI;
323218885Sdim    Modified |= ExpandFPMLxInstructions(MBB);
324218885Sdim  }
325218885Sdim
326218885Sdim  return Modified;
327218885Sdim}
328218885Sdim
329218885SdimFunctionPass *llvm::createMLxExpansionPass() {
330218885Sdim  return new MLxExpansion();
331218885Sdim}
332