1193323Sed//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
2193323Sed//
3193323Sed// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4193323Sed// See https://llvm.org/LICENSE.txt for license information.
5193323Sed// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6193323Sed//
7193323Sed//===----------------------------------------------------------------------===//
8193323Sed//
9193323Sed// This file implements the AArch64ConditionalCompares pass which reduces
10193323Sed// branching and code size by using the conditional compare instructions CCMP,
11193323Sed// CCMN, and FCMP.
12193323Sed//
13193323Sed// The CFG transformations for forming conditional compares are very similar to
14193323Sed// if-conversion, and this pass should run immediately before the early
15193323Sed// if-conversion pass.
16193323Sed//
17193323Sed//===----------------------------------------------------------------------===//
18193323Sed
19193323Sed#include "AArch64.h"
20193323Sed#include "llvm/ADT/DepthFirstIterator.h"
21193323Sed#include "llvm/ADT/Statistic.h"
22193323Sed#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
23193323Sed#include "llvm/CodeGen/MachineDominators.h"
24193323Sed#include "llvm/CodeGen/MachineFunction.h"
25193323Sed#include "llvm/CodeGen/MachineFunctionPass.h"
26193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h"
27193323Sed#include "llvm/CodeGen/MachineLoopInfo.h"
28193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h"
29193323Sed#include "llvm/CodeGen/MachineTraceMetrics.h"
30193323Sed#include "llvm/CodeGen/Passes.h"
31193323Sed#include "llvm/CodeGen/TargetInstrInfo.h"
32193323Sed#include "llvm/CodeGen/TargetRegisterInfo.h"
33193323Sed#include "llvm/CodeGen/TargetSubtargetInfo.h"
34193323Sed#include "llvm/InitializePasses.h"
35193323Sed#include "llvm/Support/CommandLine.h"
36193323Sed#include "llvm/Support/Debug.h"
37193323Sed#include "llvm/Support/raw_ostream.h"
38193323Sed
39193323Sedusing namespace llvm;
40193323Sed
41193323Sed#define DEBUG_TYPE "aarch64-ccmp"
42193323Sed
43193323Sed// Absolute maximum number of instructions allowed per speculated block.
44193323Sed// This bypasses all other heuristics, so it should be set fairly high.
45193323Sedstatic cl::opt<unsigned> BlockInstrLimit(
46193323Sed    "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
47193323Sed    cl::desc("Maximum number of instructions per speculated block."));
48193323Sed
49193323Sed// Stress testing mode - disable heuristics.
50193323Sedstatic cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
51193323Sed                            cl::desc("Turn all knobs to 11"));
52193323Sed
53193323SedSTATISTIC(NumConsidered, "Number of ccmps considered");
54193323SedSTATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
55193323SedSTATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
56193323SedSTATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
57193323SedSTATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
58193323SedSTATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
59193323SedSTATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
60193323SedSTATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
61193323SedSTATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
62193323SedSTATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
63193323SedSTATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
64193323Sed
65193323SedSTATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
66193323Sed
67193323SedSTATISTIC(NumConverted, "Number of ccmp instructions created");
68193323SedSTATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
69193323Sed
70193323Sed//===----------------------------------------------------------------------===//
71193323Sed//                                 SSACCmpConv
72193323Sed//===----------------------------------------------------------------------===//
73193323Sed//
74193323Sed// The SSACCmpConv class performs ccmp-conversion on SSA form machine code
75193323Sed// after determining if it is possible. The class contains no heuristics;
76193323Sed// external code should be used to determine when ccmp-conversion is a good
77193323Sed// idea.
78193323Sed//
79193323Sed// CCmp-formation works on a CFG representing chained conditions, typically
80193323Sed// from C's short-circuit || and && operators:
81193323Sed//
82193323Sed//   From:         Head            To:         Head
83193323Sed//                 / |                         CmpBB
84193323Sed//                /  |                         / |
85193323Sed//               |  CmpBB                     /  |
86193323Sed//               |  / |                    Tail  |
87193323Sed//               | /  |                      |   |
88193323Sed//              Tail  |                      |   |
89193323Sed//                |   |                      |   |
90//               ... ...                    ... ...
91//
92// The Head block is terminated by a br.cond instruction, and the CmpBB block
93// contains compare + br.cond. Tail must be a successor of both.
94//
95// The cmp-conversion turns the compare instruction in CmpBB into a conditional
96// compare, and merges CmpBB into Head, speculatively executing its
97// instructions. The AArch64 conditional compare instructions have an immediate
98// operand that specifies the NZCV flag values when the condition is false and
99// the compare isn't executed. This makes it possible to chain compares with
100// different condition codes.
101//
102// Example:
103//
104//    if (a == 5 || b == 17)
105//      foo();
106//
107//    Head:
108//       cmp  w0, #5
109//       b.eq Tail
110//    CmpBB:
111//       cmp  w1, #17
112//       b.eq Tail
113//    ...
114//    Tail:
115//      bl _foo
116//
117//  Becomes:
118//
119//    Head:
120//       cmp  w0, #5
121//       ccmp w1, #17, 4, ne  ; 4 = nZcv
122//       b.eq Tail
123//    ...
124//    Tail:
125//      bl _foo
126//
127// The ccmp condition code is the one that would cause the Head terminator to
128// branch to CmpBB.
129//
130// FIXME: It should also be possible to speculate a block on the critical edge
131// between Head and Tail, just like if-converting a diamond.
132//
133// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
134
135namespace {
136class SSACCmpConv {
137  MachineFunction *MF;
138  const TargetInstrInfo *TII;
139  const TargetRegisterInfo *TRI;
140  MachineRegisterInfo *MRI;
141  const MachineBranchProbabilityInfo *MBPI;
142
143public:
144  /// The first block containing a conditional branch, dominating everything
145  /// else.
146  MachineBasicBlock *Head;
147
148  /// The block containing cmp+br.cond with a successor shared with Head.
149  MachineBasicBlock *CmpBB;
150
151  /// The common successor for Head and CmpBB.
152  MachineBasicBlock *Tail;
153
154  /// The compare instruction in CmpBB that can be converted to a ccmp.
155  MachineInstr *CmpMI;
156
157private:
158  /// The branch condition in Head as determined by analyzeBranch.
159  SmallVector<MachineOperand, 4> HeadCond;
160
161  /// The condition code that makes Head branch to CmpBB.
162  AArch64CC::CondCode HeadCmpBBCC;
163
164  /// The branch condition in CmpBB.
165  SmallVector<MachineOperand, 4> CmpBBCond;
166
167  /// The condition code that makes CmpBB branch to Tail.
168  AArch64CC::CondCode CmpBBTailCC;
169
170  /// Check if the Tail PHIs are trivially convertible.
171  bool trivialTailPHIs();
172
173  /// Remove CmpBB from the Tail PHIs.
174  void updateTailPHIs();
175
176  /// Check if an operand defining DstReg is dead.
177  bool isDeadDef(unsigned DstReg);
178
179  /// Find the compare instruction in MBB that controls the conditional branch.
180  /// Return NULL if a convertible instruction can't be found.
181  MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
182
183  /// Return true if all non-terminator instructions in MBB can be safely
184  /// speculated.
185  bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
186
187public:
188  /// runOnMachineFunction - Initialize per-function data structures.
189  void runOnMachineFunction(MachineFunction &MF,
190                            const MachineBranchProbabilityInfo *MBPI) {
191    this->MF = &MF;
192    this->MBPI = MBPI;
193    TII = MF.getSubtarget().getInstrInfo();
194    TRI = MF.getSubtarget().getRegisterInfo();
195    MRI = &MF.getRegInfo();
196  }
197
198  /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
199  /// internal state, and return true.
200  bool canConvert(MachineBasicBlock *MBB);
201
202  /// Cmo-convert the last block passed to canConvertCmp(), assuming
203  /// it is possible. Add any erased blocks to RemovedBlocks.
204  void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
205
206  /// Return the expected code size delta if the conversion into a
207  /// conditional compare is performed.
208  int expectedCodeSizeDelta() const;
209};
210} // end anonymous namespace
211
212// Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
213// This means that no if-conversion is required when merging CmpBB into Head.
214bool SSACCmpConv::trivialTailPHIs() {
215  for (auto &I : *Tail) {
216    if (!I.isPHI())
217      break;
218    unsigned HeadReg = 0, CmpBBReg = 0;
219    // PHI operands come in (VReg, MBB) pairs.
220    for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
221      MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
222      Register Reg = I.getOperand(oi).getReg();
223      if (MBB == Head) {
224        assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
225        HeadReg = Reg;
226      }
227      if (MBB == CmpBB) {
228        assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
229        CmpBBReg = Reg;
230      }
231    }
232    if (HeadReg != CmpBBReg)
233      return false;
234  }
235  return true;
236}
237
238// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
239// removing the CmpBB operands. The Head operands will be identical.
240void SSACCmpConv::updateTailPHIs() {
241  for (auto &I : *Tail) {
242    if (!I.isPHI())
243      break;
244    // I is a PHI. It can have multiple entries for CmpBB.
245    for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
246      // PHI operands are (Reg, MBB) at (oi-2, oi-1).
247      if (I.getOperand(oi - 1).getMBB() == CmpBB) {
248        I.removeOperand(oi - 1);
249        I.removeOperand(oi - 2);
250      }
251    }
252  }
253}
254
255// This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
256// are still writing virtual registers without any uses.
257bool SSACCmpConv::isDeadDef(unsigned DstReg) {
258  // Writes to the zero register are dead.
259  if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
260    return true;
261  if (!Register::isVirtualRegister(DstReg))
262    return false;
263  // A virtual register def without any uses will be marked dead later, and
264  // eventually replaced by the zero register.
265  return MRI->use_nodbg_empty(DstReg);
266}
267
268// Parse a condition code returned by analyzeBranch, and compute the CondCode
269// corresponding to TBB.
270// Return
271static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
272  // A normal br.cond simply has the condition code.
273  if (Cond[0].getImm() != -1) {
274    assert(Cond.size() == 1 && "Unknown Cond array format");
275    CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
276    return true;
277  }
278  // For tbz and cbz instruction, the opcode is next.
279  switch (Cond[1].getImm()) {
280  default:
281    // This includes tbz / tbnz branches which can't be converted to
282    // ccmp + br.cond.
283    return false;
284  case AArch64::CBZW:
285  case AArch64::CBZX:
286    assert(Cond.size() == 3 && "Unknown Cond array format");
287    CC = AArch64CC::EQ;
288    return true;
289  case AArch64::CBNZW:
290  case AArch64::CBNZX:
291    assert(Cond.size() == 3 && "Unknown Cond array format");
292    CC = AArch64CC::NE;
293    return true;
294  }
295}
296
297MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
298  MachineBasicBlock::iterator I = MBB->getFirstTerminator();
299  if (I == MBB->end())
300    return nullptr;
301  // The terminator must be controlled by the flags.
302  if (!I->readsRegister(AArch64::NZCV)) {
303    switch (I->getOpcode()) {
304    case AArch64::CBZW:
305    case AArch64::CBZX:
306    case AArch64::CBNZW:
307    case AArch64::CBNZX:
308      // These can be converted into a ccmp against #0.
309      return &*I;
310    }
311    ++NumCmpTermRejs;
312    LLVM_DEBUG(dbgs() << "Flags not used by terminator: " << *I);
313    return nullptr;
314  }
315
316  // Now find the instruction controlling the terminator.
317  for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
318    I = prev_nodbg(I, MBB->begin());
319    assert(!I->isTerminator() && "Spurious terminator");
320    switch (I->getOpcode()) {
321    // cmp is an alias for subs with a dead destination register.
322    case AArch64::SUBSWri:
323    case AArch64::SUBSXri:
324    // cmn is an alias for adds with a dead destination register.
325    case AArch64::ADDSWri:
326    case AArch64::ADDSXri:
327      // Check that the immediate operand is within range, ccmp wants a uimm5.
328      // Rd = SUBSri Rn, imm, shift
329      if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
330        LLVM_DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
331        ++NumImmRangeRejs;
332        return nullptr;
333      }
334      [[fallthrough]];
335    case AArch64::SUBSWrr:
336    case AArch64::SUBSXrr:
337    case AArch64::ADDSWrr:
338    case AArch64::ADDSXrr:
339      if (isDeadDef(I->getOperand(0).getReg()))
340        return &*I;
341      LLVM_DEBUG(dbgs() << "Can't convert compare with live destination: "
342                        << *I);
343      ++NumLiveDstRejs;
344      return nullptr;
345    case AArch64::FCMPSrr:
346    case AArch64::FCMPDrr:
347    case AArch64::FCMPESrr:
348    case AArch64::FCMPEDrr:
349      return &*I;
350    }
351
352    // Check for flag reads and clobbers.
353    PhysRegInfo PRI = AnalyzePhysRegInBundle(*I, AArch64::NZCV, TRI);
354
355    if (PRI.Read) {
356      // The ccmp doesn't produce exactly the same flags as the original
357      // compare, so reject the transform if there are uses of the flags
358      // besides the terminators.
359      LLVM_DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
360      ++NumMultNZCVUses;
361      return nullptr;
362    }
363
364    if (PRI.Defined || PRI.Clobbered) {
365      LLVM_DEBUG(dbgs() << "Not convertible compare: " << *I);
366      ++NumUnknNZCVDefs;
367      return nullptr;
368    }
369  }
370  LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
371                    << '\n');
372  return nullptr;
373}
374
375/// Determine if all the instructions in MBB can safely
376/// be speculated. The terminators are not considered.
377///
378/// Only CmpMI is allowed to clobber the flags.
379///
380bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
381                                     const MachineInstr *CmpMI) {
382  // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
383  // get right.
384  if (!MBB->livein_empty()) {
385    LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
386    return false;
387  }
388
389  unsigned InstrCount = 0;
390
391  // Check all instructions, except the terminators. It is assumed that
392  // terminators never have side effects or define any used register values.
393  for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
394    if (I.isDebugInstr())
395      continue;
396
397    if (++InstrCount > BlockInstrLimit && !Stress) {
398      LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
399                        << BlockInstrLimit << " instructions.\n");
400      return false;
401    }
402
403    // There shouldn't normally be any phis in a single-predecessor block.
404    if (I.isPHI()) {
405      LLVM_DEBUG(dbgs() << "Can't hoist: " << I);
406      return false;
407    }
408
409    // Don't speculate loads. Note that it may be possible and desirable to
410    // speculate GOT or constant pool loads that are guaranteed not to trap,
411    // but we don't support that for now.
412    if (I.mayLoad()) {
413      LLVM_DEBUG(dbgs() << "Won't speculate load: " << I);
414      return false;
415    }
416
417    // We never speculate stores, so an AA pointer isn't necessary.
418    bool DontMoveAcrossStore = true;
419    if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
420      LLVM_DEBUG(dbgs() << "Can't speculate: " << I);
421      return false;
422    }
423
424    // Only CmpMI is allowed to clobber the flags.
425    if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
426      LLVM_DEBUG(dbgs() << "Clobbers flags: " << I);
427      return false;
428    }
429  }
430  return true;
431}
432
433/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
434/// candidate for cmp-conversion. Fill out the internal state.
435///
436bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
437  Head = MBB;
438  Tail = CmpBB = nullptr;
439
440  if (Head->succ_size() != 2)
441    return false;
442  MachineBasicBlock *Succ0 = Head->succ_begin()[0];
443  MachineBasicBlock *Succ1 = Head->succ_begin()[1];
444
445  // CmpBB can only have a single predecessor. Tail is allowed many.
446  if (Succ0->pred_size() != 1)
447    std::swap(Succ0, Succ1);
448
449  // Succ0 is our candidate for CmpBB.
450  if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
451    return false;
452
453  CmpBB = Succ0;
454  Tail = Succ1;
455
456  if (!CmpBB->isSuccessor(Tail))
457    return false;
458
459  // The CFG topology checks out.
460  LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
461                    << printMBBReference(*CmpBB) << " -> "
462                    << printMBBReference(*Tail) << '\n');
463  ++NumConsidered;
464
465  // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
466  //
467  // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
468  // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
469  // always be safe to sink the ccmp down to immediately before the CmpBB
470  // terminators.
471  if (!trivialTailPHIs()) {
472    LLVM_DEBUG(dbgs() << "Can't handle phis in Tail.\n");
473    ++NumPhiRejs;
474    return false;
475  }
476
477  if (!Tail->livein_empty()) {
478    LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
479    ++NumPhysRejs;
480    return false;
481  }
482
483  // CmpBB should never have PHIs since Head is its only predecessor.
484  // FIXME: Clean them up if it happens.
485  if (!CmpBB->empty() && CmpBB->front().isPHI()) {
486    LLVM_DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
487    ++NumPhi2Rejs;
488    return false;
489  }
490
491  if (!CmpBB->livein_empty()) {
492    LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
493    ++NumPhysRejs;
494    return false;
495  }
496
497  // The branch we're looking to eliminate must be analyzable.
498  HeadCond.clear();
499  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
500  if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
501    LLVM_DEBUG(dbgs() << "Head branch not analyzable.\n");
502    ++NumHeadBranchRejs;
503    return false;
504  }
505
506  // This is weird, probably some sort of degenerate CFG, or an edge to a
507  // landing pad.
508  if (!TBB || HeadCond.empty()) {
509    LLVM_DEBUG(
510        dbgs() << "analyzeBranch didn't find conditional branch in Head.\n");
511    ++NumHeadBranchRejs;
512    return false;
513  }
514
515  if (!parseCond(HeadCond, HeadCmpBBCC)) {
516    LLVM_DEBUG(dbgs() << "Unsupported branch type on Head\n");
517    ++NumHeadBranchRejs;
518    return false;
519  }
520
521  // Make sure the branch direction is right.
522  if (TBB != CmpBB) {
523    assert(TBB == Tail && "Unexpected TBB");
524    HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
525  }
526
527  CmpBBCond.clear();
528  TBB = FBB = nullptr;
529  if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
530    LLVM_DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
531    ++NumCmpBranchRejs;
532    return false;
533  }
534
535  if (!TBB || CmpBBCond.empty()) {
536    LLVM_DEBUG(
537        dbgs() << "analyzeBranch didn't find conditional branch in CmpBB.\n");
538    ++NumCmpBranchRejs;
539    return false;
540  }
541
542  if (!parseCond(CmpBBCond, CmpBBTailCC)) {
543    LLVM_DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
544    ++NumCmpBranchRejs;
545    return false;
546  }
547
548  if (TBB != Tail)
549    CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
550
551  LLVM_DEBUG(dbgs() << "Head->CmpBB on "
552                    << AArch64CC::getCondCodeName(HeadCmpBBCC)
553                    << ", CmpBB->Tail on "
554                    << AArch64CC::getCondCodeName(CmpBBTailCC) << '\n');
555
556  CmpMI = findConvertibleCompare(CmpBB);
557  if (!CmpMI)
558    return false;
559
560  if (!canSpeculateInstrs(CmpBB, CmpMI)) {
561    ++NumSpeculateRejs;
562    return false;
563  }
564  return true;
565}
566
567void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
568  LLVM_DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
569                    << printMBBReference(*Head) << ":\n"
570                    << *CmpBB);
571
572  // All CmpBB instructions are moved into Head, and CmpBB is deleted.
573  // Update the CFG first.
574  updateTailPHIs();
575
576  // Save successor probabilties before removing CmpBB and Tail from their
577  // parents.
578  BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
579  BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
580
581  Head->removeSuccessor(CmpBB);
582  CmpBB->removeSuccessor(Tail);
583
584  // If Head and CmpBB had successor probabilties, udpate the probabilities to
585  // reflect the ccmp-conversion.
586  if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
587
588    // Head is allowed two successors. We've removed CmpBB, so the remaining
589    // successor is Tail. We need to increase the successor probability for
590    // Tail to account for the CmpBB path we removed.
591    //
592    // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
593    assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
594    BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
595    Head->setSuccProbability(Head->succ_begin(),
596                             Head2Tail + Head2CmpBB * CmpBB2Tail);
597
598    // We will transfer successors of CmpBB to Head in a moment without
599    // normalizing the successor probabilities. Set the successor probabilites
600    // before doing so.
601    //
602    // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
603    for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
604      BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
605      CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
606    }
607  }
608
609  Head->transferSuccessorsAndUpdatePHIs(CmpBB);
610  DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
611  TII->removeBranch(*Head);
612
613  // If the Head terminator was one of the cbz / tbz branches with built-in
614  // compare, we need to insert an explicit compare instruction in its place.
615  if (HeadCond[0].getImm() == -1) {
616    ++NumCompBranches;
617    unsigned Opc = 0;
618    switch (HeadCond[1].getImm()) {
619    case AArch64::CBZW:
620    case AArch64::CBNZW:
621      Opc = AArch64::SUBSWri;
622      break;
623    case AArch64::CBZX:
624    case AArch64::CBNZX:
625      Opc = AArch64::SUBSXri;
626      break;
627    default:
628      llvm_unreachable("Cannot convert Head branch");
629    }
630    const MCInstrDesc &MCID = TII->get(Opc);
631    // Create a dummy virtual register for the SUBS def.
632    Register DestReg =
633        MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
634    // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
635    BuildMI(*Head, Head->end(), TermDL, MCID)
636        .addReg(DestReg, RegState::Define | RegState::Dead)
637        .add(HeadCond[2])
638        .addImm(0)
639        .addImm(0);
640    // SUBS uses the GPR*sp register classes.
641    MRI->constrainRegClass(HeadCond[2].getReg(),
642                           TII->getRegClass(MCID, 1, TRI, *MF));
643  }
644
645  Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
646
647  // Now replace CmpMI with a ccmp instruction that also considers the incoming
648  // flags.
649  unsigned Opc = 0;
650  unsigned FirstOp = 1;   // First CmpMI operand to copy.
651  bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
652  switch (CmpMI->getOpcode()) {
653  default:
654    llvm_unreachable("Unknown compare opcode");
655  case AArch64::SUBSWri:    Opc = AArch64::CCMPWi; break;
656  case AArch64::SUBSWrr:    Opc = AArch64::CCMPWr; break;
657  case AArch64::SUBSXri:    Opc = AArch64::CCMPXi; break;
658  case AArch64::SUBSXrr:    Opc = AArch64::CCMPXr; break;
659  case AArch64::ADDSWri:    Opc = AArch64::CCMNWi; break;
660  case AArch64::ADDSWrr:    Opc = AArch64::CCMNWr; break;
661  case AArch64::ADDSXri:    Opc = AArch64::CCMNXi; break;
662  case AArch64::ADDSXrr:    Opc = AArch64::CCMNXr; break;
663  case AArch64::FCMPSrr:    Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
664  case AArch64::FCMPDrr:    Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
665  case AArch64::FCMPESrr:   Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
666  case AArch64::FCMPEDrr:   Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
667  case AArch64::CBZW:
668  case AArch64::CBNZW:
669    Opc = AArch64::CCMPWi;
670    FirstOp = 0;
671    isZBranch = true;
672    break;
673  case AArch64::CBZX:
674  case AArch64::CBNZX:
675    Opc = AArch64::CCMPXi;
676    FirstOp = 0;
677    isZBranch = true;
678    break;
679  }
680
681  // The ccmp instruction should set the flags according to the comparison when
682  // Head would have branched to CmpBB.
683  // The NZCV immediate operand should provide flags for the case where Head
684  // would have branched to Tail. These flags should cause the new Head
685  // terminator to branch to tail.
686  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
687  const MCInstrDesc &MCID = TII->get(Opc);
688  MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
689                         TII->getRegClass(MCID, 0, TRI, *MF));
690  if (CmpMI->getOperand(FirstOp + 1).isReg())
691    MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
692                           TII->getRegClass(MCID, 1, TRI, *MF));
693  MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
694                                .add(CmpMI->getOperand(FirstOp)); // Register Rn
695  if (isZBranch)
696    MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
697  else
698    MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
699  MIB.addImm(NZCV).addImm(HeadCmpBBCC);
700
701  // If CmpMI was a terminator, we need a new conditional branch to replace it.
702  // This now becomes a Head terminator.
703  if (isZBranch) {
704    bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
705                CmpMI->getOpcode() == AArch64::CBNZX;
706    BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
707        .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
708        .add(CmpMI->getOperand(1)); // Branch target.
709  }
710  CmpMI->eraseFromParent();
711  Head->updateTerminator(CmpBB->getNextNode());
712
713  RemovedBlocks.push_back(CmpBB);
714  CmpBB->eraseFromParent();
715  LLVM_DEBUG(dbgs() << "Result:\n" << *Head);
716  ++NumConverted;
717}
718
719int SSACCmpConv::expectedCodeSizeDelta() const {
720  int delta = 0;
721  // If the Head terminator was one of the cbz / tbz branches with built-in
722  // compare, we need to insert an explicit compare instruction in its place
723  // plus a branch instruction.
724  if (HeadCond[0].getImm() == -1) {
725    switch (HeadCond[1].getImm()) {
726    case AArch64::CBZW:
727    case AArch64::CBNZW:
728    case AArch64::CBZX:
729    case AArch64::CBNZX:
730      // Therefore delta += 1
731      delta = 1;
732      break;
733    default:
734      llvm_unreachable("Cannot convert Head branch");
735    }
736  }
737  // If the Cmp terminator was one of the cbz / tbz branches with
738  // built-in compare, it will be turned into a compare instruction
739  // into Head, but we do not save any instruction.
740  // Otherwise, we save the branch instruction.
741  switch (CmpMI->getOpcode()) {
742  default:
743    --delta;
744    break;
745  case AArch64::CBZW:
746  case AArch64::CBNZW:
747  case AArch64::CBZX:
748  case AArch64::CBNZX:
749    break;
750  }
751  return delta;
752}
753
754//===----------------------------------------------------------------------===//
755//                       AArch64ConditionalCompares Pass
756//===----------------------------------------------------------------------===//
757
758namespace {
759class AArch64ConditionalCompares : public MachineFunctionPass {
760  const MachineBranchProbabilityInfo *MBPI;
761  const TargetInstrInfo *TII;
762  const TargetRegisterInfo *TRI;
763  MCSchedModel SchedModel;
764  // Does the proceeded function has Oz attribute.
765  bool MinSize;
766  MachineRegisterInfo *MRI;
767  MachineDominatorTree *DomTree;
768  MachineLoopInfo *Loops;
769  MachineTraceMetrics *Traces;
770  MachineTraceMetrics::Ensemble *MinInstr;
771  SSACCmpConv CmpConv;
772
773public:
774  static char ID;
775  AArch64ConditionalCompares() : MachineFunctionPass(ID) {
776    initializeAArch64ConditionalComparesPass(*PassRegistry::getPassRegistry());
777  }
778  void getAnalysisUsage(AnalysisUsage &AU) const override;
779  bool runOnMachineFunction(MachineFunction &MF) override;
780  StringRef getPassName() const override {
781    return "AArch64 Conditional Compares";
782  }
783
784private:
785  bool tryConvert(MachineBasicBlock *);
786  void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
787  void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
788  void invalidateTraces();
789  bool shouldConvert();
790};
791} // end anonymous namespace
792
793char AArch64ConditionalCompares::ID = 0;
794
795INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
796                      "AArch64 CCMP Pass", false, false)
797INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
798INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
799INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
800INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
801                    "AArch64 CCMP Pass", false, false)
802
803FunctionPass *llvm::createAArch64ConditionalCompares() {
804  return new AArch64ConditionalCompares();
805}
806
807void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
808  AU.addRequired<MachineBranchProbabilityInfo>();
809  AU.addRequired<MachineDominatorTree>();
810  AU.addPreserved<MachineDominatorTree>();
811  AU.addRequired<MachineLoopInfo>();
812  AU.addPreserved<MachineLoopInfo>();
813  AU.addRequired<MachineTraceMetrics>();
814  AU.addPreserved<MachineTraceMetrics>();
815  MachineFunctionPass::getAnalysisUsage(AU);
816}
817
818/// Update the dominator tree after if-conversion erased some blocks.
819void AArch64ConditionalCompares::updateDomTree(
820    ArrayRef<MachineBasicBlock *> Removed) {
821  // convert() removes CmpBB which was previously dominated by Head.
822  // CmpBB children should be transferred to Head.
823  MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
824  for (MachineBasicBlock *RemovedMBB : Removed) {
825    MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
826    assert(Node != HeadNode && "Cannot erase the head node");
827    assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
828    while (Node->getNumChildren())
829      DomTree->changeImmediateDominator(Node->back(), HeadNode);
830    DomTree->eraseNode(RemovedMBB);
831  }
832}
833
834/// Update LoopInfo after if-conversion.
835void
836AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
837  if (!Loops)
838    return;
839  for (MachineBasicBlock *RemovedMBB : Removed)
840    Loops->removeBlock(RemovedMBB);
841}
842
843/// Invalidate MachineTraceMetrics before if-conversion.
844void AArch64ConditionalCompares::invalidateTraces() {
845  Traces->invalidate(CmpConv.Head);
846  Traces->invalidate(CmpConv.CmpBB);
847}
848
849/// Apply cost model and heuristics to the if-conversion in IfConv.
850/// Return true if the conversion is a good idea.
851///
852bool AArch64ConditionalCompares::shouldConvert() {
853  // Stress testing mode disables all cost considerations.
854  if (Stress)
855    return true;
856  if (!MinInstr)
857    MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
858
859  // Head dominates CmpBB, so it is always included in its trace.
860  MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
861
862  // If code size is the main concern
863  if (MinSize) {
864    int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
865    LLVM_DEBUG(dbgs() << "Code size delta:  " << CodeSizeDelta << '\n');
866    // If we are minimizing the code size, do the conversion whatever
867    // the cost is.
868    if (CodeSizeDelta < 0)
869      return true;
870    if (CodeSizeDelta > 0) {
871      LLVM_DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
872      return false;
873    }
874    // CodeSizeDelta == 0, continue with the regular heuristics
875  }
876
877  // Heuristic: The compare conversion delays the execution of the branch
878  // instruction because we must wait for the inputs to the second compare as
879  // well. The branch has no dependent instructions, but delaying it increases
880  // the cost of a misprediction.
881  //
882  // Set a limit on the delay we will accept.
883  unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
884
885  // Instruction depths can be computed for all trace instructions above CmpBB.
886  unsigned HeadDepth =
887      Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
888  unsigned CmpBBDepth =
889      Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
890  LLVM_DEBUG(dbgs() << "Head depth:  " << HeadDepth
891                    << "\nCmpBB depth: " << CmpBBDepth << '\n');
892  if (CmpBBDepth > HeadDepth + DelayLimit) {
893    LLVM_DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
894                      << " cycles.\n");
895    return false;
896  }
897
898  // Check the resource depth at the bottom of CmpBB - these instructions will
899  // be speculated.
900  unsigned ResDepth = Trace.getResourceDepth(true);
901  LLVM_DEBUG(dbgs() << "Resources:   " << ResDepth << '\n');
902
903  // Heuristic: The speculatively executed instructions must all be able to
904  // merge into the Head block. The Head critical path should dominate the
905  // resource cost of the speculated instructions.
906  if (ResDepth > HeadDepth) {
907    LLVM_DEBUG(dbgs() << "Too many instructions to speculate.\n");
908    return false;
909  }
910  return true;
911}
912
913bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
914  bool Changed = false;
915  while (CmpConv.canConvert(MBB) && shouldConvert()) {
916    invalidateTraces();
917    SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
918    CmpConv.convert(RemovedBlocks);
919    Changed = true;
920    updateDomTree(RemovedBlocks);
921    updateLoops(RemovedBlocks);
922  }
923  return Changed;
924}
925
926bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
927  LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
928                    << "********** Function: " << MF.getName() << '\n');
929  if (skipFunction(MF.getFunction()))
930    return false;
931
932  TII = MF.getSubtarget().getInstrInfo();
933  TRI = MF.getSubtarget().getRegisterInfo();
934  SchedModel = MF.getSubtarget().getSchedModel();
935  MRI = &MF.getRegInfo();
936  DomTree = &getAnalysis<MachineDominatorTree>();
937  Loops = &getAnalysis<MachineLoopInfo>();
938  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
939  Traces = &getAnalysis<MachineTraceMetrics>();
940  MinInstr = nullptr;
941  MinSize = MF.getFunction().hasMinSize();
942
943  bool Changed = false;
944  CmpConv.runOnMachineFunction(MF, MBPI);
945
946  // Visit blocks in dominator tree pre-order. The pre-order enables multiple
947  // cmp-conversions from the same head block.
948  // Note that updateDomTree() modifies the children of the DomTree node
949  // currently being visited. The df_iterator supports that; it doesn't look at
950  // child_begin() / child_end() until after a node has been visited.
951  for (auto *I : depth_first(DomTree))
952    if (tryConvert(I->getBlock()))
953      Changed = true;
954
955  return Changed;
956}
957