1259698Sdim//===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===//
2259698Sdim//
3259698Sdim//                     The LLVM Compiler Infrastructure
4259698Sdim//
5259698Sdim// This file is distributed under the University of Illinois Open Source
6259698Sdim// License. See LICENSE.TXT for details.
7259698Sdim//
8259698Sdim//===----------------------------------------------------------------------===//
9259698Sdim//
10259698Sdim// This pass makes sure that all branches are in range.  There are several ways
11259698Sdim// in which this could be done.  One aggressive approach is to assume that all
12259698Sdim// branches are in range and successively replace those that turn out not
13259698Sdim// to be in range with a longer form (branch relaxation).  A simple
14259698Sdim// implementation is to continually walk through the function relaxing
15259698Sdim// branches until no more changes are needed and a fixed point is reached.
16259698Sdim// However, in the pathological worst case, this implementation is
17259698Sdim// quadratic in the number of blocks; relaxing branch N can make branch N-1
18259698Sdim// go out of range, which in turn can make branch N-2 go out of range,
19259698Sdim// and so on.
20259698Sdim//
21259698Sdim// An alternative approach is to assume that all branches must be
22259698Sdim// converted to their long forms, then reinstate the short forms of
23259698Sdim// branches that, even under this pessimistic assumption, turn out to be
24259698Sdim// in range (branch shortening).  This too can be implemented as a function
25259698Sdim// walk that is repeated until a fixed point is reached.  In general,
26259698Sdim// the result of shortening is not as good as that of relaxation, and
27259698Sdim// shortening is also quadratic in the worst case; shortening branch N
28259698Sdim// can bring branch N-1 in range of the short form, which in turn can do
29259698Sdim// the same for branch N-2, and so on.  The main advantage of shortening
30259698Sdim// is that each walk through the function produces valid code, so it is
31259698Sdim// possible to stop at any point after the first walk.  The quadraticness
32259698Sdim// could therefore be handled with a maximum pass count, although the
33259698Sdim// question then becomes: what maximum count should be used?
34259698Sdim//
35259698Sdim// On SystemZ, long branches are only needed for functions bigger than 64k,
36259698Sdim// which are relatively rare to begin with, and the long branch sequences
37259698Sdim// are actually relatively cheap.  It therefore doesn't seem worth spending
38259698Sdim// much compilation time on the problem.  Instead, the approach we take is:
39259698Sdim//
40259698Sdim// (1) Work out the address that each block would have if no branches
41259698Sdim//     need relaxing.  Exit the pass early if all branches are in range
42259698Sdim//     according to this assumption.
43259698Sdim//
44259698Sdim// (2) Work out the address that each block would have if all branches
45259698Sdim//     need relaxing.
46259698Sdim//
47259698Sdim// (3) Walk through the block calculating the final address of each instruction
48259698Sdim//     and relaxing those that need to be relaxed.  For backward branches,
49259698Sdim//     this check uses the final address of the target block, as calculated
50259698Sdim//     earlier in the walk.  For forward branches, this check uses the
51259698Sdim//     address of the target block that was calculated in (2).  Both checks
52259698Sdim//     give a conservatively-correct range.
53259698Sdim//
54259698Sdim//===----------------------------------------------------------------------===//
55259698Sdim
56259698Sdim#include "SystemZTargetMachine.h"
57259698Sdim#include "llvm/ADT/Statistic.h"
58259698Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
59259698Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
60259698Sdim#include "llvm/IR/Function.h"
61259698Sdim#include "llvm/Support/CommandLine.h"
62259698Sdim#include "llvm/Support/MathExtras.h"
63259698Sdim#include "llvm/Target/TargetInstrInfo.h"
64259698Sdim#include "llvm/Target/TargetMachine.h"
65259698Sdim#include "llvm/Target/TargetRegisterInfo.h"
66259698Sdim
67259698Sdimusing namespace llvm;
68259698Sdim
69276479Sdim#define DEBUG_TYPE "systemz-long-branch"
70276479Sdim
71259698SdimSTATISTIC(LongBranches, "Number of long branches.");
72259698Sdim
73259698Sdimnamespace {
74276479Sdim// Represents positional information about a basic block.
75276479Sdimstruct MBBInfo {
76276479Sdim  // The address that we currently assume the block has.
77276479Sdim  uint64_t Address;
78259698Sdim
79276479Sdim  // The size of the block in bytes, excluding terminators.
80276479Sdim  // This value never changes.
81276479Sdim  uint64_t Size;
82259698Sdim
83276479Sdim  // The minimum alignment of the block, as a log2 value.
84276479Sdim  // This value never changes.
85276479Sdim  unsigned Alignment;
86259698Sdim
87276479Sdim  // The number of terminators in this block.  This value never changes.
88276479Sdim  unsigned NumTerminators;
89259698Sdim
90276479Sdim  MBBInfo()
91276479Sdim    : Address(0), Size(0), Alignment(0), NumTerminators(0) {}
92276479Sdim};
93259698Sdim
94276479Sdim// Represents the state of a block terminator.
95276479Sdimstruct TerminatorInfo {
96276479Sdim  // If this terminator is a relaxable branch, this points to the branch
97276479Sdim  // instruction, otherwise it is null.
98276479Sdim  MachineInstr *Branch;
99259698Sdim
100276479Sdim  // The address that we currently assume the terminator has.
101276479Sdim  uint64_t Address;
102259698Sdim
103276479Sdim  // The current size of the terminator in bytes.
104276479Sdim  uint64_t Size;
105259698Sdim
106276479Sdim  // If Branch is nonnull, this is the number of the target block,
107276479Sdim  // otherwise it is unused.
108276479Sdim  unsigned TargetBlock;
109259698Sdim
110276479Sdim  // If Branch is nonnull, this is the length of the longest relaxed form,
111276479Sdim  // otherwise it is zero.
112276479Sdim  unsigned ExtraRelaxSize;
113259698Sdim
114276479Sdim  TerminatorInfo() : Branch(nullptr), Size(0), TargetBlock(0),
115276479Sdim                     ExtraRelaxSize(0) {}
116276479Sdim};
117259698Sdim
118276479Sdim// Used to keep track of the current position while iterating over the blocks.
119276479Sdimstruct BlockPosition {
120276479Sdim  // The address that we assume this position has.
121276479Sdim  uint64_t Address;
122259698Sdim
123276479Sdim  // The number of low bits in Address that are known to be the same
124276479Sdim  // as the runtime address.
125276479Sdim  unsigned KnownBits;
126259698Sdim
127276479Sdim  BlockPosition(unsigned InitialAlignment)
128276479Sdim    : Address(0), KnownBits(InitialAlignment) {}
129276479Sdim};
130259698Sdim
131276479Sdimclass SystemZLongBranch : public MachineFunctionPass {
132276479Sdimpublic:
133276479Sdim  static char ID;
134276479Sdim  SystemZLongBranch(const SystemZTargetMachine &tm)
135276479Sdim    : MachineFunctionPass(ID), TII(nullptr) {}
136259698Sdim
137276479Sdim  const char *getPassName() const override {
138276479Sdim    return "SystemZ Long Branch";
139276479Sdim  }
140259698Sdim
141276479Sdim  bool runOnMachineFunction(MachineFunction &F) override;
142259698Sdim
143276479Sdimprivate:
144276479Sdim  void skipNonTerminators(BlockPosition &Position, MBBInfo &Block);
145276479Sdim  void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
146276479Sdim                      bool AssumeRelaxed);
147276479Sdim  TerminatorInfo describeTerminator(MachineInstr *MI);
148276479Sdim  uint64_t initMBBInfo();
149276479Sdim  bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
150276479Sdim  bool mustRelaxABranch();
151276479Sdim  void setWorstCaseAddresses();
152276479Sdim  void splitBranchOnCount(MachineInstr *MI, unsigned AddOpcode);
153276479Sdim  void splitCompareBranch(MachineInstr *MI, unsigned CompareOpcode);
154276479Sdim  void relaxBranch(TerminatorInfo &Terminator);
155276479Sdim  void relaxBranches();
156259698Sdim
157276479Sdim  const SystemZInstrInfo *TII;
158276479Sdim  MachineFunction *MF;
159276479Sdim  SmallVector<MBBInfo, 16> MBBs;
160276479Sdim  SmallVector<TerminatorInfo, 16> Terminators;
161276479Sdim};
162259698Sdim
163276479Sdimchar SystemZLongBranch::ID = 0;
164259698Sdim
165276479Sdimconst uint64_t MaxBackwardRange = 0x10000;
166276479Sdimconst uint64_t MaxForwardRange = 0xfffe;
167276479Sdim} // end anonymous namespace
168259698Sdim
169259698SdimFunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
170259698Sdim  return new SystemZLongBranch(TM);
171259698Sdim}
172259698Sdim
173259698Sdim// Position describes the state immediately before Block.  Update Block
174259698Sdim// accordingly and move Position to the end of the block's non-terminator
175259698Sdim// instructions.
176259698Sdimvoid SystemZLongBranch::skipNonTerminators(BlockPosition &Position,
177259698Sdim                                           MBBInfo &Block) {
178259698Sdim  if (Block.Alignment > Position.KnownBits) {
179259698Sdim    // When calculating the address of Block, we need to conservatively
180259698Sdim    // assume that Block had the worst possible misalignment.
181259698Sdim    Position.Address += ((uint64_t(1) << Block.Alignment) -
182259698Sdim                         (uint64_t(1) << Position.KnownBits));
183259698Sdim    Position.KnownBits = Block.Alignment;
184259698Sdim  }
185259698Sdim
186259698Sdim  // Align the addresses.
187259698Sdim  uint64_t AlignMask = (uint64_t(1) << Block.Alignment) - 1;
188259698Sdim  Position.Address = (Position.Address + AlignMask) & ~AlignMask;
189259698Sdim
190259698Sdim  // Record the block's position.
191259698Sdim  Block.Address = Position.Address;
192259698Sdim
193259698Sdim  // Move past the non-terminators in the block.
194259698Sdim  Position.Address += Block.Size;
195259698Sdim}
196259698Sdim
197259698Sdim// Position describes the state immediately before Terminator.
198259698Sdim// Update Terminator accordingly and move Position past it.
199259698Sdim// Assume that Terminator will be relaxed if AssumeRelaxed.
200259698Sdimvoid SystemZLongBranch::skipTerminator(BlockPosition &Position,
201259698Sdim                                       TerminatorInfo &Terminator,
202259698Sdim                                       bool AssumeRelaxed) {
203259698Sdim  Terminator.Address = Position.Address;
204259698Sdim  Position.Address += Terminator.Size;
205259698Sdim  if (AssumeRelaxed)
206259698Sdim    Position.Address += Terminator.ExtraRelaxSize;
207259698Sdim}
208259698Sdim
209259698Sdim// Return a description of terminator instruction MI.
210259698SdimTerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
211259698Sdim  TerminatorInfo Terminator;
212259698Sdim  Terminator.Size = TII->getInstSizeInBytes(MI);
213259698Sdim  if (MI->isConditionalBranch() || MI->isUnconditionalBranch()) {
214259698Sdim    switch (MI->getOpcode()) {
215259698Sdim    case SystemZ::J:
216259698Sdim      // Relaxes to JG, which is 2 bytes longer.
217259698Sdim      Terminator.ExtraRelaxSize = 2;
218259698Sdim      break;
219259698Sdim    case SystemZ::BRC:
220259698Sdim      // Relaxes to BRCL, which is 2 bytes longer.
221259698Sdim      Terminator.ExtraRelaxSize = 2;
222259698Sdim      break;
223259698Sdim    case SystemZ::BRCT:
224259698Sdim    case SystemZ::BRCTG:
225259698Sdim      // Relaxes to A(G)HI and BRCL, which is 6 bytes longer.
226259698Sdim      Terminator.ExtraRelaxSize = 6;
227259698Sdim      break;
228259698Sdim    case SystemZ::CRJ:
229259698Sdim    case SystemZ::CLRJ:
230259698Sdim      // Relaxes to a C(L)R/BRCL sequence, which is 2 bytes longer.
231259698Sdim      Terminator.ExtraRelaxSize = 2;
232259698Sdim      break;
233259698Sdim    case SystemZ::CGRJ:
234259698Sdim    case SystemZ::CLGRJ:
235259698Sdim      // Relaxes to a C(L)GR/BRCL sequence, which is 4 bytes longer.
236259698Sdim      Terminator.ExtraRelaxSize = 4;
237259698Sdim      break;
238259698Sdim    case SystemZ::CIJ:
239259698Sdim    case SystemZ::CGIJ:
240259698Sdim      // Relaxes to a C(G)HI/BRCL sequence, which is 4 bytes longer.
241259698Sdim      Terminator.ExtraRelaxSize = 4;
242259698Sdim      break;
243259698Sdim    case SystemZ::CLIJ:
244259698Sdim    case SystemZ::CLGIJ:
245259698Sdim      // Relaxes to a CL(G)FI/BRCL sequence, which is 6 bytes longer.
246259698Sdim      Terminator.ExtraRelaxSize = 6;
247259698Sdim      break;
248259698Sdim    default:
249259698Sdim      llvm_unreachable("Unrecognized branch instruction");
250259698Sdim    }
251259698Sdim    Terminator.Branch = MI;
252259698Sdim    Terminator.TargetBlock =
253259698Sdim      TII->getBranchInfo(MI).Target->getMBB()->getNumber();
254259698Sdim  }
255259698Sdim  return Terminator;
256259698Sdim}
257259698Sdim
258259698Sdim// Fill MBBs and Terminators, setting the addresses on the assumption
259259698Sdim// that no branches need relaxation.  Return the size of the function under
260259698Sdim// this assumption.
261259698Sdimuint64_t SystemZLongBranch::initMBBInfo() {
262259698Sdim  MF->RenumberBlocks();
263259698Sdim  unsigned NumBlocks = MF->size();
264259698Sdim
265259698Sdim  MBBs.clear();
266259698Sdim  MBBs.resize(NumBlocks);
267259698Sdim
268259698Sdim  Terminators.clear();
269259698Sdim  Terminators.reserve(NumBlocks);
270259698Sdim
271259698Sdim  BlockPosition Position(MF->getAlignment());
272259698Sdim  for (unsigned I = 0; I < NumBlocks; ++I) {
273259698Sdim    MachineBasicBlock *MBB = MF->getBlockNumbered(I);
274259698Sdim    MBBInfo &Block = MBBs[I];
275259698Sdim
276259698Sdim    // Record the alignment, for quick access.
277259698Sdim    Block.Alignment = MBB->getAlignment();
278259698Sdim
279259698Sdim    // Calculate the size of the fixed part of the block.
280259698Sdim    MachineBasicBlock::iterator MI = MBB->begin();
281259698Sdim    MachineBasicBlock::iterator End = MBB->end();
282259698Sdim    while (MI != End && !MI->isTerminator()) {
283259698Sdim      Block.Size += TII->getInstSizeInBytes(MI);
284259698Sdim      ++MI;
285259698Sdim    }
286259698Sdim    skipNonTerminators(Position, Block);
287259698Sdim
288259698Sdim    // Add the terminators.
289259698Sdim    while (MI != End) {
290259698Sdim      if (!MI->isDebugValue()) {
291259698Sdim        assert(MI->isTerminator() && "Terminator followed by non-terminator");
292259698Sdim        Terminators.push_back(describeTerminator(MI));
293259698Sdim        skipTerminator(Position, Terminators.back(), false);
294259698Sdim        ++Block.NumTerminators;
295259698Sdim      }
296259698Sdim      ++MI;
297259698Sdim    }
298259698Sdim  }
299259698Sdim
300259698Sdim  return Position.Address;
301259698Sdim}
302259698Sdim
303259698Sdim// Return true if, under current assumptions, Terminator would need to be
304259698Sdim// relaxed if it were placed at address Address.
305259698Sdimbool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator,
306259698Sdim                                        uint64_t Address) {
307259698Sdim  if (!Terminator.Branch)
308259698Sdim    return false;
309259698Sdim
310259698Sdim  const MBBInfo &Target = MBBs[Terminator.TargetBlock];
311259698Sdim  if (Address >= Target.Address) {
312259698Sdim    if (Address - Target.Address <= MaxBackwardRange)
313259698Sdim      return false;
314259698Sdim  } else {
315259698Sdim    if (Target.Address - Address <= MaxForwardRange)
316259698Sdim      return false;
317259698Sdim  }
318259698Sdim
319259698Sdim  return true;
320259698Sdim}
321259698Sdim
322259698Sdim// Return true if, under current assumptions, any terminator needs
323259698Sdim// to be relaxed.
324259698Sdimbool SystemZLongBranch::mustRelaxABranch() {
325276479Sdim  for (auto &Terminator : Terminators)
326276479Sdim    if (mustRelaxBranch(Terminator, Terminator.Address))
327259698Sdim      return true;
328259698Sdim  return false;
329259698Sdim}
330259698Sdim
331259698Sdim// Set the address of each block on the assumption that all branches
332259698Sdim// must be long.
333259698Sdimvoid SystemZLongBranch::setWorstCaseAddresses() {
334259698Sdim  SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin();
335259698Sdim  BlockPosition Position(MF->getAlignment());
336276479Sdim  for (auto &Block : MBBs) {
337276479Sdim    skipNonTerminators(Position, Block);
338276479Sdim    for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) {
339259698Sdim      skipTerminator(Position, *TI, true);
340259698Sdim      ++TI;
341259698Sdim    }
342259698Sdim  }
343259698Sdim}
344259698Sdim
345259698Sdim// Split BRANCH ON COUNT MI into the addition given by AddOpcode followed
346259698Sdim// by a BRCL on the result.
347259698Sdimvoid SystemZLongBranch::splitBranchOnCount(MachineInstr *MI,
348259698Sdim                                           unsigned AddOpcode) {
349259698Sdim  MachineBasicBlock *MBB = MI->getParent();
350259698Sdim  DebugLoc DL = MI->getDebugLoc();
351259698Sdim  BuildMI(*MBB, MI, DL, TII->get(AddOpcode))
352259698Sdim    .addOperand(MI->getOperand(0))
353259698Sdim    .addOperand(MI->getOperand(1))
354259698Sdim    .addImm(-1);
355259698Sdim  MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
356259698Sdim    .addImm(SystemZ::CCMASK_ICMP)
357259698Sdim    .addImm(SystemZ::CCMASK_CMP_NE)
358259698Sdim    .addOperand(MI->getOperand(2));
359259698Sdim  // The implicit use of CC is a killing use.
360259698Sdim  BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
361259698Sdim  MI->eraseFromParent();
362259698Sdim}
363259698Sdim
364259698Sdim// Split MI into the comparison given by CompareOpcode followed
365259698Sdim// a BRCL on the result.
366259698Sdimvoid SystemZLongBranch::splitCompareBranch(MachineInstr *MI,
367259698Sdim                                           unsigned CompareOpcode) {
368259698Sdim  MachineBasicBlock *MBB = MI->getParent();
369259698Sdim  DebugLoc DL = MI->getDebugLoc();
370259698Sdim  BuildMI(*MBB, MI, DL, TII->get(CompareOpcode))
371259698Sdim    .addOperand(MI->getOperand(0))
372259698Sdim    .addOperand(MI->getOperand(1));
373259698Sdim  MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
374259698Sdim    .addImm(SystemZ::CCMASK_ICMP)
375259698Sdim    .addOperand(MI->getOperand(2))
376259698Sdim    .addOperand(MI->getOperand(3));
377259698Sdim  // The implicit use of CC is a killing use.
378259698Sdim  BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
379259698Sdim  MI->eraseFromParent();
380259698Sdim}
381259698Sdim
382259698Sdim// Relax the branch described by Terminator.
383259698Sdimvoid SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) {
384259698Sdim  MachineInstr *Branch = Terminator.Branch;
385259698Sdim  switch (Branch->getOpcode()) {
386259698Sdim  case SystemZ::J:
387259698Sdim    Branch->setDesc(TII->get(SystemZ::JG));
388259698Sdim    break;
389259698Sdim  case SystemZ::BRC:
390259698Sdim    Branch->setDesc(TII->get(SystemZ::BRCL));
391259698Sdim    break;
392259698Sdim  case SystemZ::BRCT:
393259698Sdim    splitBranchOnCount(Branch, SystemZ::AHI);
394259698Sdim    break;
395259698Sdim  case SystemZ::BRCTG:
396259698Sdim    splitBranchOnCount(Branch, SystemZ::AGHI);
397259698Sdim    break;
398259698Sdim  case SystemZ::CRJ:
399259698Sdim    splitCompareBranch(Branch, SystemZ::CR);
400259698Sdim    break;
401259698Sdim  case SystemZ::CGRJ:
402259698Sdim    splitCompareBranch(Branch, SystemZ::CGR);
403259698Sdim    break;
404259698Sdim  case SystemZ::CIJ:
405259698Sdim    splitCompareBranch(Branch, SystemZ::CHI);
406259698Sdim    break;
407259698Sdim  case SystemZ::CGIJ:
408259698Sdim    splitCompareBranch(Branch, SystemZ::CGHI);
409259698Sdim    break;
410259698Sdim  case SystemZ::CLRJ:
411259698Sdim    splitCompareBranch(Branch, SystemZ::CLR);
412259698Sdim    break;
413259698Sdim  case SystemZ::CLGRJ:
414259698Sdim    splitCompareBranch(Branch, SystemZ::CLGR);
415259698Sdim    break;
416259698Sdim  case SystemZ::CLIJ:
417259698Sdim    splitCompareBranch(Branch, SystemZ::CLFI);
418259698Sdim    break;
419259698Sdim  case SystemZ::CLGIJ:
420259698Sdim    splitCompareBranch(Branch, SystemZ::CLGFI);
421259698Sdim    break;
422259698Sdim  default:
423259698Sdim    llvm_unreachable("Unrecognized branch");
424259698Sdim  }
425259698Sdim
426259698Sdim  Terminator.Size += Terminator.ExtraRelaxSize;
427259698Sdim  Terminator.ExtraRelaxSize = 0;
428276479Sdim  Terminator.Branch = nullptr;
429259698Sdim
430259698Sdim  ++LongBranches;
431259698Sdim}
432259698Sdim
433259698Sdim// Run a shortening pass and relax any branches that need to be relaxed.
434259698Sdimvoid SystemZLongBranch::relaxBranches() {
435259698Sdim  SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin();
436259698Sdim  BlockPosition Position(MF->getAlignment());
437276479Sdim  for (auto &Block : MBBs) {
438276479Sdim    skipNonTerminators(Position, Block);
439276479Sdim    for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) {
440259698Sdim      assert(Position.Address <= TI->Address &&
441259698Sdim             "Addresses shouldn't go forwards");
442259698Sdim      if (mustRelaxBranch(*TI, Position.Address))
443259698Sdim        relaxBranch(*TI);
444259698Sdim      skipTerminator(Position, *TI, false);
445259698Sdim      ++TI;
446259698Sdim    }
447259698Sdim  }
448259698Sdim}
449259698Sdim
450259698Sdimbool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) {
451280031Sdim  TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
452259698Sdim  MF = &F;
453259698Sdim  uint64_t Size = initMBBInfo();
454259698Sdim  if (Size <= MaxForwardRange || !mustRelaxABranch())
455259698Sdim    return false;
456259698Sdim
457259698Sdim  setWorstCaseAddresses();
458259698Sdim  relaxBranches();
459259698Sdim  return true;
460259698Sdim}
461