1//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands pseudo instructions into target
10// instructions to allow proper scheduling and other late optimizations.  This
11// pass should be run after register allocation but before the post-regalloc
12// scheduling pass.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AArch64ExpandImm.h"
17#include "AArch64InstrInfo.h"
18#include "AArch64MachineFunctionInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/Triple.h"
24#include "llvm/CodeGen/LivePhysRegs.h"
25#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineFunctionPass.h"
28#include "llvm/CodeGen/MachineInstr.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/MachineOperand.h"
31#include "llvm/CodeGen/TargetSubtargetInfo.h"
32#include "llvm/IR/DebugLoc.h"
33#include "llvm/MC/MCInstrDesc.h"
34#include "llvm/Pass.h"
35#include "llvm/Support/CodeGen.h"
36#include "llvm/Support/MathExtras.h"
37#include "llvm/Target/TargetMachine.h"
38#include <cassert>
39#include <cstdint>
40#include <iterator>
41#include <limits>
42#include <utility>
43
44using namespace llvm;
45
46#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
47
48namespace {
49
50class AArch64ExpandPseudo : public MachineFunctionPass {
51public:
52  const AArch64InstrInfo *TII;
53
54  static char ID;
55
56  AArch64ExpandPseudo() : MachineFunctionPass(ID) {
57    initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
58  }
59
60  bool runOnMachineFunction(MachineFunction &Fn) override;
61
62  StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
63
64private:
65  bool expandMBB(MachineBasicBlock &MBB);
66  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
67                MachineBasicBlock::iterator &NextMBBI);
68  bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
69                    unsigned BitSize);
70
71  bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
72                      unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
73                      unsigned ExtendImm, unsigned ZeroReg,
74                      MachineBasicBlock::iterator &NextMBBI);
75  bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
76                          MachineBasicBlock::iterator MBBI,
77                          MachineBasicBlock::iterator &NextMBBI);
78  bool expandSetTagLoop(MachineBasicBlock &MBB,
79                        MachineBasicBlock::iterator MBBI,
80                        MachineBasicBlock::iterator &NextMBBI);
81};
82
83} // end anonymous namespace
84
85char AArch64ExpandPseudo::ID = 0;
86
87INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
88                AARCH64_EXPAND_PSEUDO_NAME, false, false)
89
90/// Transfer implicit operands on the pseudo instruction to the
91/// instructions created from the expansion.
92static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
93                           MachineInstrBuilder &DefMI) {
94  const MCInstrDesc &Desc = OldMI.getDesc();
95  for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
96       ++i) {
97    const MachineOperand &MO = OldMI.getOperand(i);
98    assert(MO.isReg() && MO.getReg());
99    if (MO.isUse())
100      UseMI.add(MO);
101    else
102      DefMI.add(MO);
103  }
104}
105
106/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
107/// real move-immediate instructions to synthesize the immediate.
108bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
109                                       MachineBasicBlock::iterator MBBI,
110                                       unsigned BitSize) {
111  MachineInstr &MI = *MBBI;
112  Register DstReg = MI.getOperand(0).getReg();
113  uint64_t RenamableState =
114      MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
115  uint64_t Imm = MI.getOperand(1).getImm();
116
117  if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
118    // Useless def, and we don't want to risk creating an invalid ORR (which
119    // would really write to sp).
120    MI.eraseFromParent();
121    return true;
122  }
123
124  SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
125  AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
126  assert(Insn.size() != 0);
127
128  SmallVector<MachineInstrBuilder, 4> MIBS;
129  for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
130    bool LastItem = std::next(I) == E;
131    switch (I->Opcode)
132    {
133    default: llvm_unreachable("unhandled!"); break;
134
135    case AArch64::ORRWri:
136    case AArch64::ORRXri:
137      MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
138        .add(MI.getOperand(0))
139        .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
140        .addImm(I->Op2));
141      break;
142    case AArch64::MOVNWi:
143    case AArch64::MOVNXi:
144    case AArch64::MOVZWi:
145    case AArch64::MOVZXi: {
146      bool DstIsDead = MI.getOperand(0).isDead();
147      MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
148        .addReg(DstReg, RegState::Define |
149                getDeadRegState(DstIsDead && LastItem) |
150                RenamableState)
151        .addImm(I->Op1)
152        .addImm(I->Op2));
153      } break;
154    case AArch64::MOVKWi:
155    case AArch64::MOVKXi: {
156      Register DstReg = MI.getOperand(0).getReg();
157      bool DstIsDead = MI.getOperand(0).isDead();
158      MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
159        .addReg(DstReg,
160                RegState::Define |
161                getDeadRegState(DstIsDead && LastItem) |
162                RenamableState)
163        .addReg(DstReg)
164        .addImm(I->Op1)
165        .addImm(I->Op2));
166      } break;
167    }
168  }
169  transferImpOps(MI, MIBS.front(), MIBS.back());
170  MI.eraseFromParent();
171  return true;
172}
173
174bool AArch64ExpandPseudo::expandCMP_SWAP(
175    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
176    unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
177    MachineBasicBlock::iterator &NextMBBI) {
178  MachineInstr &MI = *MBBI;
179  DebugLoc DL = MI.getDebugLoc();
180  const MachineOperand &Dest = MI.getOperand(0);
181  Register StatusReg = MI.getOperand(1).getReg();
182  bool StatusDead = MI.getOperand(1).isDead();
183  // Duplicating undef operands into 2 instructions does not guarantee the same
184  // value on both; However undef should be replaced by xzr anyway.
185  assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
186  Register AddrReg = MI.getOperand(2).getReg();
187  Register DesiredReg = MI.getOperand(3).getReg();
188  Register NewReg = MI.getOperand(4).getReg();
189
190  MachineFunction *MF = MBB.getParent();
191  auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
192  auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
193  auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
194
195  MF->insert(++MBB.getIterator(), LoadCmpBB);
196  MF->insert(++LoadCmpBB->getIterator(), StoreBB);
197  MF->insert(++StoreBB->getIterator(), DoneBB);
198
199  // .Lloadcmp:
200  //     mov wStatus, 0
201  //     ldaxr xDest, [xAddr]
202  //     cmp xDest, xDesired
203  //     b.ne .Ldone
204  if (!StatusDead)
205    BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
206      .addImm(0).addImm(0);
207  BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
208      .addReg(AddrReg);
209  BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
210      .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
211      .addReg(DesiredReg)
212      .addImm(ExtendImm);
213  BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
214      .addImm(AArch64CC::NE)
215      .addMBB(DoneBB)
216      .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
217  LoadCmpBB->addSuccessor(DoneBB);
218  LoadCmpBB->addSuccessor(StoreBB);
219
220  // .Lstore:
221  //     stlxr wStatus, xNew, [xAddr]
222  //     cbnz wStatus, .Lloadcmp
223  BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
224      .addReg(NewReg)
225      .addReg(AddrReg);
226  BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
227      .addReg(StatusReg, getKillRegState(StatusDead))
228      .addMBB(LoadCmpBB);
229  StoreBB->addSuccessor(LoadCmpBB);
230  StoreBB->addSuccessor(DoneBB);
231
232  DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
233  DoneBB->transferSuccessors(&MBB);
234
235  MBB.addSuccessor(LoadCmpBB);
236
237  NextMBBI = MBB.end();
238  MI.eraseFromParent();
239
240  // Recompute livein lists.
241  LivePhysRegs LiveRegs;
242  computeAndAddLiveIns(LiveRegs, *DoneBB);
243  computeAndAddLiveIns(LiveRegs, *StoreBB);
244  computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
245  // Do an extra pass around the loop to get loop carried registers right.
246  StoreBB->clearLiveIns();
247  computeAndAddLiveIns(LiveRegs, *StoreBB);
248  LoadCmpBB->clearLiveIns();
249  computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
250
251  return true;
252}
253
254bool AArch64ExpandPseudo::expandCMP_SWAP_128(
255    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
256    MachineBasicBlock::iterator &NextMBBI) {
257  MachineInstr &MI = *MBBI;
258  DebugLoc DL = MI.getDebugLoc();
259  MachineOperand &DestLo = MI.getOperand(0);
260  MachineOperand &DestHi = MI.getOperand(1);
261  Register StatusReg = MI.getOperand(2).getReg();
262  bool StatusDead = MI.getOperand(2).isDead();
263  // Duplicating undef operands into 2 instructions does not guarantee the same
264  // value on both; However undef should be replaced by xzr anyway.
265  assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
266  Register AddrReg = MI.getOperand(3).getReg();
267  Register DesiredLoReg = MI.getOperand(4).getReg();
268  Register DesiredHiReg = MI.getOperand(5).getReg();
269  Register NewLoReg = MI.getOperand(6).getReg();
270  Register NewHiReg = MI.getOperand(7).getReg();
271
272  MachineFunction *MF = MBB.getParent();
273  auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
274  auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
275  auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
276
277  MF->insert(++MBB.getIterator(), LoadCmpBB);
278  MF->insert(++LoadCmpBB->getIterator(), StoreBB);
279  MF->insert(++StoreBB->getIterator(), DoneBB);
280
281  // .Lloadcmp:
282  //     ldaxp xDestLo, xDestHi, [xAddr]
283  //     cmp xDestLo, xDesiredLo
284  //     sbcs xDestHi, xDesiredHi
285  //     b.ne .Ldone
286  BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
287      .addReg(DestLo.getReg(), RegState::Define)
288      .addReg(DestHi.getReg(), RegState::Define)
289      .addReg(AddrReg);
290  BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
291      .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
292      .addReg(DesiredLoReg)
293      .addImm(0);
294  BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
295    .addUse(AArch64::WZR)
296    .addUse(AArch64::WZR)
297    .addImm(AArch64CC::EQ);
298  BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
299      .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
300      .addReg(DesiredHiReg)
301      .addImm(0);
302  BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
303      .addUse(StatusReg, RegState::Kill)
304      .addUse(StatusReg, RegState::Kill)
305      .addImm(AArch64CC::EQ);
306  BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
307      .addUse(StatusReg, getKillRegState(StatusDead))
308      .addMBB(DoneBB);
309  LoadCmpBB->addSuccessor(DoneBB);
310  LoadCmpBB->addSuccessor(StoreBB);
311
312  // .Lstore:
313  //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
314  //     cbnz wStatus, .Lloadcmp
315  BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
316      .addReg(NewLoReg)
317      .addReg(NewHiReg)
318      .addReg(AddrReg);
319  BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
320      .addReg(StatusReg, getKillRegState(StatusDead))
321      .addMBB(LoadCmpBB);
322  StoreBB->addSuccessor(LoadCmpBB);
323  StoreBB->addSuccessor(DoneBB);
324
325  DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
326  DoneBB->transferSuccessors(&MBB);
327
328  MBB.addSuccessor(LoadCmpBB);
329
330  NextMBBI = MBB.end();
331  MI.eraseFromParent();
332
333  // Recompute liveness bottom up.
334  LivePhysRegs LiveRegs;
335  computeAndAddLiveIns(LiveRegs, *DoneBB);
336  computeAndAddLiveIns(LiveRegs, *StoreBB);
337  computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
338  // Do an extra pass in the loop to get the loop carried dependencies right.
339  StoreBB->clearLiveIns();
340  computeAndAddLiveIns(LiveRegs, *StoreBB);
341  LoadCmpBB->clearLiveIns();
342  computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
343
344  return true;
345}
346
347bool AArch64ExpandPseudo::expandSetTagLoop(
348    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
349    MachineBasicBlock::iterator &NextMBBI) {
350  MachineInstr &MI = *MBBI;
351  DebugLoc DL = MI.getDebugLoc();
352  Register SizeReg = MI.getOperand(2).getReg();
353  Register AddressReg = MI.getOperand(3).getReg();
354
355  MachineFunction *MF = MBB.getParent();
356
357  bool ZeroData = MI.getOpcode() == AArch64::STZGloop;
358  const unsigned OpCode =
359      ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
360
361  auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
362  auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
363
364  MF->insert(++MBB.getIterator(), LoopBB);
365  MF->insert(++LoopBB->getIterator(), DoneBB);
366
367  BuildMI(LoopBB, DL, TII->get(OpCode))
368      .addDef(AddressReg)
369      .addReg(AddressReg)
370      .addReg(AddressReg)
371      .addImm(2)
372      .cloneMemRefs(MI)
373      .setMIFlags(MI.getFlags());
374  BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
375      .addDef(SizeReg)
376      .addReg(SizeReg)
377      .addImm(16 * 2)
378      .addImm(0);
379  BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
380
381  LoopBB->addSuccessor(LoopBB);
382  LoopBB->addSuccessor(DoneBB);
383
384  DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
385  DoneBB->transferSuccessors(&MBB);
386
387  MBB.addSuccessor(LoopBB);
388
389  NextMBBI = MBB.end();
390  MI.eraseFromParent();
391  // Recompute liveness bottom up.
392  LivePhysRegs LiveRegs;
393  computeAndAddLiveIns(LiveRegs, *DoneBB);
394  computeAndAddLiveIns(LiveRegs, *LoopBB);
395  // Do an extra pass in the loop to get the loop carried dependencies right.
396  // FIXME: is this necessary?
397  LoopBB->clearLiveIns();
398  computeAndAddLiveIns(LiveRegs, *LoopBB);
399  DoneBB->clearLiveIns();
400  computeAndAddLiveIns(LiveRegs, *DoneBB);
401
402  return true;
403}
404
405/// If MBBI references a pseudo instruction that should be expanded here,
406/// do the expansion and return true.  Otherwise return false.
407bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
408                                   MachineBasicBlock::iterator MBBI,
409                                   MachineBasicBlock::iterator &NextMBBI) {
410  MachineInstr &MI = *MBBI;
411  unsigned Opcode = MI.getOpcode();
412  switch (Opcode) {
413  default:
414    break;
415
416  case AArch64::ADDWrr:
417  case AArch64::SUBWrr:
418  case AArch64::ADDXrr:
419  case AArch64::SUBXrr:
420  case AArch64::ADDSWrr:
421  case AArch64::SUBSWrr:
422  case AArch64::ADDSXrr:
423  case AArch64::SUBSXrr:
424  case AArch64::ANDWrr:
425  case AArch64::ANDXrr:
426  case AArch64::BICWrr:
427  case AArch64::BICXrr:
428  case AArch64::ANDSWrr:
429  case AArch64::ANDSXrr:
430  case AArch64::BICSWrr:
431  case AArch64::BICSXrr:
432  case AArch64::EONWrr:
433  case AArch64::EONXrr:
434  case AArch64::EORWrr:
435  case AArch64::EORXrr:
436  case AArch64::ORNWrr:
437  case AArch64::ORNXrr:
438  case AArch64::ORRWrr:
439  case AArch64::ORRXrr: {
440    unsigned Opcode;
441    switch (MI.getOpcode()) {
442    default:
443      return false;
444    case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
445    case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
446    case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
447    case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
448    case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
449    case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
450    case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
451    case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
452    case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
453    case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
454    case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
455    case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
456    case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
457    case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
458    case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
459    case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
460    case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
461    case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
462    case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
463    case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
464    case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
465    case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
466    case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
467    case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
468    }
469    MachineInstrBuilder MIB1 =
470        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
471                MI.getOperand(0).getReg())
472            .add(MI.getOperand(1))
473            .add(MI.getOperand(2))
474            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
475    transferImpOps(MI, MIB1, MIB1);
476    MI.eraseFromParent();
477    return true;
478  }
479
480  case AArch64::LOADgot: {
481    MachineFunction *MF = MBB.getParent();
482    Register DstReg = MI.getOperand(0).getReg();
483    const MachineOperand &MO1 = MI.getOperand(1);
484    unsigned Flags = MO1.getTargetFlags();
485
486    if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
487      // Tiny codemodel expand to LDR
488      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
489                                        TII->get(AArch64::LDRXl), DstReg);
490
491      if (MO1.isGlobal()) {
492        MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
493      } else if (MO1.isSymbol()) {
494        MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
495      } else {
496        assert(MO1.isCPI() &&
497               "Only expect globals, externalsymbols, or constant pools");
498        MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
499      }
500    } else {
501      // Small codemodel expand into ADRP + LDR.
502      MachineFunction &MF = *MI.getParent()->getParent();
503      DebugLoc DL = MI.getDebugLoc();
504      MachineInstrBuilder MIB1 =
505          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
506
507      MachineInstrBuilder MIB2;
508      if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
509        auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
510        unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
511        unsigned DstFlags = MI.getOperand(0).getTargetFlags();
512        MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
513                   .addDef(Reg32)
514                   .addReg(DstReg, RegState::Kill)
515                   .addReg(DstReg, DstFlags | RegState::Implicit);
516      } else {
517        unsigned DstReg = MI.getOperand(0).getReg();
518        MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
519                   .add(MI.getOperand(0))
520                   .addUse(DstReg, RegState::Kill);
521      }
522
523      if (MO1.isGlobal()) {
524        MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
525        MIB2.addGlobalAddress(MO1.getGlobal(), 0,
526                              Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
527      } else if (MO1.isSymbol()) {
528        MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
529        MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
530                                                        AArch64II::MO_PAGEOFF |
531                                                        AArch64II::MO_NC);
532      } else {
533        assert(MO1.isCPI() &&
534               "Only expect globals, externalsymbols, or constant pools");
535        MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
536                                  Flags | AArch64II::MO_PAGE);
537        MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
538                                  Flags | AArch64II::MO_PAGEOFF |
539                                      AArch64II::MO_NC);
540      }
541
542      transferImpOps(MI, MIB1, MIB2);
543    }
544    MI.eraseFromParent();
545    return true;
546  }
547
548  case AArch64::MOVaddr:
549  case AArch64::MOVaddrJT:
550  case AArch64::MOVaddrCP:
551  case AArch64::MOVaddrBA:
552  case AArch64::MOVaddrTLS:
553  case AArch64::MOVaddrEXT: {
554    // Expand into ADRP + ADD.
555    Register DstReg = MI.getOperand(0).getReg();
556    MachineInstrBuilder MIB1 =
557        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
558            .add(MI.getOperand(1));
559
560    if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
561      // MO_TAGGED on the page indicates a tagged address. Set the tag now.
562      // We do so by creating a MOVK that sets bits 48-63 of the register to
563      // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
564      // the small code model so we can assume a binary size of <= 4GB, which
565      // makes the untagged PC relative offset positive. The binary must also be
566      // loaded into address range [0, 2^48). Both of these properties need to
567      // be ensured at runtime when using tagged addresses.
568      auto Tag = MI.getOperand(1);
569      Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
570      Tag.setOffset(0x100000000);
571      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
572          .addReg(DstReg)
573          .add(Tag)
574          .addImm(48);
575    }
576
577    MachineInstrBuilder MIB2 =
578        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
579            .add(MI.getOperand(0))
580            .addReg(DstReg)
581            .add(MI.getOperand(2))
582            .addImm(0);
583
584    transferImpOps(MI, MIB1, MIB2);
585    MI.eraseFromParent();
586    return true;
587  }
588  case AArch64::ADDlowTLS:
589    // Produce a plain ADD
590    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
591        .add(MI.getOperand(0))
592        .add(MI.getOperand(1))
593        .add(MI.getOperand(2))
594        .addImm(0);
595    MI.eraseFromParent();
596    return true;
597
598  case AArch64::MOVbaseTLS: {
599    Register DstReg = MI.getOperand(0).getReg();
600    auto SysReg = AArch64SysReg::TPIDR_EL0;
601    MachineFunction *MF = MBB.getParent();
602    if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
603        MF->getTarget().getCodeModel() == CodeModel::Kernel)
604      SysReg = AArch64SysReg::TPIDR_EL1;
605    else if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
606      SysReg = AArch64SysReg::TPIDR_EL3;
607    else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
608      SysReg = AArch64SysReg::TPIDR_EL2;
609    else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
610      SysReg = AArch64SysReg::TPIDR_EL1;
611    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
612        .addImm(SysReg);
613    MI.eraseFromParent();
614    return true;
615  }
616
617  case AArch64::MOVi32imm:
618    return expandMOVImm(MBB, MBBI, 32);
619  case AArch64::MOVi64imm:
620    return expandMOVImm(MBB, MBBI, 64);
621  case AArch64::RET_ReallyLR: {
622    // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
623    // function and missing live-ins. We are fine in practice because callee
624    // saved register handling ensures the register value is restored before
625    // RET, but we need the undef flag here to appease the MachineVerifier
626    // liveness checks.
627    MachineInstrBuilder MIB =
628        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
629          .addReg(AArch64::LR, RegState::Undef);
630    transferImpOps(MI, MIB, MIB);
631    MI.eraseFromParent();
632    return true;
633  }
634  case AArch64::CMP_SWAP_8:
635    return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
636                          AArch64::SUBSWrx,
637                          AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
638                          AArch64::WZR, NextMBBI);
639  case AArch64::CMP_SWAP_16:
640    return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
641                          AArch64::SUBSWrx,
642                          AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
643                          AArch64::WZR, NextMBBI);
644  case AArch64::CMP_SWAP_32:
645    return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
646                          AArch64::SUBSWrs,
647                          AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
648                          AArch64::WZR, NextMBBI);
649  case AArch64::CMP_SWAP_64:
650    return expandCMP_SWAP(MBB, MBBI,
651                          AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
652                          AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
653                          AArch64::XZR, NextMBBI);
654  case AArch64::CMP_SWAP_128:
655    return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
656
657  case AArch64::AESMCrrTied:
658  case AArch64::AESIMCrrTied: {
659    MachineInstrBuilder MIB =
660    BuildMI(MBB, MBBI, MI.getDebugLoc(),
661            TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
662                                                      AArch64::AESIMCrr))
663      .add(MI.getOperand(0))
664      .add(MI.getOperand(1));
665    transferImpOps(MI, MIB, MIB);
666    MI.eraseFromParent();
667    return true;
668   }
669   case AArch64::IRGstack: {
670     MachineFunction &MF = *MBB.getParent();
671     const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
672     const AArch64FrameLowering *TFI =
673         MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
674
675     // IRG does not allow immediate offset. getTaggedBasePointerOffset should
676     // almost always point to SP-after-prologue; if not, emit a longer
677     // instruction sequence.
678     int BaseOffset = -AFI->getTaggedBasePointerOffset();
679     unsigned FrameReg;
680     StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
681         MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
682         /*PreferFP=*/false,
683         /*ForSimm=*/true);
684     Register SrcReg = FrameReg;
685     if (FrameRegOffset) {
686       // Use output register as temporary.
687       SrcReg = MI.getOperand(0).getReg();
688       emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
689                       FrameRegOffset, TII);
690     }
691     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
692         .add(MI.getOperand(0))
693         .addUse(SrcReg)
694         .add(MI.getOperand(2));
695     MI.eraseFromParent();
696     return true;
697   }
698   case AArch64::TAGPstack: {
699     int64_t Offset = MI.getOperand(2).getImm();
700     BuildMI(MBB, MBBI, MI.getDebugLoc(),
701             TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
702         .add(MI.getOperand(0))
703         .add(MI.getOperand(1))
704         .addImm(std::abs(Offset))
705         .add(MI.getOperand(4));
706     MI.eraseFromParent();
707     return true;
708   }
709   case AArch64::STGloop:
710   case AArch64::STZGloop:
711     return expandSetTagLoop(MBB, MBBI, NextMBBI);
712  }
713  return false;
714}
715
716/// Iterate over the instructions in basic block MBB and expand any
717/// pseudo instructions.  Return true if anything was modified.
718bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
719  bool Modified = false;
720
721  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
722  while (MBBI != E) {
723    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
724    Modified |= expandMI(MBB, MBBI, NMBBI);
725    MBBI = NMBBI;
726  }
727
728  return Modified;
729}
730
731bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
732  TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
733
734  bool Modified = false;
735  for (auto &MBB : MF)
736    Modified |= expandMBB(MBB);
737  return Modified;
738}
739
740/// Returns an instance of the pseudo instruction expansion pass.
741FunctionPass *llvm::createAArch64ExpandPseudoPass() {
742  return new AArch64ExpandPseudo();
743}
744