1//===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands pseudo instructions into target
10// instructions to allow proper scheduling, if-conversion, other late
11// optimizations, or simply the encoding of the instructions.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "X86FrameLowering.h"
17#include "X86InstrBuilder.h"
18#include "X86InstrInfo.h"
19#include "X86MachineFunctionInfo.h"
20#include "X86Subtarget.h"
21#include "llvm/Analysis/EHPersonalities.h"
22#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
25#include "llvm/IR/GlobalValue.h"
26using namespace llvm;
27
28#define DEBUG_TYPE "x86-pseudo"
29#define X86_EXPAND_PSEUDO_NAME "X86 pseudo instruction expansion pass"
30
31namespace {
32class X86ExpandPseudo : public MachineFunctionPass {
33public:
34  static char ID;
35  X86ExpandPseudo() : MachineFunctionPass(ID) {}
36
37  void getAnalysisUsage(AnalysisUsage &AU) const override {
38    AU.setPreservesCFG();
39    AU.addPreservedID(MachineLoopInfoID);
40    AU.addPreservedID(MachineDominatorsID);
41    MachineFunctionPass::getAnalysisUsage(AU);
42  }
43
44  const X86Subtarget *STI = nullptr;
45  const X86InstrInfo *TII = nullptr;
46  const X86RegisterInfo *TRI = nullptr;
47  const X86MachineFunctionInfo *X86FI = nullptr;
48  const X86FrameLowering *X86FL = nullptr;
49
50  bool runOnMachineFunction(MachineFunction &Fn) override;
51
52  MachineFunctionProperties getRequiredProperties() const override {
53    return MachineFunctionProperties().set(
54        MachineFunctionProperties::Property::NoVRegs);
55  }
56
57  StringRef getPassName() const override {
58    return "X86 pseudo instruction expansion pass";
59  }
60
61private:
62  void ExpandICallBranchFunnel(MachineBasicBlock *MBB,
63                               MachineBasicBlock::iterator MBBI);
64
65  bool ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
66  bool ExpandMBB(MachineBasicBlock &MBB);
67};
68char X86ExpandPseudo::ID = 0;
69
70} // End anonymous namespace.
71
72INITIALIZE_PASS(X86ExpandPseudo, DEBUG_TYPE, X86_EXPAND_PSEUDO_NAME, false,
73                false)
74
75void X86ExpandPseudo::ExpandICallBranchFunnel(
76    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) {
77  MachineBasicBlock *JTMBB = MBB;
78  MachineInstr *JTInst = &*MBBI;
79  MachineFunction *MF = MBB->getParent();
80  const BasicBlock *BB = MBB->getBasicBlock();
81  auto InsPt = MachineFunction::iterator(MBB);
82  ++InsPt;
83
84  std::vector<std::pair<MachineBasicBlock *, unsigned>> TargetMBBs;
85  DebugLoc DL = JTInst->getDebugLoc();
86  MachineOperand Selector = JTInst->getOperand(0);
87  const GlobalValue *CombinedGlobal = JTInst->getOperand(1).getGlobal();
88
89  auto CmpTarget = [&](unsigned Target) {
90    if (Selector.isReg())
91      MBB->addLiveIn(Selector.getReg());
92    BuildMI(*MBB, MBBI, DL, TII->get(X86::LEA64r), X86::R11)
93        .addReg(X86::RIP)
94        .addImm(1)
95        .addReg(0)
96        .addGlobalAddress(CombinedGlobal,
97                          JTInst->getOperand(2 + 2 * Target).getImm())
98        .addReg(0);
99    BuildMI(*MBB, MBBI, DL, TII->get(X86::CMP64rr))
100        .add(Selector)
101        .addReg(X86::R11);
102  };
103
104  auto CreateMBB = [&]() {
105    auto *NewMBB = MF->CreateMachineBasicBlock(BB);
106    MBB->addSuccessor(NewMBB);
107    if (!MBB->isLiveIn(X86::EFLAGS))
108      MBB->addLiveIn(X86::EFLAGS);
109    return NewMBB;
110  };
111
112  auto EmitCondJump = [&](unsigned CC, MachineBasicBlock *ThenMBB) {
113    BuildMI(*MBB, MBBI, DL, TII->get(X86::JCC_1)).addMBB(ThenMBB).addImm(CC);
114
115    auto *ElseMBB = CreateMBB();
116    MF->insert(InsPt, ElseMBB);
117    MBB = ElseMBB;
118    MBBI = MBB->end();
119  };
120
121  auto EmitCondJumpTarget = [&](unsigned CC, unsigned Target) {
122    auto *ThenMBB = CreateMBB();
123    TargetMBBs.push_back({ThenMBB, Target});
124    EmitCondJump(CC, ThenMBB);
125  };
126
127  auto EmitTailCall = [&](unsigned Target) {
128    BuildMI(*MBB, MBBI, DL, TII->get(X86::TAILJMPd64))
129        .add(JTInst->getOperand(3 + 2 * Target));
130  };
131
132  std::function<void(unsigned, unsigned)> EmitBranchFunnel =
133      [&](unsigned FirstTarget, unsigned NumTargets) {
134    if (NumTargets == 1) {
135      EmitTailCall(FirstTarget);
136      return;
137    }
138
139    if (NumTargets == 2) {
140      CmpTarget(FirstTarget + 1);
141      EmitCondJumpTarget(X86::COND_B, FirstTarget);
142      EmitTailCall(FirstTarget + 1);
143      return;
144    }
145
146    if (NumTargets < 6) {
147      CmpTarget(FirstTarget + 1);
148      EmitCondJumpTarget(X86::COND_B, FirstTarget);
149      EmitCondJumpTarget(X86::COND_E, FirstTarget + 1);
150      EmitBranchFunnel(FirstTarget + 2, NumTargets - 2);
151      return;
152    }
153
154    auto *ThenMBB = CreateMBB();
155    CmpTarget(FirstTarget + (NumTargets / 2));
156    EmitCondJump(X86::COND_B, ThenMBB);
157    EmitCondJumpTarget(X86::COND_E, FirstTarget + (NumTargets / 2));
158    EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1,
159                  NumTargets - (NumTargets / 2) - 1);
160
161    MF->insert(InsPt, ThenMBB);
162    MBB = ThenMBB;
163    MBBI = MBB->end();
164    EmitBranchFunnel(FirstTarget, NumTargets / 2);
165  };
166
167  EmitBranchFunnel(0, (JTInst->getNumOperands() - 2) / 2);
168  for (auto P : TargetMBBs) {
169    MF->insert(InsPt, P.first);
170    BuildMI(P.first, DL, TII->get(X86::TAILJMPd64))
171        .add(JTInst->getOperand(3 + 2 * P.second));
172  }
173  JTMBB->erase(JTInst);
174}
175
176/// If \p MBBI is a pseudo instruction, this method expands
177/// it to the corresponding (sequence of) actual instruction(s).
178/// \returns true if \p MBBI has been expanded.
179bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
180                               MachineBasicBlock::iterator MBBI) {
181  MachineInstr &MI = *MBBI;
182  unsigned Opcode = MI.getOpcode();
183  DebugLoc DL = MBBI->getDebugLoc();
184  switch (Opcode) {
185  default:
186    return false;
187  case X86::TCRETURNdi:
188  case X86::TCRETURNdicc:
189  case X86::TCRETURNri:
190  case X86::TCRETURNmi:
191  case X86::TCRETURNdi64:
192  case X86::TCRETURNdi64cc:
193  case X86::TCRETURNri64:
194  case X86::TCRETURNmi64: {
195    bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
196    MachineOperand &JumpTarget = MBBI->getOperand(0);
197    MachineOperand &StackAdjust = MBBI->getOperand(isMem ? X86::AddrNumOperands
198                                                         : 1);
199    assert(StackAdjust.isImm() && "Expecting immediate value.");
200
201    // Adjust stack pointer.
202    int StackAdj = StackAdjust.getImm();
203    int MaxTCDelta = X86FI->getTCReturnAddrDelta();
204    int Offset = 0;
205    assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
206
207    // Incoporate the retaddr area.
208    Offset = StackAdj - MaxTCDelta;
209    assert(Offset >= 0 && "Offset should never be negative");
210
211    if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) {
212      assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");
213    }
214
215    if (Offset) {
216      // Check for possible merge with preceding ADD instruction.
217      Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);
218      X86FL->emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue=*/true);
219    }
220
221    // Jump to label or value in register.
222    bool IsWin64 = STI->isTargetWin64();
223    if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||
224        Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) {
225      unsigned Op;
226      switch (Opcode) {
227      case X86::TCRETURNdi:
228        Op = X86::TAILJMPd;
229        break;
230      case X86::TCRETURNdicc:
231        Op = X86::TAILJMPd_CC;
232        break;
233      case X86::TCRETURNdi64cc:
234        assert(!MBB.getParent()->hasWinCFI() &&
235               "Conditional tail calls confuse "
236               "the Win64 unwinder.");
237        Op = X86::TAILJMPd64_CC;
238        break;
239      default:
240        // Note: Win64 uses REX prefixes indirect jumps out of functions, but
241        // not direct ones.
242        Op = X86::TAILJMPd64;
243        break;
244      }
245      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
246      if (JumpTarget.isGlobal()) {
247        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
248                             JumpTarget.getTargetFlags());
249      } else {
250        assert(JumpTarget.isSymbol());
251        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
252                              JumpTarget.getTargetFlags());
253      }
254      if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) {
255        MIB.addImm(MBBI->getOperand(2).getImm());
256      }
257
258    } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
259      unsigned Op = (Opcode == X86::TCRETURNmi)
260                        ? X86::TAILJMPm
261                        : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);
262      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
263      for (unsigned i = 0; i != X86::AddrNumOperands; ++i)
264        MIB.add(MBBI->getOperand(i));
265    } else if (Opcode == X86::TCRETURNri64) {
266      JumpTarget.setIsKill();
267      BuildMI(MBB, MBBI, DL,
268              TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
269          .add(JumpTarget);
270    } else {
271      JumpTarget.setIsKill();
272      BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr))
273          .add(JumpTarget);
274    }
275
276    MachineInstr &NewMI = *std::prev(MBBI);
277    NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI);
278
279    // Update the call site info.
280    if (MBBI->isCandidateForCallSiteEntry())
281      MBB.getParent()->moveCallSiteInfo(&*MBBI, &NewMI);
282
283    // Delete the pseudo instruction TCRETURN.
284    MBB.erase(MBBI);
285
286    return true;
287  }
288  case X86::EH_RETURN:
289  case X86::EH_RETURN64: {
290    MachineOperand &DestAddr = MBBI->getOperand(0);
291    assert(DestAddr.isReg() && "Offset should be in register!");
292    const bool Uses64BitFramePtr =
293        STI->isTarget64BitLP64() || STI->isTargetNaCl64();
294    Register StackPtr = TRI->getStackRegister();
295    BuildMI(MBB, MBBI, DL,
296            TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr)
297        .addReg(DestAddr.getReg());
298    // The EH_RETURN pseudo is really removed during the MC Lowering.
299    return true;
300  }
301  case X86::IRET: {
302    // Adjust stack to erase error code
303    int64_t StackAdj = MBBI->getOperand(0).getImm();
304    X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, true);
305    // Replace pseudo with machine iret
306    BuildMI(MBB, MBBI, DL,
307            TII->get(STI->is64Bit() ? X86::IRET64 : X86::IRET32));
308    MBB.erase(MBBI);
309    return true;
310  }
311  case X86::RET: {
312    // Adjust stack to erase error code
313    int64_t StackAdj = MBBI->getOperand(0).getImm();
314    MachineInstrBuilder MIB;
315    if (StackAdj == 0) {
316      MIB = BuildMI(MBB, MBBI, DL,
317                    TII->get(STI->is64Bit() ? X86::RETQ : X86::RETL));
318    } else if (isUInt<16>(StackAdj)) {
319      MIB = BuildMI(MBB, MBBI, DL,
320                    TII->get(STI->is64Bit() ? X86::RETIQ : X86::RETIL))
321                .addImm(StackAdj);
322    } else {
323      assert(!STI->is64Bit() &&
324             "shouldn't need to do this for x86_64 targets!");
325      // A ret can only handle immediates as big as 2**16-1.  If we need to pop
326      // off bytes before the return address, we must do it manually.
327      BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define);
328      X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, /*InEpilogue=*/true);
329      BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX);
330      MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RETL));
331    }
332    for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I)
333      MIB.add(MBBI->getOperand(I));
334    MBB.erase(MBBI);
335    return true;
336  }
337  case X86::LCMPXCHG8B_SAVE_EBX:
338  case X86::LCMPXCHG16B_SAVE_RBX: {
339    // Perform the following transformation.
340    // SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx
341    // =>
342    // [E|R]BX = InArg
343    // actualcmpxchg Addr
344    // [E|R]BX = SaveRbx
345    const MachineOperand &InArg = MBBI->getOperand(6);
346    Register SaveRbx = MBBI->getOperand(7).getReg();
347
348    unsigned ActualInArg =
349        Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX;
350    // Copy the input argument of the pseudo into the argument of the
351    // actual instruction.
352    TII->copyPhysReg(MBB, MBBI, DL, ActualInArg, InArg.getReg(),
353                     InArg.isKill());
354    // Create the actual instruction.
355    unsigned ActualOpc =
356        Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::LCMPXCHG8B : X86::LCMPXCHG16B;
357    MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(ActualOpc));
358    // Copy the operands related to the address.
359    for (unsigned Idx = 1; Idx < 6; ++Idx)
360      NewInstr->addOperand(MBBI->getOperand(Idx));
361    // Finally, restore the value of RBX.
362    TII->copyPhysReg(MBB, MBBI, DL, ActualInArg, SaveRbx,
363                     /*SrcIsKill*/ true);
364
365    // Delete the pseudo.
366    MBBI->eraseFromParent();
367    return true;
368  }
369  // Loading/storing mask pairs requires two kmov operations. The second one of
370  // these needs a 2 byte displacement relative to the specified address (with
371  // 32 bit spill size). The pairs of 1bit masks up to 16 bit masks all use the
372  // same spill size, they all are stored using MASKPAIR16STORE, loaded using
373  // MASKPAIR16LOAD.
374  //
375  // The displacement value might wrap around in theory, thus the asserts in
376  // both cases.
377  case X86::MASKPAIR16LOAD: {
378    int64_t Disp = MBBI->getOperand(1 + X86::AddrDisp).getImm();
379    assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
380    Register Reg = MBBI->getOperand(0).getReg();
381    bool DstIsDead = MBBI->getOperand(0).isDead();
382    Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
383    Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
384
385    auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
386      .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
387    auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
388      .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
389
390    for (int i = 0; i < X86::AddrNumOperands; ++i) {
391      MIBLo.add(MBBI->getOperand(1 + i));
392      if (i == X86::AddrDisp)
393        MIBHi.addImm(Disp + 2);
394      else
395        MIBHi.add(MBBI->getOperand(1 + i));
396    }
397
398    // Split the memory operand, adjusting the offset and size for the halves.
399    MachineMemOperand *OldMMO = MBBI->memoperands().front();
400    MachineFunction *MF = MBB.getParent();
401    MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2);
402    MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2);
403
404    MIBLo.setMemRefs(MMOLo);
405    MIBHi.setMemRefs(MMOHi);
406
407    // Delete the pseudo.
408    MBB.erase(MBBI);
409    return true;
410  }
411  case X86::MASKPAIR16STORE: {
412    int64_t Disp = MBBI->getOperand(X86::AddrDisp).getImm();
413    assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
414    Register Reg = MBBI->getOperand(X86::AddrNumOperands).getReg();
415    bool SrcIsKill = MBBI->getOperand(X86::AddrNumOperands).isKill();
416    Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
417    Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
418
419    auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
420    auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
421
422    for (int i = 0; i < X86::AddrNumOperands; ++i) {
423      MIBLo.add(MBBI->getOperand(i));
424      if (i == X86::AddrDisp)
425        MIBHi.addImm(Disp + 2);
426      else
427        MIBHi.add(MBBI->getOperand(i));
428    }
429    MIBLo.addReg(Reg0, getKillRegState(SrcIsKill));
430    MIBHi.addReg(Reg1, getKillRegState(SrcIsKill));
431
432    // Split the memory operand, adjusting the offset and size for the halves.
433    MachineMemOperand *OldMMO = MBBI->memoperands().front();
434    MachineFunction *MF = MBB.getParent();
435    MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2);
436    MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2);
437
438    MIBLo.setMemRefs(MMOLo);
439    MIBHi.setMemRefs(MMOHi);
440
441    // Delete the pseudo.
442    MBB.erase(MBBI);
443    return true;
444  }
445  case TargetOpcode::ICALL_BRANCH_FUNNEL:
446    ExpandICallBranchFunnel(&MBB, MBBI);
447    return true;
448  }
449  llvm_unreachable("Previous switch has a fallthrough?");
450}
451
452/// Expand all pseudo instructions contained in \p MBB.
453/// \returns true if any expansion occurred for \p MBB.
454bool X86ExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
455  bool Modified = false;
456
457  // MBBI may be invalidated by the expansion.
458  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
459  while (MBBI != E) {
460    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
461    Modified |= ExpandMI(MBB, MBBI);
462    MBBI = NMBBI;
463  }
464
465  return Modified;
466}
467
468bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
469  STI = &static_cast<const X86Subtarget &>(MF.getSubtarget());
470  TII = STI->getInstrInfo();
471  TRI = STI->getRegisterInfo();
472  X86FI = MF.getInfo<X86MachineFunctionInfo>();
473  X86FL = STI->getFrameLowering();
474
475  bool Modified = false;
476  for (MachineBasicBlock &MBB : MF)
477    Modified |= ExpandMBB(MBB);
478  return Modified;
479}
480
481/// Returns an instance of the pseudo instruction expansion pass.
482FunctionPass *llvm::createX86ExpandPseudoPass() {
483  return new X86ExpandPseudo();
484}
485