1//===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a register stacking pass.
11///
12/// This pass reorders instructions to put register uses and defs in an order
13/// such that they form single-use expression trees. Registers fitting this form
14/// are then marked as "stackified", meaning references to them are replaced by
15/// "push" and "pop" from the value stack.
16///
17/// This is primarily a code size optimization, since temporary values on the
18/// value stack don't need to be named.
19///
20//===----------------------------------------------------------------------===//
21
22#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
23#include "WebAssembly.h"
24#include "WebAssemblyDebugValueManager.h"
25#include "WebAssemblyMachineFunctionInfo.h"
26#include "WebAssemblySubtarget.h"
27#include "WebAssemblyUtilities.h"
28#include "llvm/ADT/SmallPtrSet.h"
29#include "llvm/Analysis/AliasAnalysis.h"
30#include "llvm/CodeGen/LiveIntervals.h"
31#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
32#include "llvm/CodeGen/MachineDominators.h"
33#include "llvm/CodeGen/MachineInstrBuilder.h"
34#include "llvm/CodeGen/MachineModuleInfoImpls.h"
35#include "llvm/CodeGen/MachineRegisterInfo.h"
36#include "llvm/CodeGen/Passes.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/raw_ostream.h"
39using namespace llvm;
40
41#define DEBUG_TYPE "wasm-reg-stackify"
42
43namespace {
44class WebAssemblyRegStackify final : public MachineFunctionPass {
45  StringRef getPassName() const override {
46    return "WebAssembly Register Stackify";
47  }
48
49  void getAnalysisUsage(AnalysisUsage &AU) const override {
50    AU.setPreservesCFG();
51    AU.addRequired<AAResultsWrapperPass>();
52    AU.addRequired<MachineDominatorTree>();
53    AU.addRequired<LiveIntervals>();
54    AU.addPreserved<MachineBlockFrequencyInfo>();
55    AU.addPreserved<SlotIndexes>();
56    AU.addPreserved<LiveIntervals>();
57    AU.addPreservedID(LiveVariablesID);
58    AU.addPreserved<MachineDominatorTree>();
59    MachineFunctionPass::getAnalysisUsage(AU);
60  }
61
62  bool runOnMachineFunction(MachineFunction &MF) override;
63
64public:
65  static char ID; // Pass identification, replacement for typeid
66  WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
67};
68} // end anonymous namespace
69
70char WebAssemblyRegStackify::ID = 0;
71INITIALIZE_PASS(WebAssemblyRegStackify, DEBUG_TYPE,
72                "Reorder instructions to use the WebAssembly value stack",
73                false, false)
74
75FunctionPass *llvm::createWebAssemblyRegStackify() {
76  return new WebAssemblyRegStackify();
77}
78
79// Decorate the given instruction with implicit operands that enforce the
80// expression stack ordering constraints for an instruction which is on
81// the expression stack.
82static void imposeStackOrdering(MachineInstr *MI) {
83  // Write the opaque VALUE_STACK register.
84  if (!MI->definesRegister(WebAssembly::VALUE_STACK))
85    MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
86                                             /*isDef=*/true,
87                                             /*isImp=*/true));
88
89  // Also read the opaque VALUE_STACK register.
90  if (!MI->readsRegister(WebAssembly::VALUE_STACK))
91    MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
92                                             /*isDef=*/false,
93                                             /*isImp=*/true));
94}
95
96// Convert an IMPLICIT_DEF instruction into an instruction which defines
97// a constant zero value.
98static void convertImplicitDefToConstZero(MachineInstr *MI,
99                                          MachineRegisterInfo &MRI,
100                                          const TargetInstrInfo *TII,
101                                          MachineFunction &MF,
102                                          LiveIntervals &LIS) {
103  assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF);
104
105  const auto *RegClass = MRI.getRegClass(MI->getOperand(0).getReg());
106  if (RegClass == &WebAssembly::I32RegClass) {
107    MI->setDesc(TII->get(WebAssembly::CONST_I32));
108    MI->addOperand(MachineOperand::CreateImm(0));
109  } else if (RegClass == &WebAssembly::I64RegClass) {
110    MI->setDesc(TII->get(WebAssembly::CONST_I64));
111    MI->addOperand(MachineOperand::CreateImm(0));
112  } else if (RegClass == &WebAssembly::F32RegClass) {
113    MI->setDesc(TII->get(WebAssembly::CONST_F32));
114    auto *Val = cast<ConstantFP>(Constant::getNullValue(
115        Type::getFloatTy(MF.getFunction().getContext())));
116    MI->addOperand(MachineOperand::CreateFPImm(Val));
117  } else if (RegClass == &WebAssembly::F64RegClass) {
118    MI->setDesc(TII->get(WebAssembly::CONST_F64));
119    auto *Val = cast<ConstantFP>(Constant::getNullValue(
120        Type::getDoubleTy(MF.getFunction().getContext())));
121    MI->addOperand(MachineOperand::CreateFPImm(Val));
122  } else if (RegClass == &WebAssembly::V128RegClass) {
123    Register TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
124    MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32));
125    MI->addOperand(MachineOperand::CreateReg(TempReg, false));
126    MachineInstr *Const = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
127                                  TII->get(WebAssembly::CONST_I32), TempReg)
128                              .addImm(0);
129    LIS.InsertMachineInstrInMaps(*Const);
130  } else {
131    llvm_unreachable("Unexpected reg class");
132  }
133}
134
135// Determine whether a call to the callee referenced by
136// MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
137// effects.
138static void queryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
139                        bool &Write, bool &Effects, bool &StackPointer) {
140  // All calls can use the stack pointer.
141  StackPointer = true;
142
143  const MachineOperand &MO = MI.getOperand(CalleeOpNo);
144  if (MO.isGlobal()) {
145    const Constant *GV = MO.getGlobal();
146    if (const auto *GA = dyn_cast<GlobalAlias>(GV))
147      if (!GA->isInterposable())
148        GV = GA->getAliasee();
149
150    if (const auto *F = dyn_cast<Function>(GV)) {
151      if (!F->doesNotThrow())
152        Effects = true;
153      if (F->doesNotAccessMemory())
154        return;
155      if (F->onlyReadsMemory()) {
156        Read = true;
157        return;
158      }
159    }
160  }
161
162  // Assume the worst.
163  Write = true;
164  Read = true;
165  Effects = true;
166}
167
168// Determine whether MI reads memory, writes memory, has side effects,
169// and/or uses the stack pointer value.
170static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
171                  bool &Write, bool &Effects, bool &StackPointer) {
172  assert(!MI.isTerminator());
173
174  if (MI.isDebugInstr() || MI.isPosition())
175    return;
176
177  // Check for loads.
178  if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(&AA))
179    Read = true;
180
181  // Check for stores.
182  if (MI.mayStore()) {
183    Write = true;
184  } else if (MI.hasOrderedMemoryRef()) {
185    switch (MI.getOpcode()) {
186    case WebAssembly::DIV_S_I32:
187    case WebAssembly::DIV_S_I64:
188    case WebAssembly::REM_S_I32:
189    case WebAssembly::REM_S_I64:
190    case WebAssembly::DIV_U_I32:
191    case WebAssembly::DIV_U_I64:
192    case WebAssembly::REM_U_I32:
193    case WebAssembly::REM_U_I64:
194    case WebAssembly::I32_TRUNC_S_F32:
195    case WebAssembly::I64_TRUNC_S_F32:
196    case WebAssembly::I32_TRUNC_S_F64:
197    case WebAssembly::I64_TRUNC_S_F64:
198    case WebAssembly::I32_TRUNC_U_F32:
199    case WebAssembly::I64_TRUNC_U_F32:
200    case WebAssembly::I32_TRUNC_U_F64:
201    case WebAssembly::I64_TRUNC_U_F64:
202      // These instruction have hasUnmodeledSideEffects() returning true
203      // because they trap on overflow and invalid so they can't be arbitrarily
204      // moved, however hasOrderedMemoryRef() interprets this plus their lack
205      // of memoperands as having a potential unknown memory reference.
206      break;
207    default:
208      // Record volatile accesses, unless it's a call, as calls are handled
209      // specially below.
210      if (!MI.isCall()) {
211        Write = true;
212        Effects = true;
213      }
214      break;
215    }
216  }
217
218  // Check for side effects.
219  if (MI.hasUnmodeledSideEffects()) {
220    switch (MI.getOpcode()) {
221    case WebAssembly::DIV_S_I32:
222    case WebAssembly::DIV_S_I64:
223    case WebAssembly::REM_S_I32:
224    case WebAssembly::REM_S_I64:
225    case WebAssembly::DIV_U_I32:
226    case WebAssembly::DIV_U_I64:
227    case WebAssembly::REM_U_I32:
228    case WebAssembly::REM_U_I64:
229    case WebAssembly::I32_TRUNC_S_F32:
230    case WebAssembly::I64_TRUNC_S_F32:
231    case WebAssembly::I32_TRUNC_S_F64:
232    case WebAssembly::I64_TRUNC_S_F64:
233    case WebAssembly::I32_TRUNC_U_F32:
234    case WebAssembly::I64_TRUNC_U_F32:
235    case WebAssembly::I32_TRUNC_U_F64:
236    case WebAssembly::I64_TRUNC_U_F64:
237      // These instructions have hasUnmodeledSideEffects() returning true
238      // because they trap on overflow and invalid so they can't be arbitrarily
239      // moved, however in the specific case of register stackifying, it is safe
240      // to move them because overflow and invalid are Undefined Behavior.
241      break;
242    default:
243      Effects = true;
244      break;
245    }
246  }
247
248  // Check for writes to __stack_pointer global.
249  if (MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 &&
250      strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer") == 0)
251    StackPointer = true;
252
253  // Analyze calls.
254  if (MI.isCall()) {
255    unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI.getOpcode());
256    queryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer);
257  }
258}
259
260// Test whether Def is safe and profitable to rematerialize.
261static bool shouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
262                                const WebAssemblyInstrInfo *TII) {
263  return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA);
264}
265
266// Identify the definition for this register at this point. This is a
267// generalization of MachineRegisterInfo::getUniqueVRegDef that uses
268// LiveIntervals to handle complex cases.
269static MachineInstr *getVRegDef(unsigned Reg, const MachineInstr *Insert,
270                                const MachineRegisterInfo &MRI,
271                                const LiveIntervals &LIS) {
272  // Most registers are in SSA form here so we try a quick MRI query first.
273  if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
274    return Def;
275
276  // MRI doesn't know what the Def is. Try asking LIS.
277  if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
278          LIS.getInstructionIndex(*Insert)))
279    return LIS.getInstructionFromIndex(ValNo->def);
280
281  return nullptr;
282}
283
284// Test whether Reg, as defined at Def, has exactly one use. This is a
285// generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
286// to handle complex cases.
287static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
288                      MachineDominatorTree &MDT, LiveIntervals &LIS) {
289  // Most registers are in SSA form here so we try a quick MRI query first.
290  if (MRI.hasOneUse(Reg))
291    return true;
292
293  bool HasOne = false;
294  const LiveInterval &LI = LIS.getInterval(Reg);
295  const VNInfo *DefVNI =
296      LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot());
297  assert(DefVNI);
298  for (auto &I : MRI.use_nodbg_operands(Reg)) {
299    const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
300    if (Result.valueIn() == DefVNI) {
301      if (!Result.isKill())
302        return false;
303      if (HasOne)
304        return false;
305      HasOne = true;
306    }
307  }
308  return HasOne;
309}
310
311// Test whether it's safe to move Def to just before Insert.
312// TODO: Compute memory dependencies in a way that doesn't require always
313// walking the block.
314// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
315// more precise.
316static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
317                         AliasAnalysis &AA, const MachineRegisterInfo &MRI) {
318  assert(Def->getParent() == Insert->getParent());
319
320  // 'catch' and 'extract_exception' should be the first instruction of a BB and
321  // cannot move.
322  if (Def->getOpcode() == WebAssembly::CATCH ||
323      Def->getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) {
324    const MachineBasicBlock *MBB = Def->getParent();
325    auto NextI = std::next(MachineBasicBlock::const_iterator(Def));
326    for (auto E = MBB->end(); NextI != E && NextI->isDebugInstr(); ++NextI)
327      ;
328    if (NextI != Insert)
329      return false;
330  }
331
332  // Check for register dependencies.
333  SmallVector<unsigned, 4> MutableRegisters;
334  for (const MachineOperand &MO : Def->operands()) {
335    if (!MO.isReg() || MO.isUndef())
336      continue;
337    Register Reg = MO.getReg();
338
339    // If the register is dead here and at Insert, ignore it.
340    if (MO.isDead() && Insert->definesRegister(Reg) &&
341        !Insert->readsRegister(Reg))
342      continue;
343
344    if (Register::isPhysicalRegister(Reg)) {
345      // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
346      // from moving down, and we've already checked for that.
347      if (Reg == WebAssembly::ARGUMENTS)
348        continue;
349      // If the physical register is never modified, ignore it.
350      if (!MRI.isPhysRegModified(Reg))
351        continue;
352      // Otherwise, it's a physical register with unknown liveness.
353      return false;
354    }
355
356    // If one of the operands isn't in SSA form, it has different values at
357    // different times, and we need to make sure we don't move our use across
358    // a different def.
359    if (!MO.isDef() && !MRI.hasOneDef(Reg))
360      MutableRegisters.push_back(Reg);
361  }
362
363  bool Read = false, Write = false, Effects = false, StackPointer = false;
364  query(*Def, AA, Read, Write, Effects, StackPointer);
365
366  // If the instruction does not access memory and has no side effects, it has
367  // no additional dependencies.
368  bool HasMutableRegisters = !MutableRegisters.empty();
369  if (!Read && !Write && !Effects && !StackPointer && !HasMutableRegisters)
370    return true;
371
372  // Scan through the intervening instructions between Def and Insert.
373  MachineBasicBlock::const_iterator D(Def), I(Insert);
374  for (--I; I != D; --I) {
375    bool InterveningRead = false;
376    bool InterveningWrite = false;
377    bool InterveningEffects = false;
378    bool InterveningStackPointer = false;
379    query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
380          InterveningStackPointer);
381    if (Effects && InterveningEffects)
382      return false;
383    if (Read && InterveningWrite)
384      return false;
385    if (Write && (InterveningRead || InterveningWrite))
386      return false;
387    if (StackPointer && InterveningStackPointer)
388      return false;
389
390    for (unsigned Reg : MutableRegisters)
391      for (const MachineOperand &MO : I->operands())
392        if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
393          return false;
394  }
395
396  return true;
397}
398
399/// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
400static bool oneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
401                                     const MachineBasicBlock &MBB,
402                                     const MachineRegisterInfo &MRI,
403                                     const MachineDominatorTree &MDT,
404                                     LiveIntervals &LIS,
405                                     WebAssemblyFunctionInfo &MFI) {
406  const LiveInterval &LI = LIS.getInterval(Reg);
407
408  const MachineInstr *OneUseInst = OneUse.getParent();
409  VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
410
411  for (const MachineOperand &Use : MRI.use_nodbg_operands(Reg)) {
412    if (&Use == &OneUse)
413      continue;
414
415    const MachineInstr *UseInst = Use.getParent();
416    VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
417
418    if (UseVNI != OneUseVNI)
419      continue;
420
421    if (UseInst == OneUseInst) {
422      // Another use in the same instruction. We need to ensure that the one
423      // selected use happens "before" it.
424      if (&OneUse > &Use)
425        return false;
426    } else {
427      // Test that the use is dominated by the one selected use.
428      while (!MDT.dominates(OneUseInst, UseInst)) {
429        // Actually, dominating is over-conservative. Test that the use would
430        // happen after the one selected use in the stack evaluation order.
431        //
432        // This is needed as a consequence of using implicit local.gets for
433        // uses and implicit local.sets for defs.
434        if (UseInst->getDesc().getNumDefs() == 0)
435          return false;
436        const MachineOperand &MO = UseInst->getOperand(0);
437        if (!MO.isReg())
438          return false;
439        Register DefReg = MO.getReg();
440        if (!Register::isVirtualRegister(DefReg) ||
441            !MFI.isVRegStackified(DefReg))
442          return false;
443        assert(MRI.hasOneNonDBGUse(DefReg));
444        const MachineOperand &NewUse = *MRI.use_nodbg_begin(DefReg);
445        const MachineInstr *NewUseInst = NewUse.getParent();
446        if (NewUseInst == OneUseInst) {
447          if (&OneUse > &NewUse)
448            return false;
449          break;
450        }
451        UseInst = NewUseInst;
452      }
453    }
454  }
455  return true;
456}
457
458/// Get the appropriate tee opcode for the given register class.
459static unsigned getTeeOpcode(const TargetRegisterClass *RC) {
460  if (RC == &WebAssembly::I32RegClass)
461    return WebAssembly::TEE_I32;
462  if (RC == &WebAssembly::I64RegClass)
463    return WebAssembly::TEE_I64;
464  if (RC == &WebAssembly::F32RegClass)
465    return WebAssembly::TEE_F32;
466  if (RC == &WebAssembly::F64RegClass)
467    return WebAssembly::TEE_F64;
468  if (RC == &WebAssembly::V128RegClass)
469    return WebAssembly::TEE_V128;
470  llvm_unreachable("Unexpected register class");
471}
472
473// Shrink LI to its uses, cleaning up LI.
474static void shrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
475  if (LIS.shrinkToUses(&LI)) {
476    SmallVector<LiveInterval *, 4> SplitLIs;
477    LIS.splitSeparateComponents(LI, SplitLIs);
478  }
479}
480
481/// A single-use def in the same block with no intervening memory or register
482/// dependencies; move the def down and nest it with the current instruction.
483static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
484                                      MachineInstr *Def, MachineBasicBlock &MBB,
485                                      MachineInstr *Insert, LiveIntervals &LIS,
486                                      WebAssemblyFunctionInfo &MFI,
487                                      MachineRegisterInfo &MRI) {
488  LLVM_DEBUG(dbgs() << "Move for single use: "; Def->dump());
489
490  WebAssemblyDebugValueManager DefDIs(Def);
491  MBB.splice(Insert, &MBB, Def);
492  DefDIs.move(Insert);
493  LIS.handleMove(*Def);
494
495  if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
496    // No one else is using this register for anything so we can just stackify
497    // it in place.
498    MFI.stackifyVReg(Reg);
499  } else {
500    // The register may have unrelated uses or defs; create a new register for
501    // just our one def and use so that we can stackify it.
502    Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
503    Def->getOperand(0).setReg(NewReg);
504    Op.setReg(NewReg);
505
506    // Tell LiveIntervals about the new register.
507    LIS.createAndComputeVirtRegInterval(NewReg);
508
509    // Tell LiveIntervals about the changes to the old register.
510    LiveInterval &LI = LIS.getInterval(Reg);
511    LI.removeSegment(LIS.getInstructionIndex(*Def).getRegSlot(),
512                     LIS.getInstructionIndex(*Op.getParent()).getRegSlot(),
513                     /*RemoveDeadValNo=*/true);
514
515    MFI.stackifyVReg(NewReg);
516
517    DefDIs.updateReg(NewReg);
518
519    LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
520  }
521
522  imposeStackOrdering(Def);
523  return Def;
524}
525
526/// A trivially cloneable instruction; clone it and nest the new copy with the
527/// current instruction.
528static MachineInstr *rematerializeCheapDef(
529    unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB,
530    MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS,
531    WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI,
532    const WebAssemblyInstrInfo *TII, const WebAssemblyRegisterInfo *TRI) {
533  LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
534  LLVM_DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
535
536  WebAssemblyDebugValueManager DefDIs(&Def);
537
538  Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
539  TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
540  Op.setReg(NewReg);
541  MachineInstr *Clone = &*std::prev(Insert);
542  LIS.InsertMachineInstrInMaps(*Clone);
543  LIS.createAndComputeVirtRegInterval(NewReg);
544  MFI.stackifyVReg(NewReg);
545  imposeStackOrdering(Clone);
546
547  LLVM_DEBUG(dbgs() << " - Cloned to "; Clone->dump());
548
549  // Shrink the interval.
550  bool IsDead = MRI.use_empty(Reg);
551  if (!IsDead) {
552    LiveInterval &LI = LIS.getInterval(Reg);
553    shrinkToUses(LI, LIS);
554    IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot());
555  }
556
557  // If that was the last use of the original, delete the original.
558  // Move or clone corresponding DBG_VALUEs to the 'Insert' location.
559  if (IsDead) {
560    LLVM_DEBUG(dbgs() << " - Deleting original\n");
561    SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
562    LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
563    LIS.removeInterval(Reg);
564    LIS.RemoveMachineInstrFromMaps(Def);
565    Def.eraseFromParent();
566
567    DefDIs.move(&*Insert);
568    DefDIs.updateReg(NewReg);
569  } else {
570    DefDIs.clone(&*Insert, NewReg);
571  }
572
573  return Clone;
574}
575
576/// A multiple-use def in the same block with no intervening memory or register
577/// dependencies; move the def down, nest it with the current instruction, and
578/// insert a tee to satisfy the rest of the uses. As an illustration, rewrite
579/// this:
580///
581///    Reg = INST ...        // Def
582///    INST ..., Reg, ...    // Insert
583///    INST ..., Reg, ...
584///    INST ..., Reg, ...
585///
586/// to this:
587///
588///    DefReg = INST ...     // Def (to become the new Insert)
589///    TeeReg, Reg = TEE_... DefReg
590///    INST ..., TeeReg, ... // Insert
591///    INST ..., Reg, ...
592///    INST ..., Reg, ...
593///
594/// with DefReg and TeeReg stackified. This eliminates a local.get from the
595/// resulting code.
596static MachineInstr *moveAndTeeForMultiUse(
597    unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
598    MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
599    MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
600  LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
601
602  WebAssemblyDebugValueManager DefDIs(Def);
603
604  // Move Def into place.
605  MBB.splice(Insert, &MBB, Def);
606  LIS.handleMove(*Def);
607
608  // Create the Tee and attach the registers.
609  const auto *RegClass = MRI.getRegClass(Reg);
610  Register TeeReg = MRI.createVirtualRegister(RegClass);
611  Register DefReg = MRI.createVirtualRegister(RegClass);
612  MachineOperand &DefMO = Def->getOperand(0);
613  MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
614                              TII->get(getTeeOpcode(RegClass)), TeeReg)
615                          .addReg(Reg, RegState::Define)
616                          .addReg(DefReg, getUndefRegState(DefMO.isDead()));
617  Op.setReg(TeeReg);
618  DefMO.setReg(DefReg);
619  SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
620  SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
621
622  DefDIs.move(Insert);
623
624  // Tell LiveIntervals we moved the original vreg def from Def to Tee.
625  LiveInterval &LI = LIS.getInterval(Reg);
626  LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
627  VNInfo *ValNo = LI.getVNInfoAt(DefIdx);
628  I->start = TeeIdx;
629  ValNo->def = TeeIdx;
630  shrinkToUses(LI, LIS);
631
632  // Finish stackifying the new regs.
633  LIS.createAndComputeVirtRegInterval(TeeReg);
634  LIS.createAndComputeVirtRegInterval(DefReg);
635  MFI.stackifyVReg(DefReg);
636  MFI.stackifyVReg(TeeReg);
637  imposeStackOrdering(Def);
638  imposeStackOrdering(Tee);
639
640  DefDIs.clone(Tee, DefReg);
641  DefDIs.clone(Insert, TeeReg);
642
643  LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
644  LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
645  return Def;
646}
647
648namespace {
649/// A stack for walking the tree of instructions being built, visiting the
650/// MachineOperands in DFS order.
651class TreeWalkerState {
652  using mop_iterator = MachineInstr::mop_iterator;
653  using mop_reverse_iterator = std::reverse_iterator<mop_iterator>;
654  using RangeTy = iterator_range<mop_reverse_iterator>;
655  SmallVector<RangeTy, 4> Worklist;
656
657public:
658  explicit TreeWalkerState(MachineInstr *Insert) {
659    const iterator_range<mop_iterator> &Range = Insert->explicit_uses();
660    if (Range.begin() != Range.end())
661      Worklist.push_back(reverse(Range));
662  }
663
664  bool done() const { return Worklist.empty(); }
665
666  MachineOperand &pop() {
667    RangeTy &Range = Worklist.back();
668    MachineOperand &Op = *Range.begin();
669    Range = drop_begin(Range, 1);
670    if (Range.begin() == Range.end())
671      Worklist.pop_back();
672    assert((Worklist.empty() ||
673            Worklist.back().begin() != Worklist.back().end()) &&
674           "Empty ranges shouldn't remain in the worklist");
675    return Op;
676  }
677
678  /// Push Instr's operands onto the stack to be visited.
679  void pushOperands(MachineInstr *Instr) {
680    const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
681    if (Range.begin() != Range.end())
682      Worklist.push_back(reverse(Range));
683  }
684
685  /// Some of Instr's operands are on the top of the stack; remove them and
686  /// re-insert them starting from the beginning (because we've commuted them).
687  void resetTopOperands(MachineInstr *Instr) {
688    assert(hasRemainingOperands(Instr) &&
689           "Reseting operands should only be done when the instruction has "
690           "an operand still on the stack");
691    Worklist.back() = reverse(Instr->explicit_uses());
692  }
693
694  /// Test whether Instr has operands remaining to be visited at the top of
695  /// the stack.
696  bool hasRemainingOperands(const MachineInstr *Instr) const {
697    if (Worklist.empty())
698      return false;
699    const RangeTy &Range = Worklist.back();
700    return Range.begin() != Range.end() && Range.begin()->getParent() == Instr;
701  }
702
703  /// Test whether the given register is present on the stack, indicating an
704  /// operand in the tree that we haven't visited yet. Moving a definition of
705  /// Reg to a point in the tree after that would change its value.
706  ///
707  /// This is needed as a consequence of using implicit local.gets for
708  /// uses and implicit local.sets for defs.
709  bool isOnStack(unsigned Reg) const {
710    for (const RangeTy &Range : Worklist)
711      for (const MachineOperand &MO : Range)
712        if (MO.isReg() && MO.getReg() == Reg)
713          return true;
714    return false;
715  }
716};
717
718/// State to keep track of whether commuting is in flight or whether it's been
719/// tried for the current instruction and didn't work.
720class CommutingState {
721  /// There are effectively three states: the initial state where we haven't
722  /// started commuting anything and we don't know anything yet, the tentative
723  /// state where we've commuted the operands of the current instruction and are
724  /// revisiting it, and the declined state where we've reverted the operands
725  /// back to their original order and will no longer commute it further.
726  bool TentativelyCommuting = false;
727  bool Declined = false;
728
729  /// During the tentative state, these hold the operand indices of the commuted
730  /// operands.
731  unsigned Operand0, Operand1;
732
733public:
734  /// Stackification for an operand was not successful due to ordering
735  /// constraints. If possible, and if we haven't already tried it and declined
736  /// it, commute Insert's operands and prepare to revisit it.
737  void maybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
738                    const WebAssemblyInstrInfo *TII) {
739    if (TentativelyCommuting) {
740      assert(!Declined &&
741             "Don't decline commuting until you've finished trying it");
742      // Commuting didn't help. Revert it.
743      TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
744      TentativelyCommuting = false;
745      Declined = true;
746    } else if (!Declined && TreeWalker.hasRemainingOperands(Insert)) {
747      Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
748      Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
749      if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) {
750        // Tentatively commute the operands and try again.
751        TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
752        TreeWalker.resetTopOperands(Insert);
753        TentativelyCommuting = true;
754        Declined = false;
755      }
756    }
757  }
758
759  /// Stackification for some operand was successful. Reset to the default
760  /// state.
761  void reset() {
762    TentativelyCommuting = false;
763    Declined = false;
764  }
765};
766} // end anonymous namespace
767
768bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
769  LLVM_DEBUG(dbgs() << "********** Register Stackifying **********\n"
770                       "********** Function: "
771                    << MF.getName() << '\n');
772
773  bool Changed = false;
774  MachineRegisterInfo &MRI = MF.getRegInfo();
775  WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
776  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
777  const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
778  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
779  auto &MDT = getAnalysis<MachineDominatorTree>();
780  auto &LIS = getAnalysis<LiveIntervals>();
781
782  // Walk the instructions from the bottom up. Currently we don't look past
783  // block boundaries, and the blocks aren't ordered so the block visitation
784  // order isn't significant, but we may want to change this in the future.
785  for (MachineBasicBlock &MBB : MF) {
786    // Don't use a range-based for loop, because we modify the list as we're
787    // iterating over it and the end iterator may change.
788    for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
789      MachineInstr *Insert = &*MII;
790      // Don't nest anything inside an inline asm, because we don't have
791      // constraints for $push inputs.
792      if (Insert->isInlineAsm())
793        continue;
794
795      // Ignore debugging intrinsics.
796      if (Insert->isDebugValue())
797        continue;
798
799      // Iterate through the inputs in reverse order, since we'll be pulling
800      // operands off the stack in LIFO order.
801      CommutingState Commuting;
802      TreeWalkerState TreeWalker(Insert);
803      while (!TreeWalker.done()) {
804        MachineOperand &Op = TreeWalker.pop();
805
806        // We're only interested in explicit virtual register operands.
807        if (!Op.isReg())
808          continue;
809
810        Register Reg = Op.getReg();
811        assert(Op.isUse() && "explicit_uses() should only iterate over uses");
812        assert(!Op.isImplicit() &&
813               "explicit_uses() should only iterate over explicit operands");
814        if (Register::isPhysicalRegister(Reg))
815          continue;
816
817        // Identify the definition for this register at this point.
818        MachineInstr *Def = getVRegDef(Reg, Insert, MRI, LIS);
819        if (!Def)
820          continue;
821
822        // Don't nest an INLINE_ASM def into anything, because we don't have
823        // constraints for $pop outputs.
824        if (Def->isInlineAsm())
825          continue;
826
827        // Argument instructions represent live-in registers and not real
828        // instructions.
829        if (WebAssembly::isArgument(Def->getOpcode()))
830          continue;
831
832        // Currently catch's return value register cannot be stackified, because
833        // the wasm LLVM backend currently does not support live-in values
834        // entering blocks, which is a part of multi-value proposal.
835        //
836        // Once we support live-in values of wasm blocks, this can be:
837        // catch                           ; push exnref value onto stack
838        // block exnref -> i32
839        // br_on_exn $__cpp_exception      ; pop the exnref value
840        // end_block
841        //
842        // But because we don't support it yet, the catch instruction's dst
843        // register should be assigned to a local to be propagated across
844        // 'block' boundary now.
845        //
846        // TODO Fix this once we support the multi-value proposal.
847        if (Def->getOpcode() == WebAssembly::CATCH)
848          continue;
849
850        // Decide which strategy to take. Prefer to move a single-use value
851        // over cloning it, and prefer cloning over introducing a tee.
852        // For moving, we require the def to be in the same block as the use;
853        // this makes things simpler (LiveIntervals' handleMove function only
854        // supports intra-block moves) and it's MachineSink's job to catch all
855        // the sinking opportunities anyway.
856        bool SameBlock = Def->getParent() == &MBB;
857        bool CanMove = SameBlock && isSafeToMove(Def, Insert, AA, MRI) &&
858                       !TreeWalker.isOnStack(Reg);
859        if (CanMove && hasOneUse(Reg, Def, MRI, MDT, LIS)) {
860          Insert = moveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
861        } else if (shouldRematerialize(*Def, AA, TII)) {
862          Insert =
863              rematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
864                                    LIS, MFI, MRI, TII, TRI);
865        } else if (CanMove &&
866                   oneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
867          Insert = moveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
868                                         MRI, TII);
869        } else {
870          // We failed to stackify the operand. If the problem was ordering
871          // constraints, Commuting may be able to help.
872          if (!CanMove && SameBlock)
873            Commuting.maybeCommute(Insert, TreeWalker, TII);
874          // Proceed to the next operand.
875          continue;
876        }
877
878        // If the instruction we just stackified is an IMPLICIT_DEF, convert it
879        // to a constant 0 so that the def is explicit, and the push/pop
880        // correspondence is maintained.
881        if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
882          convertImplicitDefToConstZero(Insert, MRI, TII, MF, LIS);
883
884        // We stackified an operand. Add the defining instruction's operands to
885        // the worklist stack now to continue to build an ever deeper tree.
886        Commuting.reset();
887        TreeWalker.pushOperands(Insert);
888      }
889
890      // If we stackified any operands, skip over the tree to start looking for
891      // the next instruction we can build a tree on.
892      if (Insert != &*MII) {
893        imposeStackOrdering(&*MII);
894        MII = MachineBasicBlock::iterator(Insert).getReverse();
895        Changed = true;
896      }
897    }
898  }
899
900  // If we used VALUE_STACK anywhere, add it to the live-in sets everywhere so
901  // that it never looks like a use-before-def.
902  if (Changed) {
903    MF.getRegInfo().addLiveIn(WebAssembly::VALUE_STACK);
904    for (MachineBasicBlock &MBB : MF)
905      MBB.addLiveIn(WebAssembly::VALUE_STACK);
906  }
907
908#ifndef NDEBUG
909  // Verify that pushes and pops are performed in LIFO order.
910  SmallVector<unsigned, 0> Stack;
911  for (MachineBasicBlock &MBB : MF) {
912    for (MachineInstr &MI : MBB) {
913      if (MI.isDebugInstr())
914        continue;
915      for (MachineOperand &MO : reverse(MI.explicit_operands())) {
916        if (!MO.isReg())
917          continue;
918        Register Reg = MO.getReg();
919
920        if (MFI.isVRegStackified(Reg)) {
921          if (MO.isDef())
922            Stack.push_back(Reg);
923          else
924            assert(Stack.pop_back_val() == Reg &&
925                   "Register stack pop should be paired with a push");
926        }
927      }
928    }
929    // TODO: Generalize this code to support keeping values on the stack across
930    // basic block boundaries.
931    assert(Stack.empty() &&
932           "Register stack pushes and pops should be balanced");
933  }
934#endif
935
936  return Changed;
937}
938