1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the pass which converts floating point instructions from
10// pseudo registers into register stack instructions.  This pass uses live
11// variable information to indicate where the FPn registers are used and their
12// lifetimes.
13//
14// The x87 hardware tracks liveness of the stack registers, so it is necessary
15// to implement exact liveness tracking between basic blocks. The CFG edges are
16// partitioned into bundles where the same FP registers must be live in
17// identical stack positions. Instructions are inserted at the end of each basic
18// block to rearrange the live registers to match the outgoing bundle.
19//
20// This approach avoids splitting critical edges at the potential cost of more
21// live register shuffling instructions when critical edges are present.
22//
23//===----------------------------------------------------------------------===//
24
25#include "X86.h"
26#include "X86InstrInfo.h"
27#include "llvm/ADT/DepthFirstIterator.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallPtrSet.h"
30#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/Statistic.h"
33#include "llvm/CodeGen/EdgeBundles.h"
34#include "llvm/CodeGen/LivePhysRegs.h"
35#include "llvm/CodeGen/MachineFunctionPass.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/Passes.h"
39#include "llvm/CodeGen/TargetInstrInfo.h"
40#include "llvm/CodeGen/TargetSubtargetInfo.h"
41#include "llvm/Config/llvm-config.h"
42#include "llvm/IR/InlineAsm.h"
43#include "llvm/InitializePasses.h"
44#include "llvm/Support/Debug.h"
45#include "llvm/Support/ErrorHandling.h"
46#include "llvm/Support/raw_ostream.h"
47#include "llvm/Target/TargetMachine.h"
48#include <algorithm>
49#include <bitset>
50using namespace llvm;
51
52#define DEBUG_TYPE "x86-codegen"
53
54STATISTIC(NumFXCH, "Number of fxch instructions inserted");
55STATISTIC(NumFP  , "Number of floating point instructions");
56
57namespace {
58  const unsigned ScratchFPReg = 7;
59
60  struct FPS : public MachineFunctionPass {
61    static char ID;
62    FPS() : MachineFunctionPass(ID) {
63      // This is really only to keep valgrind quiet.
64      // The logic in isLive() is too much for it.
65      memset(Stack, 0, sizeof(Stack));
66      memset(RegMap, 0, sizeof(RegMap));
67    }
68
69    void getAnalysisUsage(AnalysisUsage &AU) const override {
70      AU.setPreservesCFG();
71      AU.addRequired<EdgeBundles>();
72      AU.addPreservedID(MachineLoopInfoID);
73      AU.addPreservedID(MachineDominatorsID);
74      MachineFunctionPass::getAnalysisUsage(AU);
75    }
76
77    bool runOnMachineFunction(MachineFunction &MF) override;
78
79    MachineFunctionProperties getRequiredProperties() const override {
80      return MachineFunctionProperties().set(
81          MachineFunctionProperties::Property::NoVRegs);
82    }
83
84    StringRef getPassName() const override { return "X86 FP Stackifier"; }
85
86  private:
87    const TargetInstrInfo *TII = nullptr; // Machine instruction info.
88
89    // Two CFG edges are related if they leave the same block, or enter the same
90    // block. The transitive closure of an edge under this relation is a
91    // LiveBundle. It represents a set of CFG edges where the live FP stack
92    // registers must be allocated identically in the x87 stack.
93    //
94    // A LiveBundle is usually all the edges leaving a block, or all the edges
95    // entering a block, but it can contain more edges if critical edges are
96    // present.
97    //
98    // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
99    // but the exact mapping of FP registers to stack slots is fixed later.
100    struct LiveBundle {
101      // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
102      unsigned Mask;
103
104      // Number of pre-assigned live registers in FixStack. This is 0 when the
105      // stack order has not yet been fixed.
106      unsigned FixCount;
107
108      // Assigned stack order for live-in registers.
109      // FixStack[i] == getStackEntry(i) for all i < FixCount.
110      unsigned char FixStack[8];
111
112      LiveBundle() : Mask(0), FixCount(0) {}
113
114      // Have the live registers been assigned a stack order yet?
115      bool isFixed() const { return !Mask || FixCount; }
116    };
117
118    // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
119    // with no live FP registers.
120    SmallVector<LiveBundle, 8> LiveBundles;
121
122    // The edge bundle analysis provides indices into the LiveBundles vector.
123    EdgeBundles *Bundles = nullptr;
124
125    // Return a bitmask of FP registers in block's live-in list.
126    static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) {
127      unsigned Mask = 0;
128      for (MachineBasicBlock::livein_iterator I = MBB->livein_begin();
129           I != MBB->livein_end(); ) {
130        MCPhysReg Reg = I->PhysReg;
131        static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums");
132        if (Reg >= X86::FP0 && Reg <= X86::FP6) {
133          Mask |= 1 << (Reg - X86::FP0);
134          if (RemoveFPs) {
135            I = MBB->removeLiveIn(I);
136            continue;
137          }
138        }
139        ++I;
140      }
141      return Mask;
142    }
143
144    // Partition all the CFG edges into LiveBundles.
145    void bundleCFGRecomputeKillFlags(MachineFunction &MF);
146
147    MachineBasicBlock *MBB = nullptr;     // Current basic block
148
149    // The hardware keeps track of how many FP registers are live, so we have
150    // to model that exactly. Usually, each live register corresponds to an
151    // FP<n> register, but when dealing with calls, returns, and inline
152    // assembly, it is sometimes necessary to have live scratch registers.
153    unsigned Stack[8];          // FP<n> Registers in each stack slot...
154    unsigned StackTop = 0;      // The current top of the FP stack.
155
156    enum {
157      NumFPRegs = 8             // Including scratch pseudo-registers.
158    };
159
160    // For each live FP<n> register, point to its Stack[] entry.
161    // The first entries correspond to FP0-FP6, the rest are scratch registers
162    // used when we need slightly different live registers than what the
163    // register allocator thinks.
164    unsigned RegMap[NumFPRegs];
165
166    // Set up our stack model to match the incoming registers to MBB.
167    void setupBlockStack();
168
169    // Shuffle live registers to match the expectations of successor blocks.
170    void finishBlockStack();
171
172#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
173    void dumpStack() const {
174      dbgs() << "Stack contents:";
175      for (unsigned i = 0; i != StackTop; ++i) {
176        dbgs() << " FP" << Stack[i];
177        assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
178      }
179    }
180#endif
181
182    /// getSlot - Return the stack slot number a particular register number is
183    /// in.
184    unsigned getSlot(unsigned RegNo) const {
185      assert(RegNo < NumFPRegs && "Regno out of range!");
186      return RegMap[RegNo];
187    }
188
189    /// isLive - Is RegNo currently live in the stack?
190    bool isLive(unsigned RegNo) const {
191      unsigned Slot = getSlot(RegNo);
192      return Slot < StackTop && Stack[Slot] == RegNo;
193    }
194
195    /// getStackEntry - Return the X86::FP<n> register in register ST(i).
196    unsigned getStackEntry(unsigned STi) const {
197      if (STi >= StackTop)
198        report_fatal_error("Access past stack top!");
199      return Stack[StackTop-1-STi];
200    }
201
202    /// getSTReg - Return the X86::ST(i) register which contains the specified
203    /// FP<RegNo> register.
204    unsigned getSTReg(unsigned RegNo) const {
205      return StackTop - 1 - getSlot(RegNo) + X86::ST0;
206    }
207
208    // pushReg - Push the specified FP<n> register onto the stack.
209    void pushReg(unsigned Reg) {
210      assert(Reg < NumFPRegs && "Register number out of range!");
211      if (StackTop >= 8)
212        report_fatal_error("Stack overflow!");
213      Stack[StackTop] = Reg;
214      RegMap[Reg] = StackTop++;
215    }
216
217    // popReg - Pop a register from the stack.
218    void popReg() {
219      if (StackTop == 0)
220        report_fatal_error("Cannot pop empty stack!");
221      RegMap[Stack[--StackTop]] = ~0;     // Update state
222    }
223
224    bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
225    void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
226      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
227      if (isAtTop(RegNo)) return;
228
229      unsigned STReg = getSTReg(RegNo);
230      unsigned RegOnTop = getStackEntry(0);
231
232      // Swap the slots the regs are in.
233      std::swap(RegMap[RegNo], RegMap[RegOnTop]);
234
235      // Swap stack slot contents.
236      if (RegMap[RegOnTop] >= StackTop)
237        report_fatal_error("Access past stack top!");
238      std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
239
240      // Emit an fxch to update the runtime processors version of the state.
241      BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
242      ++NumFXCH;
243    }
244
245    void duplicateToTop(unsigned RegNo, unsigned AsReg,
246                        MachineBasicBlock::iterator I) {
247      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
248      unsigned STReg = getSTReg(RegNo);
249      pushReg(AsReg);   // New register on top of stack
250
251      BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
252    }
253
254    /// popStackAfter - Pop the current value off of the top of the FP stack
255    /// after the specified instruction.
256    void popStackAfter(MachineBasicBlock::iterator &I);
257
258    /// freeStackSlotAfter - Free the specified register from the register
259    /// stack, so that it is no longer in a register.  If the register is
260    /// currently at the top of the stack, we just pop the current instruction,
261    /// otherwise we store the current top-of-stack into the specified slot,
262    /// then pop the top of stack.
263    void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
264
265    /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
266    /// instruction.
267    MachineBasicBlock::iterator
268    freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
269
270    /// Adjust the live registers to be the set in Mask.
271    void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
272
273    /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
274    /// st(0), FP reg FixStack[1] is st(1) etc.
275    void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
276                         MachineBasicBlock::iterator I);
277
278    bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
279
280    void handleCall(MachineBasicBlock::iterator &I);
281    void handleReturn(MachineBasicBlock::iterator &I);
282    void handleZeroArgFP(MachineBasicBlock::iterator &I);
283    void handleOneArgFP(MachineBasicBlock::iterator &I);
284    void handleOneArgFPRW(MachineBasicBlock::iterator &I);
285    void handleTwoArgFP(MachineBasicBlock::iterator &I);
286    void handleCompareFP(MachineBasicBlock::iterator &I);
287    void handleCondMovFP(MachineBasicBlock::iterator &I);
288    void handleSpecialFP(MachineBasicBlock::iterator &I);
289
290    // Check if a COPY instruction is using FP registers.
291    static bool isFPCopy(MachineInstr &MI) {
292      Register DstReg = MI.getOperand(0).getReg();
293      Register SrcReg = MI.getOperand(1).getReg();
294
295      return X86::RFP80RegClass.contains(DstReg) ||
296        X86::RFP80RegClass.contains(SrcReg);
297    }
298
299    void setKillFlags(MachineBasicBlock &MBB) const;
300  };
301}
302
303char FPS::ID = 0;
304
305INITIALIZE_PASS_BEGIN(FPS, DEBUG_TYPE, "X86 FP Stackifier",
306                      false, false)
307INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
308INITIALIZE_PASS_END(FPS, DEBUG_TYPE, "X86 FP Stackifier",
309                    false, false)
310
311FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
312
313/// getFPReg - Return the X86::FPx register number for the specified operand.
314/// For example, this returns 3 for X86::FP3.
315static unsigned getFPReg(const MachineOperand &MO) {
316  assert(MO.isReg() && "Expected an FP register!");
317  Register Reg = MO.getReg();
318  assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
319  return Reg - X86::FP0;
320}
321
322/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
323/// register references into FP stack references.
324///
325bool FPS::runOnMachineFunction(MachineFunction &MF) {
326  // We only need to run this pass if there are any FP registers used in this
327  // function.  If it is all integer, there is nothing for us to do!
328  bool FPIsUsed = false;
329
330  static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
331  const MachineRegisterInfo &MRI = MF.getRegInfo();
332  for (unsigned i = 0; i <= 6; ++i)
333    if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
334      FPIsUsed = true;
335      break;
336    }
337
338  // Early exit.
339  if (!FPIsUsed) return false;
340
341  Bundles = &getAnalysis<EdgeBundles>();
342  TII = MF.getSubtarget().getInstrInfo();
343
344  // Prepare cross-MBB liveness.
345  bundleCFGRecomputeKillFlags(MF);
346
347  StackTop = 0;
348
349  // Process the function in depth first order so that we process at least one
350  // of the predecessors for every reachable block in the function.
351  df_iterator_default_set<MachineBasicBlock*> Processed;
352  MachineBasicBlock *Entry = &MF.front();
353
354  LiveBundle &Bundle =
355    LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
356
357  // In regcall convention, some FP registers may not be passed through
358  // the stack, so they will need to be assigned to the stack first
359  if ((Entry->getParent()->getFunction().getCallingConv() ==
360    CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
361    // In the register calling convention, up to one FP argument could be
362    // saved in the first FP register.
363    // If bundle.mask is non-zero and Bundle.FixCount is zero, it means
364    // that the FP registers contain arguments.
365    // The actual value is passed in FP0.
366    // Here we fix the stack and mark FP0 as pre-assigned register.
367    assert((Bundle.Mask & 0xFE) == 0 &&
368      "Only FP0 could be passed as an argument");
369    Bundle.FixCount = 1;
370    Bundle.FixStack[0] = 0;
371  }
372
373  bool Changed = false;
374  for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
375    Changed |= processBasicBlock(MF, *BB);
376
377  // Process any unreachable blocks in arbitrary order now.
378  if (MF.size() != Processed.size())
379    for (MachineBasicBlock &BB : MF)
380      if (Processed.insert(&BB).second)
381        Changed |= processBasicBlock(MF, BB);
382
383  LiveBundles.clear();
384
385  return Changed;
386}
387
388/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
389/// live-out sets for the FP registers. Consistent means that the set of
390/// registers live-out from a block is identical to the live-in set of all
391/// successors. This is not enforced by the normal live-in lists since
392/// registers may be implicitly defined, or not used by all successors.
393void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) {
394  assert(LiveBundles.empty() && "Stale data in LiveBundles");
395  LiveBundles.resize(Bundles->getNumBundles());
396
397  // Gather the actual live-in masks for all MBBs.
398  for (MachineBasicBlock &MBB : MF) {
399    setKillFlags(MBB);
400
401    const unsigned Mask = calcLiveInMask(&MBB, false);
402    if (!Mask)
403      continue;
404    // Update MBB ingoing bundle mask.
405    LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask;
406  }
407}
408
409/// processBasicBlock - Loop over all of the instructions in the basic block,
410/// transforming FP instructions into their stack form.
411///
412bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
413  bool Changed = false;
414  MBB = &BB;
415
416  setupBlockStack();
417
418  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
419    MachineInstr &MI = *I;
420    uint64_t Flags = MI.getDesc().TSFlags;
421
422    unsigned FPInstClass = Flags & X86II::FPTypeMask;
423    if (MI.isInlineAsm())
424      FPInstClass = X86II::SpecialFP;
425
426    if (MI.isCopy() && isFPCopy(MI))
427      FPInstClass = X86II::SpecialFP;
428
429    if (MI.isImplicitDef() &&
430        X86::RFP80RegClass.contains(MI.getOperand(0).getReg()))
431      FPInstClass = X86II::SpecialFP;
432
433    if (MI.isCall())
434      FPInstClass = X86II::SpecialFP;
435
436    if (FPInstClass == X86II::NotFP)
437      continue;  // Efficiently ignore non-fp insts!
438
439    MachineInstr *PrevMI = nullptr;
440    if (I != BB.begin())
441      PrevMI = &*std::prev(I);
442
443    ++NumFP;  // Keep track of # of pseudo instrs
444    LLVM_DEBUG(dbgs() << "\nFPInst:\t" << MI);
445
446    // Get dead variables list now because the MI pointer may be deleted as part
447    // of processing!
448    SmallVector<unsigned, 8> DeadRegs;
449    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
450      const MachineOperand &MO = MI.getOperand(i);
451      if (MO.isReg() && MO.isDead())
452        DeadRegs.push_back(MO.getReg());
453    }
454
455    switch (FPInstClass) {
456    case X86II::ZeroArgFP:  handleZeroArgFP(I); break;
457    case X86II::OneArgFP:   handleOneArgFP(I);  break;  // fstp ST(0)
458    case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
459    case X86II::TwoArgFP:   handleTwoArgFP(I);  break;
460    case X86II::CompareFP:  handleCompareFP(I); break;
461    case X86II::CondMovFP:  handleCondMovFP(I); break;
462    case X86II::SpecialFP:  handleSpecialFP(I); break;
463    default: llvm_unreachable("Unknown FP Type!");
464    }
465
466    // Check to see if any of the values defined by this instruction are dead
467    // after definition.  If so, pop them.
468    for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
469      unsigned Reg = DeadRegs[i];
470      // Check if Reg is live on the stack. An inline-asm register operand that
471      // is in the clobber list and marked dead might not be live on the stack.
472      static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
473      if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
474        LLVM_DEBUG(dbgs() << "Register FP#" << Reg - X86::FP0 << " is dead!\n");
475        freeStackSlotAfter(I, Reg-X86::FP0);
476      }
477    }
478
479    // Print out all of the instructions expanded to if -debug
480    LLVM_DEBUG({
481      MachineBasicBlock::iterator PrevI = PrevMI;
482      if (I == PrevI) {
483        dbgs() << "Just deleted pseudo instruction\n";
484      } else {
485        MachineBasicBlock::iterator Start = I;
486        // Rewind to first instruction newly inserted.
487        while (Start != BB.begin() && std::prev(Start) != PrevI)
488          --Start;
489        dbgs() << "Inserted instructions:\n\t";
490        Start->print(dbgs());
491        while (++Start != std::next(I)) {
492        }
493      }
494      dumpStack();
495    });
496    (void)PrevMI;
497
498    Changed = true;
499  }
500
501  finishBlockStack();
502
503  return Changed;
504}
505
506/// setupBlockStack - Use the live bundles to set up our model of the stack
507/// to match predecessors' live out stack.
508void FPS::setupBlockStack() {
509  LLVM_DEBUG(dbgs() << "\nSetting up live-ins for " << printMBBReference(*MBB)
510                    << " derived from " << MBB->getName() << ".\n");
511  StackTop = 0;
512  // Get the live-in bundle for MBB.
513  const LiveBundle &Bundle =
514    LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
515
516  if (!Bundle.Mask) {
517    LLVM_DEBUG(dbgs() << "Block has no FP live-ins.\n");
518    return;
519  }
520
521  // Depth-first iteration should ensure that we always have an assigned stack.
522  assert(Bundle.isFixed() && "Reached block before any predecessors");
523
524  // Push the fixed live-in registers.
525  for (unsigned i = Bundle.FixCount; i > 0; --i) {
526    LLVM_DEBUG(dbgs() << "Live-in st(" << (i - 1) << "): %fp"
527                      << unsigned(Bundle.FixStack[i - 1]) << '\n');
528    pushReg(Bundle.FixStack[i-1]);
529  }
530
531  // Kill off unwanted live-ins. This can happen with a critical edge.
532  // FIXME: We could keep these live registers around as zombies. They may need
533  // to be revived at the end of a short block. It might save a few instrs.
534  unsigned Mask = calcLiveInMask(MBB, /*RemoveFPs=*/true);
535  adjustLiveRegs(Mask, MBB->begin());
536  LLVM_DEBUG(MBB->dump());
537}
538
539/// finishBlockStack - Revive live-outs that are implicitly defined out of
540/// MBB. Shuffle live registers to match the expected fixed stack of any
541/// predecessors, and ensure that all predecessors are expecting the same
542/// stack.
543void FPS::finishBlockStack() {
544  // The RET handling below takes care of return blocks for us.
545  if (MBB->succ_empty())
546    return;
547
548  LLVM_DEBUG(dbgs() << "Setting up live-outs for " << printMBBReference(*MBB)
549                    << " derived from " << MBB->getName() << ".\n");
550
551  // Get MBB's live-out bundle.
552  unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
553  LiveBundle &Bundle = LiveBundles[BundleIdx];
554
555  // We may need to kill and define some registers to match successors.
556  // FIXME: This can probably be combined with the shuffle below.
557  MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
558  adjustLiveRegs(Bundle.Mask, Term);
559
560  if (!Bundle.Mask) {
561    LLVM_DEBUG(dbgs() << "No live-outs.\n");
562    return;
563  }
564
565  // Has the stack order been fixed yet?
566  LLVM_DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
567  if (Bundle.isFixed()) {
568    LLVM_DEBUG(dbgs() << "Shuffling stack to match.\n");
569    shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
570  } else {
571    // Not fixed yet, we get to choose.
572    LLVM_DEBUG(dbgs() << "Fixing stack order now.\n");
573    Bundle.FixCount = StackTop;
574    for (unsigned i = 0; i < StackTop; ++i)
575      Bundle.FixStack[i] = getStackEntry(i);
576  }
577}
578
579
580//===----------------------------------------------------------------------===//
581// Efficient Lookup Table Support
582//===----------------------------------------------------------------------===//
583
584namespace {
585  struct TableEntry {
586    uint16_t from;
587    uint16_t to;
588    bool operator<(const TableEntry &TE) const { return from < TE.from; }
589    friend bool operator<(const TableEntry &TE, unsigned V) {
590      return TE.from < V;
591    }
592    friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
593                                                const TableEntry &TE) {
594      return V < TE.from;
595    }
596  };
597}
598
599static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
600  const TableEntry *I = llvm::lower_bound(Table, Opcode);
601  if (I != Table.end() && I->from == Opcode)
602    return I->to;
603  return -1;
604}
605
606#ifdef NDEBUG
607#define ASSERT_SORTED(TABLE)
608#else
609#define ASSERT_SORTED(TABLE)                                                   \
610  {                                                                            \
611    static std::atomic<bool> TABLE##Checked(false);                            \
612    if (!TABLE##Checked.load(std::memory_order_relaxed)) {                     \
613      assert(std::is_sorted(std::begin(TABLE), std::end(TABLE)) &&             \
614             "All lookup tables must be sorted for efficient access!");        \
615      TABLE##Checked.store(true, std::memory_order_relaxed);                   \
616    }                                                                          \
617  }
618#endif
619
620//===----------------------------------------------------------------------===//
621// Register File -> Register Stack Mapping Methods
622//===----------------------------------------------------------------------===//
623
624// OpcodeTable - Sorted map of register instructions to their stack version.
625// The first element is an register file pseudo instruction, the second is the
626// concrete X86 instruction which uses the register stack.
627//
628static const TableEntry OpcodeTable[] = {
629  { X86::ABS_Fp32     , X86::ABS_F     },
630  { X86::ABS_Fp64     , X86::ABS_F     },
631  { X86::ABS_Fp80     , X86::ABS_F     },
632  { X86::ADD_Fp32m    , X86::ADD_F32m  },
633  { X86::ADD_Fp64m    , X86::ADD_F64m  },
634  { X86::ADD_Fp64m32  , X86::ADD_F32m  },
635  { X86::ADD_Fp80m32  , X86::ADD_F32m  },
636  { X86::ADD_Fp80m64  , X86::ADD_F64m  },
637  { X86::ADD_FpI16m32 , X86::ADD_FI16m },
638  { X86::ADD_FpI16m64 , X86::ADD_FI16m },
639  { X86::ADD_FpI16m80 , X86::ADD_FI16m },
640  { X86::ADD_FpI32m32 , X86::ADD_FI32m },
641  { X86::ADD_FpI32m64 , X86::ADD_FI32m },
642  { X86::ADD_FpI32m80 , X86::ADD_FI32m },
643  { X86::CHS_Fp32     , X86::CHS_F     },
644  { X86::CHS_Fp64     , X86::CHS_F     },
645  { X86::CHS_Fp80     , X86::CHS_F     },
646  { X86::CMOVBE_Fp32  , X86::CMOVBE_F  },
647  { X86::CMOVBE_Fp64  , X86::CMOVBE_F  },
648  { X86::CMOVBE_Fp80  , X86::CMOVBE_F  },
649  { X86::CMOVB_Fp32   , X86::CMOVB_F   },
650  { X86::CMOVB_Fp64   , X86::CMOVB_F  },
651  { X86::CMOVB_Fp80   , X86::CMOVB_F  },
652  { X86::CMOVE_Fp32   , X86::CMOVE_F  },
653  { X86::CMOVE_Fp64   , X86::CMOVE_F   },
654  { X86::CMOVE_Fp80   , X86::CMOVE_F   },
655  { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
656  { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
657  { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
658  { X86::CMOVNB_Fp32  , X86::CMOVNB_F  },
659  { X86::CMOVNB_Fp64  , X86::CMOVNB_F  },
660  { X86::CMOVNB_Fp80  , X86::CMOVNB_F  },
661  { X86::CMOVNE_Fp32  , X86::CMOVNE_F  },
662  { X86::CMOVNE_Fp64  , X86::CMOVNE_F  },
663  { X86::CMOVNE_Fp80  , X86::CMOVNE_F  },
664  { X86::CMOVNP_Fp32  , X86::CMOVNP_F  },
665  { X86::CMOVNP_Fp64  , X86::CMOVNP_F  },
666  { X86::CMOVNP_Fp80  , X86::CMOVNP_F  },
667  { X86::CMOVP_Fp32   , X86::CMOVP_F   },
668  { X86::CMOVP_Fp64   , X86::CMOVP_F   },
669  { X86::CMOVP_Fp80   , X86::CMOVP_F   },
670  { X86::COM_FpIr32   , X86::COM_FIr   },
671  { X86::COM_FpIr64   , X86::COM_FIr   },
672  { X86::COM_FpIr80   , X86::COM_FIr   },
673  { X86::COM_Fpr32    , X86::COM_FST0r },
674  { X86::COM_Fpr64    , X86::COM_FST0r },
675  { X86::COM_Fpr80    , X86::COM_FST0r },
676  { X86::DIVR_Fp32m   , X86::DIVR_F32m },
677  { X86::DIVR_Fp64m   , X86::DIVR_F64m },
678  { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
679  { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
680  { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
681  { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
682  { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
683  { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
684  { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
685  { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
686  { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
687  { X86::DIV_Fp32m    , X86::DIV_F32m  },
688  { X86::DIV_Fp64m    , X86::DIV_F64m  },
689  { X86::DIV_Fp64m32  , X86::DIV_F32m  },
690  { X86::DIV_Fp80m32  , X86::DIV_F32m  },
691  { X86::DIV_Fp80m64  , X86::DIV_F64m  },
692  { X86::DIV_FpI16m32 , X86::DIV_FI16m },
693  { X86::DIV_FpI16m64 , X86::DIV_FI16m },
694  { X86::DIV_FpI16m80 , X86::DIV_FI16m },
695  { X86::DIV_FpI32m32 , X86::DIV_FI32m },
696  { X86::DIV_FpI32m64 , X86::DIV_FI32m },
697  { X86::DIV_FpI32m80 , X86::DIV_FI32m },
698  { X86::ILD_Fp16m32  , X86::ILD_F16m  },
699  { X86::ILD_Fp16m64  , X86::ILD_F16m  },
700  { X86::ILD_Fp16m80  , X86::ILD_F16m  },
701  { X86::ILD_Fp32m32  , X86::ILD_F32m  },
702  { X86::ILD_Fp32m64  , X86::ILD_F32m  },
703  { X86::ILD_Fp32m80  , X86::ILD_F32m  },
704  { X86::ILD_Fp64m32  , X86::ILD_F64m  },
705  { X86::ILD_Fp64m64  , X86::ILD_F64m  },
706  { X86::ILD_Fp64m80  , X86::ILD_F64m  },
707  { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
708  { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
709  { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
710  { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
711  { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
712  { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
713  { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
714  { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
715  { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
716  { X86::IST_Fp16m32  , X86::IST_F16m  },
717  { X86::IST_Fp16m64  , X86::IST_F16m  },
718  { X86::IST_Fp16m80  , X86::IST_F16m  },
719  { X86::IST_Fp32m32  , X86::IST_F32m  },
720  { X86::IST_Fp32m64  , X86::IST_F32m  },
721  { X86::IST_Fp32m80  , X86::IST_F32m  },
722  { X86::IST_Fp64m32  , X86::IST_FP64m },
723  { X86::IST_Fp64m64  , X86::IST_FP64m },
724  { X86::IST_Fp64m80  , X86::IST_FP64m },
725  { X86::LD_Fp032     , X86::LD_F0     },
726  { X86::LD_Fp064     , X86::LD_F0     },
727  { X86::LD_Fp080     , X86::LD_F0     },
728  { X86::LD_Fp132     , X86::LD_F1     },
729  { X86::LD_Fp164     , X86::LD_F1     },
730  { X86::LD_Fp180     , X86::LD_F1     },
731  { X86::LD_Fp32m     , X86::LD_F32m   },
732  { X86::LD_Fp32m64   , X86::LD_F32m   },
733  { X86::LD_Fp32m80   , X86::LD_F32m   },
734  { X86::LD_Fp64m     , X86::LD_F64m   },
735  { X86::LD_Fp64m80   , X86::LD_F64m   },
736  { X86::LD_Fp80m     , X86::LD_F80m   },
737  { X86::MUL_Fp32m    , X86::MUL_F32m  },
738  { X86::MUL_Fp64m    , X86::MUL_F64m  },
739  { X86::MUL_Fp64m32  , X86::MUL_F32m  },
740  { X86::MUL_Fp80m32  , X86::MUL_F32m  },
741  { X86::MUL_Fp80m64  , X86::MUL_F64m  },
742  { X86::MUL_FpI16m32 , X86::MUL_FI16m },
743  { X86::MUL_FpI16m64 , X86::MUL_FI16m },
744  { X86::MUL_FpI16m80 , X86::MUL_FI16m },
745  { X86::MUL_FpI32m32 , X86::MUL_FI32m },
746  { X86::MUL_FpI32m64 , X86::MUL_FI32m },
747  { X86::MUL_FpI32m80 , X86::MUL_FI32m },
748  { X86::SQRT_Fp32    , X86::SQRT_F    },
749  { X86::SQRT_Fp64    , X86::SQRT_F    },
750  { X86::SQRT_Fp80    , X86::SQRT_F    },
751  { X86::ST_Fp32m     , X86::ST_F32m   },
752  { X86::ST_Fp64m     , X86::ST_F64m   },
753  { X86::ST_Fp64m32   , X86::ST_F32m   },
754  { X86::ST_Fp80m32   , X86::ST_F32m   },
755  { X86::ST_Fp80m64   , X86::ST_F64m   },
756  { X86::ST_FpP80m    , X86::ST_FP80m  },
757  { X86::SUBR_Fp32m   , X86::SUBR_F32m },
758  { X86::SUBR_Fp64m   , X86::SUBR_F64m },
759  { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
760  { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
761  { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
762  { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
763  { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
764  { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
765  { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
766  { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
767  { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
768  { X86::SUB_Fp32m    , X86::SUB_F32m  },
769  { X86::SUB_Fp64m    , X86::SUB_F64m  },
770  { X86::SUB_Fp64m32  , X86::SUB_F32m  },
771  { X86::SUB_Fp80m32  , X86::SUB_F32m  },
772  { X86::SUB_Fp80m64  , X86::SUB_F64m  },
773  { X86::SUB_FpI16m32 , X86::SUB_FI16m },
774  { X86::SUB_FpI16m64 , X86::SUB_FI16m },
775  { X86::SUB_FpI16m80 , X86::SUB_FI16m },
776  { X86::SUB_FpI32m32 , X86::SUB_FI32m },
777  { X86::SUB_FpI32m64 , X86::SUB_FI32m },
778  { X86::SUB_FpI32m80 , X86::SUB_FI32m },
779  { X86::TST_Fp32     , X86::TST_F     },
780  { X86::TST_Fp64     , X86::TST_F     },
781  { X86::TST_Fp80     , X86::TST_F     },
782  { X86::UCOM_FpIr32  , X86::UCOM_FIr  },
783  { X86::UCOM_FpIr64  , X86::UCOM_FIr  },
784  { X86::UCOM_FpIr80  , X86::UCOM_FIr  },
785  { X86::UCOM_Fpr32   , X86::UCOM_Fr   },
786  { X86::UCOM_Fpr64   , X86::UCOM_Fr   },
787  { X86::UCOM_Fpr80   , X86::UCOM_Fr   },
788};
789
790static unsigned getConcreteOpcode(unsigned Opcode) {
791  ASSERT_SORTED(OpcodeTable);
792  int Opc = Lookup(OpcodeTable, Opcode);
793  assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
794  return Opc;
795}
796
797//===----------------------------------------------------------------------===//
798// Helper Methods
799//===----------------------------------------------------------------------===//
800
801// PopTable - Sorted map of instructions to their popping version.  The first
802// element is an instruction, the second is the version which pops.
803//
804static const TableEntry PopTable[] = {
805  { X86::ADD_FrST0 , X86::ADD_FPrST0  },
806
807  { X86::COMP_FST0r, X86::FCOMPP      },
808  { X86::COM_FIr   , X86::COM_FIPr    },
809  { X86::COM_FST0r , X86::COMP_FST0r  },
810
811  { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
812  { X86::DIV_FrST0 , X86::DIV_FPrST0  },
813
814  { X86::IST_F16m  , X86::IST_FP16m   },
815  { X86::IST_F32m  , X86::IST_FP32m   },
816
817  { X86::MUL_FrST0 , X86::MUL_FPrST0  },
818
819  { X86::ST_F32m   , X86::ST_FP32m    },
820  { X86::ST_F64m   , X86::ST_FP64m    },
821  { X86::ST_Frr    , X86::ST_FPrr     },
822
823  { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
824  { X86::SUB_FrST0 , X86::SUB_FPrST0  },
825
826  { X86::UCOM_FIr  , X86::UCOM_FIPr   },
827
828  { X86::UCOM_FPr  , X86::UCOM_FPPr   },
829  { X86::UCOM_Fr   , X86::UCOM_FPr    },
830};
831
832/// popStackAfter - Pop the current value off of the top of the FP stack after
833/// the specified instruction.  This attempts to be sneaky and combine the pop
834/// into the instruction itself if possible.  The iterator is left pointing to
835/// the last instruction, be it a new pop instruction inserted, or the old
836/// instruction if it was modified in place.
837///
838void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
839  MachineInstr &MI = *I;
840  const DebugLoc &dl = MI.getDebugLoc();
841  ASSERT_SORTED(PopTable);
842
843  popReg();
844
845  // Check to see if there is a popping version of this instruction...
846  int Opcode = Lookup(PopTable, I->getOpcode());
847  if (Opcode != -1) {
848    I->setDesc(TII->get(Opcode));
849    if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
850      I->RemoveOperand(0);
851  } else {    // Insert an explicit pop
852    I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
853  }
854}
855
856/// freeStackSlotAfter - Free the specified register from the register stack, so
857/// that it is no longer in a register.  If the register is currently at the top
858/// of the stack, we just pop the current instruction, otherwise we store the
859/// current top-of-stack into the specified slot, then pop the top of stack.
860void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
861  if (getStackEntry(0) == FPRegNo) {  // already at the top of stack? easy.
862    popStackAfter(I);
863    return;
864  }
865
866  // Otherwise, store the top of stack into the dead slot, killing the operand
867  // without having to add in an explicit xchg then pop.
868  //
869  I = freeStackSlotBefore(++I, FPRegNo);
870}
871
872/// freeStackSlotBefore - Free the specified register without trying any
873/// folding.
874MachineBasicBlock::iterator
875FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
876  unsigned STReg    = getSTReg(FPRegNo);
877  unsigned OldSlot  = getSlot(FPRegNo);
878  unsigned TopReg   = Stack[StackTop-1];
879  Stack[OldSlot]    = TopReg;
880  RegMap[TopReg]    = OldSlot;
881  RegMap[FPRegNo]   = ~0;
882  Stack[--StackTop] = ~0;
883  return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
884      .addReg(STReg)
885      .getInstr();
886}
887
888/// adjustLiveRegs - Kill and revive registers such that exactly the FP
889/// registers with a bit in Mask are live.
890void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
891  unsigned Defs = Mask;
892  unsigned Kills = 0;
893  for (unsigned i = 0; i < StackTop; ++i) {
894    unsigned RegNo = Stack[i];
895    if (!(Defs & (1 << RegNo)))
896      // This register is live, but we don't want it.
897      Kills |= (1 << RegNo);
898    else
899      // We don't need to imp-def this live register.
900      Defs &= ~(1 << RegNo);
901  }
902  assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
903
904  // Produce implicit-defs for free by using killed registers.
905  while (Kills && Defs) {
906    unsigned KReg = countTrailingZeros(Kills);
907    unsigned DReg = countTrailingZeros(Defs);
908    LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg
909                      << "\n");
910    std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
911    std::swap(RegMap[KReg], RegMap[DReg]);
912    Kills &= ~(1 << KReg);
913    Defs &= ~(1 << DReg);
914  }
915
916  // Kill registers by popping.
917  if (Kills && I != MBB->begin()) {
918    MachineBasicBlock::iterator I2 = std::prev(I);
919    while (StackTop) {
920      unsigned KReg = getStackEntry(0);
921      if (!(Kills & (1 << KReg)))
922        break;
923      LLVM_DEBUG(dbgs() << "Popping %fp" << KReg << "\n");
924      popStackAfter(I2);
925      Kills &= ~(1 << KReg);
926    }
927  }
928
929  // Manually kill the rest.
930  while (Kills) {
931    unsigned KReg = countTrailingZeros(Kills);
932    LLVM_DEBUG(dbgs() << "Killing %fp" << KReg << "\n");
933    freeStackSlotBefore(I, KReg);
934    Kills &= ~(1 << KReg);
935  }
936
937  // Load zeros for all the imp-defs.
938  while(Defs) {
939    unsigned DReg = countTrailingZeros(Defs);
940    LLVM_DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n");
941    BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
942    pushReg(DReg);
943    Defs &= ~(1 << DReg);
944  }
945
946  // Now we should have the correct registers live.
947  LLVM_DEBUG(dumpStack());
948  assert(StackTop == countPopulation(Mask) && "Live count mismatch");
949}
950
951/// shuffleStackTop - emit fxch instructions before I to shuffle the top
952/// FixCount entries into the order given by FixStack.
953/// FIXME: Is there a better algorithm than insertion sort?
954void FPS::shuffleStackTop(const unsigned char *FixStack,
955                          unsigned FixCount,
956                          MachineBasicBlock::iterator I) {
957  // Move items into place, starting from the desired stack bottom.
958  while (FixCount--) {
959    // Old register at position FixCount.
960    unsigned OldReg = getStackEntry(FixCount);
961    // Desired register at position FixCount.
962    unsigned Reg = FixStack[FixCount];
963    if (Reg == OldReg)
964      continue;
965    // (Reg st0) (OldReg st0) = (Reg OldReg st0)
966    moveToTop(Reg, I);
967    if (FixCount > 0)
968      moveToTop(OldReg, I);
969  }
970  LLVM_DEBUG(dumpStack());
971}
972
973
974//===----------------------------------------------------------------------===//
975// Instruction transformation implementation
976//===----------------------------------------------------------------------===//
977
978void FPS::handleCall(MachineBasicBlock::iterator &I) {
979  MachineInstr &MI = *I;
980  unsigned STReturns = 0;
981
982  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
983    MachineOperand &Op = MI.getOperand(i);
984    if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
985      continue;
986
987    assert(Op.isImplicit() && "Expected implicit def/use");
988
989    if (Op.isDef())
990      STReturns |= 1 << getFPReg(Op);
991
992    // Remove the operand so that later passes don't see it.
993    MI.RemoveOperand(i);
994    --i;
995    --e;
996  }
997
998  unsigned N = countTrailingOnes(STReturns);
999
1000  // FP registers used for function return must be consecutive starting at
1001  // FP0
1002  assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
1003
1004  // Reset the FP Stack - It is required because of possible leftovers from
1005  // passed arguments. The caller should assume that the FP stack is
1006  // returned empty (unless the callee returns values on FP stack).
1007  while (StackTop > 0)
1008    popReg();
1009
1010  for (unsigned I = 0; I < N; ++I)
1011    pushReg(N - I - 1);
1012}
1013
1014/// If RET has an FP register use operand, pass the first one in ST(0) and
1015/// the second one in ST(1).
1016void FPS::handleReturn(MachineBasicBlock::iterator &I) {
1017  MachineInstr &MI = *I;
1018
1019  // Find the register operands.
1020  unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1021  unsigned LiveMask = 0;
1022
1023  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1024    MachineOperand &Op = MI.getOperand(i);
1025    if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1026      continue;
1027    // FP Register uses must be kills unless there are two uses of the same
1028    // register, in which case only one will be a kill.
1029    assert(Op.isUse() &&
1030           (Op.isKill() ||                    // Marked kill.
1031            getFPReg(Op) == FirstFPRegOp ||   // Second instance.
1032            MI.killsRegister(Op.getReg())) && // Later use is marked kill.
1033           "Ret only defs operands, and values aren't live beyond it");
1034
1035    if (FirstFPRegOp == ~0U)
1036      FirstFPRegOp = getFPReg(Op);
1037    else {
1038      assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1039      SecondFPRegOp = getFPReg(Op);
1040    }
1041    LiveMask |= (1 << getFPReg(Op));
1042
1043    // Remove the operand so that later passes don't see it.
1044    MI.RemoveOperand(i);
1045    --i;
1046    --e;
1047  }
1048
1049  // We may have been carrying spurious live-ins, so make sure only the
1050  // returned registers are left live.
1051  adjustLiveRegs(LiveMask, MI);
1052  if (!LiveMask) return;  // Quick check to see if any are possible.
1053
1054  // There are only four possibilities here:
1055  // 1) we are returning a single FP value.  In this case, it has to be in
1056  //    ST(0) already, so just declare success by removing the value from the
1057  //    FP Stack.
1058  if (SecondFPRegOp == ~0U) {
1059    // Assert that the top of stack contains the right FP register.
1060    assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1061           "Top of stack not the right register for RET!");
1062
1063    // Ok, everything is good, mark the value as not being on the stack
1064    // anymore so that our assertion about the stack being empty at end of
1065    // block doesn't fire.
1066    StackTop = 0;
1067    return;
1068  }
1069
1070  // Otherwise, we are returning two values:
1071  // 2) If returning the same value for both, we only have one thing in the FP
1072  //    stack.  Consider:  RET FP1, FP1
1073  if (StackTop == 1) {
1074    assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1075           "Stack misconfiguration for RET!");
1076
1077    // Duplicate the TOS so that we return it twice.  Just pick some other FPx
1078    // register to hold it.
1079    unsigned NewReg = ScratchFPReg;
1080    duplicateToTop(FirstFPRegOp, NewReg, MI);
1081    FirstFPRegOp = NewReg;
1082  }
1083
1084  /// Okay we know we have two different FPx operands now:
1085  assert(StackTop == 2 && "Must have two values live!");
1086
1087  /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1088  ///    in ST(1).  In this case, emit an fxch.
1089  if (getStackEntry(0) == SecondFPRegOp) {
1090    assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1091    moveToTop(FirstFPRegOp, MI);
1092  }
1093
1094  /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1095  /// ST(1).  Just remove both from our understanding of the stack and return.
1096  assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1097  assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1098  StackTop = 0;
1099}
1100
1101/// handleZeroArgFP - ST(0) = fld0    ST(0) = flds <mem>
1102///
1103void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
1104  MachineInstr &MI = *I;
1105  unsigned DestReg = getFPReg(MI.getOperand(0));
1106
1107  // Change from the pseudo instruction to the concrete instruction.
1108  MI.RemoveOperand(0); // Remove the explicit ST(0) operand
1109  MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1110  MI.addOperand(
1111      MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true));
1112
1113  // Result gets pushed on the stack.
1114  pushReg(DestReg);
1115}
1116
1117/// handleOneArgFP - fst <mem>, ST(0)
1118///
1119void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
1120  MachineInstr &MI = *I;
1121  unsigned NumOps = MI.getDesc().getNumOperands();
1122  assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
1123         "Can only handle fst* & ftst instructions!");
1124
1125  // Is this the last use of the source register?
1126  unsigned Reg = getFPReg(MI.getOperand(NumOps - 1));
1127  bool KillsSrc = MI.killsRegister(X86::FP0 + Reg);
1128
1129  // FISTP64m is strange because there isn't a non-popping versions.
1130  // If we have one _and_ we don't want to pop the operand, duplicate the value
1131  // on the stack instead of moving it.  This ensure that popping the value is
1132  // always ok.
1133  // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
1134  //
1135  if (!KillsSrc && (MI.getOpcode() == X86::IST_Fp64m32 ||
1136                    MI.getOpcode() == X86::ISTT_Fp16m32 ||
1137                    MI.getOpcode() == X86::ISTT_Fp32m32 ||
1138                    MI.getOpcode() == X86::ISTT_Fp64m32 ||
1139                    MI.getOpcode() == X86::IST_Fp64m64 ||
1140                    MI.getOpcode() == X86::ISTT_Fp16m64 ||
1141                    MI.getOpcode() == X86::ISTT_Fp32m64 ||
1142                    MI.getOpcode() == X86::ISTT_Fp64m64 ||
1143                    MI.getOpcode() == X86::IST_Fp64m80 ||
1144                    MI.getOpcode() == X86::ISTT_Fp16m80 ||
1145                    MI.getOpcode() == X86::ISTT_Fp32m80 ||
1146                    MI.getOpcode() == X86::ISTT_Fp64m80 ||
1147                    MI.getOpcode() == X86::ST_FpP80m)) {
1148    duplicateToTop(Reg, ScratchFPReg, I);
1149  } else {
1150    moveToTop(Reg, I);            // Move to the top of the stack...
1151  }
1152
1153  // Convert from the pseudo instruction to the concrete instruction.
1154  MI.RemoveOperand(NumOps - 1); // Remove explicit ST(0) operand
1155  MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1156  MI.addOperand(
1157      MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true));
1158
1159  if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||
1160      MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||
1161      MI.getOpcode() == X86::ST_FP80m) {
1162    if (StackTop == 0)
1163      report_fatal_error("Stack empty??");
1164    --StackTop;
1165  } else if (KillsSrc) { // Last use of operand?
1166    popStackAfter(I);
1167  }
1168}
1169
1170
1171/// handleOneArgFPRW: Handle instructions that read from the top of stack and
1172/// replace the value with a newly computed value.  These instructions may have
1173/// non-fp operands after their FP operands.
1174///
1175///  Examples:
1176///     R1 = fchs R2
1177///     R1 = fadd R2, [mem]
1178///
1179void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1180  MachineInstr &MI = *I;
1181#ifndef NDEBUG
1182  unsigned NumOps = MI.getDesc().getNumOperands();
1183  assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1184#endif
1185
1186  // Is this the last use of the source register?
1187  unsigned Reg = getFPReg(MI.getOperand(1));
1188  bool KillsSrc = MI.killsRegister(X86::FP0 + Reg);
1189
1190  if (KillsSrc) {
1191    // If this is the last use of the source register, just make sure it's on
1192    // the top of the stack.
1193    moveToTop(Reg, I);
1194    if (StackTop == 0)
1195      report_fatal_error("Stack cannot be empty!");
1196    --StackTop;
1197    pushReg(getFPReg(MI.getOperand(0)));
1198  } else {
1199    // If this is not the last use of the source register, _copy_ it to the top
1200    // of the stack.
1201    duplicateToTop(Reg, getFPReg(MI.getOperand(0)), I);
1202  }
1203
1204  // Change from the pseudo instruction to the concrete instruction.
1205  MI.RemoveOperand(1); // Drop the source operand.
1206  MI.RemoveOperand(0); // Drop the destination operand.
1207  MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1208}
1209
1210
1211//===----------------------------------------------------------------------===//
1212// Define tables of various ways to map pseudo instructions
1213//
1214
1215// ForwardST0Table - Map: A = B op C  into: ST(0) = ST(0) op ST(i)
1216static const TableEntry ForwardST0Table[] = {
1217  { X86::ADD_Fp32  , X86::ADD_FST0r },
1218  { X86::ADD_Fp64  , X86::ADD_FST0r },
1219  { X86::ADD_Fp80  , X86::ADD_FST0r },
1220  { X86::DIV_Fp32  , X86::DIV_FST0r },
1221  { X86::DIV_Fp64  , X86::DIV_FST0r },
1222  { X86::DIV_Fp80  , X86::DIV_FST0r },
1223  { X86::MUL_Fp32  , X86::MUL_FST0r },
1224  { X86::MUL_Fp64  , X86::MUL_FST0r },
1225  { X86::MUL_Fp80  , X86::MUL_FST0r },
1226  { X86::SUB_Fp32  , X86::SUB_FST0r },
1227  { X86::SUB_Fp64  , X86::SUB_FST0r },
1228  { X86::SUB_Fp80  , X86::SUB_FST0r },
1229};
1230
1231// ReverseST0Table - Map: A = B op C  into: ST(0) = ST(i) op ST(0)
1232static const TableEntry ReverseST0Table[] = {
1233  { X86::ADD_Fp32  , X86::ADD_FST0r  },   // commutative
1234  { X86::ADD_Fp64  , X86::ADD_FST0r  },   // commutative
1235  { X86::ADD_Fp80  , X86::ADD_FST0r  },   // commutative
1236  { X86::DIV_Fp32  , X86::DIVR_FST0r },
1237  { X86::DIV_Fp64  , X86::DIVR_FST0r },
1238  { X86::DIV_Fp80  , X86::DIVR_FST0r },
1239  { X86::MUL_Fp32  , X86::MUL_FST0r  },   // commutative
1240  { X86::MUL_Fp64  , X86::MUL_FST0r  },   // commutative
1241  { X86::MUL_Fp80  , X86::MUL_FST0r  },   // commutative
1242  { X86::SUB_Fp32  , X86::SUBR_FST0r },
1243  { X86::SUB_Fp64  , X86::SUBR_FST0r },
1244  { X86::SUB_Fp80  , X86::SUBR_FST0r },
1245};
1246
1247// ForwardSTiTable - Map: A = B op C  into: ST(i) = ST(0) op ST(i)
1248static const TableEntry ForwardSTiTable[] = {
1249  { X86::ADD_Fp32  , X86::ADD_FrST0  },   // commutative
1250  { X86::ADD_Fp64  , X86::ADD_FrST0  },   // commutative
1251  { X86::ADD_Fp80  , X86::ADD_FrST0  },   // commutative
1252  { X86::DIV_Fp32  , X86::DIVR_FrST0 },
1253  { X86::DIV_Fp64  , X86::DIVR_FrST0 },
1254  { X86::DIV_Fp80  , X86::DIVR_FrST0 },
1255  { X86::MUL_Fp32  , X86::MUL_FrST0  },   // commutative
1256  { X86::MUL_Fp64  , X86::MUL_FrST0  },   // commutative
1257  { X86::MUL_Fp80  , X86::MUL_FrST0  },   // commutative
1258  { X86::SUB_Fp32  , X86::SUBR_FrST0 },
1259  { X86::SUB_Fp64  , X86::SUBR_FrST0 },
1260  { X86::SUB_Fp80  , X86::SUBR_FrST0 },
1261};
1262
1263// ReverseSTiTable - Map: A = B op C  into: ST(i) = ST(i) op ST(0)
1264static const TableEntry ReverseSTiTable[] = {
1265  { X86::ADD_Fp32  , X86::ADD_FrST0 },
1266  { X86::ADD_Fp64  , X86::ADD_FrST0 },
1267  { X86::ADD_Fp80  , X86::ADD_FrST0 },
1268  { X86::DIV_Fp32  , X86::DIV_FrST0 },
1269  { X86::DIV_Fp64  , X86::DIV_FrST0 },
1270  { X86::DIV_Fp80  , X86::DIV_FrST0 },
1271  { X86::MUL_Fp32  , X86::MUL_FrST0 },
1272  { X86::MUL_Fp64  , X86::MUL_FrST0 },
1273  { X86::MUL_Fp80  , X86::MUL_FrST0 },
1274  { X86::SUB_Fp32  , X86::SUB_FrST0 },
1275  { X86::SUB_Fp64  , X86::SUB_FrST0 },
1276  { X86::SUB_Fp80  , X86::SUB_FrST0 },
1277};
1278
1279
1280/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1281/// instructions which need to be simplified and possibly transformed.
1282///
1283/// Result: ST(0) = fsub  ST(0), ST(i)
1284///         ST(i) = fsub  ST(0), ST(i)
1285///         ST(0) = fsubr ST(0), ST(i)
1286///         ST(i) = fsubr ST(0), ST(i)
1287///
1288void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1289  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
1290  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
1291  MachineInstr &MI = *I;
1292
1293  unsigned NumOperands = MI.getDesc().getNumOperands();
1294  assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1295  unsigned Dest = getFPReg(MI.getOperand(0));
1296  unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1297  unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1298  bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0);
1299  bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1);
1300  DebugLoc dl = MI.getDebugLoc();
1301
1302  unsigned TOS = getStackEntry(0);
1303
1304  // One of our operands must be on the top of the stack.  If neither is yet, we
1305  // need to move one.
1306  if (Op0 != TOS && Op1 != TOS) {   // No operand at TOS?
1307    // We can choose to move either operand to the top of the stack.  If one of
1308    // the operands is killed by this instruction, we want that one so that we
1309    // can update right on top of the old version.
1310    if (KillsOp0) {
1311      moveToTop(Op0, I);         // Move dead operand to TOS.
1312      TOS = Op0;
1313    } else if (KillsOp1) {
1314      moveToTop(Op1, I);
1315      TOS = Op1;
1316    } else {
1317      // All of the operands are live after this instruction executes, so we
1318      // cannot update on top of any operand.  Because of this, we must
1319      // duplicate one of the stack elements to the top.  It doesn't matter
1320      // which one we pick.
1321      //
1322      duplicateToTop(Op0, Dest, I);
1323      Op0 = TOS = Dest;
1324      KillsOp0 = true;
1325    }
1326  } else if (!KillsOp0 && !KillsOp1) {
1327    // If we DO have one of our operands at the top of the stack, but we don't
1328    // have a dead operand, we must duplicate one of the operands to a new slot
1329    // on the stack.
1330    duplicateToTop(Op0, Dest, I);
1331    Op0 = TOS = Dest;
1332    KillsOp0 = true;
1333  }
1334
1335  // Now we know that one of our operands is on the top of the stack, and at
1336  // least one of our operands is killed by this instruction.
1337  assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1338         "Stack conditions not set up right!");
1339
1340  // We decide which form to use based on what is on the top of the stack, and
1341  // which operand is killed by this instruction.
1342  ArrayRef<TableEntry> InstTable;
1343  bool isForward = TOS == Op0;
1344  bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1345  if (updateST0) {
1346    if (isForward)
1347      InstTable = ForwardST0Table;
1348    else
1349      InstTable = ReverseST0Table;
1350  } else {
1351    if (isForward)
1352      InstTable = ForwardSTiTable;
1353    else
1354      InstTable = ReverseSTiTable;
1355  }
1356
1357  int Opcode = Lookup(InstTable, MI.getOpcode());
1358  assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1359
1360  // NotTOS - The register which is not on the top of stack...
1361  unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1362
1363  // Replace the old instruction with a new instruction
1364  MBB->remove(&*I++);
1365  I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1366
1367  // If both operands are killed, pop one off of the stack in addition to
1368  // overwriting the other one.
1369  if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1370    assert(!updateST0 && "Should have updated other operand!");
1371    popStackAfter(I);   // Pop the top of stack
1372  }
1373
1374  // Update stack information so that we know the destination register is now on
1375  // the stack.
1376  unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1377  assert(UpdatedSlot < StackTop && Dest < 7);
1378  Stack[UpdatedSlot]   = Dest;
1379  RegMap[Dest]         = UpdatedSlot;
1380  MBB->getParent()->DeleteMachineInstr(&MI); // Remove the old instruction
1381}
1382
1383/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1384/// register arguments and no explicit destinations.
1385///
1386void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1387  MachineInstr &MI = *I;
1388
1389  unsigned NumOperands = MI.getDesc().getNumOperands();
1390  assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1391  unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1392  unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1393  bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0);
1394  bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1);
1395
1396  // Make sure the first operand is on the top of stack, the other one can be
1397  // anywhere.
1398  moveToTop(Op0, I);
1399
1400  // Change from the pseudo instruction to the concrete instruction.
1401  MI.getOperand(0).setReg(getSTReg(Op1));
1402  MI.RemoveOperand(1);
1403  MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1404
1405  // If any of the operands are killed by this instruction, free them.
1406  if (KillsOp0) freeStackSlotAfter(I, Op0);
1407  if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1408}
1409
1410/// handleCondMovFP - Handle two address conditional move instructions.  These
1411/// instructions move a st(i) register to st(0) iff a condition is true.  These
1412/// instructions require that the first operand is at the top of the stack, but
1413/// otherwise don't modify the stack at all.
1414void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1415  MachineInstr &MI = *I;
1416
1417  unsigned Op0 = getFPReg(MI.getOperand(0));
1418  unsigned Op1 = getFPReg(MI.getOperand(2));
1419  bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1);
1420
1421  // The first operand *must* be on the top of the stack.
1422  moveToTop(Op0, I);
1423
1424  // Change the second operand to the stack register that the operand is in.
1425  // Change from the pseudo instruction to the concrete instruction.
1426  MI.RemoveOperand(0);
1427  MI.RemoveOperand(1);
1428  MI.getOperand(0).setReg(getSTReg(Op1));
1429  MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1430
1431  // If we kill the second operand, make sure to pop it from the stack.
1432  if (Op0 != Op1 && KillsOp1) {
1433    // Get this value off of the register stack.
1434    freeStackSlotAfter(I, Op1);
1435  }
1436}
1437
1438
1439/// handleSpecialFP - Handle special instructions which behave unlike other
1440/// floating point instructions.  This is primarily intended for use by pseudo
1441/// instructions.
1442///
1443void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
1444  MachineInstr &MI = *Inst;
1445
1446  if (MI.isCall()) {
1447    handleCall(Inst);
1448    return;
1449  }
1450
1451  if (MI.isReturn()) {
1452    handleReturn(Inst);
1453    return;
1454  }
1455
1456  switch (MI.getOpcode()) {
1457  default: llvm_unreachable("Unknown SpecialFP instruction!");
1458  case TargetOpcode::COPY: {
1459    // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1460    const MachineOperand &MO1 = MI.getOperand(1);
1461    const MachineOperand &MO0 = MI.getOperand(0);
1462    bool KillsSrc = MI.killsRegister(MO1.getReg());
1463
1464    // FP <- FP copy.
1465    unsigned DstFP = getFPReg(MO0);
1466    unsigned SrcFP = getFPReg(MO1);
1467    assert(isLive(SrcFP) && "Cannot copy dead register");
1468    if (KillsSrc) {
1469      // If the input operand is killed, we can just change the owner of the
1470      // incoming stack slot into the result.
1471      unsigned Slot = getSlot(SrcFP);
1472      Stack[Slot] = DstFP;
1473      RegMap[DstFP] = Slot;
1474    } else {
1475      // For COPY we just duplicate the specified value to a new stack slot.
1476      // This could be made better, but would require substantial changes.
1477      duplicateToTop(SrcFP, DstFP, Inst);
1478    }
1479    break;
1480  }
1481
1482  case TargetOpcode::IMPLICIT_DEF: {
1483    // All FP registers must be explicitly defined, so load a 0 instead.
1484    unsigned Reg = MI.getOperand(0).getReg() - X86::FP0;
1485    LLVM_DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1486    BuildMI(*MBB, Inst, MI.getDebugLoc(), TII->get(X86::LD_F0));
1487    pushReg(Reg);
1488    break;
1489  }
1490
1491  case TargetOpcode::INLINEASM:
1492  case TargetOpcode::INLINEASM_BR: {
1493    // The inline asm MachineInstr currently only *uses* FP registers for the
1494    // 'f' constraint.  These should be turned into the current ST(x) register
1495    // in the machine instr.
1496    //
1497    // There are special rules for x87 inline assembly. The compiler must know
1498    // exactly how many registers are popped and pushed implicitly by the asm.
1499    // Otherwise it is not possible to restore the stack state after the inline
1500    // asm.
1501    //
1502    // There are 3 kinds of input operands:
1503    //
1504    // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1505    //    popped input operand must be in a fixed stack slot, and it is either
1506    //    tied to an output operand, or in the clobber list. The MI has ST use
1507    //    and def operands for these inputs.
1508    //
1509    // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1510    //    preserved by the inline asm. The fixed stack slots must be STn-STm
1511    //    following the popped inputs. A fixed input operand cannot be tied to
1512    //    an output or appear in the clobber list. The MI has ST use operands
1513    //    and no defs for these inputs.
1514    //
1515    // 3. Preserved inputs. These inputs use the "f" constraint which is
1516    //    represented as an FP register. The inline asm won't change these
1517    //    stack slots.
1518    //
1519    // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1520    // registers do not count as output operands. The inline asm changes the
1521    // stack as if it popped all the popped inputs and then pushed all the
1522    // output operands.
1523
1524    // Scan the assembly for ST registers used, defined and clobbered. We can
1525    // only tell clobbers from defs by looking at the asm descriptor.
1526    unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
1527    unsigned NumOps = 0;
1528    SmallSet<unsigned, 1> FRegIdx;
1529    unsigned RCID;
1530
1531    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI.getNumOperands();
1532         i != e && MI.getOperand(i).isImm(); i += 1 + NumOps) {
1533      unsigned Flags = MI.getOperand(i).getImm();
1534
1535      NumOps = InlineAsm::getNumOperandRegisters(Flags);
1536      if (NumOps != 1)
1537        continue;
1538      const MachineOperand &MO = MI.getOperand(i + 1);
1539      if (!MO.isReg())
1540        continue;
1541      unsigned STReg = MO.getReg() - X86::FP0;
1542      if (STReg >= 8)
1543        continue;
1544
1545      // If the flag has a register class constraint, this must be an operand
1546      // with constraint "f". Record its index and continue.
1547      if (InlineAsm::hasRegClassConstraint(Flags, RCID)) {
1548        FRegIdx.insert(i + 1);
1549        continue;
1550      }
1551
1552      switch (InlineAsm::getKind(Flags)) {
1553      case InlineAsm::Kind_RegUse:
1554        STUses |= (1u << STReg);
1555        break;
1556      case InlineAsm::Kind_RegDef:
1557      case InlineAsm::Kind_RegDefEarlyClobber:
1558        STDefs |= (1u << STReg);
1559        if (MO.isDead())
1560          STDeadDefs |= (1u << STReg);
1561        break;
1562      case InlineAsm::Kind_Clobber:
1563        STClobbers |= (1u << STReg);
1564        break;
1565      default:
1566        break;
1567      }
1568    }
1569
1570    if (STUses && !isMask_32(STUses))
1571      MI.emitError("fixed input regs must be last on the x87 stack");
1572    unsigned NumSTUses = countTrailingOnes(STUses);
1573
1574    // Defs must be contiguous from the stack top. ST0-STn.
1575    if (STDefs && !isMask_32(STDefs)) {
1576      MI.emitError("output regs must be last on the x87 stack");
1577      STDefs = NextPowerOf2(STDefs) - 1;
1578    }
1579    unsigned NumSTDefs = countTrailingOnes(STDefs);
1580
1581    // So must the clobbered stack slots. ST0-STm, m >= n.
1582    if (STClobbers && !isMask_32(STDefs | STClobbers))
1583      MI.emitError("clobbers must be last on the x87 stack");
1584
1585    // Popped inputs are the ones that are also clobbered or defined.
1586    unsigned STPopped = STUses & (STDefs | STClobbers);
1587    if (STPopped && !isMask_32(STPopped))
1588      MI.emitError("implicitly popped regs must be last on the x87 stack");
1589    unsigned NumSTPopped = countTrailingOnes(STPopped);
1590
1591    LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1592                      << NumSTPopped << ", and defines " << NumSTDefs
1593                      << " regs.\n");
1594
1595#ifndef NDEBUG
1596    // If any input operand uses constraint "f", all output register
1597    // constraints must be early-clobber defs.
1598    for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I)
1599      if (FRegIdx.count(I)) {
1600        assert((1 << getFPReg(MI.getOperand(I)) & STDefs) == 0 &&
1601               "Operands with constraint \"f\" cannot overlap with defs");
1602      }
1603#endif
1604
1605    // Collect all FP registers (register operands with constraints "t", "u",
1606    // and "f") to kill afer the instruction.
1607    unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1608    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1609      MachineOperand &Op = MI.getOperand(i);
1610      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1611        continue;
1612      unsigned FPReg = getFPReg(Op);
1613
1614      // If we kill this operand, make sure to pop it from the stack after the
1615      // asm.  We just remember it for now, and pop them all off at the end in
1616      // a batch.
1617      if (Op.isUse() && Op.isKill())
1618        FPKills |= 1U << FPReg;
1619    }
1620
1621    // Do not include registers that are implicitly popped by defs/clobbers.
1622    FPKills &= ~(STDefs | STClobbers);
1623
1624    // Now we can rearrange the live registers to match what was requested.
1625    unsigned char STUsesArray[8];
1626
1627    for (unsigned I = 0; I < NumSTUses; ++I)
1628      STUsesArray[I] = I;
1629
1630    shuffleStackTop(STUsesArray, NumSTUses, Inst);
1631    LLVM_DEBUG({
1632      dbgs() << "Before asm: ";
1633      dumpStack();
1634    });
1635
1636    // With the stack layout fixed, rewrite the FP registers.
1637    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1638      MachineOperand &Op = MI.getOperand(i);
1639      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1640        continue;
1641
1642      unsigned FPReg = getFPReg(Op);
1643
1644      if (FRegIdx.count(i))
1645        // Operand with constraint "f".
1646        Op.setReg(getSTReg(FPReg));
1647      else
1648        // Operand with a single register class constraint ("t" or "u").
1649        Op.setReg(X86::ST0 + FPReg);
1650    }
1651
1652    // Simulate the inline asm popping its inputs and pushing its outputs.
1653    StackTop -= NumSTPopped;
1654
1655    for (unsigned i = 0; i < NumSTDefs; ++i)
1656      pushReg(NumSTDefs - i - 1);
1657
1658    // If this asm kills any FP registers (is the last use of them) we must
1659    // explicitly emit pop instructions for them.  Do this now after the asm has
1660    // executed so that the ST(x) numbers are not off (which would happen if we
1661    // did this inline with operand rewriting).
1662    //
1663    // Note: this might be a non-optimal pop sequence.  We might be able to do
1664    // better by trying to pop in stack order or something.
1665    while (FPKills) {
1666      unsigned FPReg = countTrailingZeros(FPKills);
1667      if (isLive(FPReg))
1668        freeStackSlotAfter(Inst, FPReg);
1669      FPKills &= ~(1U << FPReg);
1670    }
1671
1672    // Don't delete the inline asm!
1673    return;
1674  }
1675  }
1676
1677  Inst = MBB->erase(Inst);  // Remove the pseudo instruction
1678
1679  // We want to leave I pointing to the previous instruction, but what if we
1680  // just erased the first instruction?
1681  if (Inst == MBB->begin()) {
1682    LLVM_DEBUG(dbgs() << "Inserting dummy KILL\n");
1683    Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
1684  } else
1685    --Inst;
1686}
1687
1688void FPS::setKillFlags(MachineBasicBlock &MBB) const {
1689  const TargetRegisterInfo &TRI =
1690      *MBB.getParent()->getSubtarget().getRegisterInfo();
1691  LivePhysRegs LPR(TRI);
1692
1693  LPR.addLiveOuts(MBB);
1694
1695  for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
1696       I != E; ++I) {
1697    if (I->isDebugInstr())
1698      continue;
1699
1700    std::bitset<8> Defs;
1701    SmallVector<MachineOperand *, 2> Uses;
1702    MachineInstr &MI = *I;
1703
1704    for (auto &MO : I->operands()) {
1705      if (!MO.isReg())
1706        continue;
1707
1708      unsigned Reg = MO.getReg() - X86::FP0;
1709
1710      if (Reg >= 8)
1711        continue;
1712
1713      if (MO.isDef()) {
1714        Defs.set(Reg);
1715        if (!LPR.contains(MO.getReg()))
1716          MO.setIsDead();
1717      } else
1718        Uses.push_back(&MO);
1719    }
1720
1721    for (auto *MO : Uses)
1722      if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg()))
1723        MO->setIsKill();
1724
1725    LPR.stepBackward(MI);
1726  }
1727}
1728