X86FloatingPoint.cpp revision 303975
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the pass which converts floating point instructions from
11// pseudo registers into register stack instructions.  This pass uses live
12// variable information to indicate where the FPn registers are used and their
13// lifetimes.
14//
15// The x87 hardware tracks liveness of the stack registers, so it is necessary
16// to implement exact liveness tracking between basic blocks. The CFG edges are
17// partitioned into bundles where the same FP registers must be live in
18// identical stack positions. Instructions are inserted at the end of each basic
19// block to rearrange the live registers to match the outgoing bundle.
20//
21// This approach avoids splitting critical edges at the potential cost of more
22// live register shuffling instructions when critical edges are present.
23//
24//===----------------------------------------------------------------------===//
25
26#include "X86.h"
27#include "X86InstrInfo.h"
28#include "llvm/ADT/DepthFirstIterator.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/SmallPtrSet.h"
31#include "llvm/ADT/SmallSet.h"
32#include "llvm/ADT/SmallVector.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/CodeGen/EdgeBundles.h"
35#include "llvm/CodeGen/LivePhysRegs.h"
36#include "llvm/CodeGen/MachineFunctionPass.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineRegisterInfo.h"
39#include "llvm/CodeGen/Passes.h"
40#include "llvm/IR/InlineAsm.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/ErrorHandling.h"
43#include "llvm/Support/raw_ostream.h"
44#include "llvm/Target/TargetInstrInfo.h"
45#include "llvm/Target/TargetMachine.h"
46#include "llvm/Target/TargetSubtargetInfo.h"
47#include <algorithm>
48#include <bitset>
49using namespace llvm;
50
51#define DEBUG_TYPE "x86-codegen"
52
53STATISTIC(NumFXCH, "Number of fxch instructions inserted");
54STATISTIC(NumFP  , "Number of floating point instructions");
55
56namespace {
57  const unsigned ScratchFPReg = 7;
58
59  struct FPS : public MachineFunctionPass {
60    static char ID;
61    FPS() : MachineFunctionPass(ID) {
62      initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
63      // This is really only to keep valgrind quiet.
64      // The logic in isLive() is too much for it.
65      memset(Stack, 0, sizeof(Stack));
66      memset(RegMap, 0, sizeof(RegMap));
67    }
68
69    void getAnalysisUsage(AnalysisUsage &AU) const override {
70      AU.setPreservesCFG();
71      AU.addRequired<EdgeBundles>();
72      AU.addPreservedID(MachineLoopInfoID);
73      AU.addPreservedID(MachineDominatorsID);
74      MachineFunctionPass::getAnalysisUsage(AU);
75    }
76
77    bool runOnMachineFunction(MachineFunction &MF) override;
78
79    const char *getPassName() const override { return "X86 FP Stackifier"; }
80
81  private:
82    const TargetInstrInfo *TII; // Machine instruction info.
83
84    // Two CFG edges are related if they leave the same block, or enter the same
85    // block. The transitive closure of an edge under this relation is a
86    // LiveBundle. It represents a set of CFG edges where the live FP stack
87    // registers must be allocated identically in the x87 stack.
88    //
89    // A LiveBundle is usually all the edges leaving a block, or all the edges
90    // entering a block, but it can contain more edges if critical edges are
91    // present.
92    //
93    // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
94    // but the exact mapping of FP registers to stack slots is fixed later.
95    struct LiveBundle {
96      // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
97      unsigned Mask;
98
99      // Number of pre-assigned live registers in FixStack. This is 0 when the
100      // stack order has not yet been fixed.
101      unsigned FixCount;
102
103      // Assigned stack order for live-in registers.
104      // FixStack[i] == getStackEntry(i) for all i < FixCount.
105      unsigned char FixStack[8];
106
107      LiveBundle() : Mask(0), FixCount(0) {}
108
109      // Have the live registers been assigned a stack order yet?
110      bool isFixed() const { return !Mask || FixCount; }
111    };
112
113    // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
114    // with no live FP registers.
115    SmallVector<LiveBundle, 8> LiveBundles;
116
117    // The edge bundle analysis provides indices into the LiveBundles vector.
118    EdgeBundles *Bundles;
119
120    // Return a bitmask of FP registers in block's live-in list.
121    static unsigned calcLiveInMask(MachineBasicBlock *MBB) {
122      unsigned Mask = 0;
123      for (const auto &LI : MBB->liveins()) {
124        if (LI.PhysReg < X86::FP0 || LI.PhysReg > X86::FP6)
125          continue;
126        Mask |= 1 << (LI.PhysReg - X86::FP0);
127      }
128      return Mask;
129    }
130
131    // Partition all the CFG edges into LiveBundles.
132    void bundleCFG(MachineFunction &MF);
133
134    MachineBasicBlock *MBB;     // Current basic block
135
136    // The hardware keeps track of how many FP registers are live, so we have
137    // to model that exactly. Usually, each live register corresponds to an
138    // FP<n> register, but when dealing with calls, returns, and inline
139    // assembly, it is sometimes necessary to have live scratch registers.
140    unsigned Stack[8];          // FP<n> Registers in each stack slot...
141    unsigned StackTop;          // The current top of the FP stack.
142
143    enum {
144      NumFPRegs = 8             // Including scratch pseudo-registers.
145    };
146
147    // For each live FP<n> register, point to its Stack[] entry.
148    // The first entries correspond to FP0-FP6, the rest are scratch registers
149    // used when we need slightly different live registers than what the
150    // register allocator thinks.
151    unsigned RegMap[NumFPRegs];
152
153    // Set up our stack model to match the incoming registers to MBB.
154    void setupBlockStack();
155
156    // Shuffle live registers to match the expectations of successor blocks.
157    void finishBlockStack();
158
159#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
160    void dumpStack() const {
161      dbgs() << "Stack contents:";
162      for (unsigned i = 0; i != StackTop; ++i) {
163        dbgs() << " FP" << Stack[i];
164        assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
165      }
166    }
167#endif
168
169    /// getSlot - Return the stack slot number a particular register number is
170    /// in.
171    unsigned getSlot(unsigned RegNo) const {
172      assert(RegNo < NumFPRegs && "Regno out of range!");
173      return RegMap[RegNo];
174    }
175
176    /// isLive - Is RegNo currently live in the stack?
177    bool isLive(unsigned RegNo) const {
178      unsigned Slot = getSlot(RegNo);
179      return Slot < StackTop && Stack[Slot] == RegNo;
180    }
181
182    /// getStackEntry - Return the X86::FP<n> register in register ST(i).
183    unsigned getStackEntry(unsigned STi) const {
184      if (STi >= StackTop)
185        report_fatal_error("Access past stack top!");
186      return Stack[StackTop-1-STi];
187    }
188
189    /// getSTReg - Return the X86::ST(i) register which contains the specified
190    /// FP<RegNo> register.
191    unsigned getSTReg(unsigned RegNo) const {
192      return StackTop - 1 - getSlot(RegNo) + X86::ST0;
193    }
194
195    // pushReg - Push the specified FP<n> register onto the stack.
196    void pushReg(unsigned Reg) {
197      assert(Reg < NumFPRegs && "Register number out of range!");
198      if (StackTop >= 8)
199        report_fatal_error("Stack overflow!");
200      Stack[StackTop] = Reg;
201      RegMap[Reg] = StackTop++;
202    }
203
204    bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
205    void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
206      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
207      if (isAtTop(RegNo)) return;
208
209      unsigned STReg = getSTReg(RegNo);
210      unsigned RegOnTop = getStackEntry(0);
211
212      // Swap the slots the regs are in.
213      std::swap(RegMap[RegNo], RegMap[RegOnTop]);
214
215      // Swap stack slot contents.
216      if (RegMap[RegOnTop] >= StackTop)
217        report_fatal_error("Access past stack top!");
218      std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
219
220      // Emit an fxch to update the runtime processors version of the state.
221      BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
222      ++NumFXCH;
223    }
224
225    void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
226      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
227      unsigned STReg = getSTReg(RegNo);
228      pushReg(AsReg);   // New register on top of stack
229
230      BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
231    }
232
233    /// popStackAfter - Pop the current value off of the top of the FP stack
234    /// after the specified instruction.
235    void popStackAfter(MachineBasicBlock::iterator &I);
236
237    /// freeStackSlotAfter - Free the specified register from the register
238    /// stack, so that it is no longer in a register.  If the register is
239    /// currently at the top of the stack, we just pop the current instruction,
240    /// otherwise we store the current top-of-stack into the specified slot,
241    /// then pop the top of stack.
242    void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
243
244    /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
245    /// instruction.
246    MachineBasicBlock::iterator
247    freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
248
249    /// Adjust the live registers to be the set in Mask.
250    void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
251
252    /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
253    /// st(0), FP reg FixStack[1] is st(1) etc.
254    void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
255                         MachineBasicBlock::iterator I);
256
257    bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
258
259    void handleCall(MachineBasicBlock::iterator &I);
260    void handleZeroArgFP(MachineBasicBlock::iterator &I);
261    void handleOneArgFP(MachineBasicBlock::iterator &I);
262    void handleOneArgFPRW(MachineBasicBlock::iterator &I);
263    void handleTwoArgFP(MachineBasicBlock::iterator &I);
264    void handleCompareFP(MachineBasicBlock::iterator &I);
265    void handleCondMovFP(MachineBasicBlock::iterator &I);
266    void handleSpecialFP(MachineBasicBlock::iterator &I);
267
268    // Check if a COPY instruction is using FP registers.
269    static bool isFPCopy(MachineInstr *MI) {
270      unsigned DstReg = MI->getOperand(0).getReg();
271      unsigned SrcReg = MI->getOperand(1).getReg();
272
273      return X86::RFP80RegClass.contains(DstReg) ||
274        X86::RFP80RegClass.contains(SrcReg);
275    }
276
277    void setKillFlags(MachineBasicBlock &MBB) const;
278  };
279  char FPS::ID = 0;
280}
281
282FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
283
284/// getFPReg - Return the X86::FPx register number for the specified operand.
285/// For example, this returns 3 for X86::FP3.
286static unsigned getFPReg(const MachineOperand &MO) {
287  assert(MO.isReg() && "Expected an FP register!");
288  unsigned Reg = MO.getReg();
289  assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
290  return Reg - X86::FP0;
291}
292
293/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
294/// register references into FP stack references.
295///
296bool FPS::runOnMachineFunction(MachineFunction &MF) {
297  // We only need to run this pass if there are any FP registers used in this
298  // function.  If it is all integer, there is nothing for us to do!
299  bool FPIsUsed = false;
300
301  static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
302  const MachineRegisterInfo &MRI = MF.getRegInfo();
303  for (unsigned i = 0; i <= 6; ++i)
304    if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
305      FPIsUsed = true;
306      break;
307    }
308
309  // Early exit.
310  if (!FPIsUsed) return false;
311
312  Bundles = &getAnalysis<EdgeBundles>();
313  TII = MF.getSubtarget().getInstrInfo();
314
315  // Prepare cross-MBB liveness.
316  bundleCFG(MF);
317
318  StackTop = 0;
319
320  // Process the function in depth first order so that we process at least one
321  // of the predecessors for every reachable block in the function.
322  SmallPtrSet<MachineBasicBlock*, 8> Processed;
323  MachineBasicBlock *Entry = &MF.front();
324
325  bool Changed = false;
326  for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
327    Changed |= processBasicBlock(MF, *BB);
328
329  // Process any unreachable blocks in arbitrary order now.
330  if (MF.size() != Processed.size())
331    for (MachineBasicBlock &BB : MF)
332      if (Processed.insert(&BB).second)
333        Changed |= processBasicBlock(MF, BB);
334
335  LiveBundles.clear();
336
337  return Changed;
338}
339
340/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
341/// live-out sets for the FP registers. Consistent means that the set of
342/// registers live-out from a block is identical to the live-in set of all
343/// successors. This is not enforced by the normal live-in lists since
344/// registers may be implicitly defined, or not used by all successors.
345void FPS::bundleCFG(MachineFunction &MF) {
346  assert(LiveBundles.empty() && "Stale data in LiveBundles");
347  LiveBundles.resize(Bundles->getNumBundles());
348
349  // Gather the actual live-in masks for all MBBs.
350  for (MachineBasicBlock &MBB : MF) {
351    const unsigned Mask = calcLiveInMask(&MBB);
352    if (!Mask)
353      continue;
354    // Update MBB ingoing bundle mask.
355    LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask;
356  }
357}
358
359/// processBasicBlock - Loop over all of the instructions in the basic block,
360/// transforming FP instructions into their stack form.
361///
362bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
363  bool Changed = false;
364  MBB = &BB;
365
366  setKillFlags(BB);
367  setupBlockStack();
368
369  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
370    MachineInstr *MI = I;
371    uint64_t Flags = MI->getDesc().TSFlags;
372
373    unsigned FPInstClass = Flags & X86II::FPTypeMask;
374    if (MI->isInlineAsm())
375      FPInstClass = X86II::SpecialFP;
376
377    if (MI->isCopy() && isFPCopy(MI))
378      FPInstClass = X86II::SpecialFP;
379
380    if (MI->isImplicitDef() &&
381        X86::RFP80RegClass.contains(MI->getOperand(0).getReg()))
382      FPInstClass = X86II::SpecialFP;
383
384    if (MI->isCall())
385      FPInstClass = X86II::SpecialFP;
386
387    if (FPInstClass == X86II::NotFP)
388      continue;  // Efficiently ignore non-fp insts!
389
390    MachineInstr *PrevMI = nullptr;
391    if (I != BB.begin())
392      PrevMI = std::prev(I);
393
394    ++NumFP;  // Keep track of # of pseudo instrs
395    DEBUG(dbgs() << "\nFPInst:\t" << *MI);
396
397    // Get dead variables list now because the MI pointer may be deleted as part
398    // of processing!
399    SmallVector<unsigned, 8> DeadRegs;
400    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
401      const MachineOperand &MO = MI->getOperand(i);
402      if (MO.isReg() && MO.isDead())
403        DeadRegs.push_back(MO.getReg());
404    }
405
406    switch (FPInstClass) {
407    case X86II::ZeroArgFP:  handleZeroArgFP(I); break;
408    case X86II::OneArgFP:   handleOneArgFP(I);  break;  // fstp ST(0)
409    case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
410    case X86II::TwoArgFP:   handleTwoArgFP(I);  break;
411    case X86II::CompareFP:  handleCompareFP(I); break;
412    case X86II::CondMovFP:  handleCondMovFP(I); break;
413    case X86II::SpecialFP:  handleSpecialFP(I); break;
414    default: llvm_unreachable("Unknown FP Type!");
415    }
416
417    // Check to see if any of the values defined by this instruction are dead
418    // after definition.  If so, pop them.
419    for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
420      unsigned Reg = DeadRegs[i];
421      // Check if Reg is live on the stack. An inline-asm register operand that
422      // is in the clobber list and marked dead might not be live on the stack.
423      if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
424        DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
425        freeStackSlotAfter(I, Reg-X86::FP0);
426      }
427    }
428
429    // Print out all of the instructions expanded to if -debug
430    DEBUG(
431      MachineBasicBlock::iterator PrevI(PrevMI);
432      if (I == PrevI) {
433        dbgs() << "Just deleted pseudo instruction\n";
434      } else {
435        MachineBasicBlock::iterator Start = I;
436        // Rewind to first instruction newly inserted.
437        while (Start != BB.begin() && std::prev(Start) != PrevI) --Start;
438        dbgs() << "Inserted instructions:\n\t";
439        Start->print(dbgs());
440        while (++Start != std::next(I)) {}
441      }
442      dumpStack();
443    );
444    (void)PrevMI;
445
446    Changed = true;
447  }
448
449  finishBlockStack();
450
451  return Changed;
452}
453
454/// setupBlockStack - Use the live bundles to set up our model of the stack
455/// to match predecessors' live out stack.
456void FPS::setupBlockStack() {
457  DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
458               << " derived from " << MBB->getName() << ".\n");
459  StackTop = 0;
460  // Get the live-in bundle for MBB.
461  const LiveBundle &Bundle =
462    LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
463
464  if (!Bundle.Mask) {
465    DEBUG(dbgs() << "Block has no FP live-ins.\n");
466    return;
467  }
468
469  // Depth-first iteration should ensure that we always have an assigned stack.
470  assert(Bundle.isFixed() && "Reached block before any predecessors");
471
472  // Push the fixed live-in registers.
473  for (unsigned i = Bundle.FixCount; i > 0; --i) {
474    MBB->addLiveIn(X86::ST0+i-1);
475    DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
476                 << unsigned(Bundle.FixStack[i-1]) << '\n');
477    pushReg(Bundle.FixStack[i-1]);
478  }
479
480  // Kill off unwanted live-ins. This can happen with a critical edge.
481  // FIXME: We could keep these live registers around as zombies. They may need
482  // to be revived at the end of a short block. It might save a few instrs.
483  adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
484  DEBUG(MBB->dump());
485}
486
487/// finishBlockStack - Revive live-outs that are implicitly defined out of
488/// MBB. Shuffle live registers to match the expected fixed stack of any
489/// predecessors, and ensure that all predecessors are expecting the same
490/// stack.
491void FPS::finishBlockStack() {
492  // The RET handling below takes care of return blocks for us.
493  if (MBB->succ_empty())
494    return;
495
496  DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
497               << " derived from " << MBB->getName() << ".\n");
498
499  // Get MBB's live-out bundle.
500  unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
501  LiveBundle &Bundle = LiveBundles[BundleIdx];
502
503  // We may need to kill and define some registers to match successors.
504  // FIXME: This can probably be combined with the shuffle below.
505  MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
506  adjustLiveRegs(Bundle.Mask, Term);
507
508  if (!Bundle.Mask) {
509    DEBUG(dbgs() << "No live-outs.\n");
510    return;
511  }
512
513  // Has the stack order been fixed yet?
514  DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
515  if (Bundle.isFixed()) {
516    DEBUG(dbgs() << "Shuffling stack to match.\n");
517    shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
518  } else {
519    // Not fixed yet, we get to choose.
520    DEBUG(dbgs() << "Fixing stack order now.\n");
521    Bundle.FixCount = StackTop;
522    for (unsigned i = 0; i < StackTop; ++i)
523      Bundle.FixStack[i] = getStackEntry(i);
524  }
525}
526
527
528//===----------------------------------------------------------------------===//
529// Efficient Lookup Table Support
530//===----------------------------------------------------------------------===//
531
532namespace {
533  struct TableEntry {
534    uint16_t from;
535    uint16_t to;
536    bool operator<(const TableEntry &TE) const { return from < TE.from; }
537    friend bool operator<(const TableEntry &TE, unsigned V) {
538      return TE.from < V;
539    }
540    friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
541                                                const TableEntry &TE) {
542      return V < TE.from;
543    }
544  };
545}
546
547static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
548  const TableEntry *I = std::lower_bound(Table.begin(), Table.end(), Opcode);
549  if (I != Table.end() && I->from == Opcode)
550    return I->to;
551  return -1;
552}
553
554#ifdef NDEBUG
555#define ASSERT_SORTED(TABLE)
556#else
557#define ASSERT_SORTED(TABLE)                                              \
558  { static bool TABLE##Checked = false;                                   \
559    if (!TABLE##Checked) {                                                \
560       assert(std::is_sorted(std::begin(TABLE), std::end(TABLE)) &&       \
561              "All lookup tables must be sorted for efficient access!");  \
562       TABLE##Checked = true;                                             \
563    }                                                                     \
564  }
565#endif
566
567//===----------------------------------------------------------------------===//
568// Register File -> Register Stack Mapping Methods
569//===----------------------------------------------------------------------===//
570
571// OpcodeTable - Sorted map of register instructions to their stack version.
572// The first element is an register file pseudo instruction, the second is the
573// concrete X86 instruction which uses the register stack.
574//
575static const TableEntry OpcodeTable[] = {
576  { X86::ABS_Fp32     , X86::ABS_F     },
577  { X86::ABS_Fp64     , X86::ABS_F     },
578  { X86::ABS_Fp80     , X86::ABS_F     },
579  { X86::ADD_Fp32m    , X86::ADD_F32m  },
580  { X86::ADD_Fp64m    , X86::ADD_F64m  },
581  { X86::ADD_Fp64m32  , X86::ADD_F32m  },
582  { X86::ADD_Fp80m32  , X86::ADD_F32m  },
583  { X86::ADD_Fp80m64  , X86::ADD_F64m  },
584  { X86::ADD_FpI16m32 , X86::ADD_FI16m },
585  { X86::ADD_FpI16m64 , X86::ADD_FI16m },
586  { X86::ADD_FpI16m80 , X86::ADD_FI16m },
587  { X86::ADD_FpI32m32 , X86::ADD_FI32m },
588  { X86::ADD_FpI32m64 , X86::ADD_FI32m },
589  { X86::ADD_FpI32m80 , X86::ADD_FI32m },
590  { X86::CHS_Fp32     , X86::CHS_F     },
591  { X86::CHS_Fp64     , X86::CHS_F     },
592  { X86::CHS_Fp80     , X86::CHS_F     },
593  { X86::CMOVBE_Fp32  , X86::CMOVBE_F  },
594  { X86::CMOVBE_Fp64  , X86::CMOVBE_F  },
595  { X86::CMOVBE_Fp80  , X86::CMOVBE_F  },
596  { X86::CMOVB_Fp32   , X86::CMOVB_F   },
597  { X86::CMOVB_Fp64   , X86::CMOVB_F  },
598  { X86::CMOVB_Fp80   , X86::CMOVB_F  },
599  { X86::CMOVE_Fp32   , X86::CMOVE_F  },
600  { X86::CMOVE_Fp64   , X86::CMOVE_F   },
601  { X86::CMOVE_Fp80   , X86::CMOVE_F   },
602  { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
603  { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
604  { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
605  { X86::CMOVNB_Fp32  , X86::CMOVNB_F  },
606  { X86::CMOVNB_Fp64  , X86::CMOVNB_F  },
607  { X86::CMOVNB_Fp80  , X86::CMOVNB_F  },
608  { X86::CMOVNE_Fp32  , X86::CMOVNE_F  },
609  { X86::CMOVNE_Fp64  , X86::CMOVNE_F  },
610  { X86::CMOVNE_Fp80  , X86::CMOVNE_F  },
611  { X86::CMOVNP_Fp32  , X86::CMOVNP_F  },
612  { X86::CMOVNP_Fp64  , X86::CMOVNP_F  },
613  { X86::CMOVNP_Fp80  , X86::CMOVNP_F  },
614  { X86::CMOVP_Fp32   , X86::CMOVP_F   },
615  { X86::CMOVP_Fp64   , X86::CMOVP_F   },
616  { X86::CMOVP_Fp80   , X86::CMOVP_F   },
617  { X86::COS_Fp32     , X86::COS_F     },
618  { X86::COS_Fp64     , X86::COS_F     },
619  { X86::COS_Fp80     , X86::COS_F     },
620  { X86::DIVR_Fp32m   , X86::DIVR_F32m },
621  { X86::DIVR_Fp64m   , X86::DIVR_F64m },
622  { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
623  { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
624  { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
625  { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
626  { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
627  { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
628  { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
629  { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
630  { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
631  { X86::DIV_Fp32m    , X86::DIV_F32m  },
632  { X86::DIV_Fp64m    , X86::DIV_F64m  },
633  { X86::DIV_Fp64m32  , X86::DIV_F32m  },
634  { X86::DIV_Fp80m32  , X86::DIV_F32m  },
635  { X86::DIV_Fp80m64  , X86::DIV_F64m  },
636  { X86::DIV_FpI16m32 , X86::DIV_FI16m },
637  { X86::DIV_FpI16m64 , X86::DIV_FI16m },
638  { X86::DIV_FpI16m80 , X86::DIV_FI16m },
639  { X86::DIV_FpI32m32 , X86::DIV_FI32m },
640  { X86::DIV_FpI32m64 , X86::DIV_FI32m },
641  { X86::DIV_FpI32m80 , X86::DIV_FI32m },
642  { X86::ILD_Fp16m32  , X86::ILD_F16m  },
643  { X86::ILD_Fp16m64  , X86::ILD_F16m  },
644  { X86::ILD_Fp16m80  , X86::ILD_F16m  },
645  { X86::ILD_Fp32m32  , X86::ILD_F32m  },
646  { X86::ILD_Fp32m64  , X86::ILD_F32m  },
647  { X86::ILD_Fp32m80  , X86::ILD_F32m  },
648  { X86::ILD_Fp64m32  , X86::ILD_F64m  },
649  { X86::ILD_Fp64m64  , X86::ILD_F64m  },
650  { X86::ILD_Fp64m80  , X86::ILD_F64m  },
651  { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
652  { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
653  { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
654  { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
655  { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
656  { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
657  { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
658  { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
659  { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
660  { X86::IST_Fp16m32  , X86::IST_F16m  },
661  { X86::IST_Fp16m64  , X86::IST_F16m  },
662  { X86::IST_Fp16m80  , X86::IST_F16m  },
663  { X86::IST_Fp32m32  , X86::IST_F32m  },
664  { X86::IST_Fp32m64  , X86::IST_F32m  },
665  { X86::IST_Fp32m80  , X86::IST_F32m  },
666  { X86::IST_Fp64m32  , X86::IST_FP64m },
667  { X86::IST_Fp64m64  , X86::IST_FP64m },
668  { X86::IST_Fp64m80  , X86::IST_FP64m },
669  { X86::LD_Fp032     , X86::LD_F0     },
670  { X86::LD_Fp064     , X86::LD_F0     },
671  { X86::LD_Fp080     , X86::LD_F0     },
672  { X86::LD_Fp132     , X86::LD_F1     },
673  { X86::LD_Fp164     , X86::LD_F1     },
674  { X86::LD_Fp180     , X86::LD_F1     },
675  { X86::LD_Fp32m     , X86::LD_F32m   },
676  { X86::LD_Fp32m64   , X86::LD_F32m   },
677  { X86::LD_Fp32m80   , X86::LD_F32m   },
678  { X86::LD_Fp64m     , X86::LD_F64m   },
679  { X86::LD_Fp64m80   , X86::LD_F64m   },
680  { X86::LD_Fp80m     , X86::LD_F80m   },
681  { X86::MUL_Fp32m    , X86::MUL_F32m  },
682  { X86::MUL_Fp64m    , X86::MUL_F64m  },
683  { X86::MUL_Fp64m32  , X86::MUL_F32m  },
684  { X86::MUL_Fp80m32  , X86::MUL_F32m  },
685  { X86::MUL_Fp80m64  , X86::MUL_F64m  },
686  { X86::MUL_FpI16m32 , X86::MUL_FI16m },
687  { X86::MUL_FpI16m64 , X86::MUL_FI16m },
688  { X86::MUL_FpI16m80 , X86::MUL_FI16m },
689  { X86::MUL_FpI32m32 , X86::MUL_FI32m },
690  { X86::MUL_FpI32m64 , X86::MUL_FI32m },
691  { X86::MUL_FpI32m80 , X86::MUL_FI32m },
692  { X86::SIN_Fp32     , X86::SIN_F     },
693  { X86::SIN_Fp64     , X86::SIN_F     },
694  { X86::SIN_Fp80     , X86::SIN_F     },
695  { X86::SQRT_Fp32    , X86::SQRT_F    },
696  { X86::SQRT_Fp64    , X86::SQRT_F    },
697  { X86::SQRT_Fp80    , X86::SQRT_F    },
698  { X86::ST_Fp32m     , X86::ST_F32m   },
699  { X86::ST_Fp64m     , X86::ST_F64m   },
700  { X86::ST_Fp64m32   , X86::ST_F32m   },
701  { X86::ST_Fp80m32   , X86::ST_F32m   },
702  { X86::ST_Fp80m64   , X86::ST_F64m   },
703  { X86::ST_FpP80m    , X86::ST_FP80m  },
704  { X86::SUBR_Fp32m   , X86::SUBR_F32m },
705  { X86::SUBR_Fp64m   , X86::SUBR_F64m },
706  { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
707  { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
708  { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
709  { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
710  { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
711  { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
712  { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
713  { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
714  { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
715  { X86::SUB_Fp32m    , X86::SUB_F32m  },
716  { X86::SUB_Fp64m    , X86::SUB_F64m  },
717  { X86::SUB_Fp64m32  , X86::SUB_F32m  },
718  { X86::SUB_Fp80m32  , X86::SUB_F32m  },
719  { X86::SUB_Fp80m64  , X86::SUB_F64m  },
720  { X86::SUB_FpI16m32 , X86::SUB_FI16m },
721  { X86::SUB_FpI16m64 , X86::SUB_FI16m },
722  { X86::SUB_FpI16m80 , X86::SUB_FI16m },
723  { X86::SUB_FpI32m32 , X86::SUB_FI32m },
724  { X86::SUB_FpI32m64 , X86::SUB_FI32m },
725  { X86::SUB_FpI32m80 , X86::SUB_FI32m },
726  { X86::TST_Fp32     , X86::TST_F     },
727  { X86::TST_Fp64     , X86::TST_F     },
728  { X86::TST_Fp80     , X86::TST_F     },
729  { X86::UCOM_FpIr32  , X86::UCOM_FIr  },
730  { X86::UCOM_FpIr64  , X86::UCOM_FIr  },
731  { X86::UCOM_FpIr80  , X86::UCOM_FIr  },
732  { X86::UCOM_Fpr32   , X86::UCOM_Fr   },
733  { X86::UCOM_Fpr64   , X86::UCOM_Fr   },
734  { X86::UCOM_Fpr80   , X86::UCOM_Fr   },
735};
736
737static unsigned getConcreteOpcode(unsigned Opcode) {
738  ASSERT_SORTED(OpcodeTable);
739  int Opc = Lookup(OpcodeTable, Opcode);
740  assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
741  return Opc;
742}
743
744//===----------------------------------------------------------------------===//
745// Helper Methods
746//===----------------------------------------------------------------------===//
747
748// PopTable - Sorted map of instructions to their popping version.  The first
749// element is an instruction, the second is the version which pops.
750//
751static const TableEntry PopTable[] = {
752  { X86::ADD_FrST0 , X86::ADD_FPrST0  },
753
754  { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
755  { X86::DIV_FrST0 , X86::DIV_FPrST0  },
756
757  { X86::IST_F16m  , X86::IST_FP16m   },
758  { X86::IST_F32m  , X86::IST_FP32m   },
759
760  { X86::MUL_FrST0 , X86::MUL_FPrST0  },
761
762  { X86::ST_F32m   , X86::ST_FP32m    },
763  { X86::ST_F64m   , X86::ST_FP64m    },
764  { X86::ST_Frr    , X86::ST_FPrr     },
765
766  { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
767  { X86::SUB_FrST0 , X86::SUB_FPrST0  },
768
769  { X86::UCOM_FIr  , X86::UCOM_FIPr   },
770
771  { X86::UCOM_FPr  , X86::UCOM_FPPr   },
772  { X86::UCOM_Fr   , X86::UCOM_FPr    },
773};
774
775/// popStackAfter - Pop the current value off of the top of the FP stack after
776/// the specified instruction.  This attempts to be sneaky and combine the pop
777/// into the instruction itself if possible.  The iterator is left pointing to
778/// the last instruction, be it a new pop instruction inserted, or the old
779/// instruction if it was modified in place.
780///
781void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
782  MachineInstr* MI = I;
783  DebugLoc dl = MI->getDebugLoc();
784  ASSERT_SORTED(PopTable);
785  if (StackTop == 0)
786    report_fatal_error("Cannot pop empty stack!");
787  RegMap[Stack[--StackTop]] = ~0;     // Update state
788
789  // Check to see if there is a popping version of this instruction...
790  int Opcode = Lookup(PopTable, I->getOpcode());
791  if (Opcode != -1) {
792    I->setDesc(TII->get(Opcode));
793    if (Opcode == X86::UCOM_FPPr)
794      I->RemoveOperand(0);
795  } else {    // Insert an explicit pop
796    I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
797  }
798}
799
800/// freeStackSlotAfter - Free the specified register from the register stack, so
801/// that it is no longer in a register.  If the register is currently at the top
802/// of the stack, we just pop the current instruction, otherwise we store the
803/// current top-of-stack into the specified slot, then pop the top of stack.
804void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
805  if (getStackEntry(0) == FPRegNo) {  // already at the top of stack? easy.
806    popStackAfter(I);
807    return;
808  }
809
810  // Otherwise, store the top of stack into the dead slot, killing the operand
811  // without having to add in an explicit xchg then pop.
812  //
813  I = freeStackSlotBefore(++I, FPRegNo);
814}
815
816/// freeStackSlotBefore - Free the specified register without trying any
817/// folding.
818MachineBasicBlock::iterator
819FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
820  unsigned STReg    = getSTReg(FPRegNo);
821  unsigned OldSlot  = getSlot(FPRegNo);
822  unsigned TopReg   = Stack[StackTop-1];
823  Stack[OldSlot]    = TopReg;
824  RegMap[TopReg]    = OldSlot;
825  RegMap[FPRegNo]   = ~0;
826  Stack[--StackTop] = ~0;
827  return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
828      .addReg(STReg)
829      .getInstr();
830}
831
832/// adjustLiveRegs - Kill and revive registers such that exactly the FP
833/// registers with a bit in Mask are live.
834void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
835  unsigned Defs = Mask;
836  unsigned Kills = 0;
837  for (unsigned i = 0; i < StackTop; ++i) {
838    unsigned RegNo = Stack[i];
839    if (!(Defs & (1 << RegNo)))
840      // This register is live, but we don't want it.
841      Kills |= (1 << RegNo);
842    else
843      // We don't need to imp-def this live register.
844      Defs &= ~(1 << RegNo);
845  }
846  assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
847
848  // Produce implicit-defs for free by using killed registers.
849  while (Kills && Defs) {
850    unsigned KReg = countTrailingZeros(Kills);
851    unsigned DReg = countTrailingZeros(Defs);
852    DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
853    std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
854    std::swap(RegMap[KReg], RegMap[DReg]);
855    Kills &= ~(1 << KReg);
856    Defs &= ~(1 << DReg);
857  }
858
859  // Kill registers by popping.
860  if (Kills && I != MBB->begin()) {
861    MachineBasicBlock::iterator I2 = std::prev(I);
862    while (StackTop) {
863      unsigned KReg = getStackEntry(0);
864      if (!(Kills & (1 << KReg)))
865        break;
866      DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
867      popStackAfter(I2);
868      Kills &= ~(1 << KReg);
869    }
870  }
871
872  // Manually kill the rest.
873  while (Kills) {
874    unsigned KReg = countTrailingZeros(Kills);
875    DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
876    freeStackSlotBefore(I, KReg);
877    Kills &= ~(1 << KReg);
878  }
879
880  // Load zeros for all the imp-defs.
881  while(Defs) {
882    unsigned DReg = countTrailingZeros(Defs);
883    DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
884    BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
885    pushReg(DReg);
886    Defs &= ~(1 << DReg);
887  }
888
889  // Now we should have the correct registers live.
890  DEBUG(dumpStack());
891  assert(StackTop == countPopulation(Mask) && "Live count mismatch");
892}
893
894/// shuffleStackTop - emit fxch instructions before I to shuffle the top
895/// FixCount entries into the order given by FixStack.
896/// FIXME: Is there a better algorithm than insertion sort?
897void FPS::shuffleStackTop(const unsigned char *FixStack,
898                          unsigned FixCount,
899                          MachineBasicBlock::iterator I) {
900  // Move items into place, starting from the desired stack bottom.
901  while (FixCount--) {
902    // Old register at position FixCount.
903    unsigned OldReg = getStackEntry(FixCount);
904    // Desired register at position FixCount.
905    unsigned Reg = FixStack[FixCount];
906    if (Reg == OldReg)
907      continue;
908    // (Reg st0) (OldReg st0) = (Reg OldReg st0)
909    moveToTop(Reg, I);
910    if (FixCount > 0)
911      moveToTop(OldReg, I);
912  }
913  DEBUG(dumpStack());
914}
915
916
917//===----------------------------------------------------------------------===//
918// Instruction transformation implementation
919//===----------------------------------------------------------------------===//
920
921void FPS::handleCall(MachineBasicBlock::iterator &I) {
922  unsigned STReturns = 0;
923
924  for (const auto &MO : I->operands()) {
925    if (!MO.isReg())
926      continue;
927
928    unsigned R = MO.getReg() - X86::FP0;
929
930    if (R < 8) {
931      assert(MO.isDef() && MO.isImplicit());
932      STReturns |= 1 << R;
933    }
934  }
935
936  unsigned N = countTrailingOnes(STReturns);
937
938  // FP registers used for function return must be consecutive starting at
939  // FP0.
940  assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
941
942  for (unsigned I = 0; I < N; ++I)
943    pushReg(N - I - 1);
944}
945
946/// handleZeroArgFP - ST(0) = fld0    ST(0) = flds <mem>
947///
948void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
949  MachineInstr *MI = I;
950  unsigned DestReg = getFPReg(MI->getOperand(0));
951
952  // Change from the pseudo instruction to the concrete instruction.
953  MI->RemoveOperand(0);   // Remove the explicit ST(0) operand
954  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
955
956  // Result gets pushed on the stack.
957  pushReg(DestReg);
958}
959
960/// handleOneArgFP - fst <mem>, ST(0)
961///
962void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
963  MachineInstr *MI = I;
964  unsigned NumOps = MI->getDesc().getNumOperands();
965  assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
966         "Can only handle fst* & ftst instructions!");
967
968  // Is this the last use of the source register?
969  unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
970  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
971
972  // FISTP64m is strange because there isn't a non-popping versions.
973  // If we have one _and_ we don't want to pop the operand, duplicate the value
974  // on the stack instead of moving it.  This ensure that popping the value is
975  // always ok.
976  // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
977  //
978  if (!KillsSrc &&
979      (MI->getOpcode() == X86::IST_Fp64m32 ||
980       MI->getOpcode() == X86::ISTT_Fp16m32 ||
981       MI->getOpcode() == X86::ISTT_Fp32m32 ||
982       MI->getOpcode() == X86::ISTT_Fp64m32 ||
983       MI->getOpcode() == X86::IST_Fp64m64 ||
984       MI->getOpcode() == X86::ISTT_Fp16m64 ||
985       MI->getOpcode() == X86::ISTT_Fp32m64 ||
986       MI->getOpcode() == X86::ISTT_Fp64m64 ||
987       MI->getOpcode() == X86::IST_Fp64m80 ||
988       MI->getOpcode() == X86::ISTT_Fp16m80 ||
989       MI->getOpcode() == X86::ISTT_Fp32m80 ||
990       MI->getOpcode() == X86::ISTT_Fp64m80 ||
991       MI->getOpcode() == X86::ST_FpP80m)) {
992    duplicateToTop(Reg, ScratchFPReg, I);
993  } else {
994    moveToTop(Reg, I);            // Move to the top of the stack...
995  }
996
997  // Convert from the pseudo instruction to the concrete instruction.
998  MI->RemoveOperand(NumOps-1);    // Remove explicit ST(0) operand
999  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1000
1001  if (MI->getOpcode() == X86::IST_FP64m ||
1002      MI->getOpcode() == X86::ISTT_FP16m ||
1003      MI->getOpcode() == X86::ISTT_FP32m ||
1004      MI->getOpcode() == X86::ISTT_FP64m ||
1005      MI->getOpcode() == X86::ST_FP80m) {
1006    if (StackTop == 0)
1007      report_fatal_error("Stack empty??");
1008    --StackTop;
1009  } else if (KillsSrc) { // Last use of operand?
1010    popStackAfter(I);
1011  }
1012}
1013
1014
1015/// handleOneArgFPRW: Handle instructions that read from the top of stack and
1016/// replace the value with a newly computed value.  These instructions may have
1017/// non-fp operands after their FP operands.
1018///
1019///  Examples:
1020///     R1 = fchs R2
1021///     R1 = fadd R2, [mem]
1022///
1023void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1024  MachineInstr *MI = I;
1025#ifndef NDEBUG
1026  unsigned NumOps = MI->getDesc().getNumOperands();
1027  assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1028#endif
1029
1030  // Is this the last use of the source register?
1031  unsigned Reg = getFPReg(MI->getOperand(1));
1032  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
1033
1034  if (KillsSrc) {
1035    // If this is the last use of the source register, just make sure it's on
1036    // the top of the stack.
1037    moveToTop(Reg, I);
1038    if (StackTop == 0)
1039      report_fatal_error("Stack cannot be empty!");
1040    --StackTop;
1041    pushReg(getFPReg(MI->getOperand(0)));
1042  } else {
1043    // If this is not the last use of the source register, _copy_ it to the top
1044    // of the stack.
1045    duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
1046  }
1047
1048  // Change from the pseudo instruction to the concrete instruction.
1049  MI->RemoveOperand(1);   // Drop the source operand.
1050  MI->RemoveOperand(0);   // Drop the destination operand.
1051  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1052}
1053
1054
1055//===----------------------------------------------------------------------===//
1056// Define tables of various ways to map pseudo instructions
1057//
1058
1059// ForwardST0Table - Map: A = B op C  into: ST(0) = ST(0) op ST(i)
1060static const TableEntry ForwardST0Table[] = {
1061  { X86::ADD_Fp32  , X86::ADD_FST0r },
1062  { X86::ADD_Fp64  , X86::ADD_FST0r },
1063  { X86::ADD_Fp80  , X86::ADD_FST0r },
1064  { X86::DIV_Fp32  , X86::DIV_FST0r },
1065  { X86::DIV_Fp64  , X86::DIV_FST0r },
1066  { X86::DIV_Fp80  , X86::DIV_FST0r },
1067  { X86::MUL_Fp32  , X86::MUL_FST0r },
1068  { X86::MUL_Fp64  , X86::MUL_FST0r },
1069  { X86::MUL_Fp80  , X86::MUL_FST0r },
1070  { X86::SUB_Fp32  , X86::SUB_FST0r },
1071  { X86::SUB_Fp64  , X86::SUB_FST0r },
1072  { X86::SUB_Fp80  , X86::SUB_FST0r },
1073};
1074
1075// ReverseST0Table - Map: A = B op C  into: ST(0) = ST(i) op ST(0)
1076static const TableEntry ReverseST0Table[] = {
1077  { X86::ADD_Fp32  , X86::ADD_FST0r  },   // commutative
1078  { X86::ADD_Fp64  , X86::ADD_FST0r  },   // commutative
1079  { X86::ADD_Fp80  , X86::ADD_FST0r  },   // commutative
1080  { X86::DIV_Fp32  , X86::DIVR_FST0r },
1081  { X86::DIV_Fp64  , X86::DIVR_FST0r },
1082  { X86::DIV_Fp80  , X86::DIVR_FST0r },
1083  { X86::MUL_Fp32  , X86::MUL_FST0r  },   // commutative
1084  { X86::MUL_Fp64  , X86::MUL_FST0r  },   // commutative
1085  { X86::MUL_Fp80  , X86::MUL_FST0r  },   // commutative
1086  { X86::SUB_Fp32  , X86::SUBR_FST0r },
1087  { X86::SUB_Fp64  , X86::SUBR_FST0r },
1088  { X86::SUB_Fp80  , X86::SUBR_FST0r },
1089};
1090
1091// ForwardSTiTable - Map: A = B op C  into: ST(i) = ST(0) op ST(i)
1092static const TableEntry ForwardSTiTable[] = {
1093  { X86::ADD_Fp32  , X86::ADD_FrST0  },   // commutative
1094  { X86::ADD_Fp64  , X86::ADD_FrST0  },   // commutative
1095  { X86::ADD_Fp80  , X86::ADD_FrST0  },   // commutative
1096  { X86::DIV_Fp32  , X86::DIVR_FrST0 },
1097  { X86::DIV_Fp64  , X86::DIVR_FrST0 },
1098  { X86::DIV_Fp80  , X86::DIVR_FrST0 },
1099  { X86::MUL_Fp32  , X86::MUL_FrST0  },   // commutative
1100  { X86::MUL_Fp64  , X86::MUL_FrST0  },   // commutative
1101  { X86::MUL_Fp80  , X86::MUL_FrST0  },   // commutative
1102  { X86::SUB_Fp32  , X86::SUBR_FrST0 },
1103  { X86::SUB_Fp64  , X86::SUBR_FrST0 },
1104  { X86::SUB_Fp80  , X86::SUBR_FrST0 },
1105};
1106
1107// ReverseSTiTable - Map: A = B op C  into: ST(i) = ST(i) op ST(0)
1108static const TableEntry ReverseSTiTable[] = {
1109  { X86::ADD_Fp32  , X86::ADD_FrST0 },
1110  { X86::ADD_Fp64  , X86::ADD_FrST0 },
1111  { X86::ADD_Fp80  , X86::ADD_FrST0 },
1112  { X86::DIV_Fp32  , X86::DIV_FrST0 },
1113  { X86::DIV_Fp64  , X86::DIV_FrST0 },
1114  { X86::DIV_Fp80  , X86::DIV_FrST0 },
1115  { X86::MUL_Fp32  , X86::MUL_FrST0 },
1116  { X86::MUL_Fp64  , X86::MUL_FrST0 },
1117  { X86::MUL_Fp80  , X86::MUL_FrST0 },
1118  { X86::SUB_Fp32  , X86::SUB_FrST0 },
1119  { X86::SUB_Fp64  , X86::SUB_FrST0 },
1120  { X86::SUB_Fp80  , X86::SUB_FrST0 },
1121};
1122
1123
1124/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1125/// instructions which need to be simplified and possibly transformed.
1126///
1127/// Result: ST(0) = fsub  ST(0), ST(i)
1128///         ST(i) = fsub  ST(0), ST(i)
1129///         ST(0) = fsubr ST(0), ST(i)
1130///         ST(i) = fsubr ST(0), ST(i)
1131///
1132void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1133  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
1134  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
1135  MachineInstr *MI = I;
1136
1137  unsigned NumOperands = MI->getDesc().getNumOperands();
1138  assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1139  unsigned Dest = getFPReg(MI->getOperand(0));
1140  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
1141  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
1142  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
1143  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1144  DebugLoc dl = MI->getDebugLoc();
1145
1146  unsigned TOS = getStackEntry(0);
1147
1148  // One of our operands must be on the top of the stack.  If neither is yet, we
1149  // need to move one.
1150  if (Op0 != TOS && Op1 != TOS) {   // No operand at TOS?
1151    // We can choose to move either operand to the top of the stack.  If one of
1152    // the operands is killed by this instruction, we want that one so that we
1153    // can update right on top of the old version.
1154    if (KillsOp0) {
1155      moveToTop(Op0, I);         // Move dead operand to TOS.
1156      TOS = Op0;
1157    } else if (KillsOp1) {
1158      moveToTop(Op1, I);
1159      TOS = Op1;
1160    } else {
1161      // All of the operands are live after this instruction executes, so we
1162      // cannot update on top of any operand.  Because of this, we must
1163      // duplicate one of the stack elements to the top.  It doesn't matter
1164      // which one we pick.
1165      //
1166      duplicateToTop(Op0, Dest, I);
1167      Op0 = TOS = Dest;
1168      KillsOp0 = true;
1169    }
1170  } else if (!KillsOp0 && !KillsOp1) {
1171    // If we DO have one of our operands at the top of the stack, but we don't
1172    // have a dead operand, we must duplicate one of the operands to a new slot
1173    // on the stack.
1174    duplicateToTop(Op0, Dest, I);
1175    Op0 = TOS = Dest;
1176    KillsOp0 = true;
1177  }
1178
1179  // Now we know that one of our operands is on the top of the stack, and at
1180  // least one of our operands is killed by this instruction.
1181  assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1182         "Stack conditions not set up right!");
1183
1184  // We decide which form to use based on what is on the top of the stack, and
1185  // which operand is killed by this instruction.
1186  ArrayRef<TableEntry> InstTable;
1187  bool isForward = TOS == Op0;
1188  bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1189  if (updateST0) {
1190    if (isForward)
1191      InstTable = ForwardST0Table;
1192    else
1193      InstTable = ReverseST0Table;
1194  } else {
1195    if (isForward)
1196      InstTable = ForwardSTiTable;
1197    else
1198      InstTable = ReverseSTiTable;
1199  }
1200
1201  int Opcode = Lookup(InstTable, MI->getOpcode());
1202  assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1203
1204  // NotTOS - The register which is not on the top of stack...
1205  unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1206
1207  // Replace the old instruction with a new instruction
1208  MBB->remove(I++);
1209  I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1210
1211  // If both operands are killed, pop one off of the stack in addition to
1212  // overwriting the other one.
1213  if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1214    assert(!updateST0 && "Should have updated other operand!");
1215    popStackAfter(I);   // Pop the top of stack
1216  }
1217
1218  // Update stack information so that we know the destination register is now on
1219  // the stack.
1220  unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1221  assert(UpdatedSlot < StackTop && Dest < 7);
1222  Stack[UpdatedSlot]   = Dest;
1223  RegMap[Dest]         = UpdatedSlot;
1224  MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
1225}
1226
1227/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1228/// register arguments and no explicit destinations.
1229///
1230void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1231  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
1232  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
1233  MachineInstr *MI = I;
1234
1235  unsigned NumOperands = MI->getDesc().getNumOperands();
1236  assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1237  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
1238  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
1239  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
1240  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1241
1242  // Make sure the first operand is on the top of stack, the other one can be
1243  // anywhere.
1244  moveToTop(Op0, I);
1245
1246  // Change from the pseudo instruction to the concrete instruction.
1247  MI->getOperand(0).setReg(getSTReg(Op1));
1248  MI->RemoveOperand(1);
1249  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1250
1251  // If any of the operands are killed by this instruction, free them.
1252  if (KillsOp0) freeStackSlotAfter(I, Op0);
1253  if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1254}
1255
1256/// handleCondMovFP - Handle two address conditional move instructions.  These
1257/// instructions move a st(i) register to st(0) iff a condition is true.  These
1258/// instructions require that the first operand is at the top of the stack, but
1259/// otherwise don't modify the stack at all.
1260void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1261  MachineInstr *MI = I;
1262
1263  unsigned Op0 = getFPReg(MI->getOperand(0));
1264  unsigned Op1 = getFPReg(MI->getOperand(2));
1265  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1266
1267  // The first operand *must* be on the top of the stack.
1268  moveToTop(Op0, I);
1269
1270  // Change the second operand to the stack register that the operand is in.
1271  // Change from the pseudo instruction to the concrete instruction.
1272  MI->RemoveOperand(0);
1273  MI->RemoveOperand(1);
1274  MI->getOperand(0).setReg(getSTReg(Op1));
1275  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1276
1277  // If we kill the second operand, make sure to pop it from the stack.
1278  if (Op0 != Op1 && KillsOp1) {
1279    // Get this value off of the register stack.
1280    freeStackSlotAfter(I, Op1);
1281  }
1282}
1283
1284
1285/// handleSpecialFP - Handle special instructions which behave unlike other
1286/// floating point instructions.  This is primarily intended for use by pseudo
1287/// instructions.
1288///
1289void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
1290  MachineInstr *MI = Inst;
1291
1292  if (MI->isCall()) {
1293    handleCall(Inst);
1294    return;
1295  }
1296
1297  switch (MI->getOpcode()) {
1298  default: llvm_unreachable("Unknown SpecialFP instruction!");
1299  case TargetOpcode::COPY: {
1300    // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1301    const MachineOperand &MO1 = MI->getOperand(1);
1302    const MachineOperand &MO0 = MI->getOperand(0);
1303    bool KillsSrc = MI->killsRegister(MO1.getReg());
1304
1305    // FP <- FP copy.
1306    unsigned DstFP = getFPReg(MO0);
1307    unsigned SrcFP = getFPReg(MO1);
1308    assert(isLive(SrcFP) && "Cannot copy dead register");
1309    if (KillsSrc) {
1310      // If the input operand is killed, we can just change the owner of the
1311      // incoming stack slot into the result.
1312      unsigned Slot = getSlot(SrcFP);
1313      Stack[Slot] = DstFP;
1314      RegMap[DstFP] = Slot;
1315    } else {
1316      // For COPY we just duplicate the specified value to a new stack slot.
1317      // This could be made better, but would require substantial changes.
1318      duplicateToTop(SrcFP, DstFP, Inst);
1319    }
1320    break;
1321  }
1322
1323  case TargetOpcode::IMPLICIT_DEF: {
1324    // All FP registers must be explicitly defined, so load a 0 instead.
1325    unsigned Reg = MI->getOperand(0).getReg() - X86::FP0;
1326    DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1327    BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::LD_F0));
1328    pushReg(Reg);
1329    break;
1330  }
1331
1332  case TargetOpcode::INLINEASM: {
1333    // The inline asm MachineInstr currently only *uses* FP registers for the
1334    // 'f' constraint.  These should be turned into the current ST(x) register
1335    // in the machine instr.
1336    //
1337    // There are special rules for x87 inline assembly. The compiler must know
1338    // exactly how many registers are popped and pushed implicitly by the asm.
1339    // Otherwise it is not possible to restore the stack state after the inline
1340    // asm.
1341    //
1342    // There are 3 kinds of input operands:
1343    //
1344    // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1345    //    popped input operand must be in a fixed stack slot, and it is either
1346    //    tied to an output operand, or in the clobber list. The MI has ST use
1347    //    and def operands for these inputs.
1348    //
1349    // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1350    //    preserved by the inline asm. The fixed stack slots must be STn-STm
1351    //    following the popped inputs. A fixed input operand cannot be tied to
1352    //    an output or appear in the clobber list. The MI has ST use operands
1353    //    and no defs for these inputs.
1354    //
1355    // 3. Preserved inputs. These inputs use the "f" constraint which is
1356    //    represented as an FP register. The inline asm won't change these
1357    //    stack slots.
1358    //
1359    // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1360    // registers do not count as output operands. The inline asm changes the
1361    // stack as if it popped all the popped inputs and then pushed all the
1362    // output operands.
1363
1364    // Scan the assembly for ST registers used, defined and clobbered. We can
1365    // only tell clobbers from defs by looking at the asm descriptor.
1366    unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
1367    unsigned NumOps = 0;
1368    SmallSet<unsigned, 1> FRegIdx;
1369    unsigned RCID;
1370
1371    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
1372         i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) {
1373      unsigned Flags = MI->getOperand(i).getImm();
1374
1375      NumOps = InlineAsm::getNumOperandRegisters(Flags);
1376      if (NumOps != 1)
1377        continue;
1378      const MachineOperand &MO = MI->getOperand(i + 1);
1379      if (!MO.isReg())
1380        continue;
1381      unsigned STReg = MO.getReg() - X86::FP0;
1382      if (STReg >= 8)
1383        continue;
1384
1385      // If the flag has a register class constraint, this must be an operand
1386      // with constraint "f". Record its index and continue.
1387      if (InlineAsm::hasRegClassConstraint(Flags, RCID)) {
1388        FRegIdx.insert(i + 1);
1389        continue;
1390      }
1391
1392      switch (InlineAsm::getKind(Flags)) {
1393      case InlineAsm::Kind_RegUse:
1394        STUses |= (1u << STReg);
1395        break;
1396      case InlineAsm::Kind_RegDef:
1397      case InlineAsm::Kind_RegDefEarlyClobber:
1398        STDefs |= (1u << STReg);
1399        if (MO.isDead())
1400          STDeadDefs |= (1u << STReg);
1401        break;
1402      case InlineAsm::Kind_Clobber:
1403        STClobbers |= (1u << STReg);
1404        break;
1405      default:
1406        break;
1407      }
1408    }
1409
1410    if (STUses && !isMask_32(STUses))
1411      MI->emitError("fixed input regs must be last on the x87 stack");
1412    unsigned NumSTUses = countTrailingOnes(STUses);
1413
1414    // Defs must be contiguous from the stack top. ST0-STn.
1415    if (STDefs && !isMask_32(STDefs)) {
1416      MI->emitError("output regs must be last on the x87 stack");
1417      STDefs = NextPowerOf2(STDefs) - 1;
1418    }
1419    unsigned NumSTDefs = countTrailingOnes(STDefs);
1420
1421    // So must the clobbered stack slots. ST0-STm, m >= n.
1422    if (STClobbers && !isMask_32(STDefs | STClobbers))
1423      MI->emitError("clobbers must be last on the x87 stack");
1424
1425    // Popped inputs are the ones that are also clobbered or defined.
1426    unsigned STPopped = STUses & (STDefs | STClobbers);
1427    if (STPopped && !isMask_32(STPopped))
1428      MI->emitError("implicitly popped regs must be last on the x87 stack");
1429    unsigned NumSTPopped = countTrailingOnes(STPopped);
1430
1431    DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1432                 << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
1433
1434#ifndef NDEBUG
1435    // If any input operand uses constraint "f", all output register
1436    // constraints must be early-clobber defs.
1437    for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I)
1438      if (FRegIdx.count(I)) {
1439        assert((1 << getFPReg(MI->getOperand(I)) & STDefs) == 0 &&
1440               "Operands with constraint \"f\" cannot overlap with defs");
1441      }
1442#endif
1443
1444    // Collect all FP registers (register operands with constraints "t", "u",
1445    // and "f") to kill afer the instruction.
1446    unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1447    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1448      MachineOperand &Op = MI->getOperand(i);
1449      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1450        continue;
1451      unsigned FPReg = getFPReg(Op);
1452
1453      // If we kill this operand, make sure to pop it from the stack after the
1454      // asm.  We just remember it for now, and pop them all off at the end in
1455      // a batch.
1456      if (Op.isUse() && Op.isKill())
1457        FPKills |= 1U << FPReg;
1458    }
1459
1460    // Do not include registers that are implicitly popped by defs/clobbers.
1461    FPKills &= ~(STDefs | STClobbers);
1462
1463    // Now we can rearrange the live registers to match what was requested.
1464    unsigned char STUsesArray[8];
1465
1466    for (unsigned I = 0; I < NumSTUses; ++I)
1467      STUsesArray[I] = I;
1468
1469    shuffleStackTop(STUsesArray, NumSTUses, Inst);
1470    DEBUG({dbgs() << "Before asm: "; dumpStack();});
1471
1472    // With the stack layout fixed, rewrite the FP registers.
1473    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1474      MachineOperand &Op = MI->getOperand(i);
1475      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1476        continue;
1477
1478      unsigned FPReg = getFPReg(Op);
1479
1480      if (FRegIdx.count(i))
1481        // Operand with constraint "f".
1482        Op.setReg(getSTReg(FPReg));
1483      else
1484        // Operand with a single register class constraint ("t" or "u").
1485        Op.setReg(X86::ST0 + FPReg);
1486    }
1487
1488    // Simulate the inline asm popping its inputs and pushing its outputs.
1489    StackTop -= NumSTPopped;
1490
1491    for (unsigned i = 0; i < NumSTDefs; ++i)
1492      pushReg(NumSTDefs - i - 1);
1493
1494    // If this asm kills any FP registers (is the last use of them) we must
1495    // explicitly emit pop instructions for them.  Do this now after the asm has
1496    // executed so that the ST(x) numbers are not off (which would happen if we
1497    // did this inline with operand rewriting).
1498    //
1499    // Note: this might be a non-optimal pop sequence.  We might be able to do
1500    // better by trying to pop in stack order or something.
1501    while (FPKills) {
1502      unsigned FPReg = countTrailingZeros(FPKills);
1503      if (isLive(FPReg))
1504        freeStackSlotAfter(Inst, FPReg);
1505      FPKills &= ~(1U << FPReg);
1506    }
1507
1508    // Don't delete the inline asm!
1509    return;
1510  }
1511
1512  case X86::RETQ:
1513  case X86::RETL:
1514  case X86::RETIL:
1515  case X86::RETIQ:
1516    // If RET has an FP register use operand, pass the first one in ST(0) and
1517    // the second one in ST(1).
1518
1519    // Find the register operands.
1520    unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1521    unsigned LiveMask = 0;
1522
1523    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1524      MachineOperand &Op = MI->getOperand(i);
1525      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1526        continue;
1527      // FP Register uses must be kills unless there are two uses of the same
1528      // register, in which case only one will be a kill.
1529      assert(Op.isUse() &&
1530             (Op.isKill() ||                        // Marked kill.
1531              getFPReg(Op) == FirstFPRegOp ||       // Second instance.
1532              MI->killsRegister(Op.getReg())) &&    // Later use is marked kill.
1533             "Ret only defs operands, and values aren't live beyond it");
1534
1535      if (FirstFPRegOp == ~0U)
1536        FirstFPRegOp = getFPReg(Op);
1537      else {
1538        assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1539        SecondFPRegOp = getFPReg(Op);
1540      }
1541      LiveMask |= (1 << getFPReg(Op));
1542
1543      // Remove the operand so that later passes don't see it.
1544      MI->RemoveOperand(i);
1545      --i, --e;
1546    }
1547
1548    // We may have been carrying spurious live-ins, so make sure only the returned
1549    // registers are left live.
1550    adjustLiveRegs(LiveMask, MI);
1551    if (!LiveMask) return;  // Quick check to see if any are possible.
1552
1553    // There are only four possibilities here:
1554    // 1) we are returning a single FP value.  In this case, it has to be in
1555    //    ST(0) already, so just declare success by removing the value from the
1556    //    FP Stack.
1557    if (SecondFPRegOp == ~0U) {
1558      // Assert that the top of stack contains the right FP register.
1559      assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1560             "Top of stack not the right register for RET!");
1561
1562      // Ok, everything is good, mark the value as not being on the stack
1563      // anymore so that our assertion about the stack being empty at end of
1564      // block doesn't fire.
1565      StackTop = 0;
1566      return;
1567    }
1568
1569    // Otherwise, we are returning two values:
1570    // 2) If returning the same value for both, we only have one thing in the FP
1571    //    stack.  Consider:  RET FP1, FP1
1572    if (StackTop == 1) {
1573      assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1574             "Stack misconfiguration for RET!");
1575
1576      // Duplicate the TOS so that we return it twice.  Just pick some other FPx
1577      // register to hold it.
1578      unsigned NewReg = ScratchFPReg;
1579      duplicateToTop(FirstFPRegOp, NewReg, MI);
1580      FirstFPRegOp = NewReg;
1581    }
1582
1583    /// Okay we know we have two different FPx operands now:
1584    assert(StackTop == 2 && "Must have two values live!");
1585
1586    /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1587    ///    in ST(1).  In this case, emit an fxch.
1588    if (getStackEntry(0) == SecondFPRegOp) {
1589      assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1590      moveToTop(FirstFPRegOp, MI);
1591    }
1592
1593    /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1594    /// ST(1).  Just remove both from our understanding of the stack and return.
1595    assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1596    assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1597    StackTop = 0;
1598    return;
1599  }
1600
1601  Inst = MBB->erase(Inst);  // Remove the pseudo instruction
1602
1603  // We want to leave I pointing to the previous instruction, but what if we
1604  // just erased the first instruction?
1605  if (Inst == MBB->begin()) {
1606    DEBUG(dbgs() << "Inserting dummy KILL\n");
1607    Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
1608  } else
1609    --Inst;
1610}
1611
1612void FPS::setKillFlags(MachineBasicBlock &MBB) const {
1613  const TargetRegisterInfo *TRI =
1614      MBB.getParent()->getSubtarget().getRegisterInfo();
1615  LivePhysRegs LPR(TRI);
1616
1617  LPR.addLiveOuts(&MBB);
1618
1619  for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
1620       I != E; ++I) {
1621    if (I->isDebugValue())
1622      continue;
1623
1624    std::bitset<8> Defs;
1625    SmallVector<MachineOperand *, 2> Uses;
1626    MachineInstr &MI = *I;
1627
1628    for (auto &MO : I->operands()) {
1629      if (!MO.isReg())
1630        continue;
1631
1632      unsigned Reg = MO.getReg() - X86::FP0;
1633
1634      if (Reg >= 8)
1635        continue;
1636
1637      if (MO.isDef()) {
1638        Defs.set(Reg);
1639        if (!LPR.contains(MO.getReg()))
1640          MO.setIsDead();
1641      } else
1642        Uses.push_back(&MO);
1643    }
1644
1645    for (auto *MO : Uses)
1646      if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg()))
1647        MO->setIsKill();
1648
1649    LPR.stepBackward(MI);
1650  }
1651}
1652