X86FloatingPoint.cpp revision 288943
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the pass which converts floating point instructions from
11// pseudo registers into register stack instructions.  This pass uses live
12// variable information to indicate where the FPn registers are used and their
13// lifetimes.
14//
15// The x87 hardware tracks liveness of the stack registers, so it is necessary
16// to implement exact liveness tracking between basic blocks. The CFG edges are
17// partitioned into bundles where the same FP registers must be live in
18// identical stack positions. Instructions are inserted at the end of each basic
19// block to rearrange the live registers to match the outgoing bundle.
20//
21// This approach avoids splitting critical edges at the potential cost of more
22// live register shuffling instructions when critical edges are present.
23//
24//===----------------------------------------------------------------------===//
25
26#include "X86.h"
27#include "X86InstrInfo.h"
28#include "llvm/ADT/DepthFirstIterator.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/SmallPtrSet.h"
31#include "llvm/ADT/SmallSet.h"
32#include "llvm/ADT/SmallVector.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/CodeGen/EdgeBundles.h"
35#include "llvm/CodeGen/LivePhysRegs.h"
36#include "llvm/CodeGen/MachineFunctionPass.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineRegisterInfo.h"
39#include "llvm/CodeGen/Passes.h"
40#include "llvm/IR/InlineAsm.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/ErrorHandling.h"
43#include "llvm/Support/raw_ostream.h"
44#include "llvm/Target/TargetInstrInfo.h"
45#include "llvm/Target/TargetMachine.h"
46#include "llvm/Target/TargetSubtargetInfo.h"
47#include <algorithm>
48#include <bitset>
49using namespace llvm;
50
51#define DEBUG_TYPE "x86-codegen"
52
53STATISTIC(NumFXCH, "Number of fxch instructions inserted");
54STATISTIC(NumFP  , "Number of floating point instructions");
55
56namespace {
57  const unsigned ScratchFPReg = 7;
58
59  struct FPS : public MachineFunctionPass {
60    static char ID;
61    FPS() : MachineFunctionPass(ID) {
62      initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
63      // This is really only to keep valgrind quiet.
64      // The logic in isLive() is too much for it.
65      memset(Stack, 0, sizeof(Stack));
66      memset(RegMap, 0, sizeof(RegMap));
67    }
68
69    void getAnalysisUsage(AnalysisUsage &AU) const override {
70      AU.setPreservesCFG();
71      AU.addRequired<EdgeBundles>();
72      AU.addPreservedID(MachineLoopInfoID);
73      AU.addPreservedID(MachineDominatorsID);
74      MachineFunctionPass::getAnalysisUsage(AU);
75    }
76
77    bool runOnMachineFunction(MachineFunction &MF) override;
78
79    const char *getPassName() const override { return "X86 FP Stackifier"; }
80
81  private:
82    const TargetInstrInfo *TII; // Machine instruction info.
83
84    // Two CFG edges are related if they leave the same block, or enter the same
85    // block. The transitive closure of an edge under this relation is a
86    // LiveBundle. It represents a set of CFG edges where the live FP stack
87    // registers must be allocated identically in the x87 stack.
88    //
89    // A LiveBundle is usually all the edges leaving a block, or all the edges
90    // entering a block, but it can contain more edges if critical edges are
91    // present.
92    //
93    // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
94    // but the exact mapping of FP registers to stack slots is fixed later.
95    struct LiveBundle {
96      // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
97      unsigned Mask;
98
99      // Number of pre-assigned live registers in FixStack. This is 0 when the
100      // stack order has not yet been fixed.
101      unsigned FixCount;
102
103      // Assigned stack order for live-in registers.
104      // FixStack[i] == getStackEntry(i) for all i < FixCount.
105      unsigned char FixStack[8];
106
107      LiveBundle() : Mask(0), FixCount(0) {}
108
109      // Have the live registers been assigned a stack order yet?
110      bool isFixed() const { return !Mask || FixCount; }
111    };
112
113    // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
114    // with no live FP registers.
115    SmallVector<LiveBundle, 8> LiveBundles;
116
117    // The edge bundle analysis provides indices into the LiveBundles vector.
118    EdgeBundles *Bundles;
119
120    // Return a bitmask of FP registers in block's live-in list.
121    static unsigned calcLiveInMask(MachineBasicBlock *MBB) {
122      unsigned Mask = 0;
123      for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
124           E = MBB->livein_end(); I != E; ++I) {
125        unsigned Reg = *I;
126        if (Reg < X86::FP0 || Reg > X86::FP6)
127          continue;
128        Mask |= 1 << (Reg - X86::FP0);
129      }
130      return Mask;
131    }
132
133    // Partition all the CFG edges into LiveBundles.
134    void bundleCFG(MachineFunction &MF);
135
136    MachineBasicBlock *MBB;     // Current basic block
137
138    // The hardware keeps track of how many FP registers are live, so we have
139    // to model that exactly. Usually, each live register corresponds to an
140    // FP<n> register, but when dealing with calls, returns, and inline
141    // assembly, it is sometimes necessary to have live scratch registers.
142    unsigned Stack[8];          // FP<n> Registers in each stack slot...
143    unsigned StackTop;          // The current top of the FP stack.
144
145    enum {
146      NumFPRegs = 8             // Including scratch pseudo-registers.
147    };
148
149    // For each live FP<n> register, point to its Stack[] entry.
150    // The first entries correspond to FP0-FP6, the rest are scratch registers
151    // used when we need slightly different live registers than what the
152    // register allocator thinks.
153    unsigned RegMap[NumFPRegs];
154
155    // Set up our stack model to match the incoming registers to MBB.
156    void setupBlockStack();
157
158    // Shuffle live registers to match the expectations of successor blocks.
159    void finishBlockStack();
160
161#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
162    void dumpStack() const {
163      dbgs() << "Stack contents:";
164      for (unsigned i = 0; i != StackTop; ++i) {
165        dbgs() << " FP" << Stack[i];
166        assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
167      }
168    }
169#endif
170
171    /// getSlot - Return the stack slot number a particular register number is
172    /// in.
173    unsigned getSlot(unsigned RegNo) const {
174      assert(RegNo < NumFPRegs && "Regno out of range!");
175      return RegMap[RegNo];
176    }
177
178    /// isLive - Is RegNo currently live in the stack?
179    bool isLive(unsigned RegNo) const {
180      unsigned Slot = getSlot(RegNo);
181      return Slot < StackTop && Stack[Slot] == RegNo;
182    }
183
184    /// getStackEntry - Return the X86::FP<n> register in register ST(i).
185    unsigned getStackEntry(unsigned STi) const {
186      if (STi >= StackTop)
187        report_fatal_error("Access past stack top!");
188      return Stack[StackTop-1-STi];
189    }
190
191    /// getSTReg - Return the X86::ST(i) register which contains the specified
192    /// FP<RegNo> register.
193    unsigned getSTReg(unsigned RegNo) const {
194      return StackTop - 1 - getSlot(RegNo) + X86::ST0;
195    }
196
197    // pushReg - Push the specified FP<n> register onto the stack.
198    void pushReg(unsigned Reg) {
199      assert(Reg < NumFPRegs && "Register number out of range!");
200      if (StackTop >= 8)
201        report_fatal_error("Stack overflow!");
202      Stack[StackTop] = Reg;
203      RegMap[Reg] = StackTop++;
204    }
205
206    bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
207    void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
208      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
209      if (isAtTop(RegNo)) return;
210
211      unsigned STReg = getSTReg(RegNo);
212      unsigned RegOnTop = getStackEntry(0);
213
214      // Swap the slots the regs are in.
215      std::swap(RegMap[RegNo], RegMap[RegOnTop]);
216
217      // Swap stack slot contents.
218      if (RegMap[RegOnTop] >= StackTop)
219        report_fatal_error("Access past stack top!");
220      std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
221
222      // Emit an fxch to update the runtime processors version of the state.
223      BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
224      ++NumFXCH;
225    }
226
227    void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
228      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
229      unsigned STReg = getSTReg(RegNo);
230      pushReg(AsReg);   // New register on top of stack
231
232      BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
233    }
234
235    /// popStackAfter - Pop the current value off of the top of the FP stack
236    /// after the specified instruction.
237    void popStackAfter(MachineBasicBlock::iterator &I);
238
239    /// freeStackSlotAfter - Free the specified register from the register
240    /// stack, so that it is no longer in a register.  If the register is
241    /// currently at the top of the stack, we just pop the current instruction,
242    /// otherwise we store the current top-of-stack into the specified slot,
243    /// then pop the top of stack.
244    void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
245
246    /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
247    /// instruction.
248    MachineBasicBlock::iterator
249    freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
250
251    /// Adjust the live registers to be the set in Mask.
252    void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
253
254    /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
255    /// st(0), FP reg FixStack[1] is st(1) etc.
256    void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
257                         MachineBasicBlock::iterator I);
258
259    bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
260
261    void handleCall(MachineBasicBlock::iterator &I);
262    void handleZeroArgFP(MachineBasicBlock::iterator &I);
263    void handleOneArgFP(MachineBasicBlock::iterator &I);
264    void handleOneArgFPRW(MachineBasicBlock::iterator &I);
265    void handleTwoArgFP(MachineBasicBlock::iterator &I);
266    void handleCompareFP(MachineBasicBlock::iterator &I);
267    void handleCondMovFP(MachineBasicBlock::iterator &I);
268    void handleSpecialFP(MachineBasicBlock::iterator &I);
269
270    // Check if a COPY instruction is using FP registers.
271    static bool isFPCopy(MachineInstr *MI) {
272      unsigned DstReg = MI->getOperand(0).getReg();
273      unsigned SrcReg = MI->getOperand(1).getReg();
274
275      return X86::RFP80RegClass.contains(DstReg) ||
276        X86::RFP80RegClass.contains(SrcReg);
277    }
278
279    void setKillFlags(MachineBasicBlock &MBB) const;
280  };
281  char FPS::ID = 0;
282}
283
284FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
285
286/// getFPReg - Return the X86::FPx register number for the specified operand.
287/// For example, this returns 3 for X86::FP3.
288static unsigned getFPReg(const MachineOperand &MO) {
289  assert(MO.isReg() && "Expected an FP register!");
290  unsigned Reg = MO.getReg();
291  assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
292  return Reg - X86::FP0;
293}
294
295/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
296/// register references into FP stack references.
297///
298bool FPS::runOnMachineFunction(MachineFunction &MF) {
299  // We only need to run this pass if there are any FP registers used in this
300  // function.  If it is all integer, there is nothing for us to do!
301  bool FPIsUsed = false;
302
303  static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
304  for (unsigned i = 0; i <= 6; ++i)
305    if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
306      FPIsUsed = true;
307      break;
308    }
309
310  // Early exit.
311  if (!FPIsUsed) return false;
312
313  Bundles = &getAnalysis<EdgeBundles>();
314  TII = MF.getSubtarget().getInstrInfo();
315
316  // Prepare cross-MBB liveness.
317  bundleCFG(MF);
318
319  StackTop = 0;
320
321  // Process the function in depth first order so that we process at least one
322  // of the predecessors for every reachable block in the function.
323  SmallPtrSet<MachineBasicBlock*, 8> Processed;
324  MachineBasicBlock *Entry = MF.begin();
325
326  bool Changed = false;
327  for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
328    Changed |= processBasicBlock(MF, *BB);
329
330  // Process any unreachable blocks in arbitrary order now.
331  if (MF.size() != Processed.size())
332    for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
333      if (Processed.insert(BB).second)
334        Changed |= processBasicBlock(MF, *BB);
335
336  LiveBundles.clear();
337
338  return Changed;
339}
340
341/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
342/// live-out sets for the FP registers. Consistent means that the set of
343/// registers live-out from a block is identical to the live-in set of all
344/// successors. This is not enforced by the normal live-in lists since
345/// registers may be implicitly defined, or not used by all successors.
346void FPS::bundleCFG(MachineFunction &MF) {
347  assert(LiveBundles.empty() && "Stale data in LiveBundles");
348  LiveBundles.resize(Bundles->getNumBundles());
349
350  // Gather the actual live-in masks for all MBBs.
351  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
352    MachineBasicBlock *MBB = I;
353    const unsigned Mask = calcLiveInMask(MBB);
354    if (!Mask)
355      continue;
356    // Update MBB ingoing bundle mask.
357    LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
358  }
359}
360
361/// processBasicBlock - Loop over all of the instructions in the basic block,
362/// transforming FP instructions into their stack form.
363///
364bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
365  bool Changed = false;
366  MBB = &BB;
367
368  setKillFlags(BB);
369  setupBlockStack();
370
371  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
372    MachineInstr *MI = I;
373    uint64_t Flags = MI->getDesc().TSFlags;
374
375    unsigned FPInstClass = Flags & X86II::FPTypeMask;
376    if (MI->isInlineAsm())
377      FPInstClass = X86II::SpecialFP;
378
379    if (MI->isCopy() && isFPCopy(MI))
380      FPInstClass = X86II::SpecialFP;
381
382    if (MI->isImplicitDef() &&
383        X86::RFP80RegClass.contains(MI->getOperand(0).getReg()))
384      FPInstClass = X86II::SpecialFP;
385
386    if (MI->isCall())
387      FPInstClass = X86II::SpecialFP;
388
389    if (FPInstClass == X86II::NotFP)
390      continue;  // Efficiently ignore non-fp insts!
391
392    MachineInstr *PrevMI = nullptr;
393    if (I != BB.begin())
394      PrevMI = std::prev(I);
395
396    ++NumFP;  // Keep track of # of pseudo instrs
397    DEBUG(dbgs() << "\nFPInst:\t" << *MI);
398
399    // Get dead variables list now because the MI pointer may be deleted as part
400    // of processing!
401    SmallVector<unsigned, 8> DeadRegs;
402    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
403      const MachineOperand &MO = MI->getOperand(i);
404      if (MO.isReg() && MO.isDead())
405        DeadRegs.push_back(MO.getReg());
406    }
407
408    switch (FPInstClass) {
409    case X86II::ZeroArgFP:  handleZeroArgFP(I); break;
410    case X86II::OneArgFP:   handleOneArgFP(I);  break;  // fstp ST(0)
411    case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
412    case X86II::TwoArgFP:   handleTwoArgFP(I);  break;
413    case X86II::CompareFP:  handleCompareFP(I); break;
414    case X86II::CondMovFP:  handleCondMovFP(I); break;
415    case X86II::SpecialFP:  handleSpecialFP(I); break;
416    default: llvm_unreachable("Unknown FP Type!");
417    }
418
419    // Check to see if any of the values defined by this instruction are dead
420    // after definition.  If so, pop them.
421    for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
422      unsigned Reg = DeadRegs[i];
423      // Check if Reg is live on the stack. An inline-asm register operand that
424      // is in the clobber list and marked dead might not be live on the stack.
425      if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
426        DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
427        freeStackSlotAfter(I, Reg-X86::FP0);
428      }
429    }
430
431    // Print out all of the instructions expanded to if -debug
432    DEBUG(
433      MachineBasicBlock::iterator PrevI(PrevMI);
434      if (I == PrevI) {
435        dbgs() << "Just deleted pseudo instruction\n";
436      } else {
437        MachineBasicBlock::iterator Start = I;
438        // Rewind to first instruction newly inserted.
439        while (Start != BB.begin() && std::prev(Start) != PrevI) --Start;
440        dbgs() << "Inserted instructions:\n\t";
441        Start->print(dbgs());
442        while (++Start != std::next(I)) {}
443      }
444      dumpStack();
445    );
446    (void)PrevMI;
447
448    Changed = true;
449  }
450
451  finishBlockStack();
452
453  return Changed;
454}
455
456/// setupBlockStack - Use the live bundles to set up our model of the stack
457/// to match predecessors' live out stack.
458void FPS::setupBlockStack() {
459  DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
460               << " derived from " << MBB->getName() << ".\n");
461  StackTop = 0;
462  // Get the live-in bundle for MBB.
463  const LiveBundle &Bundle =
464    LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
465
466  if (!Bundle.Mask) {
467    DEBUG(dbgs() << "Block has no FP live-ins.\n");
468    return;
469  }
470
471  // Depth-first iteration should ensure that we always have an assigned stack.
472  assert(Bundle.isFixed() && "Reached block before any predecessors");
473
474  // Push the fixed live-in registers.
475  for (unsigned i = Bundle.FixCount; i > 0; --i) {
476    MBB->addLiveIn(X86::ST0+i-1);
477    DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
478                 << unsigned(Bundle.FixStack[i-1]) << '\n');
479    pushReg(Bundle.FixStack[i-1]);
480  }
481
482  // Kill off unwanted live-ins. This can happen with a critical edge.
483  // FIXME: We could keep these live registers around as zombies. They may need
484  // to be revived at the end of a short block. It might save a few instrs.
485  adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
486  DEBUG(MBB->dump());
487}
488
489/// finishBlockStack - Revive live-outs that are implicitly defined out of
490/// MBB. Shuffle live registers to match the expected fixed stack of any
491/// predecessors, and ensure that all predecessors are expecting the same
492/// stack.
493void FPS::finishBlockStack() {
494  // The RET handling below takes care of return blocks for us.
495  if (MBB->succ_empty())
496    return;
497
498  DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
499               << " derived from " << MBB->getName() << ".\n");
500
501  // Get MBB's live-out bundle.
502  unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
503  LiveBundle &Bundle = LiveBundles[BundleIdx];
504
505  // We may need to kill and define some registers to match successors.
506  // FIXME: This can probably be combined with the shuffle below.
507  MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
508  adjustLiveRegs(Bundle.Mask, Term);
509
510  if (!Bundle.Mask) {
511    DEBUG(dbgs() << "No live-outs.\n");
512    return;
513  }
514
515  // Has the stack order been fixed yet?
516  DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
517  if (Bundle.isFixed()) {
518    DEBUG(dbgs() << "Shuffling stack to match.\n");
519    shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
520  } else {
521    // Not fixed yet, we get to choose.
522    DEBUG(dbgs() << "Fixing stack order now.\n");
523    Bundle.FixCount = StackTop;
524    for (unsigned i = 0; i < StackTop; ++i)
525      Bundle.FixStack[i] = getStackEntry(i);
526  }
527}
528
529
530//===----------------------------------------------------------------------===//
531// Efficient Lookup Table Support
532//===----------------------------------------------------------------------===//
533
534namespace {
535  struct TableEntry {
536    uint16_t from;
537    uint16_t to;
538    bool operator<(const TableEntry &TE) const { return from < TE.from; }
539    friend bool operator<(const TableEntry &TE, unsigned V) {
540      return TE.from < V;
541    }
542    friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
543                                                const TableEntry &TE) {
544      return V < TE.from;
545    }
546  };
547}
548
549#ifndef NDEBUG
550static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
551  for (unsigned i = 0; i != NumEntries-1; ++i)
552    if (!(Table[i] < Table[i+1])) return false;
553  return true;
554}
555#endif
556
557static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
558  const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
559  if (I != Table+N && I->from == Opcode)
560    return I->to;
561  return -1;
562}
563
564#ifdef NDEBUG
565#define ASSERT_SORTED(TABLE)
566#else
567#define ASSERT_SORTED(TABLE)                                              \
568  { static bool TABLE##Checked = false;                                   \
569    if (!TABLE##Checked) {                                                \
570       assert(TableIsSorted(TABLE, array_lengthof(TABLE)) &&              \
571              "All lookup tables must be sorted for efficient access!");  \
572       TABLE##Checked = true;                                             \
573    }                                                                     \
574  }
575#endif
576
577//===----------------------------------------------------------------------===//
578// Register File -> Register Stack Mapping Methods
579//===----------------------------------------------------------------------===//
580
581// OpcodeTable - Sorted map of register instructions to their stack version.
582// The first element is an register file pseudo instruction, the second is the
583// concrete X86 instruction which uses the register stack.
584//
585static const TableEntry OpcodeTable[] = {
586  { X86::ABS_Fp32     , X86::ABS_F     },
587  { X86::ABS_Fp64     , X86::ABS_F     },
588  { X86::ABS_Fp80     , X86::ABS_F     },
589  { X86::ADD_Fp32m    , X86::ADD_F32m  },
590  { X86::ADD_Fp64m    , X86::ADD_F64m  },
591  { X86::ADD_Fp64m32  , X86::ADD_F32m  },
592  { X86::ADD_Fp80m32  , X86::ADD_F32m  },
593  { X86::ADD_Fp80m64  , X86::ADD_F64m  },
594  { X86::ADD_FpI16m32 , X86::ADD_FI16m },
595  { X86::ADD_FpI16m64 , X86::ADD_FI16m },
596  { X86::ADD_FpI16m80 , X86::ADD_FI16m },
597  { X86::ADD_FpI32m32 , X86::ADD_FI32m },
598  { X86::ADD_FpI32m64 , X86::ADD_FI32m },
599  { X86::ADD_FpI32m80 , X86::ADD_FI32m },
600  { X86::CHS_Fp32     , X86::CHS_F     },
601  { X86::CHS_Fp64     , X86::CHS_F     },
602  { X86::CHS_Fp80     , X86::CHS_F     },
603  { X86::CMOVBE_Fp32  , X86::CMOVBE_F  },
604  { X86::CMOVBE_Fp64  , X86::CMOVBE_F  },
605  { X86::CMOVBE_Fp80  , X86::CMOVBE_F  },
606  { X86::CMOVB_Fp32   , X86::CMOVB_F   },
607  { X86::CMOVB_Fp64   , X86::CMOVB_F  },
608  { X86::CMOVB_Fp80   , X86::CMOVB_F  },
609  { X86::CMOVE_Fp32   , X86::CMOVE_F  },
610  { X86::CMOVE_Fp64   , X86::CMOVE_F   },
611  { X86::CMOVE_Fp80   , X86::CMOVE_F   },
612  { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
613  { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
614  { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
615  { X86::CMOVNB_Fp32  , X86::CMOVNB_F  },
616  { X86::CMOVNB_Fp64  , X86::CMOVNB_F  },
617  { X86::CMOVNB_Fp80  , X86::CMOVNB_F  },
618  { X86::CMOVNE_Fp32  , X86::CMOVNE_F  },
619  { X86::CMOVNE_Fp64  , X86::CMOVNE_F  },
620  { X86::CMOVNE_Fp80  , X86::CMOVNE_F  },
621  { X86::CMOVNP_Fp32  , X86::CMOVNP_F  },
622  { X86::CMOVNP_Fp64  , X86::CMOVNP_F  },
623  { X86::CMOVNP_Fp80  , X86::CMOVNP_F  },
624  { X86::CMOVP_Fp32   , X86::CMOVP_F   },
625  { X86::CMOVP_Fp64   , X86::CMOVP_F   },
626  { X86::CMOVP_Fp80   , X86::CMOVP_F   },
627  { X86::COS_Fp32     , X86::COS_F     },
628  { X86::COS_Fp64     , X86::COS_F     },
629  { X86::COS_Fp80     , X86::COS_F     },
630  { X86::DIVR_Fp32m   , X86::DIVR_F32m },
631  { X86::DIVR_Fp64m   , X86::DIVR_F64m },
632  { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
633  { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
634  { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
635  { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
636  { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
637  { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
638  { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
639  { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
640  { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
641  { X86::DIV_Fp32m    , X86::DIV_F32m  },
642  { X86::DIV_Fp64m    , X86::DIV_F64m  },
643  { X86::DIV_Fp64m32  , X86::DIV_F32m  },
644  { X86::DIV_Fp80m32  , X86::DIV_F32m  },
645  { X86::DIV_Fp80m64  , X86::DIV_F64m  },
646  { X86::DIV_FpI16m32 , X86::DIV_FI16m },
647  { X86::DIV_FpI16m64 , X86::DIV_FI16m },
648  { X86::DIV_FpI16m80 , X86::DIV_FI16m },
649  { X86::DIV_FpI32m32 , X86::DIV_FI32m },
650  { X86::DIV_FpI32m64 , X86::DIV_FI32m },
651  { X86::DIV_FpI32m80 , X86::DIV_FI32m },
652  { X86::ILD_Fp16m32  , X86::ILD_F16m  },
653  { X86::ILD_Fp16m64  , X86::ILD_F16m  },
654  { X86::ILD_Fp16m80  , X86::ILD_F16m  },
655  { X86::ILD_Fp32m32  , X86::ILD_F32m  },
656  { X86::ILD_Fp32m64  , X86::ILD_F32m  },
657  { X86::ILD_Fp32m80  , X86::ILD_F32m  },
658  { X86::ILD_Fp64m32  , X86::ILD_F64m  },
659  { X86::ILD_Fp64m64  , X86::ILD_F64m  },
660  { X86::ILD_Fp64m80  , X86::ILD_F64m  },
661  { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
662  { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
663  { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
664  { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
665  { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
666  { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
667  { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
668  { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
669  { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
670  { X86::IST_Fp16m32  , X86::IST_F16m  },
671  { X86::IST_Fp16m64  , X86::IST_F16m  },
672  { X86::IST_Fp16m80  , X86::IST_F16m  },
673  { X86::IST_Fp32m32  , X86::IST_F32m  },
674  { X86::IST_Fp32m64  , X86::IST_F32m  },
675  { X86::IST_Fp32m80  , X86::IST_F32m  },
676  { X86::IST_Fp64m32  , X86::IST_FP64m },
677  { X86::IST_Fp64m64  , X86::IST_FP64m },
678  { X86::IST_Fp64m80  , X86::IST_FP64m },
679  { X86::LD_Fp032     , X86::LD_F0     },
680  { X86::LD_Fp064     , X86::LD_F0     },
681  { X86::LD_Fp080     , X86::LD_F0     },
682  { X86::LD_Fp132     , X86::LD_F1     },
683  { X86::LD_Fp164     , X86::LD_F1     },
684  { X86::LD_Fp180     , X86::LD_F1     },
685  { X86::LD_Fp32m     , X86::LD_F32m   },
686  { X86::LD_Fp32m64   , X86::LD_F32m   },
687  { X86::LD_Fp32m80   , X86::LD_F32m   },
688  { X86::LD_Fp64m     , X86::LD_F64m   },
689  { X86::LD_Fp64m80   , X86::LD_F64m   },
690  { X86::LD_Fp80m     , X86::LD_F80m   },
691  { X86::MUL_Fp32m    , X86::MUL_F32m  },
692  { X86::MUL_Fp64m    , X86::MUL_F64m  },
693  { X86::MUL_Fp64m32  , X86::MUL_F32m  },
694  { X86::MUL_Fp80m32  , X86::MUL_F32m  },
695  { X86::MUL_Fp80m64  , X86::MUL_F64m  },
696  { X86::MUL_FpI16m32 , X86::MUL_FI16m },
697  { X86::MUL_FpI16m64 , X86::MUL_FI16m },
698  { X86::MUL_FpI16m80 , X86::MUL_FI16m },
699  { X86::MUL_FpI32m32 , X86::MUL_FI32m },
700  { X86::MUL_FpI32m64 , X86::MUL_FI32m },
701  { X86::MUL_FpI32m80 , X86::MUL_FI32m },
702  { X86::SIN_Fp32     , X86::SIN_F     },
703  { X86::SIN_Fp64     , X86::SIN_F     },
704  { X86::SIN_Fp80     , X86::SIN_F     },
705  { X86::SQRT_Fp32    , X86::SQRT_F    },
706  { X86::SQRT_Fp64    , X86::SQRT_F    },
707  { X86::SQRT_Fp80    , X86::SQRT_F    },
708  { X86::ST_Fp32m     , X86::ST_F32m   },
709  { X86::ST_Fp64m     , X86::ST_F64m   },
710  { X86::ST_Fp64m32   , X86::ST_F32m   },
711  { X86::ST_Fp80m32   , X86::ST_F32m   },
712  { X86::ST_Fp80m64   , X86::ST_F64m   },
713  { X86::ST_FpP80m    , X86::ST_FP80m  },
714  { X86::SUBR_Fp32m   , X86::SUBR_F32m },
715  { X86::SUBR_Fp64m   , X86::SUBR_F64m },
716  { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
717  { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
718  { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
719  { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
720  { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
721  { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
722  { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
723  { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
724  { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
725  { X86::SUB_Fp32m    , X86::SUB_F32m  },
726  { X86::SUB_Fp64m    , X86::SUB_F64m  },
727  { X86::SUB_Fp64m32  , X86::SUB_F32m  },
728  { X86::SUB_Fp80m32  , X86::SUB_F32m  },
729  { X86::SUB_Fp80m64  , X86::SUB_F64m  },
730  { X86::SUB_FpI16m32 , X86::SUB_FI16m },
731  { X86::SUB_FpI16m64 , X86::SUB_FI16m },
732  { X86::SUB_FpI16m80 , X86::SUB_FI16m },
733  { X86::SUB_FpI32m32 , X86::SUB_FI32m },
734  { X86::SUB_FpI32m64 , X86::SUB_FI32m },
735  { X86::SUB_FpI32m80 , X86::SUB_FI32m },
736  { X86::TST_Fp32     , X86::TST_F     },
737  { X86::TST_Fp64     , X86::TST_F     },
738  { X86::TST_Fp80     , X86::TST_F     },
739  { X86::UCOM_FpIr32  , X86::UCOM_FIr  },
740  { X86::UCOM_FpIr64  , X86::UCOM_FIr  },
741  { X86::UCOM_FpIr80  , X86::UCOM_FIr  },
742  { X86::UCOM_Fpr32   , X86::UCOM_Fr   },
743  { X86::UCOM_Fpr64   , X86::UCOM_Fr   },
744  { X86::UCOM_Fpr80   , X86::UCOM_Fr   },
745};
746
747static unsigned getConcreteOpcode(unsigned Opcode) {
748  ASSERT_SORTED(OpcodeTable);
749  int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
750  assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
751  return Opc;
752}
753
754//===----------------------------------------------------------------------===//
755// Helper Methods
756//===----------------------------------------------------------------------===//
757
758// PopTable - Sorted map of instructions to their popping version.  The first
759// element is an instruction, the second is the version which pops.
760//
761static const TableEntry PopTable[] = {
762  { X86::ADD_FrST0 , X86::ADD_FPrST0  },
763
764  { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
765  { X86::DIV_FrST0 , X86::DIV_FPrST0  },
766
767  { X86::IST_F16m  , X86::IST_FP16m   },
768  { X86::IST_F32m  , X86::IST_FP32m   },
769
770  { X86::MUL_FrST0 , X86::MUL_FPrST0  },
771
772  { X86::ST_F32m   , X86::ST_FP32m    },
773  { X86::ST_F64m   , X86::ST_FP64m    },
774  { X86::ST_Frr    , X86::ST_FPrr     },
775
776  { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
777  { X86::SUB_FrST0 , X86::SUB_FPrST0  },
778
779  { X86::UCOM_FIr  , X86::UCOM_FIPr   },
780
781  { X86::UCOM_FPr  , X86::UCOM_FPPr   },
782  { X86::UCOM_Fr   , X86::UCOM_FPr    },
783};
784
785/// popStackAfter - Pop the current value off of the top of the FP stack after
786/// the specified instruction.  This attempts to be sneaky and combine the pop
787/// into the instruction itself if possible.  The iterator is left pointing to
788/// the last instruction, be it a new pop instruction inserted, or the old
789/// instruction if it was modified in place.
790///
791void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
792  MachineInstr* MI = I;
793  DebugLoc dl = MI->getDebugLoc();
794  ASSERT_SORTED(PopTable);
795  if (StackTop == 0)
796    report_fatal_error("Cannot pop empty stack!");
797  RegMap[Stack[--StackTop]] = ~0;     // Update state
798
799  // Check to see if there is a popping version of this instruction...
800  int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
801  if (Opcode != -1) {
802    I->setDesc(TII->get(Opcode));
803    if (Opcode == X86::UCOM_FPPr)
804      I->RemoveOperand(0);
805  } else {    // Insert an explicit pop
806    I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
807  }
808}
809
810/// freeStackSlotAfter - Free the specified register from the register stack, so
811/// that it is no longer in a register.  If the register is currently at the top
812/// of the stack, we just pop the current instruction, otherwise we store the
813/// current top-of-stack into the specified slot, then pop the top of stack.
814void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
815  if (getStackEntry(0) == FPRegNo) {  // already at the top of stack? easy.
816    popStackAfter(I);
817    return;
818  }
819
820  // Otherwise, store the top of stack into the dead slot, killing the operand
821  // without having to add in an explicit xchg then pop.
822  //
823  I = freeStackSlotBefore(++I, FPRegNo);
824}
825
826/// freeStackSlotBefore - Free the specified register without trying any
827/// folding.
828MachineBasicBlock::iterator
829FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
830  unsigned STReg    = getSTReg(FPRegNo);
831  unsigned OldSlot  = getSlot(FPRegNo);
832  unsigned TopReg   = Stack[StackTop-1];
833  Stack[OldSlot]    = TopReg;
834  RegMap[TopReg]    = OldSlot;
835  RegMap[FPRegNo]   = ~0;
836  Stack[--StackTop] = ~0;
837  return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
838      .addReg(STReg)
839      .getInstr();
840}
841
842/// adjustLiveRegs - Kill and revive registers such that exactly the FP
843/// registers with a bit in Mask are live.
844void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
845  unsigned Defs = Mask;
846  unsigned Kills = 0;
847  for (unsigned i = 0; i < StackTop; ++i) {
848    unsigned RegNo = Stack[i];
849    if (!(Defs & (1 << RegNo)))
850      // This register is live, but we don't want it.
851      Kills |= (1 << RegNo);
852    else
853      // We don't need to imp-def this live register.
854      Defs &= ~(1 << RegNo);
855  }
856  assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
857
858  // Produce implicit-defs for free by using killed registers.
859  while (Kills && Defs) {
860    unsigned KReg = countTrailingZeros(Kills);
861    unsigned DReg = countTrailingZeros(Defs);
862    DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
863    std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
864    std::swap(RegMap[KReg], RegMap[DReg]);
865    Kills &= ~(1 << KReg);
866    Defs &= ~(1 << DReg);
867  }
868
869  // Kill registers by popping.
870  if (Kills && I != MBB->begin()) {
871    MachineBasicBlock::iterator I2 = std::prev(I);
872    while (StackTop) {
873      unsigned KReg = getStackEntry(0);
874      if (!(Kills & (1 << KReg)))
875        break;
876      DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
877      popStackAfter(I2);
878      Kills &= ~(1 << KReg);
879    }
880  }
881
882  // Manually kill the rest.
883  while (Kills) {
884    unsigned KReg = countTrailingZeros(Kills);
885    DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
886    freeStackSlotBefore(I, KReg);
887    Kills &= ~(1 << KReg);
888  }
889
890  // Load zeros for all the imp-defs.
891  while(Defs) {
892    unsigned DReg = countTrailingZeros(Defs);
893    DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
894    BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
895    pushReg(DReg);
896    Defs &= ~(1 << DReg);
897  }
898
899  // Now we should have the correct registers live.
900  DEBUG(dumpStack());
901  assert(StackTop == countPopulation(Mask) && "Live count mismatch");
902}
903
904/// shuffleStackTop - emit fxch instructions before I to shuffle the top
905/// FixCount entries into the order given by FixStack.
906/// FIXME: Is there a better algorithm than insertion sort?
907void FPS::shuffleStackTop(const unsigned char *FixStack,
908                          unsigned FixCount,
909                          MachineBasicBlock::iterator I) {
910  // Move items into place, starting from the desired stack bottom.
911  while (FixCount--) {
912    // Old register at position FixCount.
913    unsigned OldReg = getStackEntry(FixCount);
914    // Desired register at position FixCount.
915    unsigned Reg = FixStack[FixCount];
916    if (Reg == OldReg)
917      continue;
918    // (Reg st0) (OldReg st0) = (Reg OldReg st0)
919    moveToTop(Reg, I);
920    if (FixCount > 0)
921      moveToTop(OldReg, I);
922  }
923  DEBUG(dumpStack());
924}
925
926
927//===----------------------------------------------------------------------===//
928// Instruction transformation implementation
929//===----------------------------------------------------------------------===//
930
931void FPS::handleCall(MachineBasicBlock::iterator &I) {
932  unsigned STReturns = 0;
933
934  for (const auto &MO : I->operands()) {
935    if (!MO.isReg())
936      continue;
937
938    unsigned R = MO.getReg() - X86::FP0;
939
940    if (R < 8) {
941      assert(MO.isDef() && MO.isImplicit());
942      STReturns |= 1 << R;
943    }
944  }
945
946  unsigned N = countTrailingOnes(STReturns);
947
948  // FP registers used for function return must be consecutive starting at
949  // FP0.
950  assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
951
952  for (unsigned I = 0; I < N; ++I)
953    pushReg(N - I - 1);
954}
955
956/// handleZeroArgFP - ST(0) = fld0    ST(0) = flds <mem>
957///
958void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
959  MachineInstr *MI = I;
960  unsigned DestReg = getFPReg(MI->getOperand(0));
961
962  // Change from the pseudo instruction to the concrete instruction.
963  MI->RemoveOperand(0);   // Remove the explicit ST(0) operand
964  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
965
966  // Result gets pushed on the stack.
967  pushReg(DestReg);
968}
969
970/// handleOneArgFP - fst <mem>, ST(0)
971///
972void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
973  MachineInstr *MI = I;
974  unsigned NumOps = MI->getDesc().getNumOperands();
975  assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
976         "Can only handle fst* & ftst instructions!");
977
978  // Is this the last use of the source register?
979  unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
980  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
981
982  // FISTP64m is strange because there isn't a non-popping versions.
983  // If we have one _and_ we don't want to pop the operand, duplicate the value
984  // on the stack instead of moving it.  This ensure that popping the value is
985  // always ok.
986  // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
987  //
988  if (!KillsSrc &&
989      (MI->getOpcode() == X86::IST_Fp64m32 ||
990       MI->getOpcode() == X86::ISTT_Fp16m32 ||
991       MI->getOpcode() == X86::ISTT_Fp32m32 ||
992       MI->getOpcode() == X86::ISTT_Fp64m32 ||
993       MI->getOpcode() == X86::IST_Fp64m64 ||
994       MI->getOpcode() == X86::ISTT_Fp16m64 ||
995       MI->getOpcode() == X86::ISTT_Fp32m64 ||
996       MI->getOpcode() == X86::ISTT_Fp64m64 ||
997       MI->getOpcode() == X86::IST_Fp64m80 ||
998       MI->getOpcode() == X86::ISTT_Fp16m80 ||
999       MI->getOpcode() == X86::ISTT_Fp32m80 ||
1000       MI->getOpcode() == X86::ISTT_Fp64m80 ||
1001       MI->getOpcode() == X86::ST_FpP80m)) {
1002    duplicateToTop(Reg, ScratchFPReg, I);
1003  } else {
1004    moveToTop(Reg, I);            // Move to the top of the stack...
1005  }
1006
1007  // Convert from the pseudo instruction to the concrete instruction.
1008  MI->RemoveOperand(NumOps-1);    // Remove explicit ST(0) operand
1009  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1010
1011  if (MI->getOpcode() == X86::IST_FP64m ||
1012      MI->getOpcode() == X86::ISTT_FP16m ||
1013      MI->getOpcode() == X86::ISTT_FP32m ||
1014      MI->getOpcode() == X86::ISTT_FP64m ||
1015      MI->getOpcode() == X86::ST_FP80m) {
1016    if (StackTop == 0)
1017      report_fatal_error("Stack empty??");
1018    --StackTop;
1019  } else if (KillsSrc) { // Last use of operand?
1020    popStackAfter(I);
1021  }
1022}
1023
1024
1025/// handleOneArgFPRW: Handle instructions that read from the top of stack and
1026/// replace the value with a newly computed value.  These instructions may have
1027/// non-fp operands after their FP operands.
1028///
1029///  Examples:
1030///     R1 = fchs R2
1031///     R1 = fadd R2, [mem]
1032///
1033void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1034  MachineInstr *MI = I;
1035#ifndef NDEBUG
1036  unsigned NumOps = MI->getDesc().getNumOperands();
1037  assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1038#endif
1039
1040  // Is this the last use of the source register?
1041  unsigned Reg = getFPReg(MI->getOperand(1));
1042  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
1043
1044  if (KillsSrc) {
1045    // If this is the last use of the source register, just make sure it's on
1046    // the top of the stack.
1047    moveToTop(Reg, I);
1048    if (StackTop == 0)
1049      report_fatal_error("Stack cannot be empty!");
1050    --StackTop;
1051    pushReg(getFPReg(MI->getOperand(0)));
1052  } else {
1053    // If this is not the last use of the source register, _copy_ it to the top
1054    // of the stack.
1055    duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
1056  }
1057
1058  // Change from the pseudo instruction to the concrete instruction.
1059  MI->RemoveOperand(1);   // Drop the source operand.
1060  MI->RemoveOperand(0);   // Drop the destination operand.
1061  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1062}
1063
1064
1065//===----------------------------------------------------------------------===//
1066// Define tables of various ways to map pseudo instructions
1067//
1068
1069// ForwardST0Table - Map: A = B op C  into: ST(0) = ST(0) op ST(i)
1070static const TableEntry ForwardST0Table[] = {
1071  { X86::ADD_Fp32  , X86::ADD_FST0r },
1072  { X86::ADD_Fp64  , X86::ADD_FST0r },
1073  { X86::ADD_Fp80  , X86::ADD_FST0r },
1074  { X86::DIV_Fp32  , X86::DIV_FST0r },
1075  { X86::DIV_Fp64  , X86::DIV_FST0r },
1076  { X86::DIV_Fp80  , X86::DIV_FST0r },
1077  { X86::MUL_Fp32  , X86::MUL_FST0r },
1078  { X86::MUL_Fp64  , X86::MUL_FST0r },
1079  { X86::MUL_Fp80  , X86::MUL_FST0r },
1080  { X86::SUB_Fp32  , X86::SUB_FST0r },
1081  { X86::SUB_Fp64  , X86::SUB_FST0r },
1082  { X86::SUB_Fp80  , X86::SUB_FST0r },
1083};
1084
1085// ReverseST0Table - Map: A = B op C  into: ST(0) = ST(i) op ST(0)
1086static const TableEntry ReverseST0Table[] = {
1087  { X86::ADD_Fp32  , X86::ADD_FST0r  },   // commutative
1088  { X86::ADD_Fp64  , X86::ADD_FST0r  },   // commutative
1089  { X86::ADD_Fp80  , X86::ADD_FST0r  },   // commutative
1090  { X86::DIV_Fp32  , X86::DIVR_FST0r },
1091  { X86::DIV_Fp64  , X86::DIVR_FST0r },
1092  { X86::DIV_Fp80  , X86::DIVR_FST0r },
1093  { X86::MUL_Fp32  , X86::MUL_FST0r  },   // commutative
1094  { X86::MUL_Fp64  , X86::MUL_FST0r  },   // commutative
1095  { X86::MUL_Fp80  , X86::MUL_FST0r  },   // commutative
1096  { X86::SUB_Fp32  , X86::SUBR_FST0r },
1097  { X86::SUB_Fp64  , X86::SUBR_FST0r },
1098  { X86::SUB_Fp80  , X86::SUBR_FST0r },
1099};
1100
1101// ForwardSTiTable - Map: A = B op C  into: ST(i) = ST(0) op ST(i)
1102static const TableEntry ForwardSTiTable[] = {
1103  { X86::ADD_Fp32  , X86::ADD_FrST0  },   // commutative
1104  { X86::ADD_Fp64  , X86::ADD_FrST0  },   // commutative
1105  { X86::ADD_Fp80  , X86::ADD_FrST0  },   // commutative
1106  { X86::DIV_Fp32  , X86::DIVR_FrST0 },
1107  { X86::DIV_Fp64  , X86::DIVR_FrST0 },
1108  { X86::DIV_Fp80  , X86::DIVR_FrST0 },
1109  { X86::MUL_Fp32  , X86::MUL_FrST0  },   // commutative
1110  { X86::MUL_Fp64  , X86::MUL_FrST0  },   // commutative
1111  { X86::MUL_Fp80  , X86::MUL_FrST0  },   // commutative
1112  { X86::SUB_Fp32  , X86::SUBR_FrST0 },
1113  { X86::SUB_Fp64  , X86::SUBR_FrST0 },
1114  { X86::SUB_Fp80  , X86::SUBR_FrST0 },
1115};
1116
1117// ReverseSTiTable - Map: A = B op C  into: ST(i) = ST(i) op ST(0)
1118static const TableEntry ReverseSTiTable[] = {
1119  { X86::ADD_Fp32  , X86::ADD_FrST0 },
1120  { X86::ADD_Fp64  , X86::ADD_FrST0 },
1121  { X86::ADD_Fp80  , X86::ADD_FrST0 },
1122  { X86::DIV_Fp32  , X86::DIV_FrST0 },
1123  { X86::DIV_Fp64  , X86::DIV_FrST0 },
1124  { X86::DIV_Fp80  , X86::DIV_FrST0 },
1125  { X86::MUL_Fp32  , X86::MUL_FrST0 },
1126  { X86::MUL_Fp64  , X86::MUL_FrST0 },
1127  { X86::MUL_Fp80  , X86::MUL_FrST0 },
1128  { X86::SUB_Fp32  , X86::SUB_FrST0 },
1129  { X86::SUB_Fp64  , X86::SUB_FrST0 },
1130  { X86::SUB_Fp80  , X86::SUB_FrST0 },
1131};
1132
1133
1134/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1135/// instructions which need to be simplified and possibly transformed.
1136///
1137/// Result: ST(0) = fsub  ST(0), ST(i)
1138///         ST(i) = fsub  ST(0), ST(i)
1139///         ST(0) = fsubr ST(0), ST(i)
1140///         ST(i) = fsubr ST(0), ST(i)
1141///
1142void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1143  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
1144  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
1145  MachineInstr *MI = I;
1146
1147  unsigned NumOperands = MI->getDesc().getNumOperands();
1148  assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1149  unsigned Dest = getFPReg(MI->getOperand(0));
1150  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
1151  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
1152  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
1153  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1154  DebugLoc dl = MI->getDebugLoc();
1155
1156  unsigned TOS = getStackEntry(0);
1157
1158  // One of our operands must be on the top of the stack.  If neither is yet, we
1159  // need to move one.
1160  if (Op0 != TOS && Op1 != TOS) {   // No operand at TOS?
1161    // We can choose to move either operand to the top of the stack.  If one of
1162    // the operands is killed by this instruction, we want that one so that we
1163    // can update right on top of the old version.
1164    if (KillsOp0) {
1165      moveToTop(Op0, I);         // Move dead operand to TOS.
1166      TOS = Op0;
1167    } else if (KillsOp1) {
1168      moveToTop(Op1, I);
1169      TOS = Op1;
1170    } else {
1171      // All of the operands are live after this instruction executes, so we
1172      // cannot update on top of any operand.  Because of this, we must
1173      // duplicate one of the stack elements to the top.  It doesn't matter
1174      // which one we pick.
1175      //
1176      duplicateToTop(Op0, Dest, I);
1177      Op0 = TOS = Dest;
1178      KillsOp0 = true;
1179    }
1180  } else if (!KillsOp0 && !KillsOp1) {
1181    // If we DO have one of our operands at the top of the stack, but we don't
1182    // have a dead operand, we must duplicate one of the operands to a new slot
1183    // on the stack.
1184    duplicateToTop(Op0, Dest, I);
1185    Op0 = TOS = Dest;
1186    KillsOp0 = true;
1187  }
1188
1189  // Now we know that one of our operands is on the top of the stack, and at
1190  // least one of our operands is killed by this instruction.
1191  assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1192         "Stack conditions not set up right!");
1193
1194  // We decide which form to use based on what is on the top of the stack, and
1195  // which operand is killed by this instruction.
1196  const TableEntry *InstTable;
1197  bool isForward = TOS == Op0;
1198  bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1199  if (updateST0) {
1200    if (isForward)
1201      InstTable = ForwardST0Table;
1202    else
1203      InstTable = ReverseST0Table;
1204  } else {
1205    if (isForward)
1206      InstTable = ForwardSTiTable;
1207    else
1208      InstTable = ReverseSTiTable;
1209  }
1210
1211  int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
1212                      MI->getOpcode());
1213  assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1214
1215  // NotTOS - The register which is not on the top of stack...
1216  unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1217
1218  // Replace the old instruction with a new instruction
1219  MBB->remove(I++);
1220  I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1221
1222  // If both operands are killed, pop one off of the stack in addition to
1223  // overwriting the other one.
1224  if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1225    assert(!updateST0 && "Should have updated other operand!");
1226    popStackAfter(I);   // Pop the top of stack
1227  }
1228
1229  // Update stack information so that we know the destination register is now on
1230  // the stack.
1231  unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1232  assert(UpdatedSlot < StackTop && Dest < 7);
1233  Stack[UpdatedSlot]   = Dest;
1234  RegMap[Dest]         = UpdatedSlot;
1235  MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
1236}
1237
1238/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1239/// register arguments and no explicit destinations.
1240///
1241void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1242  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
1243  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
1244  MachineInstr *MI = I;
1245
1246  unsigned NumOperands = MI->getDesc().getNumOperands();
1247  assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1248  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
1249  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
1250  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
1251  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1252
1253  // Make sure the first operand is on the top of stack, the other one can be
1254  // anywhere.
1255  moveToTop(Op0, I);
1256
1257  // Change from the pseudo instruction to the concrete instruction.
1258  MI->getOperand(0).setReg(getSTReg(Op1));
1259  MI->RemoveOperand(1);
1260  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1261
1262  // If any of the operands are killed by this instruction, free them.
1263  if (KillsOp0) freeStackSlotAfter(I, Op0);
1264  if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1265}
1266
1267/// handleCondMovFP - Handle two address conditional move instructions.  These
1268/// instructions move a st(i) register to st(0) iff a condition is true.  These
1269/// instructions require that the first operand is at the top of the stack, but
1270/// otherwise don't modify the stack at all.
1271void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1272  MachineInstr *MI = I;
1273
1274  unsigned Op0 = getFPReg(MI->getOperand(0));
1275  unsigned Op1 = getFPReg(MI->getOperand(2));
1276  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1277
1278  // The first operand *must* be on the top of the stack.
1279  moveToTop(Op0, I);
1280
1281  // Change the second operand to the stack register that the operand is in.
1282  // Change from the pseudo instruction to the concrete instruction.
1283  MI->RemoveOperand(0);
1284  MI->RemoveOperand(1);
1285  MI->getOperand(0).setReg(getSTReg(Op1));
1286  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1287
1288  // If we kill the second operand, make sure to pop it from the stack.
1289  if (Op0 != Op1 && KillsOp1) {
1290    // Get this value off of the register stack.
1291    freeStackSlotAfter(I, Op1);
1292  }
1293}
1294
1295
1296/// handleSpecialFP - Handle special instructions which behave unlike other
1297/// floating point instructions.  This is primarily intended for use by pseudo
1298/// instructions.
1299///
1300void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
1301  MachineInstr *MI = Inst;
1302
1303  if (MI->isCall()) {
1304    handleCall(Inst);
1305    return;
1306  }
1307
1308  switch (MI->getOpcode()) {
1309  default: llvm_unreachable("Unknown SpecialFP instruction!");
1310  case TargetOpcode::COPY: {
1311    // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1312    const MachineOperand &MO1 = MI->getOperand(1);
1313    const MachineOperand &MO0 = MI->getOperand(0);
1314    bool KillsSrc = MI->killsRegister(MO1.getReg());
1315
1316    // FP <- FP copy.
1317    unsigned DstFP = getFPReg(MO0);
1318    unsigned SrcFP = getFPReg(MO1);
1319    assert(isLive(SrcFP) && "Cannot copy dead register");
1320    if (KillsSrc) {
1321      // If the input operand is killed, we can just change the owner of the
1322      // incoming stack slot into the result.
1323      unsigned Slot = getSlot(SrcFP);
1324      Stack[Slot] = DstFP;
1325      RegMap[DstFP] = Slot;
1326    } else {
1327      // For COPY we just duplicate the specified value to a new stack slot.
1328      // This could be made better, but would require substantial changes.
1329      duplicateToTop(SrcFP, DstFP, Inst);
1330    }
1331    break;
1332  }
1333
1334  case TargetOpcode::IMPLICIT_DEF: {
1335    // All FP registers must be explicitly defined, so load a 0 instead.
1336    unsigned Reg = MI->getOperand(0).getReg() - X86::FP0;
1337    DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1338    BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::LD_F0));
1339    pushReg(Reg);
1340    break;
1341  }
1342
1343  case TargetOpcode::INLINEASM: {
1344    // The inline asm MachineInstr currently only *uses* FP registers for the
1345    // 'f' constraint.  These should be turned into the current ST(x) register
1346    // in the machine instr.
1347    //
1348    // There are special rules for x87 inline assembly. The compiler must know
1349    // exactly how many registers are popped and pushed implicitly by the asm.
1350    // Otherwise it is not possible to restore the stack state after the inline
1351    // asm.
1352    //
1353    // There are 3 kinds of input operands:
1354    //
1355    // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1356    //    popped input operand must be in a fixed stack slot, and it is either
1357    //    tied to an output operand, or in the clobber list. The MI has ST use
1358    //    and def operands for these inputs.
1359    //
1360    // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1361    //    preserved by the inline asm. The fixed stack slots must be STn-STm
1362    //    following the popped inputs. A fixed input operand cannot be tied to
1363    //    an output or appear in the clobber list. The MI has ST use operands
1364    //    and no defs for these inputs.
1365    //
1366    // 3. Preserved inputs. These inputs use the "f" constraint which is
1367    //    represented as an FP register. The inline asm won't change these
1368    //    stack slots.
1369    //
1370    // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1371    // registers do not count as output operands. The inline asm changes the
1372    // stack as if it popped all the popped inputs and then pushed all the
1373    // output operands.
1374
1375    // Scan the assembly for ST registers used, defined and clobbered. We can
1376    // only tell clobbers from defs by looking at the asm descriptor.
1377    unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
1378    unsigned NumOps = 0;
1379    SmallSet<unsigned, 1> FRegIdx;
1380    unsigned RCID;
1381
1382    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
1383         i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) {
1384      unsigned Flags = MI->getOperand(i).getImm();
1385
1386      NumOps = InlineAsm::getNumOperandRegisters(Flags);
1387      if (NumOps != 1)
1388        continue;
1389      const MachineOperand &MO = MI->getOperand(i + 1);
1390      if (!MO.isReg())
1391        continue;
1392      unsigned STReg = MO.getReg() - X86::FP0;
1393      if (STReg >= 8)
1394        continue;
1395
1396      // If the flag has a register class constraint, this must be an operand
1397      // with constraint "f". Record its index and continue.
1398      if (InlineAsm::hasRegClassConstraint(Flags, RCID)) {
1399        FRegIdx.insert(i + 1);
1400        continue;
1401      }
1402
1403      switch (InlineAsm::getKind(Flags)) {
1404      case InlineAsm::Kind_RegUse:
1405        STUses |= (1u << STReg);
1406        break;
1407      case InlineAsm::Kind_RegDef:
1408      case InlineAsm::Kind_RegDefEarlyClobber:
1409        STDefs |= (1u << STReg);
1410        if (MO.isDead())
1411          STDeadDefs |= (1u << STReg);
1412        break;
1413      case InlineAsm::Kind_Clobber:
1414        STClobbers |= (1u << STReg);
1415        break;
1416      default:
1417        break;
1418      }
1419    }
1420
1421    if (STUses && !isMask_32(STUses))
1422      MI->emitError("fixed input regs must be last on the x87 stack");
1423    unsigned NumSTUses = countTrailingOnes(STUses);
1424
1425    // Defs must be contiguous from the stack top. ST0-STn.
1426    if (STDefs && !isMask_32(STDefs)) {
1427      MI->emitError("output regs must be last on the x87 stack");
1428      STDefs = NextPowerOf2(STDefs) - 1;
1429    }
1430    unsigned NumSTDefs = countTrailingOnes(STDefs);
1431
1432    // So must the clobbered stack slots. ST0-STm, m >= n.
1433    if (STClobbers && !isMask_32(STDefs | STClobbers))
1434      MI->emitError("clobbers must be last on the x87 stack");
1435
1436    // Popped inputs are the ones that are also clobbered or defined.
1437    unsigned STPopped = STUses & (STDefs | STClobbers);
1438    if (STPopped && !isMask_32(STPopped))
1439      MI->emitError("implicitly popped regs must be last on the x87 stack");
1440    unsigned NumSTPopped = countTrailingOnes(STPopped);
1441
1442    DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1443                 << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
1444
1445#ifndef NDEBUG
1446    // If any input operand uses constraint "f", all output register
1447    // constraints must be early-clobber defs.
1448    for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I)
1449      if (FRegIdx.count(I)) {
1450        assert((1 << getFPReg(MI->getOperand(I)) & STDefs) == 0 &&
1451               "Operands with constraint \"f\" cannot overlap with defs");
1452      }
1453#endif
1454
1455    // Collect all FP registers (register operands with constraints "t", "u",
1456    // and "f") to kill afer the instruction.
1457    unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1458    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1459      MachineOperand &Op = MI->getOperand(i);
1460      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1461        continue;
1462      unsigned FPReg = getFPReg(Op);
1463
1464      // If we kill this operand, make sure to pop it from the stack after the
1465      // asm.  We just remember it for now, and pop them all off at the end in
1466      // a batch.
1467      if (Op.isUse() && Op.isKill())
1468        FPKills |= 1U << FPReg;
1469    }
1470
1471    // Do not include registers that are implicitly popped by defs/clobbers.
1472    FPKills &= ~(STDefs | STClobbers);
1473
1474    // Now we can rearrange the live registers to match what was requested.
1475    unsigned char STUsesArray[8];
1476
1477    for (unsigned I = 0; I < NumSTUses; ++I)
1478      STUsesArray[I] = I;
1479
1480    shuffleStackTop(STUsesArray, NumSTUses, Inst);
1481    DEBUG({dbgs() << "Before asm: "; dumpStack();});
1482
1483    // With the stack layout fixed, rewrite the FP registers.
1484    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1485      MachineOperand &Op = MI->getOperand(i);
1486      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1487        continue;
1488
1489      unsigned FPReg = getFPReg(Op);
1490
1491      if (FRegIdx.count(i))
1492        // Operand with constraint "f".
1493        Op.setReg(getSTReg(FPReg));
1494      else
1495        // Operand with a single register class constraint ("t" or "u").
1496        Op.setReg(X86::ST0 + FPReg);
1497    }
1498
1499    // Simulate the inline asm popping its inputs and pushing its outputs.
1500    StackTop -= NumSTPopped;
1501
1502    for (unsigned i = 0; i < NumSTDefs; ++i)
1503      pushReg(NumSTDefs - i - 1);
1504
1505    // If this asm kills any FP registers (is the last use of them) we must
1506    // explicitly emit pop instructions for them.  Do this now after the asm has
1507    // executed so that the ST(x) numbers are not off (which would happen if we
1508    // did this inline with operand rewriting).
1509    //
1510    // Note: this might be a non-optimal pop sequence.  We might be able to do
1511    // better by trying to pop in stack order or something.
1512    while (FPKills) {
1513      unsigned FPReg = countTrailingZeros(FPKills);
1514      if (isLive(FPReg))
1515        freeStackSlotAfter(Inst, FPReg);
1516      FPKills &= ~(1U << FPReg);
1517    }
1518
1519    // Don't delete the inline asm!
1520    return;
1521  }
1522
1523  case X86::WIN_FTOL_32:
1524  case X86::WIN_FTOL_64: {
1525    // Push the operand into ST0.
1526    MachineOperand &Op = MI->getOperand(0);
1527    assert(Op.isUse() && Op.isReg() &&
1528      Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
1529    unsigned FPReg = getFPReg(Op);
1530    if (Op.isKill())
1531      moveToTop(FPReg, Inst);
1532    else
1533      duplicateToTop(FPReg, ScratchFPReg, Inst);
1534
1535    // Emit the call. This will pop the operand.
1536    BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
1537      .addExternalSymbol("_ftol2")
1538      .addReg(X86::ST0, RegState::ImplicitKill)
1539      .addReg(X86::ECX, RegState::ImplicitDefine)
1540      .addReg(X86::EAX, RegState::Define | RegState::Implicit)
1541      .addReg(X86::EDX, RegState::Define | RegState::Implicit)
1542      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
1543    --StackTop;
1544
1545    break;
1546  }
1547
1548  case X86::RETQ:
1549  case X86::RETL:
1550  case X86::RETIL:
1551  case X86::RETIQ:
1552    // If RET has an FP register use operand, pass the first one in ST(0) and
1553    // the second one in ST(1).
1554
1555    // Find the register operands.
1556    unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1557    unsigned LiveMask = 0;
1558
1559    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1560      MachineOperand &Op = MI->getOperand(i);
1561      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1562        continue;
1563      // FP Register uses must be kills unless there are two uses of the same
1564      // register, in which case only one will be a kill.
1565      assert(Op.isUse() &&
1566             (Op.isKill() ||                        // Marked kill.
1567              getFPReg(Op) == FirstFPRegOp ||       // Second instance.
1568              MI->killsRegister(Op.getReg())) &&    // Later use is marked kill.
1569             "Ret only defs operands, and values aren't live beyond it");
1570
1571      if (FirstFPRegOp == ~0U)
1572        FirstFPRegOp = getFPReg(Op);
1573      else {
1574        assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1575        SecondFPRegOp = getFPReg(Op);
1576      }
1577      LiveMask |= (1 << getFPReg(Op));
1578
1579      // Remove the operand so that later passes don't see it.
1580      MI->RemoveOperand(i);
1581      --i, --e;
1582    }
1583
1584    // We may have been carrying spurious live-ins, so make sure only the returned
1585    // registers are left live.
1586    adjustLiveRegs(LiveMask, MI);
1587    if (!LiveMask) return;  // Quick check to see if any are possible.
1588
1589    // There are only four possibilities here:
1590    // 1) we are returning a single FP value.  In this case, it has to be in
1591    //    ST(0) already, so just declare success by removing the value from the
1592    //    FP Stack.
1593    if (SecondFPRegOp == ~0U) {
1594      // Assert that the top of stack contains the right FP register.
1595      assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1596             "Top of stack not the right register for RET!");
1597
1598      // Ok, everything is good, mark the value as not being on the stack
1599      // anymore so that our assertion about the stack being empty at end of
1600      // block doesn't fire.
1601      StackTop = 0;
1602      return;
1603    }
1604
1605    // Otherwise, we are returning two values:
1606    // 2) If returning the same value for both, we only have one thing in the FP
1607    //    stack.  Consider:  RET FP1, FP1
1608    if (StackTop == 1) {
1609      assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1610             "Stack misconfiguration for RET!");
1611
1612      // Duplicate the TOS so that we return it twice.  Just pick some other FPx
1613      // register to hold it.
1614      unsigned NewReg = ScratchFPReg;
1615      duplicateToTop(FirstFPRegOp, NewReg, MI);
1616      FirstFPRegOp = NewReg;
1617    }
1618
1619    /// Okay we know we have two different FPx operands now:
1620    assert(StackTop == 2 && "Must have two values live!");
1621
1622    /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1623    ///    in ST(1).  In this case, emit an fxch.
1624    if (getStackEntry(0) == SecondFPRegOp) {
1625      assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1626      moveToTop(FirstFPRegOp, MI);
1627    }
1628
1629    /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1630    /// ST(1).  Just remove both from our understanding of the stack and return.
1631    assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1632    assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1633    StackTop = 0;
1634    return;
1635  }
1636
1637  Inst = MBB->erase(Inst);  // Remove the pseudo instruction
1638
1639  // We want to leave I pointing to the previous instruction, but what if we
1640  // just erased the first instruction?
1641  if (Inst == MBB->begin()) {
1642    DEBUG(dbgs() << "Inserting dummy KILL\n");
1643    Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
1644  } else
1645    --Inst;
1646}
1647
1648void FPS::setKillFlags(MachineBasicBlock &MBB) const {
1649  const TargetRegisterInfo *TRI =
1650      MBB.getParent()->getSubtarget().getRegisterInfo();
1651  LivePhysRegs LPR(TRI);
1652
1653  LPR.addLiveOuts(&MBB);
1654
1655  for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
1656       I != E; ++I) {
1657    if (I->isDebugValue())
1658      continue;
1659
1660    std::bitset<8> Defs;
1661    SmallVector<MachineOperand *, 2> Uses;
1662    MachineInstr &MI = *I;
1663
1664    for (auto &MO : I->operands()) {
1665      if (!MO.isReg())
1666        continue;
1667
1668      unsigned Reg = MO.getReg() - X86::FP0;
1669
1670      if (Reg >= 8)
1671        continue;
1672
1673      if (MO.isDef()) {
1674        Defs.set(Reg);
1675        if (!LPR.contains(MO.getReg()))
1676          MO.setIsDead();
1677      } else
1678        Uses.push_back(&MO);
1679    }
1680
1681    for (auto *MO : Uses)
1682      if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg()))
1683        MO->setIsKill();
1684
1685    LPR.stepBackward(MI);
1686  }
1687}
1688