X86FloatingPoint.cpp revision 221345
155997Simp//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
255997Simp//
355997Simp//                     The LLVM Compiler Infrastructure
455997Simp//
555997Simp// This file is distributed under the University of Illinois Open Source
655997Simp// License. See LICENSE.TXT for details.
755997Simp//
855997Simp//===----------------------------------------------------------------------===//
955997Simp//
1055997Simp// This file defines the pass which converts floating point instructions from
1155997Simp// pseudo registers into register stack instructions.  This pass uses live
1255997Simp// variable information to indicate where the FPn registers are used and their
1355997Simp// lifetimes.
1455997Simp//
1555997Simp// The x87 hardware tracks liveness of the stack registers, so it is necessary
1655997Simp// to implement exact liveness tracking between basic blocks. The CFG edges are
1755997Simp// partitioned into bundles where the same FP registers must be live in
1855997Simp// identical stack positions. Instructions are inserted at the end of each basic
1955997Simp// block to rearrange the live registers to match the outgoing bundle.
2055997Simp//
2155997Simp// This approach avoids splitting critical edges at the potential cost of more
2255997Simp// live register shuffling instructions when critical edges are present.
2355997Simp//
2455997Simp//===----------------------------------------------------------------------===//
2555997Simp
2655997Simp#define DEBUG_TYPE "x86-codegen"
27119418Sobrien#include "X86.h"
28119418Sobrien#include "X86InstrInfo.h"
29119418Sobrien#include "llvm/ADT/DepthFirstIterator.h"
3055997Simp#include "llvm/ADT/DenseMap.h"
31241591Sjhb#include "llvm/ADT/SmallPtrSet.h"
3255997Simp#include "llvm/ADT/SmallVector.h"
33241591Sjhb#include "llvm/ADT/Statistic.h"
3455997Simp#include "llvm/ADT/STLExtras.h"
35241591Sjhb#include "llvm/CodeGen/EdgeBundles.h"
3655997Simp#include "llvm/CodeGen/MachineFunctionPass.h"
3755997Simp#include "llvm/CodeGen/MachineInstrBuilder.h"
3855997Simp#include "llvm/CodeGen/MachineRegisterInfo.h"
3955997Simp#include "llvm/CodeGen/Passes.h"
4055997Simp#include "llvm/Support/Debug.h"
4155997Simp#include "llvm/Support/ErrorHandling.h"
4255997Simp#include "llvm/Support/raw_ostream.h"
4370782Simp#include "llvm/Target/TargetInstrInfo.h"
4455997Simp#include "llvm/Target/TargetMachine.h"
4570782Simp#include <algorithm>
46129764Simpusing namespace llvm;
4770782Simp
4855997SimpSTATISTIC(NumFXCH, "Number of fxch instructions inserted");
4955997SimpSTATISTIC(NumFP  , "Number of floating point instructions");
5055997Simp
5155997Simpnamespace {
5255997Simp  struct FPS : public MachineFunctionPass {
5355997Simp    static char ID;
5455997Simp    FPS() : MachineFunctionPass(ID) {
5570782Simp      initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
5670782Simp      // This is really only to keep valgrind quiet.
5770782Simp      // The logic in isLive() is too much for it.
5870782Simp      memset(Stack, 0, sizeof(Stack));
5955997Simp      memset(RegMap, 0, sizeof(RegMap));
60162979Simp    }
61147580Simp
62147580Simp    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
63147580Simp      AU.setPreservesCFG();
64147580Simp      AU.addRequired<EdgeBundles>();
65147580Simp      AU.addPreservedID(MachineLoopInfoID);
6670782Simp      AU.addPreservedID(MachineDominatorsID);
6770782Simp      MachineFunctionPass::getAnalysisUsage(AU);
6870782Simp    }
6955997Simp
7055997Simp    virtual bool runOnMachineFunction(MachineFunction &MF);
7155997Simp
7255997Simp    virtual const char *getPassName() const { return "X86 FP Stackifier"; }
7355997Simp
7455997Simp  private:
7555997Simp    const TargetInstrInfo *TII; // Machine instruction info.
7655997Simp
77241591Sjhb    // Two CFG edges are related if they leave the same block, or enter the same
7855997Simp    // block. The transitive closure of an edge under this relation is a
7955997Simp    // LiveBundle. It represents a set of CFG edges where the live FP stack
80296137Sjhibbits    // registers must be allocated identically in the x87 stack.
81296137Sjhibbits    //
8255997Simp    // A LiveBundle is usually all the edges leaving a block, or all the edges
8355997Simp    // entering a block, but it can contain more edges if critical edges are
8455997Simp    // present.
8555997Simp    //
86127135Snjl    // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
8755997Simp    // but the exact mapping of FP registers to stack slots is fixed later.
8855997Simp    struct LiveBundle {
8955997Simp      // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
9055997Simp      unsigned Mask;
9155997Simp
92170872Sscottl      // Number of pre-assigned live registers in FixStack. This is 0 when the
93241591Sjhb      // stack order has not yet been fixed.
94241591Sjhb      unsigned FixCount;
9555997Simp
9655997Simp      // Assigned stack order for live-in registers.
9755997Simp      // FixStack[i] == getStackEntry(i) for all i < FixCount.
9855997Simp      unsigned char FixStack[8];
9955997Simp
10055997Simp      LiveBundle() : Mask(0), FixCount(0) {}
10155997Simp
10255997Simp      // Have the live registers been assigned a stack order yet?
10355997Simp      bool isFixed() const { return !Mask || FixCount; }
10455997Simp    };
10555997Simp
10655997Simp    // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
107241591Sjhb    // with no live FP registers.
108241591Sjhb    SmallVector<LiveBundle, 8> LiveBundles;
10955997Simp
11055997Simp    // The edge bundle analysis provides indices into the LiveBundles vector.
11155997Simp    EdgeBundles *Bundles;
112150392Simp
11370782Simp    // Return a bitmask of FP registers in block's live-in list.
11470782Simp    unsigned calcLiveInMask(MachineBasicBlock *MBB) {
11570782Simp      unsigned Mask = 0;
11670782Simp      for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
11770782Simp           E = MBB->livein_end(); I != E; ++I) {
118113315Simp        unsigned Reg = *I - X86::FP0;
119113315Simp        if (Reg < 8)
120241591Sjhb          Mask |= 1 << Reg;
121241591Sjhb      }
122241591Sjhb      return Mask;
123241591Sjhb    }
12470782Simp
125241591Sjhb    // Partition all the CFG edges into LiveBundles.
12670782Simp    void bundleCFG(MachineFunction &MF);
12770782Simp
12870782Simp    MachineBasicBlock *MBB;     // Current basic block
129150392Simp    unsigned Stack[8];          // FP<n> Registers in each stack slot...
13055997Simp    unsigned RegMap[8];         // Track which stack slot contains each register
13155997Simp    unsigned StackTop;          // The current top of the FP stack.
13255997Simp
133150392Simp    // Set up our stack model to match the incoming registers to MBB.
13455997Simp    void setupBlockStack();
13555997Simp
13655997Simp    // Shuffle live registers to match the expectations of successor blocks.
13755997Simp    void finishBlockStack();
13855997Simp
13955997Simp    void dumpStack() const {
14055997Simp      dbgs() << "Stack contents:";
14155997Simp      for (unsigned i = 0; i != StackTop; ++i) {
14255997Simp        dbgs() << " FP" << Stack[i];
14355997Simp        assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
14455997Simp      }
14555997Simp      dbgs() << "\n";
14655997Simp    }
14755997Simp
14855997Simp    /// getSlot - Return the stack slot number a particular register number is
149241591Sjhb    /// in.
150241591Sjhb    unsigned getSlot(unsigned RegNo) const {
15155997Simp      assert(RegNo < 8 && "Regno out of range!");
15255997Simp      return RegMap[RegNo];
15355997Simp    }
15455997Simp
15555997Simp    /// isLive - Is RegNo currently live in the stack?
15655997Simp    bool isLive(unsigned RegNo) const {
15755997Simp      unsigned Slot = getSlot(RegNo);
15855997Simp      return Slot < StackTop && Stack[Slot] == RegNo;
15955997Simp    }
16055997Simp
16155997Simp    /// getScratchReg - Return an FP register that is not currently in use.
16255997Simp    unsigned getScratchReg() {
16355997Simp      for (int i = 7; i >= 0; --i)
16455997Simp        if (!isLive(i))
16555997Simp          return i;
16655997Simp      llvm_unreachable("Ran out of scratch FP registers");
16755997Simp    }
16855997Simp
16955997Simp    /// getStackEntry - Return the X86::FP<n> register in register ST(i).
17055997Simp    unsigned getStackEntry(unsigned STi) const {
17155997Simp      if (STi >= StackTop)
17255997Simp        report_fatal_error("Access past stack top!");
17355997Simp      return Stack[StackTop-1-STi];
17455997Simp    }
17555997Simp
17655997Simp    /// getSTReg - Return the X86::ST(i) register which contains the specified
17755997Simp    /// FP<RegNo> register.
17855997Simp    unsigned getSTReg(unsigned RegNo) const {
17955997Simp      return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
18055997Simp    }
18155997Simp
18255997Simp    // pushReg - Push the specified FP<n> register onto the stack.
183150392Simp    void pushReg(unsigned Reg) {
184150392Simp      assert(Reg < 8 && "Register number out of range!");
18555997Simp      if (StackTop >= 8)
18670782Simp        report_fatal_error("Stack overflow!");
18755997Simp      Stack[StackTop] = Reg;
18855997Simp      RegMap[Reg] = StackTop++;
18955997Simp    }
19055997Simp
19155997Simp    bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
19255997Simp    void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
19355997Simp      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
19455997Simp      if (isAtTop(RegNo)) return;
19555997Simp
19655997Simp      unsigned STReg = getSTReg(RegNo);
19769960Simp      unsigned RegOnTop = getStackEntry(0);
19855997Simp
199292079Simp      // Swap the slots the regs are in.
200      std::swap(RegMap[RegNo], RegMap[RegOnTop]);
201
202      // Swap stack slot contents.
203      if (RegMap[RegOnTop] >= StackTop)
204        report_fatal_error("Access past stack top!");
205      std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
206
207      // Emit an fxch to update the runtime processors version of the state.
208      BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
209      ++NumFXCH;
210    }
211
212    void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
213      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
214      unsigned STReg = getSTReg(RegNo);
215      pushReg(AsReg);   // New register on top of stack
216
217      BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
218    }
219
220    /// popStackAfter - Pop the current value off of the top of the FP stack
221    /// after the specified instruction.
222    void popStackAfter(MachineBasicBlock::iterator &I);
223
224    /// freeStackSlotAfter - Free the specified register from the register
225    /// stack, so that it is no longer in a register.  If the register is
226    /// currently at the top of the stack, we just pop the current instruction,
227    /// otherwise we store the current top-of-stack into the specified slot,
228    /// then pop the top of stack.
229    void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
230
231    /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
232    /// instruction.
233    MachineBasicBlock::iterator
234    freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
235
236    /// Adjust the live registers to be the set in Mask.
237    void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
238
239    /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is
240    /// st(0), FP reg FixStack[1] is st(1) etc.
241    void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
242                         MachineBasicBlock::iterator I);
243
244    bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
245
246    void handleZeroArgFP(MachineBasicBlock::iterator &I);
247    void handleOneArgFP(MachineBasicBlock::iterator &I);
248    void handleOneArgFPRW(MachineBasicBlock::iterator &I);
249    void handleTwoArgFP(MachineBasicBlock::iterator &I);
250    void handleCompareFP(MachineBasicBlock::iterator &I);
251    void handleCondMovFP(MachineBasicBlock::iterator &I);
252    void handleSpecialFP(MachineBasicBlock::iterator &I);
253
254    bool translateCopy(MachineInstr*);
255  };
256  char FPS::ID = 0;
257}
258
259FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
260
261/// getFPReg - Return the X86::FPx register number for the specified operand.
262/// For example, this returns 3 for X86::FP3.
263static unsigned getFPReg(const MachineOperand &MO) {
264  assert(MO.isReg() && "Expected an FP register!");
265  unsigned Reg = MO.getReg();
266  assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
267  return Reg - X86::FP0;
268}
269
270/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
271/// register references into FP stack references.
272///
273bool FPS::runOnMachineFunction(MachineFunction &MF) {
274  // We only need to run this pass if there are any FP registers used in this
275  // function.  If it is all integer, there is nothing for us to do!
276  bool FPIsUsed = false;
277
278  assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
279  for (unsigned i = 0; i <= 6; ++i)
280    if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
281      FPIsUsed = true;
282      break;
283    }
284
285  // Early exit.
286  if (!FPIsUsed) return false;
287
288  Bundles = &getAnalysis<EdgeBundles>();
289  TII = MF.getTarget().getInstrInfo();
290
291  // Prepare cross-MBB liveness.
292  bundleCFG(MF);
293
294  StackTop = 0;
295
296  // Process the function in depth first order so that we process at least one
297  // of the predecessors for every reachable block in the function.
298  SmallPtrSet<MachineBasicBlock*, 8> Processed;
299  MachineBasicBlock *Entry = MF.begin();
300
301  bool Changed = false;
302  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> >
303         I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
304       I != E; ++I)
305    Changed |= processBasicBlock(MF, **I);
306
307  // Process any unreachable blocks in arbitrary order now.
308  if (MF.size() != Processed.size())
309    for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
310      if (Processed.insert(BB))
311        Changed |= processBasicBlock(MF, *BB);
312
313  LiveBundles.clear();
314
315  return Changed;
316}
317
318/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
319/// live-out sets for the FP registers. Consistent means that the set of
320/// registers live-out from a block is identical to the live-in set of all
321/// successors. This is not enforced by the normal live-in lists since
322/// registers may be implicitly defined, or not used by all successors.
323void FPS::bundleCFG(MachineFunction &MF) {
324  assert(LiveBundles.empty() && "Stale data in LiveBundles");
325  LiveBundles.resize(Bundles->getNumBundles());
326
327  // Gather the actual live-in masks for all MBBs.
328  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
329    MachineBasicBlock *MBB = I;
330    const unsigned Mask = calcLiveInMask(MBB);
331    if (!Mask)
332      continue;
333    // Update MBB ingoing bundle mask.
334    LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
335  }
336}
337
338/// processBasicBlock - Loop over all of the instructions in the basic block,
339/// transforming FP instructions into their stack form.
340///
341bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
342  bool Changed = false;
343  MBB = &BB;
344
345  setupBlockStack();
346
347  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
348    MachineInstr *MI = I;
349    uint64_t Flags = MI->getDesc().TSFlags;
350
351    unsigned FPInstClass = Flags & X86II::FPTypeMask;
352    if (MI->isInlineAsm())
353      FPInstClass = X86II::SpecialFP;
354
355    if (MI->isCopy() && translateCopy(MI))
356      FPInstClass = X86II::SpecialFP;
357
358    if (FPInstClass == X86II::NotFP)
359      continue;  // Efficiently ignore non-fp insts!
360
361    MachineInstr *PrevMI = 0;
362    if (I != BB.begin())
363      PrevMI = prior(I);
364
365    ++NumFP;  // Keep track of # of pseudo instrs
366    DEBUG(dbgs() << "\nFPInst:\t" << *MI);
367
368    // Get dead variables list now because the MI pointer may be deleted as part
369    // of processing!
370    SmallVector<unsigned, 8> DeadRegs;
371    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
372      const MachineOperand &MO = MI->getOperand(i);
373      if (MO.isReg() && MO.isDead())
374        DeadRegs.push_back(MO.getReg());
375    }
376
377    switch (FPInstClass) {
378    case X86II::ZeroArgFP:  handleZeroArgFP(I); break;
379    case X86II::OneArgFP:   handleOneArgFP(I);  break;  // fstp ST(0)
380    case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
381    case X86II::TwoArgFP:   handleTwoArgFP(I);  break;
382    case X86II::CompareFP:  handleCompareFP(I); break;
383    case X86II::CondMovFP:  handleCondMovFP(I); break;
384    case X86II::SpecialFP:  handleSpecialFP(I); break;
385    default: llvm_unreachable("Unknown FP Type!");
386    }
387
388    // Check to see if any of the values defined by this instruction are dead
389    // after definition.  If so, pop them.
390    for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
391      unsigned Reg = DeadRegs[i];
392      if (Reg >= X86::FP0 && Reg <= X86::FP6) {
393        DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
394        freeStackSlotAfter(I, Reg-X86::FP0);
395      }
396    }
397
398    // Print out all of the instructions expanded to if -debug
399    DEBUG(
400      MachineBasicBlock::iterator PrevI(PrevMI);
401      if (I == PrevI) {
402        dbgs() << "Just deleted pseudo instruction\n";
403      } else {
404        MachineBasicBlock::iterator Start = I;
405        // Rewind to first instruction newly inserted.
406        while (Start != BB.begin() && prior(Start) != PrevI) --Start;
407        dbgs() << "Inserted instructions:\n\t";
408        Start->print(dbgs(), &MF.getTarget());
409        while (++Start != llvm::next(I)) {}
410      }
411      dumpStack();
412    );
413
414    Changed = true;
415  }
416
417  finishBlockStack();
418
419  return Changed;
420}
421
422/// setupBlockStack - Use the live bundles to set up our model of the stack
423/// to match predecessors' live out stack.
424void FPS::setupBlockStack() {
425  DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
426               << " derived from " << MBB->getName() << ".\n");
427  StackTop = 0;
428  // Get the live-in bundle for MBB.
429  const LiveBundle &Bundle =
430    LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
431
432  if (!Bundle.Mask) {
433    DEBUG(dbgs() << "Block has no FP live-ins.\n");
434    return;
435  }
436
437  // Depth-first iteration should ensure that we always have an assigned stack.
438  assert(Bundle.isFixed() && "Reached block before any predecessors");
439
440  // Push the fixed live-in registers.
441  for (unsigned i = Bundle.FixCount; i > 0; --i) {
442    MBB->addLiveIn(X86::ST0+i-1);
443    DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
444                 << unsigned(Bundle.FixStack[i-1]) << '\n');
445    pushReg(Bundle.FixStack[i-1]);
446  }
447
448  // Kill off unwanted live-ins. This can happen with a critical edge.
449  // FIXME: We could keep these live registers around as zombies. They may need
450  // to be revived at the end of a short block. It might save a few instrs.
451  adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
452  DEBUG(MBB->dump());
453}
454
455/// finishBlockStack - Revive live-outs that are implicitly defined out of
456/// MBB. Shuffle live registers to match the expected fixed stack of any
457/// predecessors, and ensure that all predecessors are expecting the same
458/// stack.
459void FPS::finishBlockStack() {
460  // The RET handling below takes care of return blocks for us.
461  if (MBB->succ_empty())
462    return;
463
464  DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
465               << " derived from " << MBB->getName() << ".\n");
466
467  // Get MBB's live-out bundle.
468  unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
469  LiveBundle &Bundle = LiveBundles[BundleIdx];
470
471  // We may need to kill and define some registers to match successors.
472  // FIXME: This can probably be combined with the shuffle below.
473  MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
474  adjustLiveRegs(Bundle.Mask, Term);
475
476  if (!Bundle.Mask) {
477    DEBUG(dbgs() << "No live-outs.\n");
478    return;
479  }
480
481  // Has the stack order been fixed yet?
482  DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
483  if (Bundle.isFixed()) {
484    DEBUG(dbgs() << "Shuffling stack to match.\n");
485    shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
486  } else {
487    // Not fixed yet, we get to choose.
488    DEBUG(dbgs() << "Fixing stack order now.\n");
489    Bundle.FixCount = StackTop;
490    for (unsigned i = 0; i < StackTop; ++i)
491      Bundle.FixStack[i] = getStackEntry(i);
492  }
493}
494
495
496//===----------------------------------------------------------------------===//
497// Efficient Lookup Table Support
498//===----------------------------------------------------------------------===//
499
500namespace {
501  struct TableEntry {
502    unsigned from;
503    unsigned to;
504    bool operator<(const TableEntry &TE) const { return from < TE.from; }
505    friend bool operator<(const TableEntry &TE, unsigned V) {
506      return TE.from < V;
507    }
508    friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V,
509                                              const TableEntry &TE) {
510      return V < TE.from;
511    }
512  };
513}
514
515#ifndef NDEBUG
516static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
517  for (unsigned i = 0; i != NumEntries-1; ++i)
518    if (!(Table[i] < Table[i+1])) return false;
519  return true;
520}
521#endif
522
523static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
524  const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
525  if (I != Table+N && I->from == Opcode)
526    return I->to;
527  return -1;
528}
529
530#ifdef NDEBUG
531#define ASSERT_SORTED(TABLE)
532#else
533#define ASSERT_SORTED(TABLE)                                              \
534  { static bool TABLE##Checked = false;                                   \
535    if (!TABLE##Checked) {                                                \
536       assert(TableIsSorted(TABLE, array_lengthof(TABLE)) &&              \
537              "All lookup tables must be sorted for efficient access!");  \
538       TABLE##Checked = true;                                             \
539    }                                                                     \
540  }
541#endif
542
543//===----------------------------------------------------------------------===//
544// Register File -> Register Stack Mapping Methods
545//===----------------------------------------------------------------------===//
546
547// OpcodeTable - Sorted map of register instructions to their stack version.
548// The first element is an register file pseudo instruction, the second is the
549// concrete X86 instruction which uses the register stack.
550//
551static const TableEntry OpcodeTable[] = {
552  { X86::ABS_Fp32     , X86::ABS_F     },
553  { X86::ABS_Fp64     , X86::ABS_F     },
554  { X86::ABS_Fp80     , X86::ABS_F     },
555  { X86::ADD_Fp32m    , X86::ADD_F32m  },
556  { X86::ADD_Fp64m    , X86::ADD_F64m  },
557  { X86::ADD_Fp64m32  , X86::ADD_F32m  },
558  { X86::ADD_Fp80m32  , X86::ADD_F32m  },
559  { X86::ADD_Fp80m64  , X86::ADD_F64m  },
560  { X86::ADD_FpI16m32 , X86::ADD_FI16m },
561  { X86::ADD_FpI16m64 , X86::ADD_FI16m },
562  { X86::ADD_FpI16m80 , X86::ADD_FI16m },
563  { X86::ADD_FpI32m32 , X86::ADD_FI32m },
564  { X86::ADD_FpI32m64 , X86::ADD_FI32m },
565  { X86::ADD_FpI32m80 , X86::ADD_FI32m },
566  { X86::CHS_Fp32     , X86::CHS_F     },
567  { X86::CHS_Fp64     , X86::CHS_F     },
568  { X86::CHS_Fp80     , X86::CHS_F     },
569  { X86::CMOVBE_Fp32  , X86::CMOVBE_F  },
570  { X86::CMOVBE_Fp64  , X86::CMOVBE_F  },
571  { X86::CMOVBE_Fp80  , X86::CMOVBE_F  },
572  { X86::CMOVB_Fp32   , X86::CMOVB_F   },
573  { X86::CMOVB_Fp64   , X86::CMOVB_F  },
574  { X86::CMOVB_Fp80   , X86::CMOVB_F  },
575  { X86::CMOVE_Fp32   , X86::CMOVE_F  },
576  { X86::CMOVE_Fp64   , X86::CMOVE_F   },
577  { X86::CMOVE_Fp80   , X86::CMOVE_F   },
578  { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
579  { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
580  { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
581  { X86::CMOVNB_Fp32  , X86::CMOVNB_F  },
582  { X86::CMOVNB_Fp64  , X86::CMOVNB_F  },
583  { X86::CMOVNB_Fp80  , X86::CMOVNB_F  },
584  { X86::CMOVNE_Fp32  , X86::CMOVNE_F  },
585  { X86::CMOVNE_Fp64  , X86::CMOVNE_F  },
586  { X86::CMOVNE_Fp80  , X86::CMOVNE_F  },
587  { X86::CMOVNP_Fp32  , X86::CMOVNP_F  },
588  { X86::CMOVNP_Fp64  , X86::CMOVNP_F  },
589  { X86::CMOVNP_Fp80  , X86::CMOVNP_F  },
590  { X86::CMOVP_Fp32   , X86::CMOVP_F   },
591  { X86::CMOVP_Fp64   , X86::CMOVP_F   },
592  { X86::CMOVP_Fp80   , X86::CMOVP_F   },
593  { X86::COS_Fp32     , X86::COS_F     },
594  { X86::COS_Fp64     , X86::COS_F     },
595  { X86::COS_Fp80     , X86::COS_F     },
596  { X86::DIVR_Fp32m   , X86::DIVR_F32m },
597  { X86::DIVR_Fp64m   , X86::DIVR_F64m },
598  { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
599  { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
600  { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
601  { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
602  { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
603  { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
604  { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
605  { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
606  { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
607  { X86::DIV_Fp32m    , X86::DIV_F32m  },
608  { X86::DIV_Fp64m    , X86::DIV_F64m  },
609  { X86::DIV_Fp64m32  , X86::DIV_F32m  },
610  { X86::DIV_Fp80m32  , X86::DIV_F32m  },
611  { X86::DIV_Fp80m64  , X86::DIV_F64m  },
612  { X86::DIV_FpI16m32 , X86::DIV_FI16m },
613  { X86::DIV_FpI16m64 , X86::DIV_FI16m },
614  { X86::DIV_FpI16m80 , X86::DIV_FI16m },
615  { X86::DIV_FpI32m32 , X86::DIV_FI32m },
616  { X86::DIV_FpI32m64 , X86::DIV_FI32m },
617  { X86::DIV_FpI32m80 , X86::DIV_FI32m },
618  { X86::ILD_Fp16m32  , X86::ILD_F16m  },
619  { X86::ILD_Fp16m64  , X86::ILD_F16m  },
620  { X86::ILD_Fp16m80  , X86::ILD_F16m  },
621  { X86::ILD_Fp32m32  , X86::ILD_F32m  },
622  { X86::ILD_Fp32m64  , X86::ILD_F32m  },
623  { X86::ILD_Fp32m80  , X86::ILD_F32m  },
624  { X86::ILD_Fp64m32  , X86::ILD_F64m  },
625  { X86::ILD_Fp64m64  , X86::ILD_F64m  },
626  { X86::ILD_Fp64m80  , X86::ILD_F64m  },
627  { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
628  { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
629  { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
630  { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
631  { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
632  { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
633  { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
634  { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
635  { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
636  { X86::IST_Fp16m32  , X86::IST_F16m  },
637  { X86::IST_Fp16m64  , X86::IST_F16m  },
638  { X86::IST_Fp16m80  , X86::IST_F16m  },
639  { X86::IST_Fp32m32  , X86::IST_F32m  },
640  { X86::IST_Fp32m64  , X86::IST_F32m  },
641  { X86::IST_Fp32m80  , X86::IST_F32m  },
642  { X86::IST_Fp64m32  , X86::IST_FP64m },
643  { X86::IST_Fp64m64  , X86::IST_FP64m },
644  { X86::IST_Fp64m80  , X86::IST_FP64m },
645  { X86::LD_Fp032     , X86::LD_F0     },
646  { X86::LD_Fp064     , X86::LD_F0     },
647  { X86::LD_Fp080     , X86::LD_F0     },
648  { X86::LD_Fp132     , X86::LD_F1     },
649  { X86::LD_Fp164     , X86::LD_F1     },
650  { X86::LD_Fp180     , X86::LD_F1     },
651  { X86::LD_Fp32m     , X86::LD_F32m   },
652  { X86::LD_Fp32m64   , X86::LD_F32m   },
653  { X86::LD_Fp32m80   , X86::LD_F32m   },
654  { X86::LD_Fp64m     , X86::LD_F64m   },
655  { X86::LD_Fp64m80   , X86::LD_F64m   },
656  { X86::LD_Fp80m     , X86::LD_F80m   },
657  { X86::MUL_Fp32m    , X86::MUL_F32m  },
658  { X86::MUL_Fp64m    , X86::MUL_F64m  },
659  { X86::MUL_Fp64m32  , X86::MUL_F32m  },
660  { X86::MUL_Fp80m32  , X86::MUL_F32m  },
661  { X86::MUL_Fp80m64  , X86::MUL_F64m  },
662  { X86::MUL_FpI16m32 , X86::MUL_FI16m },
663  { X86::MUL_FpI16m64 , X86::MUL_FI16m },
664  { X86::MUL_FpI16m80 , X86::MUL_FI16m },
665  { X86::MUL_FpI32m32 , X86::MUL_FI32m },
666  { X86::MUL_FpI32m64 , X86::MUL_FI32m },
667  { X86::MUL_FpI32m80 , X86::MUL_FI32m },
668  { X86::SIN_Fp32     , X86::SIN_F     },
669  { X86::SIN_Fp64     , X86::SIN_F     },
670  { X86::SIN_Fp80     , X86::SIN_F     },
671  { X86::SQRT_Fp32    , X86::SQRT_F    },
672  { X86::SQRT_Fp64    , X86::SQRT_F    },
673  { X86::SQRT_Fp80    , X86::SQRT_F    },
674  { X86::ST_Fp32m     , X86::ST_F32m   },
675  { X86::ST_Fp64m     , X86::ST_F64m   },
676  { X86::ST_Fp64m32   , X86::ST_F32m   },
677  { X86::ST_Fp80m32   , X86::ST_F32m   },
678  { X86::ST_Fp80m64   , X86::ST_F64m   },
679  { X86::ST_FpP80m    , X86::ST_FP80m  },
680  { X86::SUBR_Fp32m   , X86::SUBR_F32m },
681  { X86::SUBR_Fp64m   , X86::SUBR_F64m },
682  { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
683  { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
684  { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
685  { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
686  { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
687  { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
688  { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
689  { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
690  { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
691  { X86::SUB_Fp32m    , X86::SUB_F32m  },
692  { X86::SUB_Fp64m    , X86::SUB_F64m  },
693  { X86::SUB_Fp64m32  , X86::SUB_F32m  },
694  { X86::SUB_Fp80m32  , X86::SUB_F32m  },
695  { X86::SUB_Fp80m64  , X86::SUB_F64m  },
696  { X86::SUB_FpI16m32 , X86::SUB_FI16m },
697  { X86::SUB_FpI16m64 , X86::SUB_FI16m },
698  { X86::SUB_FpI16m80 , X86::SUB_FI16m },
699  { X86::SUB_FpI32m32 , X86::SUB_FI32m },
700  { X86::SUB_FpI32m64 , X86::SUB_FI32m },
701  { X86::SUB_FpI32m80 , X86::SUB_FI32m },
702  { X86::TST_Fp32     , X86::TST_F     },
703  { X86::TST_Fp64     , X86::TST_F     },
704  { X86::TST_Fp80     , X86::TST_F     },
705  { X86::UCOM_FpIr32  , X86::UCOM_FIr  },
706  { X86::UCOM_FpIr64  , X86::UCOM_FIr  },
707  { X86::UCOM_FpIr80  , X86::UCOM_FIr  },
708  { X86::UCOM_Fpr32   , X86::UCOM_Fr   },
709  { X86::UCOM_Fpr64   , X86::UCOM_Fr   },
710  { X86::UCOM_Fpr80   , X86::UCOM_Fr   },
711};
712
713static unsigned getConcreteOpcode(unsigned Opcode) {
714  ASSERT_SORTED(OpcodeTable);
715  int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
716  assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
717  return Opc;
718}
719
720//===----------------------------------------------------------------------===//
721// Helper Methods
722//===----------------------------------------------------------------------===//
723
724// PopTable - Sorted map of instructions to their popping version.  The first
725// element is an instruction, the second is the version which pops.
726//
727static const TableEntry PopTable[] = {
728  { X86::ADD_FrST0 , X86::ADD_FPrST0  },
729
730  { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
731  { X86::DIV_FrST0 , X86::DIV_FPrST0  },
732
733  { X86::IST_F16m  , X86::IST_FP16m   },
734  { X86::IST_F32m  , X86::IST_FP32m   },
735
736  { X86::MUL_FrST0 , X86::MUL_FPrST0  },
737
738  { X86::ST_F32m   , X86::ST_FP32m    },
739  { X86::ST_F64m   , X86::ST_FP64m    },
740  { X86::ST_Frr    , X86::ST_FPrr     },
741
742  { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
743  { X86::SUB_FrST0 , X86::SUB_FPrST0  },
744
745  { X86::UCOM_FIr  , X86::UCOM_FIPr   },
746
747  { X86::UCOM_FPr  , X86::UCOM_FPPr   },
748  { X86::UCOM_Fr   , X86::UCOM_FPr    },
749};
750
751/// popStackAfter - Pop the current value off of the top of the FP stack after
752/// the specified instruction.  This attempts to be sneaky and combine the pop
753/// into the instruction itself if possible.  The iterator is left pointing to
754/// the last instruction, be it a new pop instruction inserted, or the old
755/// instruction if it was modified in place.
756///
757void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
758  MachineInstr* MI = I;
759  DebugLoc dl = MI->getDebugLoc();
760  ASSERT_SORTED(PopTable);
761  if (StackTop == 0)
762    report_fatal_error("Cannot pop empty stack!");
763  RegMap[Stack[--StackTop]] = ~0;     // Update state
764
765  // Check to see if there is a popping version of this instruction...
766  int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
767  if (Opcode != -1) {
768    I->setDesc(TII->get(Opcode));
769    if (Opcode == X86::UCOM_FPPr)
770      I->RemoveOperand(0);
771  } else {    // Insert an explicit pop
772    I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
773  }
774}
775
776/// freeStackSlotAfter - Free the specified register from the register stack, so
777/// that it is no longer in a register.  If the register is currently at the top
778/// of the stack, we just pop the current instruction, otherwise we store the
779/// current top-of-stack into the specified slot, then pop the top of stack.
780void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
781  if (getStackEntry(0) == FPRegNo) {  // already at the top of stack? easy.
782    popStackAfter(I);
783    return;
784  }
785
786  // Otherwise, store the top of stack into the dead slot, killing the operand
787  // without having to add in an explicit xchg then pop.
788  //
789  I = freeStackSlotBefore(++I, FPRegNo);
790}
791
792/// freeStackSlotBefore - Free the specified register without trying any
793/// folding.
794MachineBasicBlock::iterator
795FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
796  unsigned STReg    = getSTReg(FPRegNo);
797  unsigned OldSlot  = getSlot(FPRegNo);
798  unsigned TopReg   = Stack[StackTop-1];
799  Stack[OldSlot]    = TopReg;
800  RegMap[TopReg]    = OldSlot;
801  RegMap[FPRegNo]   = ~0;
802  Stack[--StackTop] = ~0;
803  return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg);
804}
805
806/// adjustLiveRegs - Kill and revive registers such that exactly the FP
807/// registers with a bit in Mask are live.
808void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
809  unsigned Defs = Mask;
810  unsigned Kills = 0;
811  for (unsigned i = 0; i < StackTop; ++i) {
812    unsigned RegNo = Stack[i];
813    if (!(Defs & (1 << RegNo)))
814      // This register is live, but we don't want it.
815      Kills |= (1 << RegNo);
816    else
817      // We don't need to imp-def this live register.
818      Defs &= ~(1 << RegNo);
819  }
820  assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
821
822  // Produce implicit-defs for free by using killed registers.
823  while (Kills && Defs) {
824    unsigned KReg = CountTrailingZeros_32(Kills);
825    unsigned DReg = CountTrailingZeros_32(Defs);
826    DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
827    std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
828    std::swap(RegMap[KReg], RegMap[DReg]);
829    Kills &= ~(1 << KReg);
830    Defs &= ~(1 << DReg);
831  }
832
833  // Kill registers by popping.
834  if (Kills && I != MBB->begin()) {
835    MachineBasicBlock::iterator I2 = llvm::prior(I);
836    for (;;) {
837      unsigned KReg = getStackEntry(0);
838      if (!(Kills & (1 << KReg)))
839        break;
840      DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
841      popStackAfter(I2);
842      Kills &= ~(1 << KReg);
843    }
844  }
845
846  // Manually kill the rest.
847  while (Kills) {
848    unsigned KReg = CountTrailingZeros_32(Kills);
849    DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
850    freeStackSlotBefore(I, KReg);
851    Kills &= ~(1 << KReg);
852  }
853
854  // Load zeros for all the imp-defs.
855  while(Defs) {
856    unsigned DReg = CountTrailingZeros_32(Defs);
857    DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
858    BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
859    pushReg(DReg);
860    Defs &= ~(1 << DReg);
861  }
862
863  // Now we should have the correct registers live.
864  DEBUG(dumpStack());
865  assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch");
866}
867
868/// shuffleStackTop - emit fxch instructions before I to shuffle the top
869/// FixCount entries into the order given by FixStack.
870/// FIXME: Is there a better algorithm than insertion sort?
871void FPS::shuffleStackTop(const unsigned char *FixStack,
872                          unsigned FixCount,
873                          MachineBasicBlock::iterator I) {
874  // Move items into place, starting from the desired stack bottom.
875  while (FixCount--) {
876    // Old register at position FixCount.
877    unsigned OldReg = getStackEntry(FixCount);
878    // Desired register at position FixCount.
879    unsigned Reg = FixStack[FixCount];
880    if (Reg == OldReg)
881      continue;
882    // (Reg st0) (OldReg st0) = (Reg OldReg st0)
883    moveToTop(Reg, I);
884    moveToTop(OldReg, I);
885  }
886  DEBUG(dumpStack());
887}
888
889
890//===----------------------------------------------------------------------===//
891// Instruction transformation implementation
892//===----------------------------------------------------------------------===//
893
894/// handleZeroArgFP - ST(0) = fld0    ST(0) = flds <mem>
895///
896void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
897  MachineInstr *MI = I;
898  unsigned DestReg = getFPReg(MI->getOperand(0));
899
900  // Change from the pseudo instruction to the concrete instruction.
901  MI->RemoveOperand(0);   // Remove the explicit ST(0) operand
902  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
903
904  // Result gets pushed on the stack.
905  pushReg(DestReg);
906}
907
908/// handleOneArgFP - fst <mem>, ST(0)
909///
910void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
911  MachineInstr *MI = I;
912  unsigned NumOps = MI->getDesc().getNumOperands();
913  assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
914         "Can only handle fst* & ftst instructions!");
915
916  // Is this the last use of the source register?
917  unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
918  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
919
920  // FISTP64m is strange because there isn't a non-popping versions.
921  // If we have one _and_ we don't want to pop the operand, duplicate the value
922  // on the stack instead of moving it.  This ensure that popping the value is
923  // always ok.
924  // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
925  //
926  if (!KillsSrc &&
927      (MI->getOpcode() == X86::IST_Fp64m32 ||
928       MI->getOpcode() == X86::ISTT_Fp16m32 ||
929       MI->getOpcode() == X86::ISTT_Fp32m32 ||
930       MI->getOpcode() == X86::ISTT_Fp64m32 ||
931       MI->getOpcode() == X86::IST_Fp64m64 ||
932       MI->getOpcode() == X86::ISTT_Fp16m64 ||
933       MI->getOpcode() == X86::ISTT_Fp32m64 ||
934       MI->getOpcode() == X86::ISTT_Fp64m64 ||
935       MI->getOpcode() == X86::IST_Fp64m80 ||
936       MI->getOpcode() == X86::ISTT_Fp16m80 ||
937       MI->getOpcode() == X86::ISTT_Fp32m80 ||
938       MI->getOpcode() == X86::ISTT_Fp64m80 ||
939       MI->getOpcode() == X86::ST_FpP80m)) {
940    duplicateToTop(Reg, getScratchReg(), I);
941  } else {
942    moveToTop(Reg, I);            // Move to the top of the stack...
943  }
944
945  // Convert from the pseudo instruction to the concrete instruction.
946  MI->RemoveOperand(NumOps-1);    // Remove explicit ST(0) operand
947  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
948
949  if (MI->getOpcode() == X86::IST_FP64m ||
950      MI->getOpcode() == X86::ISTT_FP16m ||
951      MI->getOpcode() == X86::ISTT_FP32m ||
952      MI->getOpcode() == X86::ISTT_FP64m ||
953      MI->getOpcode() == X86::ST_FP80m) {
954    if (StackTop == 0)
955      report_fatal_error("Stack empty??");
956    --StackTop;
957  } else if (KillsSrc) { // Last use of operand?
958    popStackAfter(I);
959  }
960}
961
962
963/// handleOneArgFPRW: Handle instructions that read from the top of stack and
964/// replace the value with a newly computed value.  These instructions may have
965/// non-fp operands after their FP operands.
966///
967///  Examples:
968///     R1 = fchs R2
969///     R1 = fadd R2, [mem]
970///
971void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
972  MachineInstr *MI = I;
973#ifndef NDEBUG
974  unsigned NumOps = MI->getDesc().getNumOperands();
975  assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
976#endif
977
978  // Is this the last use of the source register?
979  unsigned Reg = getFPReg(MI->getOperand(1));
980  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
981
982  if (KillsSrc) {
983    // If this is the last use of the source register, just make sure it's on
984    // the top of the stack.
985    moveToTop(Reg, I);
986    if (StackTop == 0)
987      report_fatal_error("Stack cannot be empty!");
988    --StackTop;
989    pushReg(getFPReg(MI->getOperand(0)));
990  } else {
991    // If this is not the last use of the source register, _copy_ it to the top
992    // of the stack.
993    duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
994  }
995
996  // Change from the pseudo instruction to the concrete instruction.
997  MI->RemoveOperand(1);   // Drop the source operand.
998  MI->RemoveOperand(0);   // Drop the destination operand.
999  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1000}
1001
1002
1003//===----------------------------------------------------------------------===//
1004// Define tables of various ways to map pseudo instructions
1005//
1006
1007// ForwardST0Table - Map: A = B op C  into: ST(0) = ST(0) op ST(i)
1008static const TableEntry ForwardST0Table[] = {
1009  { X86::ADD_Fp32  , X86::ADD_FST0r },
1010  { X86::ADD_Fp64  , X86::ADD_FST0r },
1011  { X86::ADD_Fp80  , X86::ADD_FST0r },
1012  { X86::DIV_Fp32  , X86::DIV_FST0r },
1013  { X86::DIV_Fp64  , X86::DIV_FST0r },
1014  { X86::DIV_Fp80  , X86::DIV_FST0r },
1015  { X86::MUL_Fp32  , X86::MUL_FST0r },
1016  { X86::MUL_Fp64  , X86::MUL_FST0r },
1017  { X86::MUL_Fp80  , X86::MUL_FST0r },
1018  { X86::SUB_Fp32  , X86::SUB_FST0r },
1019  { X86::SUB_Fp64  , X86::SUB_FST0r },
1020  { X86::SUB_Fp80  , X86::SUB_FST0r },
1021};
1022
1023// ReverseST0Table - Map: A = B op C  into: ST(0) = ST(i) op ST(0)
1024static const TableEntry ReverseST0Table[] = {
1025  { X86::ADD_Fp32  , X86::ADD_FST0r  },   // commutative
1026  { X86::ADD_Fp64  , X86::ADD_FST0r  },   // commutative
1027  { X86::ADD_Fp80  , X86::ADD_FST0r  },   // commutative
1028  { X86::DIV_Fp32  , X86::DIVR_FST0r },
1029  { X86::DIV_Fp64  , X86::DIVR_FST0r },
1030  { X86::DIV_Fp80  , X86::DIVR_FST0r },
1031  { X86::MUL_Fp32  , X86::MUL_FST0r  },   // commutative
1032  { X86::MUL_Fp64  , X86::MUL_FST0r  },   // commutative
1033  { X86::MUL_Fp80  , X86::MUL_FST0r  },   // commutative
1034  { X86::SUB_Fp32  , X86::SUBR_FST0r },
1035  { X86::SUB_Fp64  , X86::SUBR_FST0r },
1036  { X86::SUB_Fp80  , X86::SUBR_FST0r },
1037};
1038
1039// ForwardSTiTable - Map: A = B op C  into: ST(i) = ST(0) op ST(i)
1040static const TableEntry ForwardSTiTable[] = {
1041  { X86::ADD_Fp32  , X86::ADD_FrST0  },   // commutative
1042  { X86::ADD_Fp64  , X86::ADD_FrST0  },   // commutative
1043  { X86::ADD_Fp80  , X86::ADD_FrST0  },   // commutative
1044  { X86::DIV_Fp32  , X86::DIVR_FrST0 },
1045  { X86::DIV_Fp64  , X86::DIVR_FrST0 },
1046  { X86::DIV_Fp80  , X86::DIVR_FrST0 },
1047  { X86::MUL_Fp32  , X86::MUL_FrST0  },   // commutative
1048  { X86::MUL_Fp64  , X86::MUL_FrST0  },   // commutative
1049  { X86::MUL_Fp80  , X86::MUL_FrST0  },   // commutative
1050  { X86::SUB_Fp32  , X86::SUBR_FrST0 },
1051  { X86::SUB_Fp64  , X86::SUBR_FrST0 },
1052  { X86::SUB_Fp80  , X86::SUBR_FrST0 },
1053};
1054
1055// ReverseSTiTable - Map: A = B op C  into: ST(i) = ST(i) op ST(0)
1056static const TableEntry ReverseSTiTable[] = {
1057  { X86::ADD_Fp32  , X86::ADD_FrST0 },
1058  { X86::ADD_Fp64  , X86::ADD_FrST0 },
1059  { X86::ADD_Fp80  , X86::ADD_FrST0 },
1060  { X86::DIV_Fp32  , X86::DIV_FrST0 },
1061  { X86::DIV_Fp64  , X86::DIV_FrST0 },
1062  { X86::DIV_Fp80  , X86::DIV_FrST0 },
1063  { X86::MUL_Fp32  , X86::MUL_FrST0 },
1064  { X86::MUL_Fp64  , X86::MUL_FrST0 },
1065  { X86::MUL_Fp80  , X86::MUL_FrST0 },
1066  { X86::SUB_Fp32  , X86::SUB_FrST0 },
1067  { X86::SUB_Fp64  , X86::SUB_FrST0 },
1068  { X86::SUB_Fp80  , X86::SUB_FrST0 },
1069};
1070
1071
1072/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1073/// instructions which need to be simplified and possibly transformed.
1074///
1075/// Result: ST(0) = fsub  ST(0), ST(i)
1076///         ST(i) = fsub  ST(0), ST(i)
1077///         ST(0) = fsubr ST(0), ST(i)
1078///         ST(i) = fsubr ST(0), ST(i)
1079///
1080void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1081  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
1082  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
1083  MachineInstr *MI = I;
1084
1085  unsigned NumOperands = MI->getDesc().getNumOperands();
1086  assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1087  unsigned Dest = getFPReg(MI->getOperand(0));
1088  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
1089  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
1090  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
1091  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1092  DebugLoc dl = MI->getDebugLoc();
1093
1094  unsigned TOS = getStackEntry(0);
1095
1096  // One of our operands must be on the top of the stack.  If neither is yet, we
1097  // need to move one.
1098  if (Op0 != TOS && Op1 != TOS) {   // No operand at TOS?
1099    // We can choose to move either operand to the top of the stack.  If one of
1100    // the operands is killed by this instruction, we want that one so that we
1101    // can update right on top of the old version.
1102    if (KillsOp0) {
1103      moveToTop(Op0, I);         // Move dead operand to TOS.
1104      TOS = Op0;
1105    } else if (KillsOp1) {
1106      moveToTop(Op1, I);
1107      TOS = Op1;
1108    } else {
1109      // All of the operands are live after this instruction executes, so we
1110      // cannot update on top of any operand.  Because of this, we must
1111      // duplicate one of the stack elements to the top.  It doesn't matter
1112      // which one we pick.
1113      //
1114      duplicateToTop(Op0, Dest, I);
1115      Op0 = TOS = Dest;
1116      KillsOp0 = true;
1117    }
1118  } else if (!KillsOp0 && !KillsOp1) {
1119    // If we DO have one of our operands at the top of the stack, but we don't
1120    // have a dead operand, we must duplicate one of the operands to a new slot
1121    // on the stack.
1122    duplicateToTop(Op0, Dest, I);
1123    Op0 = TOS = Dest;
1124    KillsOp0 = true;
1125  }
1126
1127  // Now we know that one of our operands is on the top of the stack, and at
1128  // least one of our operands is killed by this instruction.
1129  assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1130         "Stack conditions not set up right!");
1131
1132  // We decide which form to use based on what is on the top of the stack, and
1133  // which operand is killed by this instruction.
1134  const TableEntry *InstTable;
1135  bool isForward = TOS == Op0;
1136  bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1137  if (updateST0) {
1138    if (isForward)
1139      InstTable = ForwardST0Table;
1140    else
1141      InstTable = ReverseST0Table;
1142  } else {
1143    if (isForward)
1144      InstTable = ForwardSTiTable;
1145    else
1146      InstTable = ReverseSTiTable;
1147  }
1148
1149  int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
1150                      MI->getOpcode());
1151  assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1152
1153  // NotTOS - The register which is not on the top of stack...
1154  unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1155
1156  // Replace the old instruction with a new instruction
1157  MBB->remove(I++);
1158  I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1159
1160  // If both operands are killed, pop one off of the stack in addition to
1161  // overwriting the other one.
1162  if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1163    assert(!updateST0 && "Should have updated other operand!");
1164    popStackAfter(I);   // Pop the top of stack
1165  }
1166
1167  // Update stack information so that we know the destination register is now on
1168  // the stack.
1169  unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1170  assert(UpdatedSlot < StackTop && Dest < 7);
1171  Stack[UpdatedSlot]   = Dest;
1172  RegMap[Dest]         = UpdatedSlot;
1173  MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
1174}
1175
1176/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1177/// register arguments and no explicit destinations.
1178///
1179void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1180  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
1181  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
1182  MachineInstr *MI = I;
1183
1184  unsigned NumOperands = MI->getDesc().getNumOperands();
1185  assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1186  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
1187  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
1188  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
1189  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1190
1191  // Make sure the first operand is on the top of stack, the other one can be
1192  // anywhere.
1193  moveToTop(Op0, I);
1194
1195  // Change from the pseudo instruction to the concrete instruction.
1196  MI->getOperand(0).setReg(getSTReg(Op1));
1197  MI->RemoveOperand(1);
1198  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1199
1200  // If any of the operands are killed by this instruction, free them.
1201  if (KillsOp0) freeStackSlotAfter(I, Op0);
1202  if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1203}
1204
1205/// handleCondMovFP - Handle two address conditional move instructions.  These
1206/// instructions move a st(i) register to st(0) iff a condition is true.  These
1207/// instructions require that the first operand is at the top of the stack, but
1208/// otherwise don't modify the stack at all.
1209void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1210  MachineInstr *MI = I;
1211
1212  unsigned Op0 = getFPReg(MI->getOperand(0));
1213  unsigned Op1 = getFPReg(MI->getOperand(2));
1214  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1215
1216  // The first operand *must* be on the top of the stack.
1217  moveToTop(Op0, I);
1218
1219  // Change the second operand to the stack register that the operand is in.
1220  // Change from the pseudo instruction to the concrete instruction.
1221  MI->RemoveOperand(0);
1222  MI->RemoveOperand(1);
1223  MI->getOperand(0).setReg(getSTReg(Op1));
1224  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1225
1226  // If we kill the second operand, make sure to pop it from the stack.
1227  if (Op0 != Op1 && KillsOp1) {
1228    // Get this value off of the register stack.
1229    freeStackSlotAfter(I, Op1);
1230  }
1231}
1232
1233
1234/// handleSpecialFP - Handle special instructions which behave unlike other
1235/// floating point instructions.  This is primarily intended for use by pseudo
1236/// instructions.
1237///
1238void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
1239  MachineInstr *MI = I;
1240  switch (MI->getOpcode()) {
1241  default: llvm_unreachable("Unknown SpecialFP instruction!");
1242  case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
1243  case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type!
1244  case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type!
1245    assert(StackTop == 0 && "Stack should be empty after a call!");
1246    pushReg(getFPReg(MI->getOperand(0)));
1247    break;
1248  case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type!
1249  case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type!
1250  case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type!
1251    // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm.
1252    // The pattern we expect is:
1253    //  CALL
1254    //  FP1 = FpGET_ST0
1255    //  FP4 = FpGET_ST1
1256    //
1257    // At this point, we've pushed FP1 on the top of stack, so it should be
1258    // present if it isn't dead.  If it was dead, we already emitted a pop to
1259    // remove it from the stack and StackTop = 0.
1260
1261    // Push FP4 as top of stack next.
1262    pushReg(getFPReg(MI->getOperand(0)));
1263
1264    // If StackTop was 0 before we pushed our operand, then ST(0) must have been
1265    // dead.  In this case, the ST(1) value is the only thing that is live, so
1266    // it should be on the TOS (after the pop that was emitted) and is.  Just
1267    // continue in this case.
1268    if (StackTop == 1)
1269      break;
1270
1271    // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top
1272    // elements so that our accounting is correct.
1273    unsigned RegOnTop = getStackEntry(0);
1274    unsigned RegNo = getStackEntry(1);
1275
1276    // Swap the slots the regs are in.
1277    std::swap(RegMap[RegNo], RegMap[RegOnTop]);
1278
1279    // Swap stack slot contents.
1280    if (RegMap[RegOnTop] >= StackTop)
1281      report_fatal_error("Access past stack top!");
1282    std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
1283    break;
1284  }
1285  case X86::FpSET_ST0_32:
1286  case X86::FpSET_ST0_64:
1287  case X86::FpSET_ST0_80: {
1288    // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm
1289    // arguments that use an st constraint. We expect a sequence of
1290    // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM
1291    unsigned Op0 = getFPReg(MI->getOperand(0));
1292
1293    if (!MI->killsRegister(X86::FP0 + Op0)) {
1294      // Duplicate Op0 into a temporary on the stack top.
1295      duplicateToTop(Op0, getScratchReg(), I);
1296    } else {
1297      // Op0 is killed, so just swap it into position.
1298      moveToTop(Op0, I);
1299    }
1300    --StackTop;   // "Forget" we have something on the top of stack!
1301    break;
1302  }
1303  case X86::FpSET_ST1_32:
1304  case X86::FpSET_ST1_64:
1305  case X86::FpSET_ST1_80: {
1306    // Set up st(1) for inline asm. We are assuming that st(0) has already been
1307    // set up by FpSET_ST0, and our StackTop is off by one because of it.
1308    unsigned Op0 = getFPReg(MI->getOperand(0));
1309    // Restore the actual StackTop from before Fp_SET_ST0.
1310    // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we
1311    // are not enforcing the constraint.
1312    ++StackTop;
1313    unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
1314    if (!MI->killsRegister(X86::FP0 + Op0)) {
1315      duplicateToTop(Op0, getScratchReg(), I);
1316      moveToTop(RegOnTop, I);
1317    } else if (getSTReg(Op0) != X86::ST1) {
1318      // We have the wrong value at st(1). Shuffle! Untested!
1319      moveToTop(getStackEntry(1), I);
1320      moveToTop(Op0, I);
1321      moveToTop(RegOnTop, I);
1322    }
1323    assert(StackTop >= 2 && "Too few live registers");
1324    StackTop -= 2; // "Forget" both st(0) and st(1).
1325    break;
1326  }
1327  case X86::MOV_Fp3232:
1328  case X86::MOV_Fp3264:
1329  case X86::MOV_Fp6432:
1330  case X86::MOV_Fp6464:
1331  case X86::MOV_Fp3280:
1332  case X86::MOV_Fp6480:
1333  case X86::MOV_Fp8032:
1334  case X86::MOV_Fp8064:
1335  case X86::MOV_Fp8080: {
1336    const MachineOperand &MO1 = MI->getOperand(1);
1337    unsigned SrcReg = getFPReg(MO1);
1338
1339    const MachineOperand &MO0 = MI->getOperand(0);
1340    unsigned DestReg = getFPReg(MO0);
1341    if (MI->killsRegister(X86::FP0+SrcReg)) {
1342      // If the input operand is killed, we can just change the owner of the
1343      // incoming stack slot into the result.
1344      unsigned Slot = getSlot(SrcReg);
1345      assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!");
1346      Stack[Slot] = DestReg;
1347      RegMap[DestReg] = Slot;
1348
1349    } else {
1350      // For FMOV we just duplicate the specified value to a new stack slot.
1351      // This could be made better, but would require substantial changes.
1352      duplicateToTop(SrcReg, DestReg, I);
1353    }
1354    }
1355    break;
1356  case TargetOpcode::INLINEASM: {
1357    // The inline asm MachineInstr currently only *uses* FP registers for the
1358    // 'f' constraint.  These should be turned into the current ST(x) register
1359    // in the machine instr.  Also, any kills should be explicitly popped after
1360    // the inline asm.
1361    unsigned Kills = 0;
1362    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1363      MachineOperand &Op = MI->getOperand(i);
1364      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1365        continue;
1366      assert(Op.isUse() && "Only handle inline asm uses right now");
1367
1368      unsigned FPReg = getFPReg(Op);
1369      Op.setReg(getSTReg(FPReg));
1370
1371      // If we kill this operand, make sure to pop it from the stack after the
1372      // asm.  We just remember it for now, and pop them all off at the end in
1373      // a batch.
1374      if (Op.isKill())
1375        Kills |= 1U << FPReg;
1376    }
1377
1378    // If this asm kills any FP registers (is the last use of them) we must
1379    // explicitly emit pop instructions for them.  Do this now after the asm has
1380    // executed so that the ST(x) numbers are not off (which would happen if we
1381    // did this inline with operand rewriting).
1382    //
1383    // Note: this might be a non-optimal pop sequence.  We might be able to do
1384    // better by trying to pop in stack order or something.
1385    MachineBasicBlock::iterator InsertPt = MI;
1386    while (Kills) {
1387      unsigned FPReg = CountTrailingZeros_32(Kills);
1388      freeStackSlotAfter(InsertPt, FPReg);
1389      Kills &= ~(1U << FPReg);
1390    }
1391    // Don't delete the inline asm!
1392    return;
1393  }
1394
1395  case X86::RET:
1396  case X86::RETI:
1397    // If RET has an FP register use operand, pass the first one in ST(0) and
1398    // the second one in ST(1).
1399
1400    // Find the register operands.
1401    unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1402    unsigned LiveMask = 0;
1403
1404    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1405      MachineOperand &Op = MI->getOperand(i);
1406      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1407        continue;
1408      // FP Register uses must be kills unless there are two uses of the same
1409      // register, in which case only one will be a kill.
1410      assert(Op.isUse() &&
1411             (Op.isKill() ||                        // Marked kill.
1412              getFPReg(Op) == FirstFPRegOp ||       // Second instance.
1413              MI->killsRegister(Op.getReg())) &&    // Later use is marked kill.
1414             "Ret only defs operands, and values aren't live beyond it");
1415
1416      if (FirstFPRegOp == ~0U)
1417        FirstFPRegOp = getFPReg(Op);
1418      else {
1419        assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1420        SecondFPRegOp = getFPReg(Op);
1421      }
1422      LiveMask |= (1 << getFPReg(Op));
1423
1424      // Remove the operand so that later passes don't see it.
1425      MI->RemoveOperand(i);
1426      --i, --e;
1427    }
1428
1429    // We may have been carrying spurious live-ins, so make sure only the returned
1430    // registers are left live.
1431    adjustLiveRegs(LiveMask, MI);
1432    if (!LiveMask) return;  // Quick check to see if any are possible.
1433
1434    // There are only four possibilities here:
1435    // 1) we are returning a single FP value.  In this case, it has to be in
1436    //    ST(0) already, so just declare success by removing the value from the
1437    //    FP Stack.
1438    if (SecondFPRegOp == ~0U) {
1439      // Assert that the top of stack contains the right FP register.
1440      assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1441             "Top of stack not the right register for RET!");
1442
1443      // Ok, everything is good, mark the value as not being on the stack
1444      // anymore so that our assertion about the stack being empty at end of
1445      // block doesn't fire.
1446      StackTop = 0;
1447      return;
1448    }
1449
1450    // Otherwise, we are returning two values:
1451    // 2) If returning the same value for both, we only have one thing in the FP
1452    //    stack.  Consider:  RET FP1, FP1
1453    if (StackTop == 1) {
1454      assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1455             "Stack misconfiguration for RET!");
1456
1457      // Duplicate the TOS so that we return it twice.  Just pick some other FPx
1458      // register to hold it.
1459      unsigned NewReg = getScratchReg();
1460      duplicateToTop(FirstFPRegOp, NewReg, MI);
1461      FirstFPRegOp = NewReg;
1462    }
1463
1464    /// Okay we know we have two different FPx operands now:
1465    assert(StackTop == 2 && "Must have two values live!");
1466
1467    /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1468    ///    in ST(1).  In this case, emit an fxch.
1469    if (getStackEntry(0) == SecondFPRegOp) {
1470      assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1471      moveToTop(FirstFPRegOp, MI);
1472    }
1473
1474    /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1475    /// ST(1).  Just remove both from our understanding of the stack and return.
1476    assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1477    assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1478    StackTop = 0;
1479    return;
1480  }
1481
1482  I = MBB->erase(I);  // Remove the pseudo instruction
1483
1484  // We want to leave I pointing to the previous instruction, but what if we
1485  // just erased the first instruction?
1486  if (I == MBB->begin()) {
1487    DEBUG(dbgs() << "Inserting dummy KILL\n");
1488    I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL));
1489  } else
1490    --I;
1491}
1492
1493// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands.
1494bool FPS::translateCopy(MachineInstr *MI) {
1495  unsigned DstReg = MI->getOperand(0).getReg();
1496  unsigned SrcReg = MI->getOperand(1).getReg();
1497
1498  if (DstReg == X86::ST0) {
1499    MI->setDesc(TII->get(X86::FpSET_ST0_80));
1500    MI->RemoveOperand(0);
1501    return true;
1502  }
1503  if (DstReg == X86::ST1) {
1504    MI->setDesc(TII->get(X86::FpSET_ST1_80));
1505    MI->RemoveOperand(0);
1506    return true;
1507  }
1508  if (SrcReg == X86::ST0) {
1509    MI->setDesc(TII->get(X86::FpGET_ST0_80));
1510    return true;
1511  }
1512  if (SrcReg == X86::ST1) {
1513    MI->setDesc(TII->get(X86::FpGET_ST1_80));
1514    return true;
1515  }
1516  if (X86::RFP80RegClass.contains(DstReg, SrcReg)) {
1517    MI->setDesc(TII->get(X86::MOV_Fp8080));
1518    return true;
1519  }
1520  return false;
1521}
1522