X86FloatingPoint.cpp revision 212904
1193323Sed//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file defines the pass which converts floating point instructions from
11212904Sdim// pseudo registers into register stack instructions.  This pass uses live
12193323Sed// variable information to indicate where the FPn registers are used and their
13193323Sed// lifetimes.
14193323Sed//
15212904Sdim// The x87 hardware tracks liveness of the stack registers, so it is necessary
16212904Sdim// to implement exact liveness tracking between basic blocks. The CFG edges are
17212904Sdim// partitioned into bundles where the same FP registers must be live in
18212904Sdim// identical stack positions. Instructions are inserted at the end of each basic
19212904Sdim// block to rearrange the live registers to match the outgoing bundle.
20193323Sed//
21212904Sdim// This approach avoids splitting critical edges at the potential cost of more
22212904Sdim// live register shuffling instructions when critical edges are present.
23193323Sed//
24193323Sed//===----------------------------------------------------------------------===//
25193323Sed
26193323Sed#define DEBUG_TYPE "x86-codegen"
27193323Sed#include "X86.h"
28193323Sed#include "X86InstrInfo.h"
29198090Srdivacky#include "llvm/ADT/DepthFirstIterator.h"
30212904Sdim#include "llvm/ADT/DenseMap.h"
31198090Srdivacky#include "llvm/ADT/SmallPtrSet.h"
32198090Srdivacky#include "llvm/ADT/SmallVector.h"
33198090Srdivacky#include "llvm/ADT/Statistic.h"
34198090Srdivacky#include "llvm/ADT/STLExtras.h"
35193323Sed#include "llvm/CodeGen/MachineFunctionPass.h"
36193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h"
37193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h"
38193323Sed#include "llvm/CodeGen/Passes.h"
39198090Srdivacky#include "llvm/Support/Debug.h"
40198090Srdivacky#include "llvm/Support/ErrorHandling.h"
41198090Srdivacky#include "llvm/Support/raw_ostream.h"
42193323Sed#include "llvm/Target/TargetInstrInfo.h"
43193323Sed#include "llvm/Target/TargetMachine.h"
44193323Sed#include <algorithm>
45193323Sedusing namespace llvm;
46193323Sed
47193323SedSTATISTIC(NumFXCH, "Number of fxch instructions inserted");
48193323SedSTATISTIC(NumFP  , "Number of floating point instructions");
49193323Sed
50193323Sednamespace {
51198892Srdivacky  struct FPS : public MachineFunctionPass {
52193323Sed    static char ID;
53212904Sdim    FPS() : MachineFunctionPass(ID) {
54212904Sdim      // This is really only to keep valgrind quiet.
55212904Sdim      // The logic in isLive() is too much for it.
56212904Sdim      memset(Stack, 0, sizeof(Stack));
57212904Sdim      memset(RegMap, 0, sizeof(RegMap));
58212904Sdim    }
59193323Sed
60193323Sed    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
61198090Srdivacky      AU.setPreservesCFG();
62193323Sed      AU.addPreservedID(MachineLoopInfoID);
63193323Sed      AU.addPreservedID(MachineDominatorsID);
64193323Sed      MachineFunctionPass::getAnalysisUsage(AU);
65193323Sed    }
66193323Sed
67193323Sed    virtual bool runOnMachineFunction(MachineFunction &MF);
68193323Sed
69193323Sed    virtual const char *getPassName() const { return "X86 FP Stackifier"; }
70193323Sed
71193323Sed  private:
72193323Sed    const TargetInstrInfo *TII; // Machine instruction info.
73212904Sdim
74212904Sdim    // Two CFG edges are related if they leave the same block, or enter the same
75212904Sdim    // block. The transitive closure of an edge under this relation is a
76212904Sdim    // LiveBundle. It represents a set of CFG edges where the live FP stack
77212904Sdim    // registers must be allocated identically in the x87 stack.
78212904Sdim    //
79212904Sdim    // A LiveBundle is usually all the edges leaving a block, or all the edges
80212904Sdim    // entering a block, but it can contain more edges if critical edges are
81212904Sdim    // present.
82212904Sdim    //
83212904Sdim    // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
84212904Sdim    // but the exact mapping of FP registers to stack slots is fixed later.
85212904Sdim    struct LiveBundle {
86212904Sdim      // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
87212904Sdim      unsigned Mask;
88212904Sdim
89212904Sdim      // Number of pre-assigned live registers in FixStack. This is 0 when the
90212904Sdim      // stack order has not yet been fixed.
91212904Sdim      unsigned FixCount;
92212904Sdim
93212904Sdim      // Assigned stack order for live-in registers.
94212904Sdim      // FixStack[i] == getStackEntry(i) for all i < FixCount.
95212904Sdim      unsigned char FixStack[8];
96212904Sdim
97212904Sdim      LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {}
98212904Sdim
99212904Sdim      // Have the live registers been assigned a stack order yet?
100212904Sdim      bool isFixed() const { return !Mask || FixCount; }
101212904Sdim    };
102212904Sdim
103212904Sdim    // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
104212904Sdim    // with no live FP registers.
105212904Sdim    SmallVector<LiveBundle, 8> LiveBundles;
106212904Sdim
107212904Sdim    // Map each MBB in the current function to an (ingoing, outgoing) index into
108212904Sdim    // LiveBundles. Blocks with no FP registers live in or out map to (0, 0)
109212904Sdim    // and are not actually stored in the map.
110212904Sdim    DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle;
111212904Sdim
112212904Sdim    // Return a bitmask of FP registers in block's live-in list.
113212904Sdim    unsigned calcLiveInMask(MachineBasicBlock *MBB) {
114212904Sdim      unsigned Mask = 0;
115212904Sdim      for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
116212904Sdim           E = MBB->livein_end(); I != E; ++I) {
117212904Sdim        unsigned Reg = *I - X86::FP0;
118212904Sdim        if (Reg < 8)
119212904Sdim          Mask |= 1 << Reg;
120212904Sdim      }
121212904Sdim      return Mask;
122212904Sdim    }
123212904Sdim
124212904Sdim    // Partition all the CFG edges into LiveBundles.
125212904Sdim    void bundleCFG(MachineFunction &MF);
126212904Sdim
127193323Sed    MachineBasicBlock *MBB;     // Current basic block
128193323Sed    unsigned Stack[8];          // FP<n> Registers in each stack slot...
129193323Sed    unsigned RegMap[8];         // Track which stack slot contains each register
130193323Sed    unsigned StackTop;          // The current top of the FP stack.
131193323Sed
132212904Sdim    // Set up our stack model to match the incoming registers to MBB.
133212904Sdim    void setupBlockStack();
134212904Sdim
135212904Sdim    // Shuffle live registers to match the expectations of successor blocks.
136212904Sdim    void finishBlockStack();
137212904Sdim
138193323Sed    void dumpStack() const {
139202375Srdivacky      dbgs() << "Stack contents:";
140193323Sed      for (unsigned i = 0; i != StackTop; ++i) {
141202375Srdivacky        dbgs() << " FP" << Stack[i];
142193323Sed        assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
143193323Sed      }
144202375Srdivacky      dbgs() << "\n";
145193323Sed    }
146212904Sdim
147212904Sdim    /// getSlot - Return the stack slot number a particular register number is
148212904Sdim    /// in.
149193323Sed    unsigned getSlot(unsigned RegNo) const {
150193323Sed      assert(RegNo < 8 && "Regno out of range!");
151193323Sed      return RegMap[RegNo];
152193323Sed    }
153193323Sed
154212904Sdim    /// isLive - Is RegNo currently live in the stack?
155212904Sdim    bool isLive(unsigned RegNo) const {
156212904Sdim      unsigned Slot = getSlot(RegNo);
157212904Sdim      return Slot < StackTop && Stack[Slot] == RegNo;
158212904Sdim    }
159212904Sdim
160212904Sdim    /// getScratchReg - Return an FP register that is not currently in use.
161212904Sdim    unsigned getScratchReg() {
162212904Sdim      for (int i = 7; i >= 0; --i)
163212904Sdim        if (!isLive(i))
164212904Sdim          return i;
165212904Sdim      llvm_unreachable("Ran out of scratch FP registers");
166212904Sdim    }
167212904Sdim
168212904Sdim    /// getStackEntry - Return the X86::FP<n> register in register ST(i).
169193323Sed    unsigned getStackEntry(unsigned STi) const {
170193323Sed      assert(STi < StackTop && "Access past stack top!");
171193323Sed      return Stack[StackTop-1-STi];
172193323Sed    }
173193323Sed
174212904Sdim    /// getSTReg - Return the X86::ST(i) register which contains the specified
175212904Sdim    /// FP<RegNo> register.
176193323Sed    unsigned getSTReg(unsigned RegNo) const {
177193323Sed      return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
178193323Sed    }
179193323Sed
180193323Sed    // pushReg - Push the specified FP<n> register onto the stack.
181193323Sed    void pushReg(unsigned Reg) {
182193323Sed      assert(Reg < 8 && "Register number out of range!");
183193323Sed      assert(StackTop < 8 && "Stack overflow!");
184193323Sed      Stack[StackTop] = Reg;
185193323Sed      RegMap[Reg] = StackTop++;
186193323Sed    }
187193323Sed
188193323Sed    bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
189193323Sed    void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
190212904Sdim      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
191193323Sed      if (isAtTop(RegNo)) return;
192212904Sdim
193193323Sed      unsigned STReg = getSTReg(RegNo);
194193323Sed      unsigned RegOnTop = getStackEntry(0);
195193323Sed
196193323Sed      // Swap the slots the regs are in.
197193323Sed      std::swap(RegMap[RegNo], RegMap[RegOnTop]);
198193323Sed
199193323Sed      // Swap stack slot contents.
200193323Sed      assert(RegMap[RegOnTop] < StackTop);
201193323Sed      std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
202193323Sed
203193323Sed      // Emit an fxch to update the runtime processors version of the state.
204193323Sed      BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
205210299Sed      ++NumFXCH;
206193323Sed    }
207193323Sed
208193323Sed    void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
209212904Sdim      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
210193323Sed      unsigned STReg = getSTReg(RegNo);
211193323Sed      pushReg(AsReg);   // New register on top of stack
212193323Sed
213193323Sed      BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
214193323Sed    }
215193323Sed
216212904Sdim    /// popStackAfter - Pop the current value off of the top of the FP stack
217212904Sdim    /// after the specified instruction.
218193323Sed    void popStackAfter(MachineBasicBlock::iterator &I);
219193323Sed
220212904Sdim    /// freeStackSlotAfter - Free the specified register from the register
221212904Sdim    /// stack, so that it is no longer in a register.  If the register is
222212904Sdim    /// currently at the top of the stack, we just pop the current instruction,
223212904Sdim    /// otherwise we store the current top-of-stack into the specified slot,
224212904Sdim    /// then pop the top of stack.
225193323Sed    void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
226193323Sed
227212904Sdim    /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
228212904Sdim    /// instruction.
229212904Sdim    MachineBasicBlock::iterator
230212904Sdim    freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
231212904Sdim
232212904Sdim    /// Adjust the live registers to be the set in Mask.
233212904Sdim    void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
234212904Sdim
235212904Sdim    /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is
236212904Sdim    /// st(0), FP reg FixStack[1] is st(1) etc.
237212904Sdim    void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
238212904Sdim                         MachineBasicBlock::iterator I);
239212904Sdim
240193323Sed    bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
241193323Sed
242193323Sed    void handleZeroArgFP(MachineBasicBlock::iterator &I);
243193323Sed    void handleOneArgFP(MachineBasicBlock::iterator &I);
244193323Sed    void handleOneArgFPRW(MachineBasicBlock::iterator &I);
245193323Sed    void handleTwoArgFP(MachineBasicBlock::iterator &I);
246193323Sed    void handleCompareFP(MachineBasicBlock::iterator &I);
247193323Sed    void handleCondMovFP(MachineBasicBlock::iterator &I);
248193323Sed    void handleSpecialFP(MachineBasicBlock::iterator &I);
249210299Sed
250210299Sed    bool translateCopy(MachineInstr*);
251193323Sed  };
252193323Sed  char FPS::ID = 0;
253193323Sed}
254193323Sed
255193323SedFunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
256193323Sed
257193323Sed/// getFPReg - Return the X86::FPx register number for the specified operand.
258193323Sed/// For example, this returns 3 for X86::FP3.
259193323Sedstatic unsigned getFPReg(const MachineOperand &MO) {
260193323Sed  assert(MO.isReg() && "Expected an FP register!");
261193323Sed  unsigned Reg = MO.getReg();
262193323Sed  assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
263193323Sed  return Reg - X86::FP0;
264193323Sed}
265193323Sed
266193323Sed/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
267193323Sed/// register references into FP stack references.
268193323Sed///
269193323Sedbool FPS::runOnMachineFunction(MachineFunction &MF) {
270193323Sed  // We only need to run this pass if there are any FP registers used in this
271193323Sed  // function.  If it is all integer, there is nothing for us to do!
272193323Sed  bool FPIsUsed = false;
273193323Sed
274193323Sed  assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
275193323Sed  for (unsigned i = 0; i <= 6; ++i)
276193323Sed    if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
277193323Sed      FPIsUsed = true;
278193323Sed      break;
279193323Sed    }
280193323Sed
281193323Sed  // Early exit.
282193323Sed  if (!FPIsUsed) return false;
283193323Sed
284193323Sed  TII = MF.getTarget().getInstrInfo();
285212904Sdim
286212904Sdim  // Prepare cross-MBB liveness.
287212904Sdim  bundleCFG(MF);
288212904Sdim
289193323Sed  StackTop = 0;
290193323Sed
291193323Sed  // Process the function in depth first order so that we process at least one
292193323Sed  // of the predecessors for every reachable block in the function.
293193323Sed  SmallPtrSet<MachineBasicBlock*, 8> Processed;
294193323Sed  MachineBasicBlock *Entry = MF.begin();
295193323Sed
296193323Sed  bool Changed = false;
297193323Sed  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> >
298193323Sed         I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
299193323Sed       I != E; ++I)
300193323Sed    Changed |= processBasicBlock(MF, **I);
301193323Sed
302198090Srdivacky  // Process any unreachable blocks in arbitrary order now.
303212904Sdim  if (MF.size() != Processed.size())
304212904Sdim    for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
305212904Sdim      if (Processed.insert(BB))
306212904Sdim        Changed |= processBasicBlock(MF, *BB);
307198090Srdivacky
308212904Sdim  BlockBundle.clear();
309212904Sdim  LiveBundles.clear();
310212904Sdim
311193323Sed  return Changed;
312193323Sed}
313193323Sed
314212904Sdim/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
315212904Sdim/// live-out sets for the FP registers. Consistent means that the set of
316212904Sdim/// registers live-out from a block is identical to the live-in set of all
317212904Sdim/// successors. This is not enforced by the normal live-in lists since
318212904Sdim/// registers may be implicitly defined, or not used by all successors.
319212904Sdimvoid FPS::bundleCFG(MachineFunction &MF) {
320212904Sdim  assert(LiveBundles.empty() && "Stale data in LiveBundles");
321212904Sdim  assert(BlockBundle.empty() && "Stale data in BlockBundle");
322212904Sdim  SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp;
323212904Sdim
324212904Sdim  // LiveBundle[0] is the empty live-in set.
325212904Sdim  LiveBundles.resize(1);
326212904Sdim
327212904Sdim  // First gather the actual live-in masks for all MBBs.
328212904Sdim  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
329212904Sdim    MachineBasicBlock *MBB = I;
330212904Sdim    const unsigned Mask = calcLiveInMask(MBB);
331212904Sdim    if (!Mask)
332212904Sdim      continue;
333212904Sdim    // Ingoing bundle index.
334212904Sdim    unsigned &Idx = BlockBundle[MBB].first;
335212904Sdim    // Already assigned an ingoing bundle?
336212904Sdim    if (Idx)
337212904Sdim      continue;
338212904Sdim    // Allocate a new LiveBundle struct for this block's live-ins.
339212904Sdim    const unsigned BundleIdx = Idx = LiveBundles.size();
340212904Sdim    DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#"
341212904Sdim                 << MBB->getNumber());
342212904Sdim    LiveBundles.push_back(Mask);
343212904Sdim    LiveBundle &Bundle = LiveBundles.back();
344212904Sdim
345212904Sdim    // Make sure all predecessors have the same live-out set.
346212904Sdim    PropUp.insert(MBB);
347212904Sdim
348212904Sdim    // Keep pushing liveness up and down the CFG until convergence.
349212904Sdim    // Only critical edges cause iteration here, but when they do, multiple
350212904Sdim    // blocks can be assigned to the same LiveBundle index.
351212904Sdim    do {
352212904Sdim      // Assign BundleIdx as liveout from predecessors in PropUp.
353212904Sdim      for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(),
354212904Sdim           E = PropUp.end(); I != E; ++I) {
355212904Sdim        MachineBasicBlock *MBB = *I;
356212904Sdim        for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(),
357212904Sdim             LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) {
358212904Sdim          MachineBasicBlock *PredMBB = *LinkI;
359212904Sdim          // PredMBB's liveout bundle should be set to LIIdx.
360212904Sdim          unsigned &Idx = BlockBundle[PredMBB].second;
361212904Sdim          if (Idx) {
362212904Sdim            assert(Idx == BundleIdx && "Inconsistent CFG");
363212904Sdim            continue;
364212904Sdim          }
365212904Sdim          Idx = BundleIdx;
366212904Sdim          DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber());
367212904Sdim          // Propagate to siblings.
368212904Sdim          if (PredMBB->succ_size() > 1)
369212904Sdim            PropDown.insert(PredMBB);
370212904Sdim        }
371212904Sdim      }
372212904Sdim      PropUp.clear();
373212904Sdim
374212904Sdim      // Assign BundleIdx as livein to successors in PropDown.
375212904Sdim      for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(),
376212904Sdim           E = PropDown.end(); I != E; ++I) {
377212904Sdim        MachineBasicBlock *MBB = *I;
378212904Sdim        for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(),
379212904Sdim             LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) {
380212904Sdim          MachineBasicBlock *SuccMBB = *LinkI;
381212904Sdim          // LinkMBB's livein bundle should be set to BundleIdx.
382212904Sdim          unsigned &Idx = BlockBundle[SuccMBB].first;
383212904Sdim          if (Idx) {
384212904Sdim            assert(Idx == BundleIdx && "Inconsistent CFG");
385212904Sdim            continue;
386212904Sdim          }
387212904Sdim          Idx = BundleIdx;
388212904Sdim          DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber());
389212904Sdim          // Propagate to siblings.
390212904Sdim          if (SuccMBB->pred_size() > 1)
391212904Sdim            PropUp.insert(SuccMBB);
392212904Sdim          // Also accumulate the bundle liveness mask from the liveins here.
393212904Sdim          Bundle.Mask |= calcLiveInMask(SuccMBB);
394212904Sdim        }
395212904Sdim      }
396212904Sdim      PropDown.clear();
397212904Sdim    } while (!PropUp.empty());
398212904Sdim    DEBUG({
399212904Sdim      dbgs() << " live:";
400212904Sdim      for (unsigned i = 0; i < 8; ++i)
401212904Sdim        if (Bundle.Mask & (1<<i))
402212904Sdim          dbgs() << " %FP" << i;
403212904Sdim      dbgs() << '\n';
404212904Sdim    });
405212904Sdim  }
406212904Sdim}
407212904Sdim
408193323Sed/// processBasicBlock - Loop over all of the instructions in the basic block,
409193323Sed/// transforming FP instructions into their stack form.
410193323Sed///
411193323Sedbool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
412193323Sed  bool Changed = false;
413193323Sed  MBB = &BB;
414193323Sed
415212904Sdim  setupBlockStack();
416212904Sdim
417193323Sed  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
418193323Sed    MachineInstr *MI = I;
419210299Sed    uint64_t Flags = MI->getDesc().TSFlags;
420212904Sdim
421193323Sed    unsigned FPInstClass = Flags & X86II::FPTypeMask;
422203954Srdivacky    if (MI->isInlineAsm())
423193323Sed      FPInstClass = X86II::SpecialFP;
424210299Sed
425210299Sed    if (MI->isCopy() && translateCopy(MI))
426210299Sed      FPInstClass = X86II::SpecialFP;
427210299Sed
428193323Sed    if (FPInstClass == X86II::NotFP)
429193323Sed      continue;  // Efficiently ignore non-fp insts!
430193323Sed
431193323Sed    MachineInstr *PrevMI = 0;
432193323Sed    if (I != BB.begin())
433193323Sed      PrevMI = prior(I);
434193323Sed
435193323Sed    ++NumFP;  // Keep track of # of pseudo instrs
436202375Srdivacky    DEBUG(dbgs() << "\nFPInst:\t" << *MI);
437193323Sed
438193323Sed    // Get dead variables list now because the MI pointer may be deleted as part
439193323Sed    // of processing!
440193323Sed    SmallVector<unsigned, 8> DeadRegs;
441193323Sed    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
442193323Sed      const MachineOperand &MO = MI->getOperand(i);
443193323Sed      if (MO.isReg() && MO.isDead())
444193323Sed        DeadRegs.push_back(MO.getReg());
445193323Sed    }
446193323Sed
447193323Sed    switch (FPInstClass) {
448193323Sed    case X86II::ZeroArgFP:  handleZeroArgFP(I); break;
449193323Sed    case X86II::OneArgFP:   handleOneArgFP(I);  break;  // fstp ST(0)
450193323Sed    case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
451193323Sed    case X86II::TwoArgFP:   handleTwoArgFP(I);  break;
452193323Sed    case X86II::CompareFP:  handleCompareFP(I); break;
453193323Sed    case X86II::CondMovFP:  handleCondMovFP(I); break;
454193323Sed    case X86II::SpecialFP:  handleSpecialFP(I); break;
455198090Srdivacky    default: llvm_unreachable("Unknown FP Type!");
456193323Sed    }
457193323Sed
458193323Sed    // Check to see if any of the values defined by this instruction are dead
459193323Sed    // after definition.  If so, pop them.
460193323Sed    for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
461193323Sed      unsigned Reg = DeadRegs[i];
462193323Sed      if (Reg >= X86::FP0 && Reg <= X86::FP6) {
463202375Srdivacky        DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
464193323Sed        freeStackSlotAfter(I, Reg-X86::FP0);
465193323Sed      }
466193323Sed    }
467193323Sed
468193323Sed    // Print out all of the instructions expanded to if -debug
469193323Sed    DEBUG(
470193323Sed      MachineBasicBlock::iterator PrevI(PrevMI);
471193323Sed      if (I == PrevI) {
472202375Srdivacky        dbgs() << "Just deleted pseudo instruction\n";
473193323Sed      } else {
474193323Sed        MachineBasicBlock::iterator Start = I;
475193323Sed        // Rewind to first instruction newly inserted.
476193323Sed        while (Start != BB.begin() && prior(Start) != PrevI) --Start;
477202375Srdivacky        dbgs() << "Inserted instructions:\n\t";
478202375Srdivacky        Start->print(dbgs(), &MF.getTarget());
479200581Srdivacky        while (++Start != llvm::next(I)) {}
480193323Sed      }
481193323Sed      dumpStack();
482193323Sed    );
483193323Sed
484193323Sed    Changed = true;
485193323Sed  }
486193323Sed
487212904Sdim  finishBlockStack();
488212904Sdim
489193323Sed  return Changed;
490193323Sed}
491193323Sed
492212904Sdim/// setupBlockStack - Use the BlockBundle map to set up our model of the stack
493212904Sdim/// to match predecessors' live out stack.
494212904Sdimvoid FPS::setupBlockStack() {
495212904Sdim  DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
496212904Sdim               << " derived from " << MBB->getName() << ".\n");
497212904Sdim  StackTop = 0;
498212904Sdim  const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first];
499212904Sdim
500212904Sdim  if (!Bundle.Mask) {
501212904Sdim    DEBUG(dbgs() << "Block has no FP live-ins.\n");
502212904Sdim    return;
503212904Sdim  }
504212904Sdim
505212904Sdim  // Depth-first iteration should ensure that we always have an assigned stack.
506212904Sdim  assert(Bundle.isFixed() && "Reached block before any predecessors");
507212904Sdim
508212904Sdim  // Push the fixed live-in registers.
509212904Sdim  for (unsigned i = Bundle.FixCount; i > 0; --i) {
510212904Sdim    MBB->addLiveIn(X86::ST0+i-1);
511212904Sdim    DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
512212904Sdim                 << unsigned(Bundle.FixStack[i-1]) << '\n');
513212904Sdim    pushReg(Bundle.FixStack[i-1]);
514212904Sdim  }
515212904Sdim
516212904Sdim  // Kill off unwanted live-ins. This can happen with a critical edge.
517212904Sdim  // FIXME: We could keep these live registers around as zombies. They may need
518212904Sdim  // to be revived at the end of a short block. It might save a few instrs.
519212904Sdim  adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
520212904Sdim  DEBUG(MBB->dump());
521212904Sdim}
522212904Sdim
523212904Sdim/// finishBlockStack - Revive live-outs that are implicitly defined out of
524212904Sdim/// MBB. Shuffle live registers to match the expected fixed stack of any
525212904Sdim/// predecessors, and ensure that all predecessors are expecting the same
526212904Sdim/// stack.
527212904Sdimvoid FPS::finishBlockStack() {
528212904Sdim  // The RET handling below takes care of return blocks for us.
529212904Sdim  if (MBB->succ_empty())
530212904Sdim    return;
531212904Sdim
532212904Sdim  DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
533212904Sdim               << " derived from " << MBB->getName() << ".\n");
534212904Sdim
535212904Sdim  unsigned BundleIdx = BlockBundle.lookup(MBB).second;
536212904Sdim  LiveBundle &Bundle = LiveBundles[BundleIdx];
537212904Sdim
538212904Sdim  // We may need to kill and define some registers to match successors.
539212904Sdim  // FIXME: This can probably be combined with the shuffle below.
540212904Sdim  MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
541212904Sdim  adjustLiveRegs(Bundle.Mask, Term);
542212904Sdim
543212904Sdim  if (!Bundle.Mask) {
544212904Sdim    DEBUG(dbgs() << "No live-outs.\n");
545212904Sdim    return;
546212904Sdim  }
547212904Sdim
548212904Sdim  // Has the stack order been fixed yet?
549212904Sdim  DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
550212904Sdim  if (Bundle.isFixed()) {
551212904Sdim    DEBUG(dbgs() << "Shuffling stack to match.\n");
552212904Sdim    shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
553212904Sdim  } else {
554212904Sdim    // Not fixed yet, we get to choose.
555212904Sdim    DEBUG(dbgs() << "Fixing stack order now.\n");
556212904Sdim    Bundle.FixCount = StackTop;
557212904Sdim    for (unsigned i = 0; i < StackTop; ++i)
558212904Sdim      Bundle.FixStack[i] = getStackEntry(i);
559212904Sdim  }
560212904Sdim}
561212904Sdim
562212904Sdim
563193323Sed//===----------------------------------------------------------------------===//
564193323Sed// Efficient Lookup Table Support
565193323Sed//===----------------------------------------------------------------------===//
566193323Sed
567193323Sednamespace {
568193323Sed  struct TableEntry {
569193323Sed    unsigned from;
570193323Sed    unsigned to;
571193323Sed    bool operator<(const TableEntry &TE) const { return from < TE.from; }
572193323Sed    friend bool operator<(const TableEntry &TE, unsigned V) {
573193323Sed      return TE.from < V;
574193323Sed    }
575212904Sdim    friend bool ATTRIBUTE_USED operator<(unsigned V, const TableEntry &TE) {
576193323Sed      return V < TE.from;
577193323Sed    }
578193323Sed  };
579193323Sed}
580193323Sed
581193323Sed#ifndef NDEBUG
582193323Sedstatic bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
583193323Sed  for (unsigned i = 0; i != NumEntries-1; ++i)
584193323Sed    if (!(Table[i] < Table[i+1])) return false;
585193323Sed  return true;
586193323Sed}
587193323Sed#endif
588193323Sed
589193323Sedstatic int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
590193323Sed  const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
591193323Sed  if (I != Table+N && I->from == Opcode)
592193323Sed    return I->to;
593193323Sed  return -1;
594193323Sed}
595193323Sed
596193323Sed#ifdef NDEBUG
597193323Sed#define ASSERT_SORTED(TABLE)
598193323Sed#else
599193323Sed#define ASSERT_SORTED(TABLE)                                              \
600193323Sed  { static bool TABLE##Checked = false;                                   \
601193323Sed    if (!TABLE##Checked) {                                                \
602193323Sed       assert(TableIsSorted(TABLE, array_lengthof(TABLE)) &&              \
603193323Sed              "All lookup tables must be sorted for efficient access!");  \
604193323Sed       TABLE##Checked = true;                                             \
605193323Sed    }                                                                     \
606193323Sed  }
607193323Sed#endif
608193323Sed
609193323Sed//===----------------------------------------------------------------------===//
610193323Sed// Register File -> Register Stack Mapping Methods
611193323Sed//===----------------------------------------------------------------------===//
612193323Sed
613193323Sed// OpcodeTable - Sorted map of register instructions to their stack version.
614193323Sed// The first element is an register file pseudo instruction, the second is the
615193323Sed// concrete X86 instruction which uses the register stack.
616193323Sed//
617193323Sedstatic const TableEntry OpcodeTable[] = {
618193323Sed  { X86::ABS_Fp32     , X86::ABS_F     },
619193323Sed  { X86::ABS_Fp64     , X86::ABS_F     },
620193323Sed  { X86::ABS_Fp80     , X86::ABS_F     },
621193323Sed  { X86::ADD_Fp32m    , X86::ADD_F32m  },
622193323Sed  { X86::ADD_Fp64m    , X86::ADD_F64m  },
623193323Sed  { X86::ADD_Fp64m32  , X86::ADD_F32m  },
624193323Sed  { X86::ADD_Fp80m32  , X86::ADD_F32m  },
625193323Sed  { X86::ADD_Fp80m64  , X86::ADD_F64m  },
626193323Sed  { X86::ADD_FpI16m32 , X86::ADD_FI16m },
627193323Sed  { X86::ADD_FpI16m64 , X86::ADD_FI16m },
628193323Sed  { X86::ADD_FpI16m80 , X86::ADD_FI16m },
629193323Sed  { X86::ADD_FpI32m32 , X86::ADD_FI32m },
630193323Sed  { X86::ADD_FpI32m64 , X86::ADD_FI32m },
631193323Sed  { X86::ADD_FpI32m80 , X86::ADD_FI32m },
632193323Sed  { X86::CHS_Fp32     , X86::CHS_F     },
633193323Sed  { X86::CHS_Fp64     , X86::CHS_F     },
634193323Sed  { X86::CHS_Fp80     , X86::CHS_F     },
635193323Sed  { X86::CMOVBE_Fp32  , X86::CMOVBE_F  },
636193323Sed  { X86::CMOVBE_Fp64  , X86::CMOVBE_F  },
637193323Sed  { X86::CMOVBE_Fp80  , X86::CMOVBE_F  },
638193323Sed  { X86::CMOVB_Fp32   , X86::CMOVB_F   },
639193323Sed  { X86::CMOVB_Fp64   , X86::CMOVB_F  },
640193323Sed  { X86::CMOVB_Fp80   , X86::CMOVB_F  },
641193323Sed  { X86::CMOVE_Fp32   , X86::CMOVE_F  },
642193323Sed  { X86::CMOVE_Fp64   , X86::CMOVE_F   },
643193323Sed  { X86::CMOVE_Fp80   , X86::CMOVE_F   },
644193323Sed  { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
645193323Sed  { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
646193323Sed  { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
647193323Sed  { X86::CMOVNB_Fp32  , X86::CMOVNB_F  },
648193323Sed  { X86::CMOVNB_Fp64  , X86::CMOVNB_F  },
649193323Sed  { X86::CMOVNB_Fp80  , X86::CMOVNB_F  },
650193323Sed  { X86::CMOVNE_Fp32  , X86::CMOVNE_F  },
651193323Sed  { X86::CMOVNE_Fp64  , X86::CMOVNE_F  },
652193323Sed  { X86::CMOVNE_Fp80  , X86::CMOVNE_F  },
653193323Sed  { X86::CMOVNP_Fp32  , X86::CMOVNP_F  },
654193323Sed  { X86::CMOVNP_Fp64  , X86::CMOVNP_F  },
655193323Sed  { X86::CMOVNP_Fp80  , X86::CMOVNP_F  },
656193323Sed  { X86::CMOVP_Fp32   , X86::CMOVP_F   },
657193323Sed  { X86::CMOVP_Fp64   , X86::CMOVP_F   },
658193323Sed  { X86::CMOVP_Fp80   , X86::CMOVP_F   },
659193323Sed  { X86::COS_Fp32     , X86::COS_F     },
660193323Sed  { X86::COS_Fp64     , X86::COS_F     },
661193323Sed  { X86::COS_Fp80     , X86::COS_F     },
662193323Sed  { X86::DIVR_Fp32m   , X86::DIVR_F32m },
663193323Sed  { X86::DIVR_Fp64m   , X86::DIVR_F64m },
664193323Sed  { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
665193323Sed  { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
666193323Sed  { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
667193323Sed  { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
668193323Sed  { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
669193323Sed  { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
670193323Sed  { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
671193323Sed  { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
672193323Sed  { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
673193323Sed  { X86::DIV_Fp32m    , X86::DIV_F32m  },
674193323Sed  { X86::DIV_Fp64m    , X86::DIV_F64m  },
675193323Sed  { X86::DIV_Fp64m32  , X86::DIV_F32m  },
676193323Sed  { X86::DIV_Fp80m32  , X86::DIV_F32m  },
677193323Sed  { X86::DIV_Fp80m64  , X86::DIV_F64m  },
678193323Sed  { X86::DIV_FpI16m32 , X86::DIV_FI16m },
679193323Sed  { X86::DIV_FpI16m64 , X86::DIV_FI16m },
680193323Sed  { X86::DIV_FpI16m80 , X86::DIV_FI16m },
681193323Sed  { X86::DIV_FpI32m32 , X86::DIV_FI32m },
682193323Sed  { X86::DIV_FpI32m64 , X86::DIV_FI32m },
683193323Sed  { X86::DIV_FpI32m80 , X86::DIV_FI32m },
684193323Sed  { X86::ILD_Fp16m32  , X86::ILD_F16m  },
685193323Sed  { X86::ILD_Fp16m64  , X86::ILD_F16m  },
686193323Sed  { X86::ILD_Fp16m80  , X86::ILD_F16m  },
687193323Sed  { X86::ILD_Fp32m32  , X86::ILD_F32m  },
688193323Sed  { X86::ILD_Fp32m64  , X86::ILD_F32m  },
689193323Sed  { X86::ILD_Fp32m80  , X86::ILD_F32m  },
690193323Sed  { X86::ILD_Fp64m32  , X86::ILD_F64m  },
691193323Sed  { X86::ILD_Fp64m64  , X86::ILD_F64m  },
692193323Sed  { X86::ILD_Fp64m80  , X86::ILD_F64m  },
693193323Sed  { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
694193323Sed  { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
695193323Sed  { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
696193323Sed  { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
697193323Sed  { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
698193323Sed  { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
699193323Sed  { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
700193323Sed  { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
701193323Sed  { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
702193323Sed  { X86::IST_Fp16m32  , X86::IST_F16m  },
703193323Sed  { X86::IST_Fp16m64  , X86::IST_F16m  },
704193323Sed  { X86::IST_Fp16m80  , X86::IST_F16m  },
705193323Sed  { X86::IST_Fp32m32  , X86::IST_F32m  },
706193323Sed  { X86::IST_Fp32m64  , X86::IST_F32m  },
707193323Sed  { X86::IST_Fp32m80  , X86::IST_F32m  },
708193323Sed  { X86::IST_Fp64m32  , X86::IST_FP64m },
709193323Sed  { X86::IST_Fp64m64  , X86::IST_FP64m },
710193323Sed  { X86::IST_Fp64m80  , X86::IST_FP64m },
711193323Sed  { X86::LD_Fp032     , X86::LD_F0     },
712193323Sed  { X86::LD_Fp064     , X86::LD_F0     },
713193323Sed  { X86::LD_Fp080     , X86::LD_F0     },
714193323Sed  { X86::LD_Fp132     , X86::LD_F1     },
715193323Sed  { X86::LD_Fp164     , X86::LD_F1     },
716193323Sed  { X86::LD_Fp180     , X86::LD_F1     },
717193323Sed  { X86::LD_Fp32m     , X86::LD_F32m   },
718193323Sed  { X86::LD_Fp32m64   , X86::LD_F32m   },
719193323Sed  { X86::LD_Fp32m80   , X86::LD_F32m   },
720193323Sed  { X86::LD_Fp64m     , X86::LD_F64m   },
721193323Sed  { X86::LD_Fp64m80   , X86::LD_F64m   },
722193323Sed  { X86::LD_Fp80m     , X86::LD_F80m   },
723193323Sed  { X86::MUL_Fp32m    , X86::MUL_F32m  },
724193323Sed  { X86::MUL_Fp64m    , X86::MUL_F64m  },
725193323Sed  { X86::MUL_Fp64m32  , X86::MUL_F32m  },
726193323Sed  { X86::MUL_Fp80m32  , X86::MUL_F32m  },
727193323Sed  { X86::MUL_Fp80m64  , X86::MUL_F64m  },
728193323Sed  { X86::MUL_FpI16m32 , X86::MUL_FI16m },
729193323Sed  { X86::MUL_FpI16m64 , X86::MUL_FI16m },
730193323Sed  { X86::MUL_FpI16m80 , X86::MUL_FI16m },
731193323Sed  { X86::MUL_FpI32m32 , X86::MUL_FI32m },
732193323Sed  { X86::MUL_FpI32m64 , X86::MUL_FI32m },
733193323Sed  { X86::MUL_FpI32m80 , X86::MUL_FI32m },
734193323Sed  { X86::SIN_Fp32     , X86::SIN_F     },
735193323Sed  { X86::SIN_Fp64     , X86::SIN_F     },
736193323Sed  { X86::SIN_Fp80     , X86::SIN_F     },
737193323Sed  { X86::SQRT_Fp32    , X86::SQRT_F    },
738193323Sed  { X86::SQRT_Fp64    , X86::SQRT_F    },
739193323Sed  { X86::SQRT_Fp80    , X86::SQRT_F    },
740193323Sed  { X86::ST_Fp32m     , X86::ST_F32m   },
741193323Sed  { X86::ST_Fp64m     , X86::ST_F64m   },
742193323Sed  { X86::ST_Fp64m32   , X86::ST_F32m   },
743193323Sed  { X86::ST_Fp80m32   , X86::ST_F32m   },
744193323Sed  { X86::ST_Fp80m64   , X86::ST_F64m   },
745193323Sed  { X86::ST_FpP80m    , X86::ST_FP80m  },
746193323Sed  { X86::SUBR_Fp32m   , X86::SUBR_F32m },
747193323Sed  { X86::SUBR_Fp64m   , X86::SUBR_F64m },
748193323Sed  { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
749193323Sed  { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
750193323Sed  { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
751193323Sed  { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
752193323Sed  { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
753193323Sed  { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
754193323Sed  { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
755193323Sed  { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
756193323Sed  { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
757193323Sed  { X86::SUB_Fp32m    , X86::SUB_F32m  },
758193323Sed  { X86::SUB_Fp64m    , X86::SUB_F64m  },
759193323Sed  { X86::SUB_Fp64m32  , X86::SUB_F32m  },
760193323Sed  { X86::SUB_Fp80m32  , X86::SUB_F32m  },
761193323Sed  { X86::SUB_Fp80m64  , X86::SUB_F64m  },
762193323Sed  { X86::SUB_FpI16m32 , X86::SUB_FI16m },
763193323Sed  { X86::SUB_FpI16m64 , X86::SUB_FI16m },
764193323Sed  { X86::SUB_FpI16m80 , X86::SUB_FI16m },
765193323Sed  { X86::SUB_FpI32m32 , X86::SUB_FI32m },
766193323Sed  { X86::SUB_FpI32m64 , X86::SUB_FI32m },
767193323Sed  { X86::SUB_FpI32m80 , X86::SUB_FI32m },
768193323Sed  { X86::TST_Fp32     , X86::TST_F     },
769193323Sed  { X86::TST_Fp64     , X86::TST_F     },
770193323Sed  { X86::TST_Fp80     , X86::TST_F     },
771193323Sed  { X86::UCOM_FpIr32  , X86::UCOM_FIr  },
772193323Sed  { X86::UCOM_FpIr64  , X86::UCOM_FIr  },
773193323Sed  { X86::UCOM_FpIr80  , X86::UCOM_FIr  },
774193323Sed  { X86::UCOM_Fpr32   , X86::UCOM_Fr   },
775193323Sed  { X86::UCOM_Fpr64   , X86::UCOM_Fr   },
776193323Sed  { X86::UCOM_Fpr80   , X86::UCOM_Fr   },
777193323Sed};
778193323Sed
779193323Sedstatic unsigned getConcreteOpcode(unsigned Opcode) {
780193323Sed  ASSERT_SORTED(OpcodeTable);
781193323Sed  int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
782193323Sed  assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
783193323Sed  return Opc;
784193323Sed}
785193323Sed
786193323Sed//===----------------------------------------------------------------------===//
787193323Sed// Helper Methods
788193323Sed//===----------------------------------------------------------------------===//
789193323Sed
790193323Sed// PopTable - Sorted map of instructions to their popping version.  The first
791193323Sed// element is an instruction, the second is the version which pops.
792193323Sed//
793193323Sedstatic const TableEntry PopTable[] = {
794193323Sed  { X86::ADD_FrST0 , X86::ADD_FPrST0  },
795193323Sed
796193323Sed  { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
797193323Sed  { X86::DIV_FrST0 , X86::DIV_FPrST0  },
798193323Sed
799193323Sed  { X86::IST_F16m  , X86::IST_FP16m   },
800193323Sed  { X86::IST_F32m  , X86::IST_FP32m   },
801193323Sed
802193323Sed  { X86::MUL_FrST0 , X86::MUL_FPrST0  },
803193323Sed
804193323Sed  { X86::ST_F32m   , X86::ST_FP32m    },
805193323Sed  { X86::ST_F64m   , X86::ST_FP64m    },
806193323Sed  { X86::ST_Frr    , X86::ST_FPrr     },
807193323Sed
808193323Sed  { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
809193323Sed  { X86::SUB_FrST0 , X86::SUB_FPrST0  },
810193323Sed
811193323Sed  { X86::UCOM_FIr  , X86::UCOM_FIPr   },
812193323Sed
813193323Sed  { X86::UCOM_FPr  , X86::UCOM_FPPr   },
814193323Sed  { X86::UCOM_Fr   , X86::UCOM_FPr    },
815193323Sed};
816193323Sed
817193323Sed/// popStackAfter - Pop the current value off of the top of the FP stack after
818193323Sed/// the specified instruction.  This attempts to be sneaky and combine the pop
819193323Sed/// into the instruction itself if possible.  The iterator is left pointing to
820193323Sed/// the last instruction, be it a new pop instruction inserted, or the old
821193323Sed/// instruction if it was modified in place.
822193323Sed///
823193323Sedvoid FPS::popStackAfter(MachineBasicBlock::iterator &I) {
824193323Sed  MachineInstr* MI = I;
825193323Sed  DebugLoc dl = MI->getDebugLoc();
826193323Sed  ASSERT_SORTED(PopTable);
827193323Sed  assert(StackTop > 0 && "Cannot pop empty stack!");
828193323Sed  RegMap[Stack[--StackTop]] = ~0;     // Update state
829193323Sed
830193323Sed  // Check to see if there is a popping version of this instruction...
831193323Sed  int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
832193323Sed  if (Opcode != -1) {
833193323Sed    I->setDesc(TII->get(Opcode));
834193323Sed    if (Opcode == X86::UCOM_FPPr)
835193323Sed      I->RemoveOperand(0);
836193323Sed  } else {    // Insert an explicit pop
837193323Sed    I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
838193323Sed  }
839193323Sed}
840193323Sed
841193323Sed/// freeStackSlotAfter - Free the specified register from the register stack, so
842193323Sed/// that it is no longer in a register.  If the register is currently at the top
843193323Sed/// of the stack, we just pop the current instruction, otherwise we store the
844193323Sed/// current top-of-stack into the specified slot, then pop the top of stack.
845193323Sedvoid FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
846193323Sed  if (getStackEntry(0) == FPRegNo) {  // already at the top of stack? easy.
847193323Sed    popStackAfter(I);
848193323Sed    return;
849193323Sed  }
850193323Sed
851193323Sed  // Otherwise, store the top of stack into the dead slot, killing the operand
852193323Sed  // without having to add in an explicit xchg then pop.
853193323Sed  //
854212904Sdim  I = freeStackSlotBefore(++I, FPRegNo);
855212904Sdim}
856212904Sdim
857212904Sdim/// freeStackSlotBefore - Free the specified register without trying any
858212904Sdim/// folding.
859212904SdimMachineBasicBlock::iterator
860212904SdimFPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
861193323Sed  unsigned STReg    = getSTReg(FPRegNo);
862193323Sed  unsigned OldSlot  = getSlot(FPRegNo);
863193323Sed  unsigned TopReg   = Stack[StackTop-1];
864193323Sed  Stack[OldSlot]    = TopReg;
865193323Sed  RegMap[TopReg]    = OldSlot;
866193323Sed  RegMap[FPRegNo]   = ~0;
867193323Sed  Stack[--StackTop] = ~0;
868212904Sdim  return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg);
869193323Sed}
870193323Sed
871212904Sdim/// adjustLiveRegs - Kill and revive registers such that exactly the FP
872212904Sdim/// registers with a bit in Mask are live.
873212904Sdimvoid FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
874212904Sdim  unsigned Defs = Mask;
875212904Sdim  unsigned Kills = 0;
876212904Sdim  for (unsigned i = 0; i < StackTop; ++i) {
877212904Sdim    unsigned RegNo = Stack[i];
878212904Sdim    if (!(Defs & (1 << RegNo)))
879212904Sdim      // This register is live, but we don't want it.
880212904Sdim      Kills |= (1 << RegNo);
881212904Sdim    else
882212904Sdim      // We don't need to imp-def this live register.
883212904Sdim      Defs &= ~(1 << RegNo);
884212904Sdim  }
885212904Sdim  assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
886193323Sed
887212904Sdim  // Produce implicit-defs for free by using killed registers.
888212904Sdim  while (Kills && Defs) {
889212904Sdim    unsigned KReg = CountTrailingZeros_32(Kills);
890212904Sdim    unsigned DReg = CountTrailingZeros_32(Defs);
891212904Sdim    DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
892212904Sdim    std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
893212904Sdim    std::swap(RegMap[KReg], RegMap[DReg]);
894212904Sdim    Kills &= ~(1 << KReg);
895212904Sdim    Defs &= ~(1 << DReg);
896212904Sdim  }
897212904Sdim
898212904Sdim  // Kill registers by popping.
899212904Sdim  if (Kills && I != MBB->begin()) {
900212904Sdim    MachineBasicBlock::iterator I2 = llvm::prior(I);
901212904Sdim    for (;;) {
902212904Sdim      unsigned KReg = getStackEntry(0);
903212904Sdim      if (!(Kills & (1 << KReg)))
904212904Sdim        break;
905212904Sdim      DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
906212904Sdim      popStackAfter(I2);
907212904Sdim      Kills &= ~(1 << KReg);
908212904Sdim    }
909212904Sdim  }
910212904Sdim
911212904Sdim  // Manually kill the rest.
912212904Sdim  while (Kills) {
913212904Sdim    unsigned KReg = CountTrailingZeros_32(Kills);
914212904Sdim    DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
915212904Sdim    freeStackSlotBefore(I, KReg);
916212904Sdim    Kills &= ~(1 << KReg);
917212904Sdim  }
918212904Sdim
919212904Sdim  // Load zeros for all the imp-defs.
920212904Sdim  while(Defs) {
921212904Sdim    unsigned DReg = CountTrailingZeros_32(Defs);
922212904Sdim    DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
923212904Sdim    BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
924212904Sdim    pushReg(DReg);
925212904Sdim    Defs &= ~(1 << DReg);
926212904Sdim  }
927212904Sdim
928212904Sdim  // Now we should have the correct registers live.
929212904Sdim  DEBUG(dumpStack());
930212904Sdim  assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch");
931212904Sdim}
932212904Sdim
933212904Sdim/// shuffleStackTop - emit fxch instructions before I to shuffle the top
934212904Sdim/// FixCount entries into the order given by FixStack.
935212904Sdim/// FIXME: Is there a better algorithm than insertion sort?
936212904Sdimvoid FPS::shuffleStackTop(const unsigned char *FixStack,
937212904Sdim                          unsigned FixCount,
938212904Sdim                          MachineBasicBlock::iterator I) {
939212904Sdim  // Move items into place, starting from the desired stack bottom.
940212904Sdim  while (FixCount--) {
941212904Sdim    // Old register at position FixCount.
942212904Sdim    unsigned OldReg = getStackEntry(FixCount);
943212904Sdim    // Desired register at position FixCount.
944212904Sdim    unsigned Reg = FixStack[FixCount];
945212904Sdim    if (Reg == OldReg)
946212904Sdim      continue;
947212904Sdim    // (Reg st0) (OldReg st0) = (Reg OldReg st0)
948212904Sdim    moveToTop(Reg, I);
949212904Sdim    moveToTop(OldReg, I);
950212904Sdim  }
951212904Sdim  DEBUG(dumpStack());
952212904Sdim}
953212904Sdim
954212904Sdim
955193323Sed//===----------------------------------------------------------------------===//
956193323Sed// Instruction transformation implementation
957193323Sed//===----------------------------------------------------------------------===//
958193323Sed
959193323Sed/// handleZeroArgFP - ST(0) = fld0    ST(0) = flds <mem>
960193323Sed///
961193323Sedvoid FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
962193323Sed  MachineInstr *MI = I;
963193323Sed  unsigned DestReg = getFPReg(MI->getOperand(0));
964193323Sed
965193323Sed  // Change from the pseudo instruction to the concrete instruction.
966193323Sed  MI->RemoveOperand(0);   // Remove the explicit ST(0) operand
967193323Sed  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
968193323Sed
969193323Sed  // Result gets pushed on the stack.
970193323Sed  pushReg(DestReg);
971193323Sed}
972193323Sed
973193323Sed/// handleOneArgFP - fst <mem>, ST(0)
974193323Sed///
975193323Sedvoid FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
976193323Sed  MachineInstr *MI = I;
977193323Sed  unsigned NumOps = MI->getDesc().getNumOperands();
978210299Sed  assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
979193323Sed         "Can only handle fst* & ftst instructions!");
980193323Sed
981193323Sed  // Is this the last use of the source register?
982193323Sed  unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
983193323Sed  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
984193323Sed
985193323Sed  // FISTP64m is strange because there isn't a non-popping versions.
986193323Sed  // If we have one _and_ we don't want to pop the operand, duplicate the value
987193323Sed  // on the stack instead of moving it.  This ensure that popping the value is
988193323Sed  // always ok.
989193323Sed  // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
990193323Sed  //
991193323Sed  if (!KillsSrc &&
992193323Sed      (MI->getOpcode() == X86::IST_Fp64m32 ||
993193323Sed       MI->getOpcode() == X86::ISTT_Fp16m32 ||
994193323Sed       MI->getOpcode() == X86::ISTT_Fp32m32 ||
995193323Sed       MI->getOpcode() == X86::ISTT_Fp64m32 ||
996193323Sed       MI->getOpcode() == X86::IST_Fp64m64 ||
997193323Sed       MI->getOpcode() == X86::ISTT_Fp16m64 ||
998193323Sed       MI->getOpcode() == X86::ISTT_Fp32m64 ||
999193323Sed       MI->getOpcode() == X86::ISTT_Fp64m64 ||
1000193323Sed       MI->getOpcode() == X86::IST_Fp64m80 ||
1001193323Sed       MI->getOpcode() == X86::ISTT_Fp16m80 ||
1002193323Sed       MI->getOpcode() == X86::ISTT_Fp32m80 ||
1003193323Sed       MI->getOpcode() == X86::ISTT_Fp64m80 ||
1004193323Sed       MI->getOpcode() == X86::ST_FpP80m)) {
1005212904Sdim    duplicateToTop(Reg, getScratchReg(), I);
1006193323Sed  } else {
1007193323Sed    moveToTop(Reg, I);            // Move to the top of the stack...
1008193323Sed  }
1009193323Sed
1010193323Sed  // Convert from the pseudo instruction to the concrete instruction.
1011193323Sed  MI->RemoveOperand(NumOps-1);    // Remove explicit ST(0) operand
1012193323Sed  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1013193323Sed
1014193323Sed  if (MI->getOpcode() == X86::IST_FP64m ||
1015193323Sed      MI->getOpcode() == X86::ISTT_FP16m ||
1016193323Sed      MI->getOpcode() == X86::ISTT_FP32m ||
1017193323Sed      MI->getOpcode() == X86::ISTT_FP64m ||
1018193323Sed      MI->getOpcode() == X86::ST_FP80m) {
1019193323Sed    assert(StackTop > 0 && "Stack empty??");
1020193323Sed    --StackTop;
1021193323Sed  } else if (KillsSrc) { // Last use of operand?
1022193323Sed    popStackAfter(I);
1023193323Sed  }
1024193323Sed}
1025193323Sed
1026193323Sed
1027193323Sed/// handleOneArgFPRW: Handle instructions that read from the top of stack and
1028193323Sed/// replace the value with a newly computed value.  These instructions may have
1029193323Sed/// non-fp operands after their FP operands.
1030193323Sed///
1031193323Sed///  Examples:
1032193323Sed///     R1 = fchs R2
1033193323Sed///     R1 = fadd R2, [mem]
1034193323Sed///
1035193323Sedvoid FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1036193323Sed  MachineInstr *MI = I;
1037193323Sed#ifndef NDEBUG
1038193323Sed  unsigned NumOps = MI->getDesc().getNumOperands();
1039193323Sed  assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1040193323Sed#endif
1041193323Sed
1042193323Sed  // Is this the last use of the source register?
1043193323Sed  unsigned Reg = getFPReg(MI->getOperand(1));
1044193323Sed  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
1045193323Sed
1046193323Sed  if (KillsSrc) {
1047193323Sed    // If this is the last use of the source register, just make sure it's on
1048193323Sed    // the top of the stack.
1049193323Sed    moveToTop(Reg, I);
1050193323Sed    assert(StackTop > 0 && "Stack cannot be empty!");
1051193323Sed    --StackTop;
1052193323Sed    pushReg(getFPReg(MI->getOperand(0)));
1053193323Sed  } else {
1054193323Sed    // If this is not the last use of the source register, _copy_ it to the top
1055193323Sed    // of the stack.
1056193323Sed    duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
1057193323Sed  }
1058193323Sed
1059193323Sed  // Change from the pseudo instruction to the concrete instruction.
1060193323Sed  MI->RemoveOperand(1);   // Drop the source operand.
1061193323Sed  MI->RemoveOperand(0);   // Drop the destination operand.
1062193323Sed  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1063193323Sed}
1064193323Sed
1065193323Sed
1066193323Sed//===----------------------------------------------------------------------===//
1067193323Sed// Define tables of various ways to map pseudo instructions
1068193323Sed//
1069193323Sed
1070193323Sed// ForwardST0Table - Map: A = B op C  into: ST(0) = ST(0) op ST(i)
1071193323Sedstatic const TableEntry ForwardST0Table[] = {
1072193323Sed  { X86::ADD_Fp32  , X86::ADD_FST0r },
1073193323Sed  { X86::ADD_Fp64  , X86::ADD_FST0r },
1074193323Sed  { X86::ADD_Fp80  , X86::ADD_FST0r },
1075193323Sed  { X86::DIV_Fp32  , X86::DIV_FST0r },
1076193323Sed  { X86::DIV_Fp64  , X86::DIV_FST0r },
1077193323Sed  { X86::DIV_Fp80  , X86::DIV_FST0r },
1078193323Sed  { X86::MUL_Fp32  , X86::MUL_FST0r },
1079193323Sed  { X86::MUL_Fp64  , X86::MUL_FST0r },
1080193323Sed  { X86::MUL_Fp80  , X86::MUL_FST0r },
1081193323Sed  { X86::SUB_Fp32  , X86::SUB_FST0r },
1082193323Sed  { X86::SUB_Fp64  , X86::SUB_FST0r },
1083193323Sed  { X86::SUB_Fp80  , X86::SUB_FST0r },
1084193323Sed};
1085193323Sed
1086193323Sed// ReverseST0Table - Map: A = B op C  into: ST(0) = ST(i) op ST(0)
1087193323Sedstatic const TableEntry ReverseST0Table[] = {
1088193323Sed  { X86::ADD_Fp32  , X86::ADD_FST0r  },   // commutative
1089193323Sed  { X86::ADD_Fp64  , X86::ADD_FST0r  },   // commutative
1090193323Sed  { X86::ADD_Fp80  , X86::ADD_FST0r  },   // commutative
1091193323Sed  { X86::DIV_Fp32  , X86::DIVR_FST0r },
1092193323Sed  { X86::DIV_Fp64  , X86::DIVR_FST0r },
1093193323Sed  { X86::DIV_Fp80  , X86::DIVR_FST0r },
1094193323Sed  { X86::MUL_Fp32  , X86::MUL_FST0r  },   // commutative
1095193323Sed  { X86::MUL_Fp64  , X86::MUL_FST0r  },   // commutative
1096193323Sed  { X86::MUL_Fp80  , X86::MUL_FST0r  },   // commutative
1097193323Sed  { X86::SUB_Fp32  , X86::SUBR_FST0r },
1098193323Sed  { X86::SUB_Fp64  , X86::SUBR_FST0r },
1099193323Sed  { X86::SUB_Fp80  , X86::SUBR_FST0r },
1100193323Sed};
1101193323Sed
1102193323Sed// ForwardSTiTable - Map: A = B op C  into: ST(i) = ST(0) op ST(i)
1103193323Sedstatic const TableEntry ForwardSTiTable[] = {
1104193323Sed  { X86::ADD_Fp32  , X86::ADD_FrST0  },   // commutative
1105193323Sed  { X86::ADD_Fp64  , X86::ADD_FrST0  },   // commutative
1106193323Sed  { X86::ADD_Fp80  , X86::ADD_FrST0  },   // commutative
1107193323Sed  { X86::DIV_Fp32  , X86::DIVR_FrST0 },
1108193323Sed  { X86::DIV_Fp64  , X86::DIVR_FrST0 },
1109193323Sed  { X86::DIV_Fp80  , X86::DIVR_FrST0 },
1110193323Sed  { X86::MUL_Fp32  , X86::MUL_FrST0  },   // commutative
1111193323Sed  { X86::MUL_Fp64  , X86::MUL_FrST0  },   // commutative
1112193323Sed  { X86::MUL_Fp80  , X86::MUL_FrST0  },   // commutative
1113193323Sed  { X86::SUB_Fp32  , X86::SUBR_FrST0 },
1114193323Sed  { X86::SUB_Fp64  , X86::SUBR_FrST0 },
1115193323Sed  { X86::SUB_Fp80  , X86::SUBR_FrST0 },
1116193323Sed};
1117193323Sed
1118193323Sed// ReverseSTiTable - Map: A = B op C  into: ST(i) = ST(i) op ST(0)
1119193323Sedstatic const TableEntry ReverseSTiTable[] = {
1120193323Sed  { X86::ADD_Fp32  , X86::ADD_FrST0 },
1121193323Sed  { X86::ADD_Fp64  , X86::ADD_FrST0 },
1122193323Sed  { X86::ADD_Fp80  , X86::ADD_FrST0 },
1123193323Sed  { X86::DIV_Fp32  , X86::DIV_FrST0 },
1124193323Sed  { X86::DIV_Fp64  , X86::DIV_FrST0 },
1125193323Sed  { X86::DIV_Fp80  , X86::DIV_FrST0 },
1126193323Sed  { X86::MUL_Fp32  , X86::MUL_FrST0 },
1127193323Sed  { X86::MUL_Fp64  , X86::MUL_FrST0 },
1128193323Sed  { X86::MUL_Fp80  , X86::MUL_FrST0 },
1129193323Sed  { X86::SUB_Fp32  , X86::SUB_FrST0 },
1130193323Sed  { X86::SUB_Fp64  , X86::SUB_FrST0 },
1131193323Sed  { X86::SUB_Fp80  , X86::SUB_FrST0 },
1132193323Sed};
1133193323Sed
1134193323Sed
1135193323Sed/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1136193323Sed/// instructions which need to be simplified and possibly transformed.
1137193323Sed///
1138193323Sed/// Result: ST(0) = fsub  ST(0), ST(i)
1139193323Sed///         ST(i) = fsub  ST(0), ST(i)
1140193323Sed///         ST(0) = fsubr ST(0), ST(i)
1141193323Sed///         ST(i) = fsubr ST(0), ST(i)
1142193323Sed///
1143193323Sedvoid FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1144193323Sed  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
1145193323Sed  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
1146193323Sed  MachineInstr *MI = I;
1147193323Sed
1148193323Sed  unsigned NumOperands = MI->getDesc().getNumOperands();
1149193323Sed  assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1150193323Sed  unsigned Dest = getFPReg(MI->getOperand(0));
1151193323Sed  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
1152193323Sed  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
1153193323Sed  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
1154193323Sed  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1155193323Sed  DebugLoc dl = MI->getDebugLoc();
1156193323Sed
1157193323Sed  unsigned TOS = getStackEntry(0);
1158193323Sed
1159193323Sed  // One of our operands must be on the top of the stack.  If neither is yet, we
1160193323Sed  // need to move one.
1161193323Sed  if (Op0 != TOS && Op1 != TOS) {   // No operand at TOS?
1162193323Sed    // We can choose to move either operand to the top of the stack.  If one of
1163193323Sed    // the operands is killed by this instruction, we want that one so that we
1164193323Sed    // can update right on top of the old version.
1165193323Sed    if (KillsOp0) {
1166193323Sed      moveToTop(Op0, I);         // Move dead operand to TOS.
1167193323Sed      TOS = Op0;
1168193323Sed    } else if (KillsOp1) {
1169193323Sed      moveToTop(Op1, I);
1170193323Sed      TOS = Op1;
1171193323Sed    } else {
1172193323Sed      // All of the operands are live after this instruction executes, so we
1173193323Sed      // cannot update on top of any operand.  Because of this, we must
1174193323Sed      // duplicate one of the stack elements to the top.  It doesn't matter
1175193323Sed      // which one we pick.
1176193323Sed      //
1177193323Sed      duplicateToTop(Op0, Dest, I);
1178193323Sed      Op0 = TOS = Dest;
1179193323Sed      KillsOp0 = true;
1180193323Sed    }
1181193323Sed  } else if (!KillsOp0 && !KillsOp1) {
1182193323Sed    // If we DO have one of our operands at the top of the stack, but we don't
1183193323Sed    // have a dead operand, we must duplicate one of the operands to a new slot
1184193323Sed    // on the stack.
1185193323Sed    duplicateToTop(Op0, Dest, I);
1186193323Sed    Op0 = TOS = Dest;
1187193323Sed    KillsOp0 = true;
1188193323Sed  }
1189193323Sed
1190193323Sed  // Now we know that one of our operands is on the top of the stack, and at
1191193323Sed  // least one of our operands is killed by this instruction.
1192193323Sed  assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1193193323Sed         "Stack conditions not set up right!");
1194193323Sed
1195193323Sed  // We decide which form to use based on what is on the top of the stack, and
1196193323Sed  // which operand is killed by this instruction.
1197193323Sed  const TableEntry *InstTable;
1198193323Sed  bool isForward = TOS == Op0;
1199193323Sed  bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1200193323Sed  if (updateST0) {
1201193323Sed    if (isForward)
1202193323Sed      InstTable = ForwardST0Table;
1203193323Sed    else
1204193323Sed      InstTable = ReverseST0Table;
1205193323Sed  } else {
1206193323Sed    if (isForward)
1207193323Sed      InstTable = ForwardSTiTable;
1208193323Sed    else
1209193323Sed      InstTable = ReverseSTiTable;
1210193323Sed  }
1211193323Sed
1212193323Sed  int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
1213193323Sed                      MI->getOpcode());
1214193323Sed  assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1215193323Sed
1216193323Sed  // NotTOS - The register which is not on the top of stack...
1217193323Sed  unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1218193323Sed
1219193323Sed  // Replace the old instruction with a new instruction
1220193323Sed  MBB->remove(I++);
1221193323Sed  I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1222193323Sed
1223193323Sed  // If both operands are killed, pop one off of the stack in addition to
1224193323Sed  // overwriting the other one.
1225193323Sed  if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1226193323Sed    assert(!updateST0 && "Should have updated other operand!");
1227193323Sed    popStackAfter(I);   // Pop the top of stack
1228193323Sed  }
1229193323Sed
1230193323Sed  // Update stack information so that we know the destination register is now on
1231193323Sed  // the stack.
1232193323Sed  unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1233193323Sed  assert(UpdatedSlot < StackTop && Dest < 7);
1234193323Sed  Stack[UpdatedSlot]   = Dest;
1235193323Sed  RegMap[Dest]         = UpdatedSlot;
1236193323Sed  MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
1237193323Sed}
1238193323Sed
1239193323Sed/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1240193323Sed/// register arguments and no explicit destinations.
1241193323Sed///
1242193323Sedvoid FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1243193323Sed  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
1244193323Sed  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
1245193323Sed  MachineInstr *MI = I;
1246193323Sed
1247193323Sed  unsigned NumOperands = MI->getDesc().getNumOperands();
1248193323Sed  assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1249193323Sed  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
1250193323Sed  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
1251193323Sed  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
1252193323Sed  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1253193323Sed
1254193323Sed  // Make sure the first operand is on the top of stack, the other one can be
1255193323Sed  // anywhere.
1256193323Sed  moveToTop(Op0, I);
1257193323Sed
1258193323Sed  // Change from the pseudo instruction to the concrete instruction.
1259193323Sed  MI->getOperand(0).setReg(getSTReg(Op1));
1260193323Sed  MI->RemoveOperand(1);
1261193323Sed  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1262193323Sed
1263193323Sed  // If any of the operands are killed by this instruction, free them.
1264193323Sed  if (KillsOp0) freeStackSlotAfter(I, Op0);
1265193323Sed  if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1266193323Sed}
1267193323Sed
1268193323Sed/// handleCondMovFP - Handle two address conditional move instructions.  These
1269193323Sed/// instructions move a st(i) register to st(0) iff a condition is true.  These
1270193323Sed/// instructions require that the first operand is at the top of the stack, but
1271193323Sed/// otherwise don't modify the stack at all.
1272193323Sedvoid FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1273193323Sed  MachineInstr *MI = I;
1274193323Sed
1275193323Sed  unsigned Op0 = getFPReg(MI->getOperand(0));
1276193323Sed  unsigned Op1 = getFPReg(MI->getOperand(2));
1277193323Sed  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
1278193323Sed
1279193323Sed  // The first operand *must* be on the top of the stack.
1280193323Sed  moveToTop(Op0, I);
1281193323Sed
1282193323Sed  // Change the second operand to the stack register that the operand is in.
1283193323Sed  // Change from the pseudo instruction to the concrete instruction.
1284193323Sed  MI->RemoveOperand(0);
1285193323Sed  MI->RemoveOperand(1);
1286193323Sed  MI->getOperand(0).setReg(getSTReg(Op1));
1287193323Sed  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
1288193323Sed
1289193323Sed  // If we kill the second operand, make sure to pop it from the stack.
1290193323Sed  if (Op0 != Op1 && KillsOp1) {
1291193323Sed    // Get this value off of the register stack.
1292193323Sed    freeStackSlotAfter(I, Op1);
1293193323Sed  }
1294193323Sed}
1295193323Sed
1296193323Sed
1297193323Sed/// handleSpecialFP - Handle special instructions which behave unlike other
1298193323Sed/// floating point instructions.  This is primarily intended for use by pseudo
1299193323Sed/// instructions.
1300193323Sed///
1301193323Sedvoid FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
1302193323Sed  MachineInstr *MI = I;
1303193323Sed  DebugLoc dl = MI->getDebugLoc();
1304193323Sed  switch (MI->getOpcode()) {
1305198090Srdivacky  default: llvm_unreachable("Unknown SpecialFP instruction!");
1306193323Sed  case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
1307193323Sed  case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type!
1308193323Sed  case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type!
1309193323Sed    assert(StackTop == 0 && "Stack should be empty after a call!");
1310193323Sed    pushReg(getFPReg(MI->getOperand(0)));
1311193323Sed    break;
1312193323Sed  case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type!
1313193323Sed  case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type!
1314193323Sed  case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type!
1315193323Sed    // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm.
1316193323Sed    // The pattern we expect is:
1317193323Sed    //  CALL
1318193323Sed    //  FP1 = FpGET_ST0
1319193323Sed    //  FP4 = FpGET_ST1
1320193323Sed    //
1321193323Sed    // At this point, we've pushed FP1 on the top of stack, so it should be
1322193323Sed    // present if it isn't dead.  If it was dead, we already emitted a pop to
1323193323Sed    // remove it from the stack and StackTop = 0.
1324193323Sed
1325193323Sed    // Push FP4 as top of stack next.
1326193323Sed    pushReg(getFPReg(MI->getOperand(0)));
1327193323Sed
1328193323Sed    // If StackTop was 0 before we pushed our operand, then ST(0) must have been
1329193323Sed    // dead.  In this case, the ST(1) value is the only thing that is live, so
1330193323Sed    // it should be on the TOS (after the pop that was emitted) and is.  Just
1331193323Sed    // continue in this case.
1332193323Sed    if (StackTop == 1)
1333193323Sed      break;
1334193323Sed
1335193323Sed    // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top
1336193323Sed    // elements so that our accounting is correct.
1337193323Sed    unsigned RegOnTop = getStackEntry(0);
1338193323Sed    unsigned RegNo = getStackEntry(1);
1339193323Sed
1340193323Sed    // Swap the slots the regs are in.
1341193323Sed    std::swap(RegMap[RegNo], RegMap[RegOnTop]);
1342193323Sed
1343193323Sed    // Swap stack slot contents.
1344193323Sed    assert(RegMap[RegOnTop] < StackTop);
1345193323Sed    std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
1346193323Sed    break;
1347193323Sed  }
1348193323Sed  case X86::FpSET_ST0_32:
1349193323Sed  case X86::FpSET_ST0_64:
1350195340Sed  case X86::FpSET_ST0_80: {
1351210299Sed    // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm
1352210299Sed    // arguments that use an st constraint. We expect a sequence of
1353210299Sed    // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM
1354195340Sed    unsigned Op0 = getFPReg(MI->getOperand(0));
1355195340Sed
1356195340Sed    if (!MI->killsRegister(X86::FP0 + Op0)) {
1357210299Sed      // Duplicate Op0 into a temporary on the stack top.
1358212904Sdim      duplicateToTop(Op0, getScratchReg(), I);
1359195340Sed    } else {
1360210299Sed      // Op0 is killed, so just swap it into position.
1361195340Sed      moveToTop(Op0, I);
1362194612Sed    }
1363193323Sed    --StackTop;   // "Forget" we have something on the top of stack!
1364193323Sed    break;
1365195340Sed  }
1366193323Sed  case X86::FpSET_ST1_32:
1367193323Sed  case X86::FpSET_ST1_64:
1368210299Sed  case X86::FpSET_ST1_80: {
1369210299Sed    // Set up st(1) for inline asm. We are assuming that st(0) has already been
1370210299Sed    // set up by FpSET_ST0, and our StackTop is off by one because of it.
1371210299Sed    unsigned Op0 = getFPReg(MI->getOperand(0));
1372210299Sed    // Restore the actual StackTop from before Fp_SET_ST0.
1373210299Sed    // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we
1374210299Sed    // are not enforcing the constraint.
1375210299Sed    ++StackTop;
1376210299Sed    unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
1377210299Sed    if (!MI->killsRegister(X86::FP0 + Op0)) {
1378212904Sdim      duplicateToTop(Op0, getScratchReg(), I);
1379210299Sed      moveToTop(RegOnTop, I);
1380210299Sed    } else if (getSTReg(Op0) != X86::ST1) {
1381210299Sed      // We have the wrong value at st(1). Shuffle! Untested!
1382210299Sed      moveToTop(getStackEntry(1), I);
1383210299Sed      moveToTop(Op0, I);
1384210299Sed      moveToTop(RegOnTop, I);
1385193323Sed    }
1386210299Sed    assert(StackTop >= 2 && "Too few live registers");
1387210299Sed    StackTop -= 2; // "Forget" both st(0) and st(1).
1388193323Sed    break;
1389210299Sed  }
1390193323Sed  case X86::MOV_Fp3232:
1391193323Sed  case X86::MOV_Fp3264:
1392193323Sed  case X86::MOV_Fp6432:
1393193323Sed  case X86::MOV_Fp6464:
1394193323Sed  case X86::MOV_Fp3280:
1395193323Sed  case X86::MOV_Fp6480:
1396193323Sed  case X86::MOV_Fp8032:
1397193323Sed  case X86::MOV_Fp8064:
1398193323Sed  case X86::MOV_Fp8080: {
1399193323Sed    const MachineOperand &MO1 = MI->getOperand(1);
1400193323Sed    unsigned SrcReg = getFPReg(MO1);
1401193323Sed
1402193323Sed    const MachineOperand &MO0 = MI->getOperand(0);
1403193323Sed    unsigned DestReg = getFPReg(MO0);
1404193323Sed    if (MI->killsRegister(X86::FP0+SrcReg)) {
1405193323Sed      // If the input operand is killed, we can just change the owner of the
1406193323Sed      // incoming stack slot into the result.
1407193323Sed      unsigned Slot = getSlot(SrcReg);
1408193323Sed      assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!");
1409193323Sed      Stack[Slot] = DestReg;
1410193323Sed      RegMap[DestReg] = Slot;
1411193323Sed
1412193323Sed    } else {
1413193323Sed      // For FMOV we just duplicate the specified value to a new stack slot.
1414193323Sed      // This could be made better, but would require substantial changes.
1415193323Sed      duplicateToTop(SrcReg, DestReg, I);
1416193323Sed    }
1417193323Sed    }
1418193323Sed    break;
1419203954Srdivacky  case TargetOpcode::INLINEASM: {
1420193323Sed    // The inline asm MachineInstr currently only *uses* FP registers for the
1421193323Sed    // 'f' constraint.  These should be turned into the current ST(x) register
1422193323Sed    // in the machine instr.  Also, any kills should be explicitly popped after
1423193323Sed    // the inline asm.
1424207618Srdivacky    unsigned Kills = 0;
1425193323Sed    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1426193323Sed      MachineOperand &Op = MI->getOperand(i);
1427193323Sed      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1428193323Sed        continue;
1429193323Sed      assert(Op.isUse() && "Only handle inline asm uses right now");
1430193323Sed
1431193323Sed      unsigned FPReg = getFPReg(Op);
1432193323Sed      Op.setReg(getSTReg(FPReg));
1433193323Sed
1434193323Sed      // If we kill this operand, make sure to pop it from the stack after the
1435193323Sed      // asm.  We just remember it for now, and pop them all off at the end in
1436193323Sed      // a batch.
1437193323Sed      if (Op.isKill())
1438207618Srdivacky        Kills |= 1U << FPReg;
1439193323Sed    }
1440193323Sed
1441193323Sed    // If this asm kills any FP registers (is the last use of them) we must
1442193323Sed    // explicitly emit pop instructions for them.  Do this now after the asm has
1443193323Sed    // executed so that the ST(x) numbers are not off (which would happen if we
1444193323Sed    // did this inline with operand rewriting).
1445193323Sed    //
1446193323Sed    // Note: this might be a non-optimal pop sequence.  We might be able to do
1447193323Sed    // better by trying to pop in stack order or something.
1448193323Sed    MachineBasicBlock::iterator InsertPt = MI;
1449207618Srdivacky    while (Kills) {
1450207618Srdivacky      unsigned FPReg = CountTrailingZeros_32(Kills);
1451207618Srdivacky      freeStackSlotAfter(InsertPt, FPReg);
1452207618Srdivacky      Kills &= ~(1U << FPReg);
1453207618Srdivacky    }
1454193323Sed    // Don't delete the inline asm!
1455193323Sed    return;
1456193323Sed  }
1457193323Sed
1458193323Sed  case X86::RET:
1459193323Sed  case X86::RETI:
1460193323Sed    // If RET has an FP register use operand, pass the first one in ST(0) and
1461193323Sed    // the second one in ST(1).
1462212904Sdim
1463193323Sed    // Find the register operands.
1464193323Sed    unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1465212904Sdim    unsigned LiveMask = 0;
1466212904Sdim
1467193323Sed    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1468193323Sed      MachineOperand &Op = MI->getOperand(i);
1469193323Sed      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1470193323Sed        continue;
1471193323Sed      // FP Register uses must be kills unless there are two uses of the same
1472193323Sed      // register, in which case only one will be a kill.
1473193323Sed      assert(Op.isUse() &&
1474193323Sed             (Op.isKill() ||                        // Marked kill.
1475193323Sed              getFPReg(Op) == FirstFPRegOp ||       // Second instance.
1476193323Sed              MI->killsRegister(Op.getReg())) &&    // Later use is marked kill.
1477193323Sed             "Ret only defs operands, and values aren't live beyond it");
1478193323Sed
1479193323Sed      if (FirstFPRegOp == ~0U)
1480193323Sed        FirstFPRegOp = getFPReg(Op);
1481193323Sed      else {
1482193323Sed        assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1483193323Sed        SecondFPRegOp = getFPReg(Op);
1484193323Sed      }
1485212904Sdim      LiveMask |= (1 << getFPReg(Op));
1486193323Sed
1487193323Sed      // Remove the operand so that later passes don't see it.
1488193323Sed      MI->RemoveOperand(i);
1489193323Sed      --i, --e;
1490193323Sed    }
1491212904Sdim
1492212904Sdim    // We may have been carrying spurious live-ins, so make sure only the returned
1493212904Sdim    // registers are left live.
1494212904Sdim    adjustLiveRegs(LiveMask, MI);
1495212904Sdim    if (!LiveMask) return;  // Quick check to see if any are possible.
1496212904Sdim
1497193323Sed    // There are only four possibilities here:
1498193323Sed    // 1) we are returning a single FP value.  In this case, it has to be in
1499193323Sed    //    ST(0) already, so just declare success by removing the value from the
1500193323Sed    //    FP Stack.
1501193323Sed    if (SecondFPRegOp == ~0U) {
1502193323Sed      // Assert that the top of stack contains the right FP register.
1503193323Sed      assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1504193323Sed             "Top of stack not the right register for RET!");
1505193323Sed
1506193323Sed      // Ok, everything is good, mark the value as not being on the stack
1507193323Sed      // anymore so that our assertion about the stack being empty at end of
1508193323Sed      // block doesn't fire.
1509193323Sed      StackTop = 0;
1510193323Sed      return;
1511193323Sed    }
1512193323Sed
1513193323Sed    // Otherwise, we are returning two values:
1514193323Sed    // 2) If returning the same value for both, we only have one thing in the FP
1515193323Sed    //    stack.  Consider:  RET FP1, FP1
1516193323Sed    if (StackTop == 1) {
1517193323Sed      assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1518193323Sed             "Stack misconfiguration for RET!");
1519193323Sed
1520193323Sed      // Duplicate the TOS so that we return it twice.  Just pick some other FPx
1521193323Sed      // register to hold it.
1522212904Sdim      unsigned NewReg = getScratchReg();
1523193323Sed      duplicateToTop(FirstFPRegOp, NewReg, MI);
1524193323Sed      FirstFPRegOp = NewReg;
1525193323Sed    }
1526193323Sed
1527193323Sed    /// Okay we know we have two different FPx operands now:
1528193323Sed    assert(StackTop == 2 && "Must have two values live!");
1529193323Sed
1530193323Sed    /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1531193323Sed    ///    in ST(1).  In this case, emit an fxch.
1532193323Sed    if (getStackEntry(0) == SecondFPRegOp) {
1533193323Sed      assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1534193323Sed      moveToTop(FirstFPRegOp, MI);
1535193323Sed    }
1536193323Sed
1537193323Sed    /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1538193323Sed    /// ST(1).  Just remove both from our understanding of the stack and return.
1539193323Sed    assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1540193323Sed    assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1541193323Sed    StackTop = 0;
1542193323Sed    return;
1543193323Sed  }
1544193323Sed
1545193323Sed  I = MBB->erase(I);  // Remove the pseudo instruction
1546212904Sdim
1547212904Sdim  // We want to leave I pointing to the previous instruction, but what if we
1548212904Sdim  // just erased the first instruction?
1549212904Sdim  if (I == MBB->begin()) {
1550212904Sdim    DEBUG(dbgs() << "Inserting dummy KILL\n");
1551212904Sdim    I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL));
1552212904Sdim  } else
1553212904Sdim    --I;
1554193323Sed}
1555210299Sed
1556210299Sed// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands.
1557210299Sedbool FPS::translateCopy(MachineInstr *MI) {
1558210299Sed  unsigned DstReg = MI->getOperand(0).getReg();
1559210299Sed  unsigned SrcReg = MI->getOperand(1).getReg();
1560210299Sed
1561210299Sed  if (DstReg == X86::ST0) {
1562210299Sed    MI->setDesc(TII->get(X86::FpSET_ST0_80));
1563210299Sed    MI->RemoveOperand(0);
1564210299Sed    return true;
1565210299Sed  }
1566210299Sed  if (DstReg == X86::ST1) {
1567210299Sed    MI->setDesc(TII->get(X86::FpSET_ST1_80));
1568210299Sed    MI->RemoveOperand(0);
1569210299Sed    return true;
1570210299Sed  }
1571210299Sed  if (SrcReg == X86::ST0) {
1572210299Sed    MI->setDesc(TII->get(X86::FpGET_ST0_80));
1573210299Sed    return true;
1574210299Sed  }
1575210299Sed  if (SrcReg == X86::ST1) {
1576210299Sed    MI->setDesc(TII->get(X86::FpGET_ST1_80));
1577210299Sed    return true;
1578210299Sed  }
1579210299Sed  if (X86::RFP80RegClass.contains(DstReg, SrcReg)) {
1580210299Sed    MI->setDesc(TII->get(X86::MOV_Fp8080));
1581210299Sed    return true;
1582210299Sed  }
1583210299Sed  return false;
1584210299Sed}
1585