X86FloatingPoint.cpp revision 210299
1193323Sed//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file defines the pass which converts floating point instructions from 11193323Sed// virtual registers into register stack instructions. This pass uses live 12193323Sed// variable information to indicate where the FPn registers are used and their 13193323Sed// lifetimes. 14193323Sed// 15193323Sed// This pass is hampered by the lack of decent CFG manipulation routines for 16193323Sed// machine code. In particular, this wants to be able to split critical edges 17193323Sed// as necessary, traverse the machine basic block CFG in depth-first order, and 18193323Sed// allow there to be multiple machine basic blocks for each LLVM basicblock 19193323Sed// (needed for critical edge splitting). 20193323Sed// 21193323Sed// In particular, this pass currently barfs on critical edges. Because of this, 22193323Sed// it requires the instruction selector to insert FP_REG_KILL instructions on 23193323Sed// the exits of any basic block that has critical edges going from it, or which 24193323Sed// branch to a critical basic block. 25193323Sed// 26193323Sed// FIXME: this is not implemented yet. The stackifier pass only works on local 27193323Sed// basic blocks. 28193323Sed// 29193323Sed//===----------------------------------------------------------------------===// 30193323Sed 31193323Sed#define DEBUG_TYPE "x86-codegen" 32193323Sed#include "X86.h" 33193323Sed#include "X86InstrInfo.h" 34198090Srdivacky#include "llvm/ADT/DepthFirstIterator.h" 35198090Srdivacky#include "llvm/ADT/SmallPtrSet.h" 36198090Srdivacky#include "llvm/ADT/SmallVector.h" 37198090Srdivacky#include "llvm/ADT/Statistic.h" 38198090Srdivacky#include "llvm/ADT/STLExtras.h" 39193323Sed#include "llvm/CodeGen/MachineFunctionPass.h" 40193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 41193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 42193323Sed#include "llvm/CodeGen/Passes.h" 43198090Srdivacky#include "llvm/Support/Debug.h" 44198090Srdivacky#include "llvm/Support/ErrorHandling.h" 45198090Srdivacky#include "llvm/Support/raw_ostream.h" 46193323Sed#include "llvm/Target/TargetInstrInfo.h" 47193323Sed#include "llvm/Target/TargetMachine.h" 48193323Sed#include <algorithm> 49193323Sedusing namespace llvm; 50193323Sed 51193323SedSTATISTIC(NumFXCH, "Number of fxch instructions inserted"); 52193323SedSTATISTIC(NumFP , "Number of floating point instructions"); 53193323Sed 54193323Sednamespace { 55198892Srdivacky struct FPS : public MachineFunctionPass { 56193323Sed static char ID; 57193323Sed FPS() : MachineFunctionPass(&ID) {} 58193323Sed 59193323Sed virtual void getAnalysisUsage(AnalysisUsage &AU) const { 60198090Srdivacky AU.setPreservesCFG(); 61193323Sed AU.addPreservedID(MachineLoopInfoID); 62193323Sed AU.addPreservedID(MachineDominatorsID); 63193323Sed MachineFunctionPass::getAnalysisUsage(AU); 64193323Sed } 65193323Sed 66193323Sed virtual bool runOnMachineFunction(MachineFunction &MF); 67193323Sed 68193323Sed virtual const char *getPassName() const { return "X86 FP Stackifier"; } 69193323Sed 70193323Sed private: 71193323Sed const TargetInstrInfo *TII; // Machine instruction info. 72193323Sed MachineBasicBlock *MBB; // Current basic block 73193323Sed unsigned Stack[8]; // FP<n> Registers in each stack slot... 74193323Sed unsigned RegMap[8]; // Track which stack slot contains each register 75193323Sed unsigned StackTop; // The current top of the FP stack. 76193323Sed 77193323Sed void dumpStack() const { 78202375Srdivacky dbgs() << "Stack contents:"; 79193323Sed for (unsigned i = 0; i != StackTop; ++i) { 80202375Srdivacky dbgs() << " FP" << Stack[i]; 81193323Sed assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); 82193323Sed } 83202375Srdivacky dbgs() << "\n"; 84193323Sed } 85193323Sed private: 86193323Sed /// isStackEmpty - Return true if the FP stack is empty. 87193323Sed bool isStackEmpty() const { 88193323Sed return StackTop == 0; 89193323Sed } 90193323Sed 91193323Sed // getSlot - Return the stack slot number a particular register number is 92193323Sed // in. 93193323Sed unsigned getSlot(unsigned RegNo) const { 94193323Sed assert(RegNo < 8 && "Regno out of range!"); 95193323Sed return RegMap[RegNo]; 96193323Sed } 97193323Sed 98193323Sed // getStackEntry - Return the X86::FP<n> register in register ST(i). 99193323Sed unsigned getStackEntry(unsigned STi) const { 100193323Sed assert(STi < StackTop && "Access past stack top!"); 101193323Sed return Stack[StackTop-1-STi]; 102193323Sed } 103193323Sed 104193323Sed // getSTReg - Return the X86::ST(i) register which contains the specified 105193323Sed // FP<RegNo> register. 106193323Sed unsigned getSTReg(unsigned RegNo) const { 107193323Sed return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0; 108193323Sed } 109193323Sed 110193323Sed // pushReg - Push the specified FP<n> register onto the stack. 111193323Sed void pushReg(unsigned Reg) { 112193323Sed assert(Reg < 8 && "Register number out of range!"); 113193323Sed assert(StackTop < 8 && "Stack overflow!"); 114193323Sed Stack[StackTop] = Reg; 115193323Sed RegMap[Reg] = StackTop++; 116193323Sed } 117193323Sed 118193323Sed bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } 119193323Sed void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) { 120193323Sed MachineInstr *MI = I; 121193323Sed DebugLoc dl = MI->getDebugLoc(); 122193323Sed if (isAtTop(RegNo)) return; 123193323Sed 124193323Sed unsigned STReg = getSTReg(RegNo); 125193323Sed unsigned RegOnTop = getStackEntry(0); 126193323Sed 127193323Sed // Swap the slots the regs are in. 128193323Sed std::swap(RegMap[RegNo], RegMap[RegOnTop]); 129193323Sed 130193323Sed // Swap stack slot contents. 131193323Sed assert(RegMap[RegOnTop] < StackTop); 132193323Sed std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 133193323Sed 134193323Sed // Emit an fxch to update the runtime processors version of the state. 135193323Sed BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg); 136210299Sed ++NumFXCH; 137193323Sed } 138193323Sed 139193323Sed void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { 140193323Sed DebugLoc dl = I->getDebugLoc(); 141193323Sed unsigned STReg = getSTReg(RegNo); 142193323Sed pushReg(AsReg); // New register on top of stack 143193323Sed 144193323Sed BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); 145193323Sed } 146193323Sed 147193323Sed // popStackAfter - Pop the current value off of the top of the FP stack 148193323Sed // after the specified instruction. 149193323Sed void popStackAfter(MachineBasicBlock::iterator &I); 150193323Sed 151193323Sed // freeStackSlotAfter - Free the specified register from the register stack, 152193323Sed // so that it is no longer in a register. If the register is currently at 153193323Sed // the top of the stack, we just pop the current instruction, otherwise we 154193323Sed // store the current top-of-stack into the specified slot, then pop the top 155193323Sed // of stack. 156193323Sed void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg); 157193323Sed 158193323Sed bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); 159193323Sed 160193323Sed void handleZeroArgFP(MachineBasicBlock::iterator &I); 161193323Sed void handleOneArgFP(MachineBasicBlock::iterator &I); 162193323Sed void handleOneArgFPRW(MachineBasicBlock::iterator &I); 163193323Sed void handleTwoArgFP(MachineBasicBlock::iterator &I); 164193323Sed void handleCompareFP(MachineBasicBlock::iterator &I); 165193323Sed void handleCondMovFP(MachineBasicBlock::iterator &I); 166193323Sed void handleSpecialFP(MachineBasicBlock::iterator &I); 167210299Sed 168210299Sed bool translateCopy(MachineInstr*); 169193323Sed }; 170193323Sed char FPS::ID = 0; 171193323Sed} 172193323Sed 173193323SedFunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } 174193323Sed 175193323Sed/// getFPReg - Return the X86::FPx register number for the specified operand. 176193323Sed/// For example, this returns 3 for X86::FP3. 177193323Sedstatic unsigned getFPReg(const MachineOperand &MO) { 178193323Sed assert(MO.isReg() && "Expected an FP register!"); 179193323Sed unsigned Reg = MO.getReg(); 180193323Sed assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); 181193323Sed return Reg - X86::FP0; 182193323Sed} 183193323Sed 184193323Sed 185193323Sed/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP 186193323Sed/// register references into FP stack references. 187193323Sed/// 188193323Sedbool FPS::runOnMachineFunction(MachineFunction &MF) { 189193323Sed // We only need to run this pass if there are any FP registers used in this 190193323Sed // function. If it is all integer, there is nothing for us to do! 191193323Sed bool FPIsUsed = false; 192193323Sed 193193323Sed assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!"); 194193323Sed for (unsigned i = 0; i <= 6; ++i) 195193323Sed if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) { 196193323Sed FPIsUsed = true; 197193323Sed break; 198193323Sed } 199193323Sed 200193323Sed // Early exit. 201193323Sed if (!FPIsUsed) return false; 202193323Sed 203193323Sed TII = MF.getTarget().getInstrInfo(); 204193323Sed StackTop = 0; 205193323Sed 206193323Sed // Process the function in depth first order so that we process at least one 207193323Sed // of the predecessors for every reachable block in the function. 208193323Sed SmallPtrSet<MachineBasicBlock*, 8> Processed; 209193323Sed MachineBasicBlock *Entry = MF.begin(); 210193323Sed 211193323Sed bool Changed = false; 212193323Sed for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> > 213193323Sed I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed); 214193323Sed I != E; ++I) 215193323Sed Changed |= processBasicBlock(MF, **I); 216193323Sed 217198090Srdivacky // Process any unreachable blocks in arbitrary order now. 218198090Srdivacky if (MF.size() == Processed.size()) 219198090Srdivacky return Changed; 220198090Srdivacky 221198090Srdivacky for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) 222198090Srdivacky if (Processed.insert(BB)) 223198090Srdivacky Changed |= processBasicBlock(MF, *BB); 224198090Srdivacky 225193323Sed return Changed; 226193323Sed} 227193323Sed 228193323Sed/// processBasicBlock - Loop over all of the instructions in the basic block, 229193323Sed/// transforming FP instructions into their stack form. 230193323Sed/// 231193323Sedbool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { 232193323Sed bool Changed = false; 233193323Sed MBB = &BB; 234193323Sed 235193323Sed for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { 236193323Sed MachineInstr *MI = I; 237210299Sed uint64_t Flags = MI->getDesc().TSFlags; 238193323Sed 239193323Sed unsigned FPInstClass = Flags & X86II::FPTypeMask; 240203954Srdivacky if (MI->isInlineAsm()) 241193323Sed FPInstClass = X86II::SpecialFP; 242210299Sed 243210299Sed if (MI->isCopy() && translateCopy(MI)) 244210299Sed FPInstClass = X86II::SpecialFP; 245210299Sed 246193323Sed if (FPInstClass == X86II::NotFP) 247193323Sed continue; // Efficiently ignore non-fp insts! 248193323Sed 249193323Sed MachineInstr *PrevMI = 0; 250193323Sed if (I != BB.begin()) 251193323Sed PrevMI = prior(I); 252193323Sed 253193323Sed ++NumFP; // Keep track of # of pseudo instrs 254202375Srdivacky DEBUG(dbgs() << "\nFPInst:\t" << *MI); 255193323Sed 256193323Sed // Get dead variables list now because the MI pointer may be deleted as part 257193323Sed // of processing! 258193323Sed SmallVector<unsigned, 8> DeadRegs; 259193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 260193323Sed const MachineOperand &MO = MI->getOperand(i); 261193323Sed if (MO.isReg() && MO.isDead()) 262193323Sed DeadRegs.push_back(MO.getReg()); 263193323Sed } 264193323Sed 265193323Sed switch (FPInstClass) { 266193323Sed case X86II::ZeroArgFP: handleZeroArgFP(I); break; 267193323Sed case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0) 268193323Sed case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0)) 269193323Sed case X86II::TwoArgFP: handleTwoArgFP(I); break; 270193323Sed case X86II::CompareFP: handleCompareFP(I); break; 271193323Sed case X86II::CondMovFP: handleCondMovFP(I); break; 272193323Sed case X86II::SpecialFP: handleSpecialFP(I); break; 273198090Srdivacky default: llvm_unreachable("Unknown FP Type!"); 274193323Sed } 275193323Sed 276193323Sed // Check to see if any of the values defined by this instruction are dead 277193323Sed // after definition. If so, pop them. 278193323Sed for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { 279193323Sed unsigned Reg = DeadRegs[i]; 280193323Sed if (Reg >= X86::FP0 && Reg <= X86::FP6) { 281202375Srdivacky DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); 282193323Sed freeStackSlotAfter(I, Reg-X86::FP0); 283193323Sed } 284193323Sed } 285193323Sed 286193323Sed // Print out all of the instructions expanded to if -debug 287193323Sed DEBUG( 288193323Sed MachineBasicBlock::iterator PrevI(PrevMI); 289193323Sed if (I == PrevI) { 290202375Srdivacky dbgs() << "Just deleted pseudo instruction\n"; 291193323Sed } else { 292193323Sed MachineBasicBlock::iterator Start = I; 293193323Sed // Rewind to first instruction newly inserted. 294193323Sed while (Start != BB.begin() && prior(Start) != PrevI) --Start; 295202375Srdivacky dbgs() << "Inserted instructions:\n\t"; 296202375Srdivacky Start->print(dbgs(), &MF.getTarget()); 297200581Srdivacky while (++Start != llvm::next(I)) {} 298193323Sed } 299193323Sed dumpStack(); 300193323Sed ); 301193323Sed 302193323Sed Changed = true; 303193323Sed } 304193323Sed 305193323Sed assert(isStackEmpty() && "Stack not empty at end of basic block?"); 306193323Sed return Changed; 307193323Sed} 308193323Sed 309193323Sed//===----------------------------------------------------------------------===// 310193323Sed// Efficient Lookup Table Support 311193323Sed//===----------------------------------------------------------------------===// 312193323Sed 313193323Sednamespace { 314193323Sed struct TableEntry { 315193323Sed unsigned from; 316193323Sed unsigned to; 317193323Sed bool operator<(const TableEntry &TE) const { return from < TE.from; } 318193323Sed friend bool operator<(const TableEntry &TE, unsigned V) { 319193323Sed return TE.from < V; 320193323Sed } 321193323Sed friend bool operator<(unsigned V, const TableEntry &TE) { 322193323Sed return V < TE.from; 323193323Sed } 324193323Sed }; 325193323Sed} 326193323Sed 327193323Sed#ifndef NDEBUG 328193323Sedstatic bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { 329193323Sed for (unsigned i = 0; i != NumEntries-1; ++i) 330193323Sed if (!(Table[i] < Table[i+1])) return false; 331193323Sed return true; 332193323Sed} 333193323Sed#endif 334193323Sed 335193323Sedstatic int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { 336193323Sed const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); 337193323Sed if (I != Table+N && I->from == Opcode) 338193323Sed return I->to; 339193323Sed return -1; 340193323Sed} 341193323Sed 342193323Sed#ifdef NDEBUG 343193323Sed#define ASSERT_SORTED(TABLE) 344193323Sed#else 345193323Sed#define ASSERT_SORTED(TABLE) \ 346193323Sed { static bool TABLE##Checked = false; \ 347193323Sed if (!TABLE##Checked) { \ 348193323Sed assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \ 349193323Sed "All lookup tables must be sorted for efficient access!"); \ 350193323Sed TABLE##Checked = true; \ 351193323Sed } \ 352193323Sed } 353193323Sed#endif 354193323Sed 355193323Sed//===----------------------------------------------------------------------===// 356193323Sed// Register File -> Register Stack Mapping Methods 357193323Sed//===----------------------------------------------------------------------===// 358193323Sed 359193323Sed// OpcodeTable - Sorted map of register instructions to their stack version. 360193323Sed// The first element is an register file pseudo instruction, the second is the 361193323Sed// concrete X86 instruction which uses the register stack. 362193323Sed// 363193323Sedstatic const TableEntry OpcodeTable[] = { 364193323Sed { X86::ABS_Fp32 , X86::ABS_F }, 365193323Sed { X86::ABS_Fp64 , X86::ABS_F }, 366193323Sed { X86::ABS_Fp80 , X86::ABS_F }, 367193323Sed { X86::ADD_Fp32m , X86::ADD_F32m }, 368193323Sed { X86::ADD_Fp64m , X86::ADD_F64m }, 369193323Sed { X86::ADD_Fp64m32 , X86::ADD_F32m }, 370193323Sed { X86::ADD_Fp80m32 , X86::ADD_F32m }, 371193323Sed { X86::ADD_Fp80m64 , X86::ADD_F64m }, 372193323Sed { X86::ADD_FpI16m32 , X86::ADD_FI16m }, 373193323Sed { X86::ADD_FpI16m64 , X86::ADD_FI16m }, 374193323Sed { X86::ADD_FpI16m80 , X86::ADD_FI16m }, 375193323Sed { X86::ADD_FpI32m32 , X86::ADD_FI32m }, 376193323Sed { X86::ADD_FpI32m64 , X86::ADD_FI32m }, 377193323Sed { X86::ADD_FpI32m80 , X86::ADD_FI32m }, 378193323Sed { X86::CHS_Fp32 , X86::CHS_F }, 379193323Sed { X86::CHS_Fp64 , X86::CHS_F }, 380193323Sed { X86::CHS_Fp80 , X86::CHS_F }, 381193323Sed { X86::CMOVBE_Fp32 , X86::CMOVBE_F }, 382193323Sed { X86::CMOVBE_Fp64 , X86::CMOVBE_F }, 383193323Sed { X86::CMOVBE_Fp80 , X86::CMOVBE_F }, 384193323Sed { X86::CMOVB_Fp32 , X86::CMOVB_F }, 385193323Sed { X86::CMOVB_Fp64 , X86::CMOVB_F }, 386193323Sed { X86::CMOVB_Fp80 , X86::CMOVB_F }, 387193323Sed { X86::CMOVE_Fp32 , X86::CMOVE_F }, 388193323Sed { X86::CMOVE_Fp64 , X86::CMOVE_F }, 389193323Sed { X86::CMOVE_Fp80 , X86::CMOVE_F }, 390193323Sed { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F }, 391193323Sed { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F }, 392193323Sed { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F }, 393193323Sed { X86::CMOVNB_Fp32 , X86::CMOVNB_F }, 394193323Sed { X86::CMOVNB_Fp64 , X86::CMOVNB_F }, 395193323Sed { X86::CMOVNB_Fp80 , X86::CMOVNB_F }, 396193323Sed { X86::CMOVNE_Fp32 , X86::CMOVNE_F }, 397193323Sed { X86::CMOVNE_Fp64 , X86::CMOVNE_F }, 398193323Sed { X86::CMOVNE_Fp80 , X86::CMOVNE_F }, 399193323Sed { X86::CMOVNP_Fp32 , X86::CMOVNP_F }, 400193323Sed { X86::CMOVNP_Fp64 , X86::CMOVNP_F }, 401193323Sed { X86::CMOVNP_Fp80 , X86::CMOVNP_F }, 402193323Sed { X86::CMOVP_Fp32 , X86::CMOVP_F }, 403193323Sed { X86::CMOVP_Fp64 , X86::CMOVP_F }, 404193323Sed { X86::CMOVP_Fp80 , X86::CMOVP_F }, 405193323Sed { X86::COS_Fp32 , X86::COS_F }, 406193323Sed { X86::COS_Fp64 , X86::COS_F }, 407193323Sed { X86::COS_Fp80 , X86::COS_F }, 408193323Sed { X86::DIVR_Fp32m , X86::DIVR_F32m }, 409193323Sed { X86::DIVR_Fp64m , X86::DIVR_F64m }, 410193323Sed { X86::DIVR_Fp64m32 , X86::DIVR_F32m }, 411193323Sed { X86::DIVR_Fp80m32 , X86::DIVR_F32m }, 412193323Sed { X86::DIVR_Fp80m64 , X86::DIVR_F64m }, 413193323Sed { X86::DIVR_FpI16m32, X86::DIVR_FI16m}, 414193323Sed { X86::DIVR_FpI16m64, X86::DIVR_FI16m}, 415193323Sed { X86::DIVR_FpI16m80, X86::DIVR_FI16m}, 416193323Sed { X86::DIVR_FpI32m32, X86::DIVR_FI32m}, 417193323Sed { X86::DIVR_FpI32m64, X86::DIVR_FI32m}, 418193323Sed { X86::DIVR_FpI32m80, X86::DIVR_FI32m}, 419193323Sed { X86::DIV_Fp32m , X86::DIV_F32m }, 420193323Sed { X86::DIV_Fp64m , X86::DIV_F64m }, 421193323Sed { X86::DIV_Fp64m32 , X86::DIV_F32m }, 422193323Sed { X86::DIV_Fp80m32 , X86::DIV_F32m }, 423193323Sed { X86::DIV_Fp80m64 , X86::DIV_F64m }, 424193323Sed { X86::DIV_FpI16m32 , X86::DIV_FI16m }, 425193323Sed { X86::DIV_FpI16m64 , X86::DIV_FI16m }, 426193323Sed { X86::DIV_FpI16m80 , X86::DIV_FI16m }, 427193323Sed { X86::DIV_FpI32m32 , X86::DIV_FI32m }, 428193323Sed { X86::DIV_FpI32m64 , X86::DIV_FI32m }, 429193323Sed { X86::DIV_FpI32m80 , X86::DIV_FI32m }, 430193323Sed { X86::ILD_Fp16m32 , X86::ILD_F16m }, 431193323Sed { X86::ILD_Fp16m64 , X86::ILD_F16m }, 432193323Sed { X86::ILD_Fp16m80 , X86::ILD_F16m }, 433193323Sed { X86::ILD_Fp32m32 , X86::ILD_F32m }, 434193323Sed { X86::ILD_Fp32m64 , X86::ILD_F32m }, 435193323Sed { X86::ILD_Fp32m80 , X86::ILD_F32m }, 436193323Sed { X86::ILD_Fp64m32 , X86::ILD_F64m }, 437193323Sed { X86::ILD_Fp64m64 , X86::ILD_F64m }, 438193323Sed { X86::ILD_Fp64m80 , X86::ILD_F64m }, 439193323Sed { X86::ISTT_Fp16m32 , X86::ISTT_FP16m}, 440193323Sed { X86::ISTT_Fp16m64 , X86::ISTT_FP16m}, 441193323Sed { X86::ISTT_Fp16m80 , X86::ISTT_FP16m}, 442193323Sed { X86::ISTT_Fp32m32 , X86::ISTT_FP32m}, 443193323Sed { X86::ISTT_Fp32m64 , X86::ISTT_FP32m}, 444193323Sed { X86::ISTT_Fp32m80 , X86::ISTT_FP32m}, 445193323Sed { X86::ISTT_Fp64m32 , X86::ISTT_FP64m}, 446193323Sed { X86::ISTT_Fp64m64 , X86::ISTT_FP64m}, 447193323Sed { X86::ISTT_Fp64m80 , X86::ISTT_FP64m}, 448193323Sed { X86::IST_Fp16m32 , X86::IST_F16m }, 449193323Sed { X86::IST_Fp16m64 , X86::IST_F16m }, 450193323Sed { X86::IST_Fp16m80 , X86::IST_F16m }, 451193323Sed { X86::IST_Fp32m32 , X86::IST_F32m }, 452193323Sed { X86::IST_Fp32m64 , X86::IST_F32m }, 453193323Sed { X86::IST_Fp32m80 , X86::IST_F32m }, 454193323Sed { X86::IST_Fp64m32 , X86::IST_FP64m }, 455193323Sed { X86::IST_Fp64m64 , X86::IST_FP64m }, 456193323Sed { X86::IST_Fp64m80 , X86::IST_FP64m }, 457193323Sed { X86::LD_Fp032 , X86::LD_F0 }, 458193323Sed { X86::LD_Fp064 , X86::LD_F0 }, 459193323Sed { X86::LD_Fp080 , X86::LD_F0 }, 460193323Sed { X86::LD_Fp132 , X86::LD_F1 }, 461193323Sed { X86::LD_Fp164 , X86::LD_F1 }, 462193323Sed { X86::LD_Fp180 , X86::LD_F1 }, 463193323Sed { X86::LD_Fp32m , X86::LD_F32m }, 464193323Sed { X86::LD_Fp32m64 , X86::LD_F32m }, 465193323Sed { X86::LD_Fp32m80 , X86::LD_F32m }, 466193323Sed { X86::LD_Fp64m , X86::LD_F64m }, 467193323Sed { X86::LD_Fp64m80 , X86::LD_F64m }, 468193323Sed { X86::LD_Fp80m , X86::LD_F80m }, 469193323Sed { X86::MUL_Fp32m , X86::MUL_F32m }, 470193323Sed { X86::MUL_Fp64m , X86::MUL_F64m }, 471193323Sed { X86::MUL_Fp64m32 , X86::MUL_F32m }, 472193323Sed { X86::MUL_Fp80m32 , X86::MUL_F32m }, 473193323Sed { X86::MUL_Fp80m64 , X86::MUL_F64m }, 474193323Sed { X86::MUL_FpI16m32 , X86::MUL_FI16m }, 475193323Sed { X86::MUL_FpI16m64 , X86::MUL_FI16m }, 476193323Sed { X86::MUL_FpI16m80 , X86::MUL_FI16m }, 477193323Sed { X86::MUL_FpI32m32 , X86::MUL_FI32m }, 478193323Sed { X86::MUL_FpI32m64 , X86::MUL_FI32m }, 479193323Sed { X86::MUL_FpI32m80 , X86::MUL_FI32m }, 480193323Sed { X86::SIN_Fp32 , X86::SIN_F }, 481193323Sed { X86::SIN_Fp64 , X86::SIN_F }, 482193323Sed { X86::SIN_Fp80 , X86::SIN_F }, 483193323Sed { X86::SQRT_Fp32 , X86::SQRT_F }, 484193323Sed { X86::SQRT_Fp64 , X86::SQRT_F }, 485193323Sed { X86::SQRT_Fp80 , X86::SQRT_F }, 486193323Sed { X86::ST_Fp32m , X86::ST_F32m }, 487193323Sed { X86::ST_Fp64m , X86::ST_F64m }, 488193323Sed { X86::ST_Fp64m32 , X86::ST_F32m }, 489193323Sed { X86::ST_Fp80m32 , X86::ST_F32m }, 490193323Sed { X86::ST_Fp80m64 , X86::ST_F64m }, 491193323Sed { X86::ST_FpP80m , X86::ST_FP80m }, 492193323Sed { X86::SUBR_Fp32m , X86::SUBR_F32m }, 493193323Sed { X86::SUBR_Fp64m , X86::SUBR_F64m }, 494193323Sed { X86::SUBR_Fp64m32 , X86::SUBR_F32m }, 495193323Sed { X86::SUBR_Fp80m32 , X86::SUBR_F32m }, 496193323Sed { X86::SUBR_Fp80m64 , X86::SUBR_F64m }, 497193323Sed { X86::SUBR_FpI16m32, X86::SUBR_FI16m}, 498193323Sed { X86::SUBR_FpI16m64, X86::SUBR_FI16m}, 499193323Sed { X86::SUBR_FpI16m80, X86::SUBR_FI16m}, 500193323Sed { X86::SUBR_FpI32m32, X86::SUBR_FI32m}, 501193323Sed { X86::SUBR_FpI32m64, X86::SUBR_FI32m}, 502193323Sed { X86::SUBR_FpI32m80, X86::SUBR_FI32m}, 503193323Sed { X86::SUB_Fp32m , X86::SUB_F32m }, 504193323Sed { X86::SUB_Fp64m , X86::SUB_F64m }, 505193323Sed { X86::SUB_Fp64m32 , X86::SUB_F32m }, 506193323Sed { X86::SUB_Fp80m32 , X86::SUB_F32m }, 507193323Sed { X86::SUB_Fp80m64 , X86::SUB_F64m }, 508193323Sed { X86::SUB_FpI16m32 , X86::SUB_FI16m }, 509193323Sed { X86::SUB_FpI16m64 , X86::SUB_FI16m }, 510193323Sed { X86::SUB_FpI16m80 , X86::SUB_FI16m }, 511193323Sed { X86::SUB_FpI32m32 , X86::SUB_FI32m }, 512193323Sed { X86::SUB_FpI32m64 , X86::SUB_FI32m }, 513193323Sed { X86::SUB_FpI32m80 , X86::SUB_FI32m }, 514193323Sed { X86::TST_Fp32 , X86::TST_F }, 515193323Sed { X86::TST_Fp64 , X86::TST_F }, 516193323Sed { X86::TST_Fp80 , X86::TST_F }, 517193323Sed { X86::UCOM_FpIr32 , X86::UCOM_FIr }, 518193323Sed { X86::UCOM_FpIr64 , X86::UCOM_FIr }, 519193323Sed { X86::UCOM_FpIr80 , X86::UCOM_FIr }, 520193323Sed { X86::UCOM_Fpr32 , X86::UCOM_Fr }, 521193323Sed { X86::UCOM_Fpr64 , X86::UCOM_Fr }, 522193323Sed { X86::UCOM_Fpr80 , X86::UCOM_Fr }, 523193323Sed}; 524193323Sed 525193323Sedstatic unsigned getConcreteOpcode(unsigned Opcode) { 526193323Sed ASSERT_SORTED(OpcodeTable); 527193323Sed int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode); 528193323Sed assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!"); 529193323Sed return Opc; 530193323Sed} 531193323Sed 532193323Sed//===----------------------------------------------------------------------===// 533193323Sed// Helper Methods 534193323Sed//===----------------------------------------------------------------------===// 535193323Sed 536193323Sed// PopTable - Sorted map of instructions to their popping version. The first 537193323Sed// element is an instruction, the second is the version which pops. 538193323Sed// 539193323Sedstatic const TableEntry PopTable[] = { 540193323Sed { X86::ADD_FrST0 , X86::ADD_FPrST0 }, 541193323Sed 542193323Sed { X86::DIVR_FrST0, X86::DIVR_FPrST0 }, 543193323Sed { X86::DIV_FrST0 , X86::DIV_FPrST0 }, 544193323Sed 545193323Sed { X86::IST_F16m , X86::IST_FP16m }, 546193323Sed { X86::IST_F32m , X86::IST_FP32m }, 547193323Sed 548193323Sed { X86::MUL_FrST0 , X86::MUL_FPrST0 }, 549193323Sed 550193323Sed { X86::ST_F32m , X86::ST_FP32m }, 551193323Sed { X86::ST_F64m , X86::ST_FP64m }, 552193323Sed { X86::ST_Frr , X86::ST_FPrr }, 553193323Sed 554193323Sed { X86::SUBR_FrST0, X86::SUBR_FPrST0 }, 555193323Sed { X86::SUB_FrST0 , X86::SUB_FPrST0 }, 556193323Sed 557193323Sed { X86::UCOM_FIr , X86::UCOM_FIPr }, 558193323Sed 559193323Sed { X86::UCOM_FPr , X86::UCOM_FPPr }, 560193323Sed { X86::UCOM_Fr , X86::UCOM_FPr }, 561193323Sed}; 562193323Sed 563193323Sed/// popStackAfter - Pop the current value off of the top of the FP stack after 564193323Sed/// the specified instruction. This attempts to be sneaky and combine the pop 565193323Sed/// into the instruction itself if possible. The iterator is left pointing to 566193323Sed/// the last instruction, be it a new pop instruction inserted, or the old 567193323Sed/// instruction if it was modified in place. 568193323Sed/// 569193323Sedvoid FPS::popStackAfter(MachineBasicBlock::iterator &I) { 570193323Sed MachineInstr* MI = I; 571193323Sed DebugLoc dl = MI->getDebugLoc(); 572193323Sed ASSERT_SORTED(PopTable); 573193323Sed assert(StackTop > 0 && "Cannot pop empty stack!"); 574193323Sed RegMap[Stack[--StackTop]] = ~0; // Update state 575193323Sed 576193323Sed // Check to see if there is a popping version of this instruction... 577193323Sed int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode()); 578193323Sed if (Opcode != -1) { 579193323Sed I->setDesc(TII->get(Opcode)); 580193323Sed if (Opcode == X86::UCOM_FPPr) 581193323Sed I->RemoveOperand(0); 582193323Sed } else { // Insert an explicit pop 583193323Sed I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0); 584193323Sed } 585193323Sed} 586193323Sed 587193323Sed/// freeStackSlotAfter - Free the specified register from the register stack, so 588193323Sed/// that it is no longer in a register. If the register is currently at the top 589193323Sed/// of the stack, we just pop the current instruction, otherwise we store the 590193323Sed/// current top-of-stack into the specified slot, then pop the top of stack. 591193323Sedvoid FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { 592193323Sed if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy. 593193323Sed popStackAfter(I); 594193323Sed return; 595193323Sed } 596193323Sed 597193323Sed // Otherwise, store the top of stack into the dead slot, killing the operand 598193323Sed // without having to add in an explicit xchg then pop. 599193323Sed // 600193323Sed unsigned STReg = getSTReg(FPRegNo); 601193323Sed unsigned OldSlot = getSlot(FPRegNo); 602193323Sed unsigned TopReg = Stack[StackTop-1]; 603193323Sed Stack[OldSlot] = TopReg; 604193323Sed RegMap[TopReg] = OldSlot; 605193323Sed RegMap[FPRegNo] = ~0; 606193323Sed Stack[--StackTop] = ~0; 607193323Sed MachineInstr *MI = I; 608193323Sed DebugLoc dl = MI->getDebugLoc(); 609193323Sed I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(STReg); 610193323Sed} 611193323Sed 612193323Sed 613193323Sed//===----------------------------------------------------------------------===// 614193323Sed// Instruction transformation implementation 615193323Sed//===----------------------------------------------------------------------===// 616193323Sed 617193323Sed/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem> 618193323Sed/// 619193323Sedvoid FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { 620193323Sed MachineInstr *MI = I; 621193323Sed unsigned DestReg = getFPReg(MI->getOperand(0)); 622193323Sed 623193323Sed // Change from the pseudo instruction to the concrete instruction. 624193323Sed MI->RemoveOperand(0); // Remove the explicit ST(0) operand 625193323Sed MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 626193323Sed 627193323Sed // Result gets pushed on the stack. 628193323Sed pushReg(DestReg); 629193323Sed} 630193323Sed 631193323Sed/// handleOneArgFP - fst <mem>, ST(0) 632193323Sed/// 633193323Sedvoid FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { 634193323Sed MachineInstr *MI = I; 635193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 636210299Sed assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) && 637193323Sed "Can only handle fst* & ftst instructions!"); 638193323Sed 639193323Sed // Is this the last use of the source register? 640193323Sed unsigned Reg = getFPReg(MI->getOperand(NumOps-1)); 641193323Sed bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 642193323Sed 643193323Sed // FISTP64m is strange because there isn't a non-popping versions. 644193323Sed // If we have one _and_ we don't want to pop the operand, duplicate the value 645193323Sed // on the stack instead of moving it. This ensure that popping the value is 646193323Sed // always ok. 647193323Sed // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m. 648193323Sed // 649193323Sed if (!KillsSrc && 650193323Sed (MI->getOpcode() == X86::IST_Fp64m32 || 651193323Sed MI->getOpcode() == X86::ISTT_Fp16m32 || 652193323Sed MI->getOpcode() == X86::ISTT_Fp32m32 || 653193323Sed MI->getOpcode() == X86::ISTT_Fp64m32 || 654193323Sed MI->getOpcode() == X86::IST_Fp64m64 || 655193323Sed MI->getOpcode() == X86::ISTT_Fp16m64 || 656193323Sed MI->getOpcode() == X86::ISTT_Fp32m64 || 657193323Sed MI->getOpcode() == X86::ISTT_Fp64m64 || 658193323Sed MI->getOpcode() == X86::IST_Fp64m80 || 659193323Sed MI->getOpcode() == X86::ISTT_Fp16m80 || 660193323Sed MI->getOpcode() == X86::ISTT_Fp32m80 || 661193323Sed MI->getOpcode() == X86::ISTT_Fp64m80 || 662193323Sed MI->getOpcode() == X86::ST_FpP80m)) { 663193323Sed duplicateToTop(Reg, 7 /*temp register*/, I); 664193323Sed } else { 665193323Sed moveToTop(Reg, I); // Move to the top of the stack... 666193323Sed } 667193323Sed 668193323Sed // Convert from the pseudo instruction to the concrete instruction. 669193323Sed MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand 670193323Sed MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 671193323Sed 672193323Sed if (MI->getOpcode() == X86::IST_FP64m || 673193323Sed MI->getOpcode() == X86::ISTT_FP16m || 674193323Sed MI->getOpcode() == X86::ISTT_FP32m || 675193323Sed MI->getOpcode() == X86::ISTT_FP64m || 676193323Sed MI->getOpcode() == X86::ST_FP80m) { 677193323Sed assert(StackTop > 0 && "Stack empty??"); 678193323Sed --StackTop; 679193323Sed } else if (KillsSrc) { // Last use of operand? 680193323Sed popStackAfter(I); 681193323Sed } 682193323Sed} 683193323Sed 684193323Sed 685193323Sed/// handleOneArgFPRW: Handle instructions that read from the top of stack and 686193323Sed/// replace the value with a newly computed value. These instructions may have 687193323Sed/// non-fp operands after their FP operands. 688193323Sed/// 689193323Sed/// Examples: 690193323Sed/// R1 = fchs R2 691193323Sed/// R1 = fadd R2, [mem] 692193323Sed/// 693193323Sedvoid FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { 694193323Sed MachineInstr *MI = I; 695193323Sed#ifndef NDEBUG 696193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 697193323Sed assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!"); 698193323Sed#endif 699193323Sed 700193323Sed // Is this the last use of the source register? 701193323Sed unsigned Reg = getFPReg(MI->getOperand(1)); 702193323Sed bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 703193323Sed 704193323Sed if (KillsSrc) { 705193323Sed // If this is the last use of the source register, just make sure it's on 706193323Sed // the top of the stack. 707193323Sed moveToTop(Reg, I); 708193323Sed assert(StackTop > 0 && "Stack cannot be empty!"); 709193323Sed --StackTop; 710193323Sed pushReg(getFPReg(MI->getOperand(0))); 711193323Sed } else { 712193323Sed // If this is not the last use of the source register, _copy_ it to the top 713193323Sed // of the stack. 714193323Sed duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I); 715193323Sed } 716193323Sed 717193323Sed // Change from the pseudo instruction to the concrete instruction. 718193323Sed MI->RemoveOperand(1); // Drop the source operand. 719193323Sed MI->RemoveOperand(0); // Drop the destination operand. 720193323Sed MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 721193323Sed} 722193323Sed 723193323Sed 724193323Sed//===----------------------------------------------------------------------===// 725193323Sed// Define tables of various ways to map pseudo instructions 726193323Sed// 727193323Sed 728193323Sed// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) 729193323Sedstatic const TableEntry ForwardST0Table[] = { 730193323Sed { X86::ADD_Fp32 , X86::ADD_FST0r }, 731193323Sed { X86::ADD_Fp64 , X86::ADD_FST0r }, 732193323Sed { X86::ADD_Fp80 , X86::ADD_FST0r }, 733193323Sed { X86::DIV_Fp32 , X86::DIV_FST0r }, 734193323Sed { X86::DIV_Fp64 , X86::DIV_FST0r }, 735193323Sed { X86::DIV_Fp80 , X86::DIV_FST0r }, 736193323Sed { X86::MUL_Fp32 , X86::MUL_FST0r }, 737193323Sed { X86::MUL_Fp64 , X86::MUL_FST0r }, 738193323Sed { X86::MUL_Fp80 , X86::MUL_FST0r }, 739193323Sed { X86::SUB_Fp32 , X86::SUB_FST0r }, 740193323Sed { X86::SUB_Fp64 , X86::SUB_FST0r }, 741193323Sed { X86::SUB_Fp80 , X86::SUB_FST0r }, 742193323Sed}; 743193323Sed 744193323Sed// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) 745193323Sedstatic const TableEntry ReverseST0Table[] = { 746193323Sed { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative 747193323Sed { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative 748193323Sed { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative 749193323Sed { X86::DIV_Fp32 , X86::DIVR_FST0r }, 750193323Sed { X86::DIV_Fp64 , X86::DIVR_FST0r }, 751193323Sed { X86::DIV_Fp80 , X86::DIVR_FST0r }, 752193323Sed { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative 753193323Sed { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative 754193323Sed { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative 755193323Sed { X86::SUB_Fp32 , X86::SUBR_FST0r }, 756193323Sed { X86::SUB_Fp64 , X86::SUBR_FST0r }, 757193323Sed { X86::SUB_Fp80 , X86::SUBR_FST0r }, 758193323Sed}; 759193323Sed 760193323Sed// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) 761193323Sedstatic const TableEntry ForwardSTiTable[] = { 762193323Sed { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative 763193323Sed { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative 764193323Sed { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative 765193323Sed { X86::DIV_Fp32 , X86::DIVR_FrST0 }, 766193323Sed { X86::DIV_Fp64 , X86::DIVR_FrST0 }, 767193323Sed { X86::DIV_Fp80 , X86::DIVR_FrST0 }, 768193323Sed { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative 769193323Sed { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative 770193323Sed { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative 771193323Sed { X86::SUB_Fp32 , X86::SUBR_FrST0 }, 772193323Sed { X86::SUB_Fp64 , X86::SUBR_FrST0 }, 773193323Sed { X86::SUB_Fp80 , X86::SUBR_FrST0 }, 774193323Sed}; 775193323Sed 776193323Sed// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) 777193323Sedstatic const TableEntry ReverseSTiTable[] = { 778193323Sed { X86::ADD_Fp32 , X86::ADD_FrST0 }, 779193323Sed { X86::ADD_Fp64 , X86::ADD_FrST0 }, 780193323Sed { X86::ADD_Fp80 , X86::ADD_FrST0 }, 781193323Sed { X86::DIV_Fp32 , X86::DIV_FrST0 }, 782193323Sed { X86::DIV_Fp64 , X86::DIV_FrST0 }, 783193323Sed { X86::DIV_Fp80 , X86::DIV_FrST0 }, 784193323Sed { X86::MUL_Fp32 , X86::MUL_FrST0 }, 785193323Sed { X86::MUL_Fp64 , X86::MUL_FrST0 }, 786193323Sed { X86::MUL_Fp80 , X86::MUL_FrST0 }, 787193323Sed { X86::SUB_Fp32 , X86::SUB_FrST0 }, 788193323Sed { X86::SUB_Fp64 , X86::SUB_FrST0 }, 789193323Sed { X86::SUB_Fp80 , X86::SUB_FrST0 }, 790193323Sed}; 791193323Sed 792193323Sed 793193323Sed/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual 794193323Sed/// instructions which need to be simplified and possibly transformed. 795193323Sed/// 796193323Sed/// Result: ST(0) = fsub ST(0), ST(i) 797193323Sed/// ST(i) = fsub ST(0), ST(i) 798193323Sed/// ST(0) = fsubr ST(0), ST(i) 799193323Sed/// ST(i) = fsubr ST(0), ST(i) 800193323Sed/// 801193323Sedvoid FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { 802193323Sed ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 803193323Sed ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 804193323Sed MachineInstr *MI = I; 805193323Sed 806193323Sed unsigned NumOperands = MI->getDesc().getNumOperands(); 807193323Sed assert(NumOperands == 3 && "Illegal TwoArgFP instruction!"); 808193323Sed unsigned Dest = getFPReg(MI->getOperand(0)); 809193323Sed unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 810193323Sed unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 811193323Sed bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 812193323Sed bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 813193323Sed DebugLoc dl = MI->getDebugLoc(); 814193323Sed 815193323Sed unsigned TOS = getStackEntry(0); 816193323Sed 817193323Sed // One of our operands must be on the top of the stack. If neither is yet, we 818193323Sed // need to move one. 819193323Sed if (Op0 != TOS && Op1 != TOS) { // No operand at TOS? 820193323Sed // We can choose to move either operand to the top of the stack. If one of 821193323Sed // the operands is killed by this instruction, we want that one so that we 822193323Sed // can update right on top of the old version. 823193323Sed if (KillsOp0) { 824193323Sed moveToTop(Op0, I); // Move dead operand to TOS. 825193323Sed TOS = Op0; 826193323Sed } else if (KillsOp1) { 827193323Sed moveToTop(Op1, I); 828193323Sed TOS = Op1; 829193323Sed } else { 830193323Sed // All of the operands are live after this instruction executes, so we 831193323Sed // cannot update on top of any operand. Because of this, we must 832193323Sed // duplicate one of the stack elements to the top. It doesn't matter 833193323Sed // which one we pick. 834193323Sed // 835193323Sed duplicateToTop(Op0, Dest, I); 836193323Sed Op0 = TOS = Dest; 837193323Sed KillsOp0 = true; 838193323Sed } 839193323Sed } else if (!KillsOp0 && !KillsOp1) { 840193323Sed // If we DO have one of our operands at the top of the stack, but we don't 841193323Sed // have a dead operand, we must duplicate one of the operands to a new slot 842193323Sed // on the stack. 843193323Sed duplicateToTop(Op0, Dest, I); 844193323Sed Op0 = TOS = Dest; 845193323Sed KillsOp0 = true; 846193323Sed } 847193323Sed 848193323Sed // Now we know that one of our operands is on the top of the stack, and at 849193323Sed // least one of our operands is killed by this instruction. 850193323Sed assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) && 851193323Sed "Stack conditions not set up right!"); 852193323Sed 853193323Sed // We decide which form to use based on what is on the top of the stack, and 854193323Sed // which operand is killed by this instruction. 855193323Sed const TableEntry *InstTable; 856193323Sed bool isForward = TOS == Op0; 857193323Sed bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); 858193323Sed if (updateST0) { 859193323Sed if (isForward) 860193323Sed InstTable = ForwardST0Table; 861193323Sed else 862193323Sed InstTable = ReverseST0Table; 863193323Sed } else { 864193323Sed if (isForward) 865193323Sed InstTable = ForwardSTiTable; 866193323Sed else 867193323Sed InstTable = ReverseSTiTable; 868193323Sed } 869193323Sed 870193323Sed int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table), 871193323Sed MI->getOpcode()); 872193323Sed assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); 873193323Sed 874193323Sed // NotTOS - The register which is not on the top of stack... 875193323Sed unsigned NotTOS = (TOS == Op0) ? Op1 : Op0; 876193323Sed 877193323Sed // Replace the old instruction with a new instruction 878193323Sed MBB->remove(I++); 879193323Sed I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS)); 880193323Sed 881193323Sed // If both operands are killed, pop one off of the stack in addition to 882193323Sed // overwriting the other one. 883193323Sed if (KillsOp0 && KillsOp1 && Op0 != Op1) { 884193323Sed assert(!updateST0 && "Should have updated other operand!"); 885193323Sed popStackAfter(I); // Pop the top of stack 886193323Sed } 887193323Sed 888193323Sed // Update stack information so that we know the destination register is now on 889193323Sed // the stack. 890193323Sed unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS); 891193323Sed assert(UpdatedSlot < StackTop && Dest < 7); 892193323Sed Stack[UpdatedSlot] = Dest; 893193323Sed RegMap[Dest] = UpdatedSlot; 894193323Sed MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction 895193323Sed} 896193323Sed 897193323Sed/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP 898193323Sed/// register arguments and no explicit destinations. 899193323Sed/// 900193323Sedvoid FPS::handleCompareFP(MachineBasicBlock::iterator &I) { 901193323Sed ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 902193323Sed ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 903193323Sed MachineInstr *MI = I; 904193323Sed 905193323Sed unsigned NumOperands = MI->getDesc().getNumOperands(); 906193323Sed assert(NumOperands == 2 && "Illegal FUCOM* instruction!"); 907193323Sed unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 908193323Sed unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 909193323Sed bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 910193323Sed bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 911193323Sed 912193323Sed // Make sure the first operand is on the top of stack, the other one can be 913193323Sed // anywhere. 914193323Sed moveToTop(Op0, I); 915193323Sed 916193323Sed // Change from the pseudo instruction to the concrete instruction. 917193323Sed MI->getOperand(0).setReg(getSTReg(Op1)); 918193323Sed MI->RemoveOperand(1); 919193323Sed MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 920193323Sed 921193323Sed // If any of the operands are killed by this instruction, free them. 922193323Sed if (KillsOp0) freeStackSlotAfter(I, Op0); 923193323Sed if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1); 924193323Sed} 925193323Sed 926193323Sed/// handleCondMovFP - Handle two address conditional move instructions. These 927193323Sed/// instructions move a st(i) register to st(0) iff a condition is true. These 928193323Sed/// instructions require that the first operand is at the top of the stack, but 929193323Sed/// otherwise don't modify the stack at all. 930193323Sedvoid FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { 931193323Sed MachineInstr *MI = I; 932193323Sed 933193323Sed unsigned Op0 = getFPReg(MI->getOperand(0)); 934193323Sed unsigned Op1 = getFPReg(MI->getOperand(2)); 935193323Sed bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 936193323Sed 937193323Sed // The first operand *must* be on the top of the stack. 938193323Sed moveToTop(Op0, I); 939193323Sed 940193323Sed // Change the second operand to the stack register that the operand is in. 941193323Sed // Change from the pseudo instruction to the concrete instruction. 942193323Sed MI->RemoveOperand(0); 943193323Sed MI->RemoveOperand(1); 944193323Sed MI->getOperand(0).setReg(getSTReg(Op1)); 945193323Sed MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 946193323Sed 947193323Sed // If we kill the second operand, make sure to pop it from the stack. 948193323Sed if (Op0 != Op1 && KillsOp1) { 949193323Sed // Get this value off of the register stack. 950193323Sed freeStackSlotAfter(I, Op1); 951193323Sed } 952193323Sed} 953193323Sed 954193323Sed 955193323Sed/// handleSpecialFP - Handle special instructions which behave unlike other 956193323Sed/// floating point instructions. This is primarily intended for use by pseudo 957193323Sed/// instructions. 958193323Sed/// 959193323Sedvoid FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { 960193323Sed MachineInstr *MI = I; 961193323Sed DebugLoc dl = MI->getDebugLoc(); 962193323Sed switch (MI->getOpcode()) { 963198090Srdivacky default: llvm_unreachable("Unknown SpecialFP instruction!"); 964193323Sed case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! 965193323Sed case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! 966193323Sed case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! 967193323Sed assert(StackTop == 0 && "Stack should be empty after a call!"); 968193323Sed pushReg(getFPReg(MI->getOperand(0))); 969193323Sed break; 970193323Sed case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type! 971193323Sed case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type! 972193323Sed case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type! 973193323Sed // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm. 974193323Sed // The pattern we expect is: 975193323Sed // CALL 976193323Sed // FP1 = FpGET_ST0 977193323Sed // FP4 = FpGET_ST1 978193323Sed // 979193323Sed // At this point, we've pushed FP1 on the top of stack, so it should be 980193323Sed // present if it isn't dead. If it was dead, we already emitted a pop to 981193323Sed // remove it from the stack and StackTop = 0. 982193323Sed 983193323Sed // Push FP4 as top of stack next. 984193323Sed pushReg(getFPReg(MI->getOperand(0))); 985193323Sed 986193323Sed // If StackTop was 0 before we pushed our operand, then ST(0) must have been 987193323Sed // dead. In this case, the ST(1) value is the only thing that is live, so 988193323Sed // it should be on the TOS (after the pop that was emitted) and is. Just 989193323Sed // continue in this case. 990193323Sed if (StackTop == 1) 991193323Sed break; 992193323Sed 993193323Sed // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top 994193323Sed // elements so that our accounting is correct. 995193323Sed unsigned RegOnTop = getStackEntry(0); 996193323Sed unsigned RegNo = getStackEntry(1); 997193323Sed 998193323Sed // Swap the slots the regs are in. 999193323Sed std::swap(RegMap[RegNo], RegMap[RegOnTop]); 1000193323Sed 1001193323Sed // Swap stack slot contents. 1002193323Sed assert(RegMap[RegOnTop] < StackTop); 1003193323Sed std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 1004193323Sed break; 1005193323Sed } 1006193323Sed case X86::FpSET_ST0_32: 1007193323Sed case X86::FpSET_ST0_64: 1008195340Sed case X86::FpSET_ST0_80: { 1009210299Sed // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm 1010210299Sed // arguments that use an st constraint. We expect a sequence of 1011210299Sed // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM 1012195340Sed unsigned Op0 = getFPReg(MI->getOperand(0)); 1013195340Sed 1014195340Sed if (!MI->killsRegister(X86::FP0 + Op0)) { 1015210299Sed // Duplicate Op0 into a temporary on the stack top. 1016210299Sed // This actually assumes that FP7 is dead. 1017210299Sed duplicateToTop(Op0, 7, I); 1018195340Sed } else { 1019210299Sed // Op0 is killed, so just swap it into position. 1020195340Sed moveToTop(Op0, I); 1021194612Sed } 1022193323Sed --StackTop; // "Forget" we have something on the top of stack! 1023193323Sed break; 1024195340Sed } 1025193323Sed case X86::FpSET_ST1_32: 1026193323Sed case X86::FpSET_ST1_64: 1027210299Sed case X86::FpSET_ST1_80: { 1028210299Sed // Set up st(1) for inline asm. We are assuming that st(0) has already been 1029210299Sed // set up by FpSET_ST0, and our StackTop is off by one because of it. 1030210299Sed unsigned Op0 = getFPReg(MI->getOperand(0)); 1031210299Sed // Restore the actual StackTop from before Fp_SET_ST0. 1032210299Sed // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we 1033210299Sed // are not enforcing the constraint. 1034210299Sed ++StackTop; 1035210299Sed unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). 1036210299Sed if (!MI->killsRegister(X86::FP0 + Op0)) { 1037210299Sed // Assume FP6 is not live, use it as a scratch register. 1038210299Sed duplicateToTop(Op0, 6, I); 1039210299Sed moveToTop(RegOnTop, I); 1040210299Sed } else if (getSTReg(Op0) != X86::ST1) { 1041210299Sed // We have the wrong value at st(1). Shuffle! Untested! 1042210299Sed moveToTop(getStackEntry(1), I); 1043210299Sed moveToTop(Op0, I); 1044210299Sed moveToTop(RegOnTop, I); 1045193323Sed } 1046210299Sed assert(StackTop >= 2 && "Too few live registers"); 1047210299Sed StackTop -= 2; // "Forget" both st(0) and st(1). 1048193323Sed break; 1049210299Sed } 1050193323Sed case X86::MOV_Fp3232: 1051193323Sed case X86::MOV_Fp3264: 1052193323Sed case X86::MOV_Fp6432: 1053193323Sed case X86::MOV_Fp6464: 1054193323Sed case X86::MOV_Fp3280: 1055193323Sed case X86::MOV_Fp6480: 1056193323Sed case X86::MOV_Fp8032: 1057193323Sed case X86::MOV_Fp8064: 1058193323Sed case X86::MOV_Fp8080: { 1059193323Sed const MachineOperand &MO1 = MI->getOperand(1); 1060193323Sed unsigned SrcReg = getFPReg(MO1); 1061193323Sed 1062193323Sed const MachineOperand &MO0 = MI->getOperand(0); 1063193323Sed unsigned DestReg = getFPReg(MO0); 1064193323Sed if (MI->killsRegister(X86::FP0+SrcReg)) { 1065193323Sed // If the input operand is killed, we can just change the owner of the 1066193323Sed // incoming stack slot into the result. 1067193323Sed unsigned Slot = getSlot(SrcReg); 1068193323Sed assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); 1069193323Sed Stack[Slot] = DestReg; 1070193323Sed RegMap[DestReg] = Slot; 1071193323Sed 1072193323Sed } else { 1073193323Sed // For FMOV we just duplicate the specified value to a new stack slot. 1074193323Sed // This could be made better, but would require substantial changes. 1075193323Sed duplicateToTop(SrcReg, DestReg, I); 1076193323Sed } 1077193323Sed } 1078193323Sed break; 1079203954Srdivacky case TargetOpcode::INLINEASM: { 1080193323Sed // The inline asm MachineInstr currently only *uses* FP registers for the 1081193323Sed // 'f' constraint. These should be turned into the current ST(x) register 1082193323Sed // in the machine instr. Also, any kills should be explicitly popped after 1083193323Sed // the inline asm. 1084207618Srdivacky unsigned Kills = 0; 1085193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1086193323Sed MachineOperand &Op = MI->getOperand(i); 1087193323Sed if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1088193323Sed continue; 1089193323Sed assert(Op.isUse() && "Only handle inline asm uses right now"); 1090193323Sed 1091193323Sed unsigned FPReg = getFPReg(Op); 1092193323Sed Op.setReg(getSTReg(FPReg)); 1093193323Sed 1094193323Sed // If we kill this operand, make sure to pop it from the stack after the 1095193323Sed // asm. We just remember it for now, and pop them all off at the end in 1096193323Sed // a batch. 1097193323Sed if (Op.isKill()) 1098207618Srdivacky Kills |= 1U << FPReg; 1099193323Sed } 1100193323Sed 1101193323Sed // If this asm kills any FP registers (is the last use of them) we must 1102193323Sed // explicitly emit pop instructions for them. Do this now after the asm has 1103193323Sed // executed so that the ST(x) numbers are not off (which would happen if we 1104193323Sed // did this inline with operand rewriting). 1105193323Sed // 1106193323Sed // Note: this might be a non-optimal pop sequence. We might be able to do 1107193323Sed // better by trying to pop in stack order or something. 1108193323Sed MachineBasicBlock::iterator InsertPt = MI; 1109207618Srdivacky while (Kills) { 1110207618Srdivacky unsigned FPReg = CountTrailingZeros_32(Kills); 1111207618Srdivacky freeStackSlotAfter(InsertPt, FPReg); 1112207618Srdivacky Kills &= ~(1U << FPReg); 1113207618Srdivacky } 1114193323Sed // Don't delete the inline asm! 1115193323Sed return; 1116193323Sed } 1117193323Sed 1118193323Sed case X86::RET: 1119193323Sed case X86::RETI: 1120193323Sed // If RET has an FP register use operand, pass the first one in ST(0) and 1121193323Sed // the second one in ST(1). 1122193323Sed if (isStackEmpty()) return; // Quick check to see if any are possible. 1123193323Sed 1124193323Sed // Find the register operands. 1125193323Sed unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U; 1126193323Sed 1127193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1128193323Sed MachineOperand &Op = MI->getOperand(i); 1129193323Sed if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1130193323Sed continue; 1131193323Sed // FP Register uses must be kills unless there are two uses of the same 1132193323Sed // register, in which case only one will be a kill. 1133193323Sed assert(Op.isUse() && 1134193323Sed (Op.isKill() || // Marked kill. 1135193323Sed getFPReg(Op) == FirstFPRegOp || // Second instance. 1136193323Sed MI->killsRegister(Op.getReg())) && // Later use is marked kill. 1137193323Sed "Ret only defs operands, and values aren't live beyond it"); 1138193323Sed 1139193323Sed if (FirstFPRegOp == ~0U) 1140193323Sed FirstFPRegOp = getFPReg(Op); 1141193323Sed else { 1142193323Sed assert(SecondFPRegOp == ~0U && "More than two fp operands!"); 1143193323Sed SecondFPRegOp = getFPReg(Op); 1144193323Sed } 1145193323Sed 1146193323Sed // Remove the operand so that later passes don't see it. 1147193323Sed MI->RemoveOperand(i); 1148193323Sed --i, --e; 1149193323Sed } 1150193323Sed 1151193323Sed // There are only four possibilities here: 1152193323Sed // 1) we are returning a single FP value. In this case, it has to be in 1153193323Sed // ST(0) already, so just declare success by removing the value from the 1154193323Sed // FP Stack. 1155193323Sed if (SecondFPRegOp == ~0U) { 1156193323Sed // Assert that the top of stack contains the right FP register. 1157193323Sed assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) && 1158193323Sed "Top of stack not the right register for RET!"); 1159193323Sed 1160193323Sed // Ok, everything is good, mark the value as not being on the stack 1161193323Sed // anymore so that our assertion about the stack being empty at end of 1162193323Sed // block doesn't fire. 1163193323Sed StackTop = 0; 1164193323Sed return; 1165193323Sed } 1166193323Sed 1167193323Sed // Otherwise, we are returning two values: 1168193323Sed // 2) If returning the same value for both, we only have one thing in the FP 1169193323Sed // stack. Consider: RET FP1, FP1 1170193323Sed if (StackTop == 1) { 1171193323Sed assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&& 1172193323Sed "Stack misconfiguration for RET!"); 1173193323Sed 1174193323Sed // Duplicate the TOS so that we return it twice. Just pick some other FPx 1175193323Sed // register to hold it. 1176193323Sed unsigned NewReg = (FirstFPRegOp+1)%7; 1177193323Sed duplicateToTop(FirstFPRegOp, NewReg, MI); 1178193323Sed FirstFPRegOp = NewReg; 1179193323Sed } 1180193323Sed 1181193323Sed /// Okay we know we have two different FPx operands now: 1182193323Sed assert(StackTop == 2 && "Must have two values live!"); 1183193323Sed 1184193323Sed /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently 1185193323Sed /// in ST(1). In this case, emit an fxch. 1186193323Sed if (getStackEntry(0) == SecondFPRegOp) { 1187193323Sed assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live"); 1188193323Sed moveToTop(FirstFPRegOp, MI); 1189193323Sed } 1190193323Sed 1191193323Sed /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in 1192193323Sed /// ST(1). Just remove both from our understanding of the stack and return. 1193193323Sed assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live"); 1194193323Sed assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live"); 1195193323Sed StackTop = 0; 1196193323Sed return; 1197193323Sed } 1198193323Sed 1199193323Sed I = MBB->erase(I); // Remove the pseudo instruction 1200193323Sed --I; 1201193323Sed} 1202210299Sed 1203210299Sed// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. 1204210299Sedbool FPS::translateCopy(MachineInstr *MI) { 1205210299Sed unsigned DstReg = MI->getOperand(0).getReg(); 1206210299Sed unsigned SrcReg = MI->getOperand(1).getReg(); 1207210299Sed 1208210299Sed if (DstReg == X86::ST0) { 1209210299Sed MI->setDesc(TII->get(X86::FpSET_ST0_80)); 1210210299Sed MI->RemoveOperand(0); 1211210299Sed return true; 1212210299Sed } 1213210299Sed if (DstReg == X86::ST1) { 1214210299Sed MI->setDesc(TII->get(X86::FpSET_ST1_80)); 1215210299Sed MI->RemoveOperand(0); 1216210299Sed return true; 1217210299Sed } 1218210299Sed if (SrcReg == X86::ST0) { 1219210299Sed MI->setDesc(TII->get(X86::FpGET_ST0_80)); 1220210299Sed return true; 1221210299Sed } 1222210299Sed if (SrcReg == X86::ST1) { 1223210299Sed MI->setDesc(TII->get(X86::FpGET_ST1_80)); 1224210299Sed return true; 1225210299Sed } 1226210299Sed if (X86::RFP80RegClass.contains(DstReg, SrcReg)) { 1227210299Sed MI->setDesc(TII->get(X86::MOV_Fp8080)); 1228210299Sed return true; 1229210299Sed } 1230210299Sed return false; 1231210299Sed} 1232