X86FloatingPoint.cpp revision 221345
155997Simp//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===// 255997Simp// 355997Simp// The LLVM Compiler Infrastructure 455997Simp// 555997Simp// This file is distributed under the University of Illinois Open Source 655997Simp// License. See LICENSE.TXT for details. 755997Simp// 855997Simp//===----------------------------------------------------------------------===// 955997Simp// 1055997Simp// This file defines the pass which converts floating point instructions from 1155997Simp// pseudo registers into register stack instructions. This pass uses live 1255997Simp// variable information to indicate where the FPn registers are used and their 1355997Simp// lifetimes. 1455997Simp// 1555997Simp// The x87 hardware tracks liveness of the stack registers, so it is necessary 1655997Simp// to implement exact liveness tracking between basic blocks. The CFG edges are 1755997Simp// partitioned into bundles where the same FP registers must be live in 1855997Simp// identical stack positions. Instructions are inserted at the end of each basic 1955997Simp// block to rearrange the live registers to match the outgoing bundle. 2055997Simp// 2155997Simp// This approach avoids splitting critical edges at the potential cost of more 2255997Simp// live register shuffling instructions when critical edges are present. 2355997Simp// 2455997Simp//===----------------------------------------------------------------------===// 2555997Simp 2655997Simp#define DEBUG_TYPE "x86-codegen" 27119418Sobrien#include "X86.h" 28119418Sobrien#include "X86InstrInfo.h" 29119418Sobrien#include "llvm/ADT/DepthFirstIterator.h" 3055997Simp#include "llvm/ADT/DenseMap.h" 31241591Sjhb#include "llvm/ADT/SmallPtrSet.h" 3255997Simp#include "llvm/ADT/SmallVector.h" 33241591Sjhb#include "llvm/ADT/Statistic.h" 3455997Simp#include "llvm/ADT/STLExtras.h" 35241591Sjhb#include "llvm/CodeGen/EdgeBundles.h" 3655997Simp#include "llvm/CodeGen/MachineFunctionPass.h" 3755997Simp#include "llvm/CodeGen/MachineInstrBuilder.h" 3855997Simp#include "llvm/CodeGen/MachineRegisterInfo.h" 3955997Simp#include "llvm/CodeGen/Passes.h" 4055997Simp#include "llvm/Support/Debug.h" 4155997Simp#include "llvm/Support/ErrorHandling.h" 4255997Simp#include "llvm/Support/raw_ostream.h" 4370782Simp#include "llvm/Target/TargetInstrInfo.h" 4455997Simp#include "llvm/Target/TargetMachine.h" 4570782Simp#include <algorithm> 46129764Simpusing namespace llvm; 4770782Simp 4855997SimpSTATISTIC(NumFXCH, "Number of fxch instructions inserted"); 4955997SimpSTATISTIC(NumFP , "Number of floating point instructions"); 5055997Simp 5155997Simpnamespace { 5255997Simp struct FPS : public MachineFunctionPass { 5355997Simp static char ID; 5455997Simp FPS() : MachineFunctionPass(ID) { 5570782Simp initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); 5670782Simp // This is really only to keep valgrind quiet. 5770782Simp // The logic in isLive() is too much for it. 5870782Simp memset(Stack, 0, sizeof(Stack)); 5955997Simp memset(RegMap, 0, sizeof(RegMap)); 60162979Simp } 61147580Simp 62147580Simp virtual void getAnalysisUsage(AnalysisUsage &AU) const { 63147580Simp AU.setPreservesCFG(); 64147580Simp AU.addRequired<EdgeBundles>(); 65147580Simp AU.addPreservedID(MachineLoopInfoID); 6670782Simp AU.addPreservedID(MachineDominatorsID); 6770782Simp MachineFunctionPass::getAnalysisUsage(AU); 6870782Simp } 6955997Simp 7055997Simp virtual bool runOnMachineFunction(MachineFunction &MF); 7155997Simp 7255997Simp virtual const char *getPassName() const { return "X86 FP Stackifier"; } 7355997Simp 7455997Simp private: 7555997Simp const TargetInstrInfo *TII; // Machine instruction info. 7655997Simp 77241591Sjhb // Two CFG edges are related if they leave the same block, or enter the same 7855997Simp // block. The transitive closure of an edge under this relation is a 7955997Simp // LiveBundle. It represents a set of CFG edges where the live FP stack 80296137Sjhibbits // registers must be allocated identically in the x87 stack. 81296137Sjhibbits // 8255997Simp // A LiveBundle is usually all the edges leaving a block, or all the edges 8355997Simp // entering a block, but it can contain more edges if critical edges are 8455997Simp // present. 8555997Simp // 86127135Snjl // The set of live FP registers in a LiveBundle is calculated by bundleCFG, 8755997Simp // but the exact mapping of FP registers to stack slots is fixed later. 8855997Simp struct LiveBundle { 8955997Simp // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c. 9055997Simp unsigned Mask; 9155997Simp 92170872Sscottl // Number of pre-assigned live registers in FixStack. This is 0 when the 93241591Sjhb // stack order has not yet been fixed. 94241591Sjhb unsigned FixCount; 9555997Simp 9655997Simp // Assigned stack order for live-in registers. 9755997Simp // FixStack[i] == getStackEntry(i) for all i < FixCount. 9855997Simp unsigned char FixStack[8]; 9955997Simp 10055997Simp LiveBundle() : Mask(0), FixCount(0) {} 10155997Simp 10255997Simp // Have the live registers been assigned a stack order yet? 10355997Simp bool isFixed() const { return !Mask || FixCount; } 10455997Simp }; 10555997Simp 10655997Simp // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges 107241591Sjhb // with no live FP registers. 108241591Sjhb SmallVector<LiveBundle, 8> LiveBundles; 10955997Simp 11055997Simp // The edge bundle analysis provides indices into the LiveBundles vector. 11155997Simp EdgeBundles *Bundles; 112150392Simp 11370782Simp // Return a bitmask of FP registers in block's live-in list. 11470782Simp unsigned calcLiveInMask(MachineBasicBlock *MBB) { 11570782Simp unsigned Mask = 0; 11670782Simp for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), 11770782Simp E = MBB->livein_end(); I != E; ++I) { 118113315Simp unsigned Reg = *I - X86::FP0; 119113315Simp if (Reg < 8) 120241591Sjhb Mask |= 1 << Reg; 121241591Sjhb } 122241591Sjhb return Mask; 123241591Sjhb } 12470782Simp 125241591Sjhb // Partition all the CFG edges into LiveBundles. 12670782Simp void bundleCFG(MachineFunction &MF); 12770782Simp 12870782Simp MachineBasicBlock *MBB; // Current basic block 129150392Simp unsigned Stack[8]; // FP<n> Registers in each stack slot... 13055997Simp unsigned RegMap[8]; // Track which stack slot contains each register 13155997Simp unsigned StackTop; // The current top of the FP stack. 13255997Simp 133150392Simp // Set up our stack model to match the incoming registers to MBB. 13455997Simp void setupBlockStack(); 13555997Simp 13655997Simp // Shuffle live registers to match the expectations of successor blocks. 13755997Simp void finishBlockStack(); 13855997Simp 13955997Simp void dumpStack() const { 14055997Simp dbgs() << "Stack contents:"; 14155997Simp for (unsigned i = 0; i != StackTop; ++i) { 14255997Simp dbgs() << " FP" << Stack[i]; 14355997Simp assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); 14455997Simp } 14555997Simp dbgs() << "\n"; 14655997Simp } 14755997Simp 14855997Simp /// getSlot - Return the stack slot number a particular register number is 149241591Sjhb /// in. 150241591Sjhb unsigned getSlot(unsigned RegNo) const { 15155997Simp assert(RegNo < 8 && "Regno out of range!"); 15255997Simp return RegMap[RegNo]; 15355997Simp } 15455997Simp 15555997Simp /// isLive - Is RegNo currently live in the stack? 15655997Simp bool isLive(unsigned RegNo) const { 15755997Simp unsigned Slot = getSlot(RegNo); 15855997Simp return Slot < StackTop && Stack[Slot] == RegNo; 15955997Simp } 16055997Simp 16155997Simp /// getScratchReg - Return an FP register that is not currently in use. 16255997Simp unsigned getScratchReg() { 16355997Simp for (int i = 7; i >= 0; --i) 16455997Simp if (!isLive(i)) 16555997Simp return i; 16655997Simp llvm_unreachable("Ran out of scratch FP registers"); 16755997Simp } 16855997Simp 16955997Simp /// getStackEntry - Return the X86::FP<n> register in register ST(i). 17055997Simp unsigned getStackEntry(unsigned STi) const { 17155997Simp if (STi >= StackTop) 17255997Simp report_fatal_error("Access past stack top!"); 17355997Simp return Stack[StackTop-1-STi]; 17455997Simp } 17555997Simp 17655997Simp /// getSTReg - Return the X86::ST(i) register which contains the specified 17755997Simp /// FP<RegNo> register. 17855997Simp unsigned getSTReg(unsigned RegNo) const { 17955997Simp return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0; 18055997Simp } 18155997Simp 18255997Simp // pushReg - Push the specified FP<n> register onto the stack. 183150392Simp void pushReg(unsigned Reg) { 184150392Simp assert(Reg < 8 && "Register number out of range!"); 18555997Simp if (StackTop >= 8) 18670782Simp report_fatal_error("Stack overflow!"); 18755997Simp Stack[StackTop] = Reg; 18855997Simp RegMap[Reg] = StackTop++; 18955997Simp } 19055997Simp 19155997Simp bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } 19255997Simp void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) { 19355997Simp DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 19455997Simp if (isAtTop(RegNo)) return; 19555997Simp 19655997Simp unsigned STReg = getSTReg(RegNo); 19769960Simp unsigned RegOnTop = getStackEntry(0); 19855997Simp 199292079Simp // Swap the slots the regs are in. 200 std::swap(RegMap[RegNo], RegMap[RegOnTop]); 201 202 // Swap stack slot contents. 203 if (RegMap[RegOnTop] >= StackTop) 204 report_fatal_error("Access past stack top!"); 205 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 206 207 // Emit an fxch to update the runtime processors version of the state. 208 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg); 209 ++NumFXCH; 210 } 211 212 void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { 213 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 214 unsigned STReg = getSTReg(RegNo); 215 pushReg(AsReg); // New register on top of stack 216 217 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); 218 } 219 220 /// popStackAfter - Pop the current value off of the top of the FP stack 221 /// after the specified instruction. 222 void popStackAfter(MachineBasicBlock::iterator &I); 223 224 /// freeStackSlotAfter - Free the specified register from the register 225 /// stack, so that it is no longer in a register. If the register is 226 /// currently at the top of the stack, we just pop the current instruction, 227 /// otherwise we store the current top-of-stack into the specified slot, 228 /// then pop the top of stack. 229 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg); 230 231 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted 232 /// instruction. 233 MachineBasicBlock::iterator 234 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo); 235 236 /// Adjust the live registers to be the set in Mask. 237 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); 238 239 /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is 240 /// st(0), FP reg FixStack[1] is st(1) etc. 241 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount, 242 MachineBasicBlock::iterator I); 243 244 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); 245 246 void handleZeroArgFP(MachineBasicBlock::iterator &I); 247 void handleOneArgFP(MachineBasicBlock::iterator &I); 248 void handleOneArgFPRW(MachineBasicBlock::iterator &I); 249 void handleTwoArgFP(MachineBasicBlock::iterator &I); 250 void handleCompareFP(MachineBasicBlock::iterator &I); 251 void handleCondMovFP(MachineBasicBlock::iterator &I); 252 void handleSpecialFP(MachineBasicBlock::iterator &I); 253 254 bool translateCopy(MachineInstr*); 255 }; 256 char FPS::ID = 0; 257} 258 259FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } 260 261/// getFPReg - Return the X86::FPx register number for the specified operand. 262/// For example, this returns 3 for X86::FP3. 263static unsigned getFPReg(const MachineOperand &MO) { 264 assert(MO.isReg() && "Expected an FP register!"); 265 unsigned Reg = MO.getReg(); 266 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); 267 return Reg - X86::FP0; 268} 269 270/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP 271/// register references into FP stack references. 272/// 273bool FPS::runOnMachineFunction(MachineFunction &MF) { 274 // We only need to run this pass if there are any FP registers used in this 275 // function. If it is all integer, there is nothing for us to do! 276 bool FPIsUsed = false; 277 278 assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!"); 279 for (unsigned i = 0; i <= 6; ++i) 280 if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) { 281 FPIsUsed = true; 282 break; 283 } 284 285 // Early exit. 286 if (!FPIsUsed) return false; 287 288 Bundles = &getAnalysis<EdgeBundles>(); 289 TII = MF.getTarget().getInstrInfo(); 290 291 // Prepare cross-MBB liveness. 292 bundleCFG(MF); 293 294 StackTop = 0; 295 296 // Process the function in depth first order so that we process at least one 297 // of the predecessors for every reachable block in the function. 298 SmallPtrSet<MachineBasicBlock*, 8> Processed; 299 MachineBasicBlock *Entry = MF.begin(); 300 301 bool Changed = false; 302 for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> > 303 I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed); 304 I != E; ++I) 305 Changed |= processBasicBlock(MF, **I); 306 307 // Process any unreachable blocks in arbitrary order now. 308 if (MF.size() != Processed.size()) 309 for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) 310 if (Processed.insert(BB)) 311 Changed |= processBasicBlock(MF, *BB); 312 313 LiveBundles.clear(); 314 315 return Changed; 316} 317 318/// bundleCFG - Scan all the basic blocks to determine consistent live-in and 319/// live-out sets for the FP registers. Consistent means that the set of 320/// registers live-out from a block is identical to the live-in set of all 321/// successors. This is not enforced by the normal live-in lists since 322/// registers may be implicitly defined, or not used by all successors. 323void FPS::bundleCFG(MachineFunction &MF) { 324 assert(LiveBundles.empty() && "Stale data in LiveBundles"); 325 LiveBundles.resize(Bundles->getNumBundles()); 326 327 // Gather the actual live-in masks for all MBBs. 328 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 329 MachineBasicBlock *MBB = I; 330 const unsigned Mask = calcLiveInMask(MBB); 331 if (!Mask) 332 continue; 333 // Update MBB ingoing bundle mask. 334 LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask; 335 } 336} 337 338/// processBasicBlock - Loop over all of the instructions in the basic block, 339/// transforming FP instructions into their stack form. 340/// 341bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { 342 bool Changed = false; 343 MBB = &BB; 344 345 setupBlockStack(); 346 347 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { 348 MachineInstr *MI = I; 349 uint64_t Flags = MI->getDesc().TSFlags; 350 351 unsigned FPInstClass = Flags & X86II::FPTypeMask; 352 if (MI->isInlineAsm()) 353 FPInstClass = X86II::SpecialFP; 354 355 if (MI->isCopy() && translateCopy(MI)) 356 FPInstClass = X86II::SpecialFP; 357 358 if (FPInstClass == X86II::NotFP) 359 continue; // Efficiently ignore non-fp insts! 360 361 MachineInstr *PrevMI = 0; 362 if (I != BB.begin()) 363 PrevMI = prior(I); 364 365 ++NumFP; // Keep track of # of pseudo instrs 366 DEBUG(dbgs() << "\nFPInst:\t" << *MI); 367 368 // Get dead variables list now because the MI pointer may be deleted as part 369 // of processing! 370 SmallVector<unsigned, 8> DeadRegs; 371 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 372 const MachineOperand &MO = MI->getOperand(i); 373 if (MO.isReg() && MO.isDead()) 374 DeadRegs.push_back(MO.getReg()); 375 } 376 377 switch (FPInstClass) { 378 case X86II::ZeroArgFP: handleZeroArgFP(I); break; 379 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0) 380 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0)) 381 case X86II::TwoArgFP: handleTwoArgFP(I); break; 382 case X86II::CompareFP: handleCompareFP(I); break; 383 case X86II::CondMovFP: handleCondMovFP(I); break; 384 case X86II::SpecialFP: handleSpecialFP(I); break; 385 default: llvm_unreachable("Unknown FP Type!"); 386 } 387 388 // Check to see if any of the values defined by this instruction are dead 389 // after definition. If so, pop them. 390 for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { 391 unsigned Reg = DeadRegs[i]; 392 if (Reg >= X86::FP0 && Reg <= X86::FP6) { 393 DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); 394 freeStackSlotAfter(I, Reg-X86::FP0); 395 } 396 } 397 398 // Print out all of the instructions expanded to if -debug 399 DEBUG( 400 MachineBasicBlock::iterator PrevI(PrevMI); 401 if (I == PrevI) { 402 dbgs() << "Just deleted pseudo instruction\n"; 403 } else { 404 MachineBasicBlock::iterator Start = I; 405 // Rewind to first instruction newly inserted. 406 while (Start != BB.begin() && prior(Start) != PrevI) --Start; 407 dbgs() << "Inserted instructions:\n\t"; 408 Start->print(dbgs(), &MF.getTarget()); 409 while (++Start != llvm::next(I)) {} 410 } 411 dumpStack(); 412 ); 413 414 Changed = true; 415 } 416 417 finishBlockStack(); 418 419 return Changed; 420} 421 422/// setupBlockStack - Use the live bundles to set up our model of the stack 423/// to match predecessors' live out stack. 424void FPS::setupBlockStack() { 425 DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber() 426 << " derived from " << MBB->getName() << ".\n"); 427 StackTop = 0; 428 // Get the live-in bundle for MBB. 429 const LiveBundle &Bundle = 430 LiveBundles[Bundles->getBundle(MBB->getNumber(), false)]; 431 432 if (!Bundle.Mask) { 433 DEBUG(dbgs() << "Block has no FP live-ins.\n"); 434 return; 435 } 436 437 // Depth-first iteration should ensure that we always have an assigned stack. 438 assert(Bundle.isFixed() && "Reached block before any predecessors"); 439 440 // Push the fixed live-in registers. 441 for (unsigned i = Bundle.FixCount; i > 0; --i) { 442 MBB->addLiveIn(X86::ST0+i-1); 443 DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP" 444 << unsigned(Bundle.FixStack[i-1]) << '\n'); 445 pushReg(Bundle.FixStack[i-1]); 446 } 447 448 // Kill off unwanted live-ins. This can happen with a critical edge. 449 // FIXME: We could keep these live registers around as zombies. They may need 450 // to be revived at the end of a short block. It might save a few instrs. 451 adjustLiveRegs(calcLiveInMask(MBB), MBB->begin()); 452 DEBUG(MBB->dump()); 453} 454 455/// finishBlockStack - Revive live-outs that are implicitly defined out of 456/// MBB. Shuffle live registers to match the expected fixed stack of any 457/// predecessors, and ensure that all predecessors are expecting the same 458/// stack. 459void FPS::finishBlockStack() { 460 // The RET handling below takes care of return blocks for us. 461 if (MBB->succ_empty()) 462 return; 463 464 DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber() 465 << " derived from " << MBB->getName() << ".\n"); 466 467 // Get MBB's live-out bundle. 468 unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true); 469 LiveBundle &Bundle = LiveBundles[BundleIdx]; 470 471 // We may need to kill and define some registers to match successors. 472 // FIXME: This can probably be combined with the shuffle below. 473 MachineBasicBlock::iterator Term = MBB->getFirstTerminator(); 474 adjustLiveRegs(Bundle.Mask, Term); 475 476 if (!Bundle.Mask) { 477 DEBUG(dbgs() << "No live-outs.\n"); 478 return; 479 } 480 481 // Has the stack order been fixed yet? 482 DEBUG(dbgs() << "LB#" << BundleIdx << ": "); 483 if (Bundle.isFixed()) { 484 DEBUG(dbgs() << "Shuffling stack to match.\n"); 485 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term); 486 } else { 487 // Not fixed yet, we get to choose. 488 DEBUG(dbgs() << "Fixing stack order now.\n"); 489 Bundle.FixCount = StackTop; 490 for (unsigned i = 0; i < StackTop; ++i) 491 Bundle.FixStack[i] = getStackEntry(i); 492 } 493} 494 495 496//===----------------------------------------------------------------------===// 497// Efficient Lookup Table Support 498//===----------------------------------------------------------------------===// 499 500namespace { 501 struct TableEntry { 502 unsigned from; 503 unsigned to; 504 bool operator<(const TableEntry &TE) const { return from < TE.from; } 505 friend bool operator<(const TableEntry &TE, unsigned V) { 506 return TE.from < V; 507 } 508 friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V, 509 const TableEntry &TE) { 510 return V < TE.from; 511 } 512 }; 513} 514 515#ifndef NDEBUG 516static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { 517 for (unsigned i = 0; i != NumEntries-1; ++i) 518 if (!(Table[i] < Table[i+1])) return false; 519 return true; 520} 521#endif 522 523static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { 524 const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); 525 if (I != Table+N && I->from == Opcode) 526 return I->to; 527 return -1; 528} 529 530#ifdef NDEBUG 531#define ASSERT_SORTED(TABLE) 532#else 533#define ASSERT_SORTED(TABLE) \ 534 { static bool TABLE##Checked = false; \ 535 if (!TABLE##Checked) { \ 536 assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \ 537 "All lookup tables must be sorted for efficient access!"); \ 538 TABLE##Checked = true; \ 539 } \ 540 } 541#endif 542 543//===----------------------------------------------------------------------===// 544// Register File -> Register Stack Mapping Methods 545//===----------------------------------------------------------------------===// 546 547// OpcodeTable - Sorted map of register instructions to their stack version. 548// The first element is an register file pseudo instruction, the second is the 549// concrete X86 instruction which uses the register stack. 550// 551static const TableEntry OpcodeTable[] = { 552 { X86::ABS_Fp32 , X86::ABS_F }, 553 { X86::ABS_Fp64 , X86::ABS_F }, 554 { X86::ABS_Fp80 , X86::ABS_F }, 555 { X86::ADD_Fp32m , X86::ADD_F32m }, 556 { X86::ADD_Fp64m , X86::ADD_F64m }, 557 { X86::ADD_Fp64m32 , X86::ADD_F32m }, 558 { X86::ADD_Fp80m32 , X86::ADD_F32m }, 559 { X86::ADD_Fp80m64 , X86::ADD_F64m }, 560 { X86::ADD_FpI16m32 , X86::ADD_FI16m }, 561 { X86::ADD_FpI16m64 , X86::ADD_FI16m }, 562 { X86::ADD_FpI16m80 , X86::ADD_FI16m }, 563 { X86::ADD_FpI32m32 , X86::ADD_FI32m }, 564 { X86::ADD_FpI32m64 , X86::ADD_FI32m }, 565 { X86::ADD_FpI32m80 , X86::ADD_FI32m }, 566 { X86::CHS_Fp32 , X86::CHS_F }, 567 { X86::CHS_Fp64 , X86::CHS_F }, 568 { X86::CHS_Fp80 , X86::CHS_F }, 569 { X86::CMOVBE_Fp32 , X86::CMOVBE_F }, 570 { X86::CMOVBE_Fp64 , X86::CMOVBE_F }, 571 { X86::CMOVBE_Fp80 , X86::CMOVBE_F }, 572 { X86::CMOVB_Fp32 , X86::CMOVB_F }, 573 { X86::CMOVB_Fp64 , X86::CMOVB_F }, 574 { X86::CMOVB_Fp80 , X86::CMOVB_F }, 575 { X86::CMOVE_Fp32 , X86::CMOVE_F }, 576 { X86::CMOVE_Fp64 , X86::CMOVE_F }, 577 { X86::CMOVE_Fp80 , X86::CMOVE_F }, 578 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F }, 579 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F }, 580 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F }, 581 { X86::CMOVNB_Fp32 , X86::CMOVNB_F }, 582 { X86::CMOVNB_Fp64 , X86::CMOVNB_F }, 583 { X86::CMOVNB_Fp80 , X86::CMOVNB_F }, 584 { X86::CMOVNE_Fp32 , X86::CMOVNE_F }, 585 { X86::CMOVNE_Fp64 , X86::CMOVNE_F }, 586 { X86::CMOVNE_Fp80 , X86::CMOVNE_F }, 587 { X86::CMOVNP_Fp32 , X86::CMOVNP_F }, 588 { X86::CMOVNP_Fp64 , X86::CMOVNP_F }, 589 { X86::CMOVNP_Fp80 , X86::CMOVNP_F }, 590 { X86::CMOVP_Fp32 , X86::CMOVP_F }, 591 { X86::CMOVP_Fp64 , X86::CMOVP_F }, 592 { X86::CMOVP_Fp80 , X86::CMOVP_F }, 593 { X86::COS_Fp32 , X86::COS_F }, 594 { X86::COS_Fp64 , X86::COS_F }, 595 { X86::COS_Fp80 , X86::COS_F }, 596 { X86::DIVR_Fp32m , X86::DIVR_F32m }, 597 { X86::DIVR_Fp64m , X86::DIVR_F64m }, 598 { X86::DIVR_Fp64m32 , X86::DIVR_F32m }, 599 { X86::DIVR_Fp80m32 , X86::DIVR_F32m }, 600 { X86::DIVR_Fp80m64 , X86::DIVR_F64m }, 601 { X86::DIVR_FpI16m32, X86::DIVR_FI16m}, 602 { X86::DIVR_FpI16m64, X86::DIVR_FI16m}, 603 { X86::DIVR_FpI16m80, X86::DIVR_FI16m}, 604 { X86::DIVR_FpI32m32, X86::DIVR_FI32m}, 605 { X86::DIVR_FpI32m64, X86::DIVR_FI32m}, 606 { X86::DIVR_FpI32m80, X86::DIVR_FI32m}, 607 { X86::DIV_Fp32m , X86::DIV_F32m }, 608 { X86::DIV_Fp64m , X86::DIV_F64m }, 609 { X86::DIV_Fp64m32 , X86::DIV_F32m }, 610 { X86::DIV_Fp80m32 , X86::DIV_F32m }, 611 { X86::DIV_Fp80m64 , X86::DIV_F64m }, 612 { X86::DIV_FpI16m32 , X86::DIV_FI16m }, 613 { X86::DIV_FpI16m64 , X86::DIV_FI16m }, 614 { X86::DIV_FpI16m80 , X86::DIV_FI16m }, 615 { X86::DIV_FpI32m32 , X86::DIV_FI32m }, 616 { X86::DIV_FpI32m64 , X86::DIV_FI32m }, 617 { X86::DIV_FpI32m80 , X86::DIV_FI32m }, 618 { X86::ILD_Fp16m32 , X86::ILD_F16m }, 619 { X86::ILD_Fp16m64 , X86::ILD_F16m }, 620 { X86::ILD_Fp16m80 , X86::ILD_F16m }, 621 { X86::ILD_Fp32m32 , X86::ILD_F32m }, 622 { X86::ILD_Fp32m64 , X86::ILD_F32m }, 623 { X86::ILD_Fp32m80 , X86::ILD_F32m }, 624 { X86::ILD_Fp64m32 , X86::ILD_F64m }, 625 { X86::ILD_Fp64m64 , X86::ILD_F64m }, 626 { X86::ILD_Fp64m80 , X86::ILD_F64m }, 627 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m}, 628 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m}, 629 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m}, 630 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m}, 631 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m}, 632 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m}, 633 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m}, 634 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m}, 635 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m}, 636 { X86::IST_Fp16m32 , X86::IST_F16m }, 637 { X86::IST_Fp16m64 , X86::IST_F16m }, 638 { X86::IST_Fp16m80 , X86::IST_F16m }, 639 { X86::IST_Fp32m32 , X86::IST_F32m }, 640 { X86::IST_Fp32m64 , X86::IST_F32m }, 641 { X86::IST_Fp32m80 , X86::IST_F32m }, 642 { X86::IST_Fp64m32 , X86::IST_FP64m }, 643 { X86::IST_Fp64m64 , X86::IST_FP64m }, 644 { X86::IST_Fp64m80 , X86::IST_FP64m }, 645 { X86::LD_Fp032 , X86::LD_F0 }, 646 { X86::LD_Fp064 , X86::LD_F0 }, 647 { X86::LD_Fp080 , X86::LD_F0 }, 648 { X86::LD_Fp132 , X86::LD_F1 }, 649 { X86::LD_Fp164 , X86::LD_F1 }, 650 { X86::LD_Fp180 , X86::LD_F1 }, 651 { X86::LD_Fp32m , X86::LD_F32m }, 652 { X86::LD_Fp32m64 , X86::LD_F32m }, 653 { X86::LD_Fp32m80 , X86::LD_F32m }, 654 { X86::LD_Fp64m , X86::LD_F64m }, 655 { X86::LD_Fp64m80 , X86::LD_F64m }, 656 { X86::LD_Fp80m , X86::LD_F80m }, 657 { X86::MUL_Fp32m , X86::MUL_F32m }, 658 { X86::MUL_Fp64m , X86::MUL_F64m }, 659 { X86::MUL_Fp64m32 , X86::MUL_F32m }, 660 { X86::MUL_Fp80m32 , X86::MUL_F32m }, 661 { X86::MUL_Fp80m64 , X86::MUL_F64m }, 662 { X86::MUL_FpI16m32 , X86::MUL_FI16m }, 663 { X86::MUL_FpI16m64 , X86::MUL_FI16m }, 664 { X86::MUL_FpI16m80 , X86::MUL_FI16m }, 665 { X86::MUL_FpI32m32 , X86::MUL_FI32m }, 666 { X86::MUL_FpI32m64 , X86::MUL_FI32m }, 667 { X86::MUL_FpI32m80 , X86::MUL_FI32m }, 668 { X86::SIN_Fp32 , X86::SIN_F }, 669 { X86::SIN_Fp64 , X86::SIN_F }, 670 { X86::SIN_Fp80 , X86::SIN_F }, 671 { X86::SQRT_Fp32 , X86::SQRT_F }, 672 { X86::SQRT_Fp64 , X86::SQRT_F }, 673 { X86::SQRT_Fp80 , X86::SQRT_F }, 674 { X86::ST_Fp32m , X86::ST_F32m }, 675 { X86::ST_Fp64m , X86::ST_F64m }, 676 { X86::ST_Fp64m32 , X86::ST_F32m }, 677 { X86::ST_Fp80m32 , X86::ST_F32m }, 678 { X86::ST_Fp80m64 , X86::ST_F64m }, 679 { X86::ST_FpP80m , X86::ST_FP80m }, 680 { X86::SUBR_Fp32m , X86::SUBR_F32m }, 681 { X86::SUBR_Fp64m , X86::SUBR_F64m }, 682 { X86::SUBR_Fp64m32 , X86::SUBR_F32m }, 683 { X86::SUBR_Fp80m32 , X86::SUBR_F32m }, 684 { X86::SUBR_Fp80m64 , X86::SUBR_F64m }, 685 { X86::SUBR_FpI16m32, X86::SUBR_FI16m}, 686 { X86::SUBR_FpI16m64, X86::SUBR_FI16m}, 687 { X86::SUBR_FpI16m80, X86::SUBR_FI16m}, 688 { X86::SUBR_FpI32m32, X86::SUBR_FI32m}, 689 { X86::SUBR_FpI32m64, X86::SUBR_FI32m}, 690 { X86::SUBR_FpI32m80, X86::SUBR_FI32m}, 691 { X86::SUB_Fp32m , X86::SUB_F32m }, 692 { X86::SUB_Fp64m , X86::SUB_F64m }, 693 { X86::SUB_Fp64m32 , X86::SUB_F32m }, 694 { X86::SUB_Fp80m32 , X86::SUB_F32m }, 695 { X86::SUB_Fp80m64 , X86::SUB_F64m }, 696 { X86::SUB_FpI16m32 , X86::SUB_FI16m }, 697 { X86::SUB_FpI16m64 , X86::SUB_FI16m }, 698 { X86::SUB_FpI16m80 , X86::SUB_FI16m }, 699 { X86::SUB_FpI32m32 , X86::SUB_FI32m }, 700 { X86::SUB_FpI32m64 , X86::SUB_FI32m }, 701 { X86::SUB_FpI32m80 , X86::SUB_FI32m }, 702 { X86::TST_Fp32 , X86::TST_F }, 703 { X86::TST_Fp64 , X86::TST_F }, 704 { X86::TST_Fp80 , X86::TST_F }, 705 { X86::UCOM_FpIr32 , X86::UCOM_FIr }, 706 { X86::UCOM_FpIr64 , X86::UCOM_FIr }, 707 { X86::UCOM_FpIr80 , X86::UCOM_FIr }, 708 { X86::UCOM_Fpr32 , X86::UCOM_Fr }, 709 { X86::UCOM_Fpr64 , X86::UCOM_Fr }, 710 { X86::UCOM_Fpr80 , X86::UCOM_Fr }, 711}; 712 713static unsigned getConcreteOpcode(unsigned Opcode) { 714 ASSERT_SORTED(OpcodeTable); 715 int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode); 716 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!"); 717 return Opc; 718} 719 720//===----------------------------------------------------------------------===// 721// Helper Methods 722//===----------------------------------------------------------------------===// 723 724// PopTable - Sorted map of instructions to their popping version. The first 725// element is an instruction, the second is the version which pops. 726// 727static const TableEntry PopTable[] = { 728 { X86::ADD_FrST0 , X86::ADD_FPrST0 }, 729 730 { X86::DIVR_FrST0, X86::DIVR_FPrST0 }, 731 { X86::DIV_FrST0 , X86::DIV_FPrST0 }, 732 733 { X86::IST_F16m , X86::IST_FP16m }, 734 { X86::IST_F32m , X86::IST_FP32m }, 735 736 { X86::MUL_FrST0 , X86::MUL_FPrST0 }, 737 738 { X86::ST_F32m , X86::ST_FP32m }, 739 { X86::ST_F64m , X86::ST_FP64m }, 740 { X86::ST_Frr , X86::ST_FPrr }, 741 742 { X86::SUBR_FrST0, X86::SUBR_FPrST0 }, 743 { X86::SUB_FrST0 , X86::SUB_FPrST0 }, 744 745 { X86::UCOM_FIr , X86::UCOM_FIPr }, 746 747 { X86::UCOM_FPr , X86::UCOM_FPPr }, 748 { X86::UCOM_Fr , X86::UCOM_FPr }, 749}; 750 751/// popStackAfter - Pop the current value off of the top of the FP stack after 752/// the specified instruction. This attempts to be sneaky and combine the pop 753/// into the instruction itself if possible. The iterator is left pointing to 754/// the last instruction, be it a new pop instruction inserted, or the old 755/// instruction if it was modified in place. 756/// 757void FPS::popStackAfter(MachineBasicBlock::iterator &I) { 758 MachineInstr* MI = I; 759 DebugLoc dl = MI->getDebugLoc(); 760 ASSERT_SORTED(PopTable); 761 if (StackTop == 0) 762 report_fatal_error("Cannot pop empty stack!"); 763 RegMap[Stack[--StackTop]] = ~0; // Update state 764 765 // Check to see if there is a popping version of this instruction... 766 int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode()); 767 if (Opcode != -1) { 768 I->setDesc(TII->get(Opcode)); 769 if (Opcode == X86::UCOM_FPPr) 770 I->RemoveOperand(0); 771 } else { // Insert an explicit pop 772 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0); 773 } 774} 775 776/// freeStackSlotAfter - Free the specified register from the register stack, so 777/// that it is no longer in a register. If the register is currently at the top 778/// of the stack, we just pop the current instruction, otherwise we store the 779/// current top-of-stack into the specified slot, then pop the top of stack. 780void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { 781 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy. 782 popStackAfter(I); 783 return; 784 } 785 786 // Otherwise, store the top of stack into the dead slot, killing the operand 787 // without having to add in an explicit xchg then pop. 788 // 789 I = freeStackSlotBefore(++I, FPRegNo); 790} 791 792/// freeStackSlotBefore - Free the specified register without trying any 793/// folding. 794MachineBasicBlock::iterator 795FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) { 796 unsigned STReg = getSTReg(FPRegNo); 797 unsigned OldSlot = getSlot(FPRegNo); 798 unsigned TopReg = Stack[StackTop-1]; 799 Stack[OldSlot] = TopReg; 800 RegMap[TopReg] = OldSlot; 801 RegMap[FPRegNo] = ~0; 802 Stack[--StackTop] = ~0; 803 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg); 804} 805 806/// adjustLiveRegs - Kill and revive registers such that exactly the FP 807/// registers with a bit in Mask are live. 808void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { 809 unsigned Defs = Mask; 810 unsigned Kills = 0; 811 for (unsigned i = 0; i < StackTop; ++i) { 812 unsigned RegNo = Stack[i]; 813 if (!(Defs & (1 << RegNo))) 814 // This register is live, but we don't want it. 815 Kills |= (1 << RegNo); 816 else 817 // We don't need to imp-def this live register. 818 Defs &= ~(1 << RegNo); 819 } 820 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?"); 821 822 // Produce implicit-defs for free by using killed registers. 823 while (Kills && Defs) { 824 unsigned KReg = CountTrailingZeros_32(Kills); 825 unsigned DReg = CountTrailingZeros_32(Defs); 826 DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n"); 827 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]); 828 std::swap(RegMap[KReg], RegMap[DReg]); 829 Kills &= ~(1 << KReg); 830 Defs &= ~(1 << DReg); 831 } 832 833 // Kill registers by popping. 834 if (Kills && I != MBB->begin()) { 835 MachineBasicBlock::iterator I2 = llvm::prior(I); 836 for (;;) { 837 unsigned KReg = getStackEntry(0); 838 if (!(Kills & (1 << KReg))) 839 break; 840 DEBUG(dbgs() << "Popping %FP" << KReg << "\n"); 841 popStackAfter(I2); 842 Kills &= ~(1 << KReg); 843 } 844 } 845 846 // Manually kill the rest. 847 while (Kills) { 848 unsigned KReg = CountTrailingZeros_32(Kills); 849 DEBUG(dbgs() << "Killing %FP" << KReg << "\n"); 850 freeStackSlotBefore(I, KReg); 851 Kills &= ~(1 << KReg); 852 } 853 854 // Load zeros for all the imp-defs. 855 while(Defs) { 856 unsigned DReg = CountTrailingZeros_32(Defs); 857 DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n"); 858 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0)); 859 pushReg(DReg); 860 Defs &= ~(1 << DReg); 861 } 862 863 // Now we should have the correct registers live. 864 DEBUG(dumpStack()); 865 assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch"); 866} 867 868/// shuffleStackTop - emit fxch instructions before I to shuffle the top 869/// FixCount entries into the order given by FixStack. 870/// FIXME: Is there a better algorithm than insertion sort? 871void FPS::shuffleStackTop(const unsigned char *FixStack, 872 unsigned FixCount, 873 MachineBasicBlock::iterator I) { 874 // Move items into place, starting from the desired stack bottom. 875 while (FixCount--) { 876 // Old register at position FixCount. 877 unsigned OldReg = getStackEntry(FixCount); 878 // Desired register at position FixCount. 879 unsigned Reg = FixStack[FixCount]; 880 if (Reg == OldReg) 881 continue; 882 // (Reg st0) (OldReg st0) = (Reg OldReg st0) 883 moveToTop(Reg, I); 884 moveToTop(OldReg, I); 885 } 886 DEBUG(dumpStack()); 887} 888 889 890//===----------------------------------------------------------------------===// 891// Instruction transformation implementation 892//===----------------------------------------------------------------------===// 893 894/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem> 895/// 896void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { 897 MachineInstr *MI = I; 898 unsigned DestReg = getFPReg(MI->getOperand(0)); 899 900 // Change from the pseudo instruction to the concrete instruction. 901 MI->RemoveOperand(0); // Remove the explicit ST(0) operand 902 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 903 904 // Result gets pushed on the stack. 905 pushReg(DestReg); 906} 907 908/// handleOneArgFP - fst <mem>, ST(0) 909/// 910void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { 911 MachineInstr *MI = I; 912 unsigned NumOps = MI->getDesc().getNumOperands(); 913 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) && 914 "Can only handle fst* & ftst instructions!"); 915 916 // Is this the last use of the source register? 917 unsigned Reg = getFPReg(MI->getOperand(NumOps-1)); 918 bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 919 920 // FISTP64m is strange because there isn't a non-popping versions. 921 // If we have one _and_ we don't want to pop the operand, duplicate the value 922 // on the stack instead of moving it. This ensure that popping the value is 923 // always ok. 924 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m. 925 // 926 if (!KillsSrc && 927 (MI->getOpcode() == X86::IST_Fp64m32 || 928 MI->getOpcode() == X86::ISTT_Fp16m32 || 929 MI->getOpcode() == X86::ISTT_Fp32m32 || 930 MI->getOpcode() == X86::ISTT_Fp64m32 || 931 MI->getOpcode() == X86::IST_Fp64m64 || 932 MI->getOpcode() == X86::ISTT_Fp16m64 || 933 MI->getOpcode() == X86::ISTT_Fp32m64 || 934 MI->getOpcode() == X86::ISTT_Fp64m64 || 935 MI->getOpcode() == X86::IST_Fp64m80 || 936 MI->getOpcode() == X86::ISTT_Fp16m80 || 937 MI->getOpcode() == X86::ISTT_Fp32m80 || 938 MI->getOpcode() == X86::ISTT_Fp64m80 || 939 MI->getOpcode() == X86::ST_FpP80m)) { 940 duplicateToTop(Reg, getScratchReg(), I); 941 } else { 942 moveToTop(Reg, I); // Move to the top of the stack... 943 } 944 945 // Convert from the pseudo instruction to the concrete instruction. 946 MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand 947 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 948 949 if (MI->getOpcode() == X86::IST_FP64m || 950 MI->getOpcode() == X86::ISTT_FP16m || 951 MI->getOpcode() == X86::ISTT_FP32m || 952 MI->getOpcode() == X86::ISTT_FP64m || 953 MI->getOpcode() == X86::ST_FP80m) { 954 if (StackTop == 0) 955 report_fatal_error("Stack empty??"); 956 --StackTop; 957 } else if (KillsSrc) { // Last use of operand? 958 popStackAfter(I); 959 } 960} 961 962 963/// handleOneArgFPRW: Handle instructions that read from the top of stack and 964/// replace the value with a newly computed value. These instructions may have 965/// non-fp operands after their FP operands. 966/// 967/// Examples: 968/// R1 = fchs R2 969/// R1 = fadd R2, [mem] 970/// 971void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { 972 MachineInstr *MI = I; 973#ifndef NDEBUG 974 unsigned NumOps = MI->getDesc().getNumOperands(); 975 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!"); 976#endif 977 978 // Is this the last use of the source register? 979 unsigned Reg = getFPReg(MI->getOperand(1)); 980 bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 981 982 if (KillsSrc) { 983 // If this is the last use of the source register, just make sure it's on 984 // the top of the stack. 985 moveToTop(Reg, I); 986 if (StackTop == 0) 987 report_fatal_error("Stack cannot be empty!"); 988 --StackTop; 989 pushReg(getFPReg(MI->getOperand(0))); 990 } else { 991 // If this is not the last use of the source register, _copy_ it to the top 992 // of the stack. 993 duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I); 994 } 995 996 // Change from the pseudo instruction to the concrete instruction. 997 MI->RemoveOperand(1); // Drop the source operand. 998 MI->RemoveOperand(0); // Drop the destination operand. 999 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1000} 1001 1002 1003//===----------------------------------------------------------------------===// 1004// Define tables of various ways to map pseudo instructions 1005// 1006 1007// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) 1008static const TableEntry ForwardST0Table[] = { 1009 { X86::ADD_Fp32 , X86::ADD_FST0r }, 1010 { X86::ADD_Fp64 , X86::ADD_FST0r }, 1011 { X86::ADD_Fp80 , X86::ADD_FST0r }, 1012 { X86::DIV_Fp32 , X86::DIV_FST0r }, 1013 { X86::DIV_Fp64 , X86::DIV_FST0r }, 1014 { X86::DIV_Fp80 , X86::DIV_FST0r }, 1015 { X86::MUL_Fp32 , X86::MUL_FST0r }, 1016 { X86::MUL_Fp64 , X86::MUL_FST0r }, 1017 { X86::MUL_Fp80 , X86::MUL_FST0r }, 1018 { X86::SUB_Fp32 , X86::SUB_FST0r }, 1019 { X86::SUB_Fp64 , X86::SUB_FST0r }, 1020 { X86::SUB_Fp80 , X86::SUB_FST0r }, 1021}; 1022 1023// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) 1024static const TableEntry ReverseST0Table[] = { 1025 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative 1026 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative 1027 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative 1028 { X86::DIV_Fp32 , X86::DIVR_FST0r }, 1029 { X86::DIV_Fp64 , X86::DIVR_FST0r }, 1030 { X86::DIV_Fp80 , X86::DIVR_FST0r }, 1031 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative 1032 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative 1033 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative 1034 { X86::SUB_Fp32 , X86::SUBR_FST0r }, 1035 { X86::SUB_Fp64 , X86::SUBR_FST0r }, 1036 { X86::SUB_Fp80 , X86::SUBR_FST0r }, 1037}; 1038 1039// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) 1040static const TableEntry ForwardSTiTable[] = { 1041 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative 1042 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative 1043 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative 1044 { X86::DIV_Fp32 , X86::DIVR_FrST0 }, 1045 { X86::DIV_Fp64 , X86::DIVR_FrST0 }, 1046 { X86::DIV_Fp80 , X86::DIVR_FrST0 }, 1047 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative 1048 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative 1049 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative 1050 { X86::SUB_Fp32 , X86::SUBR_FrST0 }, 1051 { X86::SUB_Fp64 , X86::SUBR_FrST0 }, 1052 { X86::SUB_Fp80 , X86::SUBR_FrST0 }, 1053}; 1054 1055// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) 1056static const TableEntry ReverseSTiTable[] = { 1057 { X86::ADD_Fp32 , X86::ADD_FrST0 }, 1058 { X86::ADD_Fp64 , X86::ADD_FrST0 }, 1059 { X86::ADD_Fp80 , X86::ADD_FrST0 }, 1060 { X86::DIV_Fp32 , X86::DIV_FrST0 }, 1061 { X86::DIV_Fp64 , X86::DIV_FrST0 }, 1062 { X86::DIV_Fp80 , X86::DIV_FrST0 }, 1063 { X86::MUL_Fp32 , X86::MUL_FrST0 }, 1064 { X86::MUL_Fp64 , X86::MUL_FrST0 }, 1065 { X86::MUL_Fp80 , X86::MUL_FrST0 }, 1066 { X86::SUB_Fp32 , X86::SUB_FrST0 }, 1067 { X86::SUB_Fp64 , X86::SUB_FrST0 }, 1068 { X86::SUB_Fp80 , X86::SUB_FrST0 }, 1069}; 1070 1071 1072/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual 1073/// instructions which need to be simplified and possibly transformed. 1074/// 1075/// Result: ST(0) = fsub ST(0), ST(i) 1076/// ST(i) = fsub ST(0), ST(i) 1077/// ST(0) = fsubr ST(0), ST(i) 1078/// ST(i) = fsubr ST(0), ST(i) 1079/// 1080void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { 1081 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 1082 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 1083 MachineInstr *MI = I; 1084 1085 unsigned NumOperands = MI->getDesc().getNumOperands(); 1086 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!"); 1087 unsigned Dest = getFPReg(MI->getOperand(0)); 1088 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 1089 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 1090 bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 1091 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1092 DebugLoc dl = MI->getDebugLoc(); 1093 1094 unsigned TOS = getStackEntry(0); 1095 1096 // One of our operands must be on the top of the stack. If neither is yet, we 1097 // need to move one. 1098 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS? 1099 // We can choose to move either operand to the top of the stack. If one of 1100 // the operands is killed by this instruction, we want that one so that we 1101 // can update right on top of the old version. 1102 if (KillsOp0) { 1103 moveToTop(Op0, I); // Move dead operand to TOS. 1104 TOS = Op0; 1105 } else if (KillsOp1) { 1106 moveToTop(Op1, I); 1107 TOS = Op1; 1108 } else { 1109 // All of the operands are live after this instruction executes, so we 1110 // cannot update on top of any operand. Because of this, we must 1111 // duplicate one of the stack elements to the top. It doesn't matter 1112 // which one we pick. 1113 // 1114 duplicateToTop(Op0, Dest, I); 1115 Op0 = TOS = Dest; 1116 KillsOp0 = true; 1117 } 1118 } else if (!KillsOp0 && !KillsOp1) { 1119 // If we DO have one of our operands at the top of the stack, but we don't 1120 // have a dead operand, we must duplicate one of the operands to a new slot 1121 // on the stack. 1122 duplicateToTop(Op0, Dest, I); 1123 Op0 = TOS = Dest; 1124 KillsOp0 = true; 1125 } 1126 1127 // Now we know that one of our operands is on the top of the stack, and at 1128 // least one of our operands is killed by this instruction. 1129 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) && 1130 "Stack conditions not set up right!"); 1131 1132 // We decide which form to use based on what is on the top of the stack, and 1133 // which operand is killed by this instruction. 1134 const TableEntry *InstTable; 1135 bool isForward = TOS == Op0; 1136 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); 1137 if (updateST0) { 1138 if (isForward) 1139 InstTable = ForwardST0Table; 1140 else 1141 InstTable = ReverseST0Table; 1142 } else { 1143 if (isForward) 1144 InstTable = ForwardSTiTable; 1145 else 1146 InstTable = ReverseSTiTable; 1147 } 1148 1149 int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table), 1150 MI->getOpcode()); 1151 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); 1152 1153 // NotTOS - The register which is not on the top of stack... 1154 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0; 1155 1156 // Replace the old instruction with a new instruction 1157 MBB->remove(I++); 1158 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS)); 1159 1160 // If both operands are killed, pop one off of the stack in addition to 1161 // overwriting the other one. 1162 if (KillsOp0 && KillsOp1 && Op0 != Op1) { 1163 assert(!updateST0 && "Should have updated other operand!"); 1164 popStackAfter(I); // Pop the top of stack 1165 } 1166 1167 // Update stack information so that we know the destination register is now on 1168 // the stack. 1169 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS); 1170 assert(UpdatedSlot < StackTop && Dest < 7); 1171 Stack[UpdatedSlot] = Dest; 1172 RegMap[Dest] = UpdatedSlot; 1173 MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction 1174} 1175 1176/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP 1177/// register arguments and no explicit destinations. 1178/// 1179void FPS::handleCompareFP(MachineBasicBlock::iterator &I) { 1180 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 1181 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 1182 MachineInstr *MI = I; 1183 1184 unsigned NumOperands = MI->getDesc().getNumOperands(); 1185 assert(NumOperands == 2 && "Illegal FUCOM* instruction!"); 1186 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 1187 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 1188 bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 1189 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1190 1191 // Make sure the first operand is on the top of stack, the other one can be 1192 // anywhere. 1193 moveToTop(Op0, I); 1194 1195 // Change from the pseudo instruction to the concrete instruction. 1196 MI->getOperand(0).setReg(getSTReg(Op1)); 1197 MI->RemoveOperand(1); 1198 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1199 1200 // If any of the operands are killed by this instruction, free them. 1201 if (KillsOp0) freeStackSlotAfter(I, Op0); 1202 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1); 1203} 1204 1205/// handleCondMovFP - Handle two address conditional move instructions. These 1206/// instructions move a st(i) register to st(0) iff a condition is true. These 1207/// instructions require that the first operand is at the top of the stack, but 1208/// otherwise don't modify the stack at all. 1209void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { 1210 MachineInstr *MI = I; 1211 1212 unsigned Op0 = getFPReg(MI->getOperand(0)); 1213 unsigned Op1 = getFPReg(MI->getOperand(2)); 1214 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1215 1216 // The first operand *must* be on the top of the stack. 1217 moveToTop(Op0, I); 1218 1219 // Change the second operand to the stack register that the operand is in. 1220 // Change from the pseudo instruction to the concrete instruction. 1221 MI->RemoveOperand(0); 1222 MI->RemoveOperand(1); 1223 MI->getOperand(0).setReg(getSTReg(Op1)); 1224 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1225 1226 // If we kill the second operand, make sure to pop it from the stack. 1227 if (Op0 != Op1 && KillsOp1) { 1228 // Get this value off of the register stack. 1229 freeStackSlotAfter(I, Op1); 1230 } 1231} 1232 1233 1234/// handleSpecialFP - Handle special instructions which behave unlike other 1235/// floating point instructions. This is primarily intended for use by pseudo 1236/// instructions. 1237/// 1238void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { 1239 MachineInstr *MI = I; 1240 switch (MI->getOpcode()) { 1241 default: llvm_unreachable("Unknown SpecialFP instruction!"); 1242 case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! 1243 case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! 1244 case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! 1245 assert(StackTop == 0 && "Stack should be empty after a call!"); 1246 pushReg(getFPReg(MI->getOperand(0))); 1247 break; 1248 case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type! 1249 case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type! 1250 case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type! 1251 // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm. 1252 // The pattern we expect is: 1253 // CALL 1254 // FP1 = FpGET_ST0 1255 // FP4 = FpGET_ST1 1256 // 1257 // At this point, we've pushed FP1 on the top of stack, so it should be 1258 // present if it isn't dead. If it was dead, we already emitted a pop to 1259 // remove it from the stack and StackTop = 0. 1260 1261 // Push FP4 as top of stack next. 1262 pushReg(getFPReg(MI->getOperand(0))); 1263 1264 // If StackTop was 0 before we pushed our operand, then ST(0) must have been 1265 // dead. In this case, the ST(1) value is the only thing that is live, so 1266 // it should be on the TOS (after the pop that was emitted) and is. Just 1267 // continue in this case. 1268 if (StackTop == 1) 1269 break; 1270 1271 // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top 1272 // elements so that our accounting is correct. 1273 unsigned RegOnTop = getStackEntry(0); 1274 unsigned RegNo = getStackEntry(1); 1275 1276 // Swap the slots the regs are in. 1277 std::swap(RegMap[RegNo], RegMap[RegOnTop]); 1278 1279 // Swap stack slot contents. 1280 if (RegMap[RegOnTop] >= StackTop) 1281 report_fatal_error("Access past stack top!"); 1282 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 1283 break; 1284 } 1285 case X86::FpSET_ST0_32: 1286 case X86::FpSET_ST0_64: 1287 case X86::FpSET_ST0_80: { 1288 // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm 1289 // arguments that use an st constraint. We expect a sequence of 1290 // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM 1291 unsigned Op0 = getFPReg(MI->getOperand(0)); 1292 1293 if (!MI->killsRegister(X86::FP0 + Op0)) { 1294 // Duplicate Op0 into a temporary on the stack top. 1295 duplicateToTop(Op0, getScratchReg(), I); 1296 } else { 1297 // Op0 is killed, so just swap it into position. 1298 moveToTop(Op0, I); 1299 } 1300 --StackTop; // "Forget" we have something on the top of stack! 1301 break; 1302 } 1303 case X86::FpSET_ST1_32: 1304 case X86::FpSET_ST1_64: 1305 case X86::FpSET_ST1_80: { 1306 // Set up st(1) for inline asm. We are assuming that st(0) has already been 1307 // set up by FpSET_ST0, and our StackTop is off by one because of it. 1308 unsigned Op0 = getFPReg(MI->getOperand(0)); 1309 // Restore the actual StackTop from before Fp_SET_ST0. 1310 // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we 1311 // are not enforcing the constraint. 1312 ++StackTop; 1313 unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). 1314 if (!MI->killsRegister(X86::FP0 + Op0)) { 1315 duplicateToTop(Op0, getScratchReg(), I); 1316 moveToTop(RegOnTop, I); 1317 } else if (getSTReg(Op0) != X86::ST1) { 1318 // We have the wrong value at st(1). Shuffle! Untested! 1319 moveToTop(getStackEntry(1), I); 1320 moveToTop(Op0, I); 1321 moveToTop(RegOnTop, I); 1322 } 1323 assert(StackTop >= 2 && "Too few live registers"); 1324 StackTop -= 2; // "Forget" both st(0) and st(1). 1325 break; 1326 } 1327 case X86::MOV_Fp3232: 1328 case X86::MOV_Fp3264: 1329 case X86::MOV_Fp6432: 1330 case X86::MOV_Fp6464: 1331 case X86::MOV_Fp3280: 1332 case X86::MOV_Fp6480: 1333 case X86::MOV_Fp8032: 1334 case X86::MOV_Fp8064: 1335 case X86::MOV_Fp8080: { 1336 const MachineOperand &MO1 = MI->getOperand(1); 1337 unsigned SrcReg = getFPReg(MO1); 1338 1339 const MachineOperand &MO0 = MI->getOperand(0); 1340 unsigned DestReg = getFPReg(MO0); 1341 if (MI->killsRegister(X86::FP0+SrcReg)) { 1342 // If the input operand is killed, we can just change the owner of the 1343 // incoming stack slot into the result. 1344 unsigned Slot = getSlot(SrcReg); 1345 assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); 1346 Stack[Slot] = DestReg; 1347 RegMap[DestReg] = Slot; 1348 1349 } else { 1350 // For FMOV we just duplicate the specified value to a new stack slot. 1351 // This could be made better, but would require substantial changes. 1352 duplicateToTop(SrcReg, DestReg, I); 1353 } 1354 } 1355 break; 1356 case TargetOpcode::INLINEASM: { 1357 // The inline asm MachineInstr currently only *uses* FP registers for the 1358 // 'f' constraint. These should be turned into the current ST(x) register 1359 // in the machine instr. Also, any kills should be explicitly popped after 1360 // the inline asm. 1361 unsigned Kills = 0; 1362 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1363 MachineOperand &Op = MI->getOperand(i); 1364 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1365 continue; 1366 assert(Op.isUse() && "Only handle inline asm uses right now"); 1367 1368 unsigned FPReg = getFPReg(Op); 1369 Op.setReg(getSTReg(FPReg)); 1370 1371 // If we kill this operand, make sure to pop it from the stack after the 1372 // asm. We just remember it for now, and pop them all off at the end in 1373 // a batch. 1374 if (Op.isKill()) 1375 Kills |= 1U << FPReg; 1376 } 1377 1378 // If this asm kills any FP registers (is the last use of them) we must 1379 // explicitly emit pop instructions for them. Do this now after the asm has 1380 // executed so that the ST(x) numbers are not off (which would happen if we 1381 // did this inline with operand rewriting). 1382 // 1383 // Note: this might be a non-optimal pop sequence. We might be able to do 1384 // better by trying to pop in stack order or something. 1385 MachineBasicBlock::iterator InsertPt = MI; 1386 while (Kills) { 1387 unsigned FPReg = CountTrailingZeros_32(Kills); 1388 freeStackSlotAfter(InsertPt, FPReg); 1389 Kills &= ~(1U << FPReg); 1390 } 1391 // Don't delete the inline asm! 1392 return; 1393 } 1394 1395 case X86::RET: 1396 case X86::RETI: 1397 // If RET has an FP register use operand, pass the first one in ST(0) and 1398 // the second one in ST(1). 1399 1400 // Find the register operands. 1401 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U; 1402 unsigned LiveMask = 0; 1403 1404 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1405 MachineOperand &Op = MI->getOperand(i); 1406 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1407 continue; 1408 // FP Register uses must be kills unless there are two uses of the same 1409 // register, in which case only one will be a kill. 1410 assert(Op.isUse() && 1411 (Op.isKill() || // Marked kill. 1412 getFPReg(Op) == FirstFPRegOp || // Second instance. 1413 MI->killsRegister(Op.getReg())) && // Later use is marked kill. 1414 "Ret only defs operands, and values aren't live beyond it"); 1415 1416 if (FirstFPRegOp == ~0U) 1417 FirstFPRegOp = getFPReg(Op); 1418 else { 1419 assert(SecondFPRegOp == ~0U && "More than two fp operands!"); 1420 SecondFPRegOp = getFPReg(Op); 1421 } 1422 LiveMask |= (1 << getFPReg(Op)); 1423 1424 // Remove the operand so that later passes don't see it. 1425 MI->RemoveOperand(i); 1426 --i, --e; 1427 } 1428 1429 // We may have been carrying spurious live-ins, so make sure only the returned 1430 // registers are left live. 1431 adjustLiveRegs(LiveMask, MI); 1432 if (!LiveMask) return; // Quick check to see if any are possible. 1433 1434 // There are only four possibilities here: 1435 // 1) we are returning a single FP value. In this case, it has to be in 1436 // ST(0) already, so just declare success by removing the value from the 1437 // FP Stack. 1438 if (SecondFPRegOp == ~0U) { 1439 // Assert that the top of stack contains the right FP register. 1440 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) && 1441 "Top of stack not the right register for RET!"); 1442 1443 // Ok, everything is good, mark the value as not being on the stack 1444 // anymore so that our assertion about the stack being empty at end of 1445 // block doesn't fire. 1446 StackTop = 0; 1447 return; 1448 } 1449 1450 // Otherwise, we are returning two values: 1451 // 2) If returning the same value for both, we only have one thing in the FP 1452 // stack. Consider: RET FP1, FP1 1453 if (StackTop == 1) { 1454 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&& 1455 "Stack misconfiguration for RET!"); 1456 1457 // Duplicate the TOS so that we return it twice. Just pick some other FPx 1458 // register to hold it. 1459 unsigned NewReg = getScratchReg(); 1460 duplicateToTop(FirstFPRegOp, NewReg, MI); 1461 FirstFPRegOp = NewReg; 1462 } 1463 1464 /// Okay we know we have two different FPx operands now: 1465 assert(StackTop == 2 && "Must have two values live!"); 1466 1467 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently 1468 /// in ST(1). In this case, emit an fxch. 1469 if (getStackEntry(0) == SecondFPRegOp) { 1470 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live"); 1471 moveToTop(FirstFPRegOp, MI); 1472 } 1473 1474 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in 1475 /// ST(1). Just remove both from our understanding of the stack and return. 1476 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live"); 1477 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live"); 1478 StackTop = 0; 1479 return; 1480 } 1481 1482 I = MBB->erase(I); // Remove the pseudo instruction 1483 1484 // We want to leave I pointing to the previous instruction, but what if we 1485 // just erased the first instruction? 1486 if (I == MBB->begin()) { 1487 DEBUG(dbgs() << "Inserting dummy KILL\n"); 1488 I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL)); 1489 } else 1490 --I; 1491} 1492 1493// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. 1494bool FPS::translateCopy(MachineInstr *MI) { 1495 unsigned DstReg = MI->getOperand(0).getReg(); 1496 unsigned SrcReg = MI->getOperand(1).getReg(); 1497 1498 if (DstReg == X86::ST0) { 1499 MI->setDesc(TII->get(X86::FpSET_ST0_80)); 1500 MI->RemoveOperand(0); 1501 return true; 1502 } 1503 if (DstReg == X86::ST1) { 1504 MI->setDesc(TII->get(X86::FpSET_ST1_80)); 1505 MI->RemoveOperand(0); 1506 return true; 1507 } 1508 if (SrcReg == X86::ST0) { 1509 MI->setDesc(TII->get(X86::FpGET_ST0_80)); 1510 return true; 1511 } 1512 if (SrcReg == X86::ST1) { 1513 MI->setDesc(TII->get(X86::FpGET_ST1_80)); 1514 return true; 1515 } 1516 if (X86::RFP80RegClass.contains(DstReg, SrcReg)) { 1517 MI->setDesc(TII->get(X86::MOV_Fp8080)); 1518 return true; 1519 } 1520 return false; 1521} 1522