X86FastISel.cpp revision 219077
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the X86-specific support for the FastISel class. Much 11// of the target-specific code is generated by tablegen in the file 12// X86GenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86RegisterInfo.h" 19#include "X86Subtarget.h" 20#include "X86TargetMachine.h" 21#include "llvm/CallingConv.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Instructions.h" 25#include "llvm/IntrinsicInst.h" 26#include "llvm/CodeGen/Analysis.h" 27#include "llvm/CodeGen/FastISel.h" 28#include "llvm/CodeGen/FunctionLoweringInfo.h" 29#include "llvm/CodeGen/MachineConstantPool.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineRegisterInfo.h" 32#include "llvm/Support/CallSite.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/GetElementPtrTypeIterator.h" 35#include "llvm/Target/TargetOptions.h" 36using namespace llvm; 37 38namespace { 39 40class X86FastISel : public FastISel { 41 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 42 /// make the right decision when generating code for different targets. 43 const X86Subtarget *Subtarget; 44 45 /// StackPtr - Register used as the stack pointer. 46 /// 47 unsigned StackPtr; 48 49 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 50 /// floating point ops. 51 /// When SSE is available, use it for f32 operations. 52 /// When SSE2 is available, use it for f64 operations. 53 bool X86ScalarSSEf64; 54 bool X86ScalarSSEf32; 55 56public: 57 explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { 58 Subtarget = &TM.getSubtarget<X86Subtarget>(); 59 StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 60 X86ScalarSSEf64 = Subtarget->hasSSE2(); 61 X86ScalarSSEf32 = Subtarget->hasSSE1(); 62 } 63 64 virtual bool TargetSelectInstruction(const Instruction *I); 65 66 /// TryToFoldLoad - The specified machine instr operand is a vreg, and that 67 /// vreg is being provided by the specified load instruction. If possible, 68 /// try to fold the load as an operand to the instruction, returning true if 69 /// possible. 70 virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, 71 const LoadInst *LI); 72 73#include "X86GenFastISel.inc" 74 75private: 76 bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT); 77 78 bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); 79 80 bool X86FastEmitStore(EVT VT, const Value *Val, 81 const X86AddressMode &AM); 82 bool X86FastEmitStore(EVT VT, unsigned Val, 83 const X86AddressMode &AM); 84 85 bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, 86 unsigned &ResultReg); 87 88 bool X86SelectAddress(const Value *V, X86AddressMode &AM); 89 bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); 90 91 bool X86SelectLoad(const Instruction *I); 92 93 bool X86SelectStore(const Instruction *I); 94 95 bool X86SelectRet(const Instruction *I); 96 97 bool X86SelectCmp(const Instruction *I); 98 99 bool X86SelectZExt(const Instruction *I); 100 101 bool X86SelectBranch(const Instruction *I); 102 103 bool X86SelectShift(const Instruction *I); 104 105 bool X86SelectSelect(const Instruction *I); 106 107 bool X86SelectTrunc(const Instruction *I); 108 109 bool X86SelectFPExt(const Instruction *I); 110 bool X86SelectFPTrunc(const Instruction *I); 111 112 bool X86SelectExtractValue(const Instruction *I); 113 114 bool X86VisitIntrinsicCall(const IntrinsicInst &I); 115 bool X86SelectCall(const Instruction *I); 116 117 const X86InstrInfo *getInstrInfo() const { 118 return getTargetMachine()->getInstrInfo(); 119 } 120 const X86TargetMachine *getTargetMachine() const { 121 return static_cast<const X86TargetMachine *>(&TM); 122 } 123 124 unsigned TargetMaterializeConstant(const Constant *C); 125 126 unsigned TargetMaterializeAlloca(const AllocaInst *C); 127 128 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 129 /// computed in an SSE register, not on the X87 floating point stack. 130 bool isScalarFPTypeInSSEReg(EVT VT) const { 131 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 132 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 133 } 134 135 bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false); 136}; 137 138} // end anonymous namespace. 139 140bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) { 141 EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true); 142 if (evt == MVT::Other || !evt.isSimple()) 143 // Unhandled type. Halt "fast" selection and bail. 144 return false; 145 146 VT = evt.getSimpleVT(); 147 // For now, require SSE/SSE2 for performing floating-point operations, 148 // since x87 requires additional work. 149 if (VT == MVT::f64 && !X86ScalarSSEf64) 150 return false; 151 if (VT == MVT::f32 && !X86ScalarSSEf32) 152 return false; 153 // Similarly, no f80 support yet. 154 if (VT == MVT::f80) 155 return false; 156 // We only handle legal types. For example, on x86-32 the instruction 157 // selector contains all of the 64-bit instructions from x86-64, 158 // under the assumption that i64 won't be used if the target doesn't 159 // support it. 160 return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); 161} 162 163#include "X86GenCallingConv.inc" 164 165/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. 166/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. 167/// Return true and the result register by reference if it is possible. 168bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, 169 unsigned &ResultReg) { 170 // Get opcode and regclass of the output for the given load instruction. 171 unsigned Opc = 0; 172 const TargetRegisterClass *RC = NULL; 173 switch (VT.getSimpleVT().SimpleTy) { 174 default: return false; 175 case MVT::i1: 176 case MVT::i8: 177 Opc = X86::MOV8rm; 178 RC = X86::GR8RegisterClass; 179 break; 180 case MVT::i16: 181 Opc = X86::MOV16rm; 182 RC = X86::GR16RegisterClass; 183 break; 184 case MVT::i32: 185 Opc = X86::MOV32rm; 186 RC = X86::GR32RegisterClass; 187 break; 188 case MVT::i64: 189 // Must be in x86-64 mode. 190 Opc = X86::MOV64rm; 191 RC = X86::GR64RegisterClass; 192 break; 193 case MVT::f32: 194 if (Subtarget->hasSSE1()) { 195 Opc = X86::MOVSSrm; 196 RC = X86::FR32RegisterClass; 197 } else { 198 Opc = X86::LD_Fp32m; 199 RC = X86::RFP32RegisterClass; 200 } 201 break; 202 case MVT::f64: 203 if (Subtarget->hasSSE2()) { 204 Opc = X86::MOVSDrm; 205 RC = X86::FR64RegisterClass; 206 } else { 207 Opc = X86::LD_Fp64m; 208 RC = X86::RFP64RegisterClass; 209 } 210 break; 211 case MVT::f80: 212 // No f80 support yet. 213 return false; 214 } 215 216 ResultReg = createResultReg(RC); 217 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 218 DL, TII.get(Opc), ResultReg), AM); 219 return true; 220} 221 222/// X86FastEmitStore - Emit a machine instruction to store a value Val of 223/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr 224/// and a displacement offset, or a GlobalAddress, 225/// i.e. V. Return true if it is possible. 226bool 227X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, 228 const X86AddressMode &AM) { 229 // Get opcode and regclass of the output for the given store instruction. 230 unsigned Opc = 0; 231 switch (VT.getSimpleVT().SimpleTy) { 232 case MVT::f80: // No f80 support yet. 233 default: return false; 234 case MVT::i1: { 235 // Mask out all but lowest bit. 236 unsigned AndResult = createResultReg(X86::GR8RegisterClass); 237 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 238 TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); 239 Val = AndResult; 240 } 241 // FALLTHROUGH, handling i1 as i8. 242 case MVT::i8: Opc = X86::MOV8mr; break; 243 case MVT::i16: Opc = X86::MOV16mr; break; 244 case MVT::i32: Opc = X86::MOV32mr; break; 245 case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. 246 case MVT::f32: 247 Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m; 248 break; 249 case MVT::f64: 250 Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m; 251 break; 252 } 253 254 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 255 DL, TII.get(Opc)), AM).addReg(Val); 256 return true; 257} 258 259bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, 260 const X86AddressMode &AM) { 261 // Handle 'null' like i32/i64 0. 262 if (isa<ConstantPointerNull>(Val)) 263 Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); 264 265 // If this is a store of a simple constant, fold the constant into the store. 266 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 267 unsigned Opc = 0; 268 bool Signed = true; 269 switch (VT.getSimpleVT().SimpleTy) { 270 default: break; 271 case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8. 272 case MVT::i8: Opc = X86::MOV8mi; break; 273 case MVT::i16: Opc = X86::MOV16mi; break; 274 case MVT::i32: Opc = X86::MOV32mi; break; 275 case MVT::i64: 276 // Must be a 32-bit sign extended value. 277 if ((int)CI->getSExtValue() == CI->getSExtValue()) 278 Opc = X86::MOV64mi32; 279 break; 280 } 281 282 if (Opc) { 283 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 284 DL, TII.get(Opc)), AM) 285 .addImm(Signed ? (uint64_t) CI->getSExtValue() : 286 CI->getZExtValue()); 287 return true; 288 } 289 } 290 291 unsigned ValReg = getRegForValue(Val); 292 if (ValReg == 0) 293 return false; 294 295 return X86FastEmitStore(VT, ValReg, AM); 296} 297 298/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of 299/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. 300/// ISD::SIGN_EXTEND). 301bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, 302 unsigned Src, EVT SrcVT, 303 unsigned &ResultReg) { 304 unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, 305 Src, /*TODO: Kill=*/false); 306 307 if (RR != 0) { 308 ResultReg = RR; 309 return true; 310 } else 311 return false; 312} 313 314/// X86SelectAddress - Attempt to fill in an address from the given value. 315/// 316bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { 317 const User *U = NULL; 318 unsigned Opcode = Instruction::UserOp1; 319 if (const Instruction *I = dyn_cast<Instruction>(V)) { 320 // Don't walk into other basic blocks; it's possible we haven't 321 // visited them yet, so the instructions may not yet be assigned 322 // virtual registers. 323 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) || 324 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 325 Opcode = I->getOpcode(); 326 U = I; 327 } 328 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 329 Opcode = C->getOpcode(); 330 U = C; 331 } 332 333 if (const PointerType *Ty = dyn_cast<PointerType>(V->getType())) 334 if (Ty->getAddressSpace() > 255) 335 // Fast instruction selection doesn't support the special 336 // address spaces. 337 return false; 338 339 switch (Opcode) { 340 default: break; 341 case Instruction::BitCast: 342 // Look past bitcasts. 343 return X86SelectAddress(U->getOperand(0), AM); 344 345 case Instruction::IntToPtr: 346 // Look past no-op inttoptrs. 347 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 348 return X86SelectAddress(U->getOperand(0), AM); 349 break; 350 351 case Instruction::PtrToInt: 352 // Look past no-op ptrtoints. 353 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 354 return X86SelectAddress(U->getOperand(0), AM); 355 break; 356 357 case Instruction::Alloca: { 358 // Do static allocas. 359 const AllocaInst *A = cast<AllocaInst>(V); 360 DenseMap<const AllocaInst*, int>::iterator SI = 361 FuncInfo.StaticAllocaMap.find(A); 362 if (SI != FuncInfo.StaticAllocaMap.end()) { 363 AM.BaseType = X86AddressMode::FrameIndexBase; 364 AM.Base.FrameIndex = SI->second; 365 return true; 366 } 367 break; 368 } 369 370 case Instruction::Add: { 371 // Adds of constants are common and easy enough. 372 if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 373 uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); 374 // They have to fit in the 32-bit signed displacement field though. 375 if (isInt<32>(Disp)) { 376 AM.Disp = (uint32_t)Disp; 377 return X86SelectAddress(U->getOperand(0), AM); 378 } 379 } 380 break; 381 } 382 383 case Instruction::GetElementPtr: { 384 X86AddressMode SavedAM = AM; 385 386 // Pattern-match simple GEPs. 387 uint64_t Disp = (int32_t)AM.Disp; 388 unsigned IndexReg = AM.IndexReg; 389 unsigned Scale = AM.Scale; 390 gep_type_iterator GTI = gep_type_begin(U); 391 // Iterate through the indices, folding what we can. Constants can be 392 // folded, and one dynamic index can be handled, if the scale is supported. 393 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 394 i != e; ++i, ++GTI) { 395 const Value *Op = *i; 396 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 397 const StructLayout *SL = TD.getStructLayout(STy); 398 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 399 Disp += SL->getElementOffset(Idx); 400 } else { 401 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); 402 SmallVector<const Value *, 4> Worklist; 403 Worklist.push_back(Op); 404 do { 405 Op = Worklist.pop_back_val(); 406 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 407 // Constant-offset addressing. 408 Disp += CI->getSExtValue() * S; 409 } else if (isa<AddOperator>(Op) && 410 isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { 411 // An add with a constant operand. Fold the constant. 412 ConstantInt *CI = 413 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 414 Disp += CI->getSExtValue() * S; 415 // Add the other operand back to the work list. 416 Worklist.push_back(cast<AddOperator>(Op)->getOperand(0)); 417 } else if (IndexReg == 0 && 418 (!AM.GV || !Subtarget->isPICStyleRIPRel()) && 419 (S == 1 || S == 2 || S == 4 || S == 8)) { 420 // Scaled-index addressing. 421 Scale = S; 422 IndexReg = getRegForGEPIndex(Op).first; 423 if (IndexReg == 0) 424 return false; 425 } else 426 // Unsupported. 427 goto unsupported_gep; 428 } while (!Worklist.empty()); 429 } 430 } 431 // Check for displacement overflow. 432 if (!isInt<32>(Disp)) 433 break; 434 // Ok, the GEP indices were covered by constant-offset and scaled-index 435 // addressing. Update the address state and move on to examining the base. 436 AM.IndexReg = IndexReg; 437 AM.Scale = Scale; 438 AM.Disp = (uint32_t)Disp; 439 if (X86SelectAddress(U->getOperand(0), AM)) 440 return true; 441 442 // If we couldn't merge the sub value into this addr mode, revert back to 443 // our address and just match the value instead of completely failing. 444 AM = SavedAM; 445 break; 446 unsupported_gep: 447 // Ok, the GEP indices weren't all covered. 448 break; 449 } 450 } 451 452 // Handle constant address. 453 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 454 // Can't handle alternate code models yet. 455 if (TM.getCodeModel() != CodeModel::Small) 456 return false; 457 458 // RIP-relative addresses can't have additional register operands. 459 if (Subtarget->isPICStyleRIPRel() && 460 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 461 return false; 462 463 // Can't handle TLS yet. 464 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 465 if (GVar->isThreadLocal()) 466 return false; 467 468 // Okay, we've committed to selecting this global. Set up the basic address. 469 AM.GV = GV; 470 471 // Allow the subtarget to classify the global. 472 unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); 473 474 // If this reference is relative to the pic base, set it now. 475 if (isGlobalRelativeToPICBase(GVFlags)) { 476 // FIXME: How do we know Base.Reg is free?? 477 AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 478 } 479 480 // Unless the ABI requires an extra load, return a direct reference to 481 // the global. 482 if (!isGlobalStubReference(GVFlags)) { 483 if (Subtarget->isPICStyleRIPRel()) { 484 // Use rip-relative addressing if we can. Above we verified that the 485 // base and index registers are unused. 486 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 487 AM.Base.Reg = X86::RIP; 488 } 489 AM.GVOpFlags = GVFlags; 490 return true; 491 } 492 493 // Ok, we need to do a load from a stub. If we've already loaded from this 494 // stub, reuse the loaded pointer, otherwise emit the load now. 495 DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V); 496 unsigned LoadReg; 497 if (I != LocalValueMap.end() && I->second != 0) { 498 LoadReg = I->second; 499 } else { 500 // Issue load from stub. 501 unsigned Opc = 0; 502 const TargetRegisterClass *RC = NULL; 503 X86AddressMode StubAM; 504 StubAM.Base.Reg = AM.Base.Reg; 505 StubAM.GV = GV; 506 StubAM.GVOpFlags = GVFlags; 507 508 // Prepare for inserting code in the local-value area. 509 SavePoint SaveInsertPt = enterLocalValueArea(); 510 511 if (TLI.getPointerTy() == MVT::i64) { 512 Opc = X86::MOV64rm; 513 RC = X86::GR64RegisterClass; 514 515 if (Subtarget->isPICStyleRIPRel()) 516 StubAM.Base.Reg = X86::RIP; 517 } else { 518 Opc = X86::MOV32rm; 519 RC = X86::GR32RegisterClass; 520 } 521 522 LoadReg = createResultReg(RC); 523 MachineInstrBuilder LoadMI = 524 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg); 525 addFullAddress(LoadMI, StubAM); 526 527 // Ok, back to normal mode. 528 leaveLocalValueArea(SaveInsertPt); 529 530 // Prevent loading GV stub multiple times in same MBB. 531 LocalValueMap[V] = LoadReg; 532 } 533 534 // Now construct the final address. Note that the Disp, Scale, 535 // and Index values may already be set here. 536 AM.Base.Reg = LoadReg; 537 AM.GV = 0; 538 return true; 539 } 540 541 // If all else fails, try to materialize the value in a register. 542 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 543 if (AM.Base.Reg == 0) { 544 AM.Base.Reg = getRegForValue(V); 545 return AM.Base.Reg != 0; 546 } 547 if (AM.IndexReg == 0) { 548 assert(AM.Scale == 1 && "Scale with no index!"); 549 AM.IndexReg = getRegForValue(V); 550 return AM.IndexReg != 0; 551 } 552 } 553 554 return false; 555} 556 557/// X86SelectCallAddress - Attempt to fill in an address from the given value. 558/// 559bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { 560 const User *U = NULL; 561 unsigned Opcode = Instruction::UserOp1; 562 if (const Instruction *I = dyn_cast<Instruction>(V)) { 563 Opcode = I->getOpcode(); 564 U = I; 565 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 566 Opcode = C->getOpcode(); 567 U = C; 568 } 569 570 switch (Opcode) { 571 default: break; 572 case Instruction::BitCast: 573 // Look past bitcasts. 574 return X86SelectCallAddress(U->getOperand(0), AM); 575 576 case Instruction::IntToPtr: 577 // Look past no-op inttoptrs. 578 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 579 return X86SelectCallAddress(U->getOperand(0), AM); 580 break; 581 582 case Instruction::PtrToInt: 583 // Look past no-op ptrtoints. 584 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 585 return X86SelectCallAddress(U->getOperand(0), AM); 586 break; 587 } 588 589 // Handle constant address. 590 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 591 // Can't handle alternate code models yet. 592 if (TM.getCodeModel() != CodeModel::Small) 593 return false; 594 595 // RIP-relative addresses can't have additional register operands. 596 if (Subtarget->isPICStyleRIPRel() && 597 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 598 return false; 599 600 // Can't handle DLLImport. 601 if (GV->hasDLLImportLinkage()) 602 return false; 603 604 // Can't handle TLS. 605 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 606 if (GVar->isThreadLocal()) 607 return false; 608 609 // Okay, we've committed to selecting this global. Set up the basic address. 610 AM.GV = GV; 611 612 // No ABI requires an extra load for anything other than DLLImport, which 613 // we rejected above. Return a direct reference to the global. 614 if (Subtarget->isPICStyleRIPRel()) { 615 // Use rip-relative addressing if we can. Above we verified that the 616 // base and index registers are unused. 617 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 618 AM.Base.Reg = X86::RIP; 619 } else if (Subtarget->isPICStyleStubPIC()) { 620 AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET; 621 } else if (Subtarget->isPICStyleGOT()) { 622 AM.GVOpFlags = X86II::MO_GOTOFF; 623 } 624 625 return true; 626 } 627 628 // If all else fails, try to materialize the value in a register. 629 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 630 if (AM.Base.Reg == 0) { 631 AM.Base.Reg = getRegForValue(V); 632 return AM.Base.Reg != 0; 633 } 634 if (AM.IndexReg == 0) { 635 assert(AM.Scale == 1 && "Scale with no index!"); 636 AM.IndexReg = getRegForValue(V); 637 return AM.IndexReg != 0; 638 } 639 } 640 641 return false; 642} 643 644 645/// X86SelectStore - Select and emit code to implement store instructions. 646bool X86FastISel::X86SelectStore(const Instruction *I) { 647 MVT VT; 648 if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) 649 return false; 650 651 X86AddressMode AM; 652 if (!X86SelectAddress(I->getOperand(1), AM)) 653 return false; 654 655 return X86FastEmitStore(VT, I->getOperand(0), AM); 656} 657 658/// X86SelectRet - Select and emit code to implement ret instructions. 659bool X86FastISel::X86SelectRet(const Instruction *I) { 660 const ReturnInst *Ret = cast<ReturnInst>(I); 661 const Function &F = *I->getParent()->getParent(); 662 663 if (!FuncInfo.CanLowerReturn) 664 return false; 665 666 CallingConv::ID CC = F.getCallingConv(); 667 if (CC != CallingConv::C && 668 CC != CallingConv::Fast && 669 CC != CallingConv::X86_FastCall) 670 return false; 671 672 if (Subtarget->isTargetWin64()) 673 return false; 674 675 // Don't handle popping bytes on return for now. 676 if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>() 677 ->getBytesToPopOnReturn() != 0) 678 return 0; 679 680 // fastcc with -tailcallopt is intended to provide a guaranteed 681 // tail call optimization. Fastisel doesn't know how to do that. 682 if (CC == CallingConv::Fast && GuaranteedTailCallOpt) 683 return false; 684 685 // Let SDISel handle vararg functions. 686 if (F.isVarArg()) 687 return false; 688 689 if (Ret->getNumOperands() > 0) { 690 SmallVector<ISD::OutputArg, 4> Outs; 691 GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), 692 Outs, TLI); 693 694 // Analyze operands of the call, assigning locations to each operand. 695 SmallVector<CCValAssign, 16> ValLocs; 696 CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext()); 697 CCInfo.AnalyzeReturn(Outs, RetCC_X86); 698 699 const Value *RV = Ret->getOperand(0); 700 unsigned Reg = getRegForValue(RV); 701 if (Reg == 0) 702 return false; 703 704 // Only handle a single return value for now. 705 if (ValLocs.size() != 1) 706 return false; 707 708 CCValAssign &VA = ValLocs[0]; 709 710 // Don't bother handling odd stuff for now. 711 if (VA.getLocInfo() != CCValAssign::Full) 712 return false; 713 // Only handle register returns for now. 714 if (!VA.isRegLoc()) 715 return false; 716 // TODO: For now, don't try to handle cases where getLocInfo() 717 // says Full but the types don't match. 718 if (TLI.getValueType(RV->getType()) != VA.getValVT()) 719 return false; 720 721 // The calling-convention tables for x87 returns don't tell 722 // the whole story. 723 if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) 724 return false; 725 726 // Make the copy. 727 unsigned SrcReg = Reg + VA.getValNo(); 728 unsigned DstReg = VA.getLocReg(); 729 const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); 730 // Avoid a cross-class copy. This is very unlikely. 731 if (!SrcRC->contains(DstReg)) 732 return false; 733 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 734 DstReg).addReg(SrcReg); 735 736 // Mark the register as live out of the function. 737 MRI.addLiveOut(VA.getLocReg()); 738 } 739 740 // Now emit the RET. 741 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET)); 742 return true; 743} 744 745/// X86SelectLoad - Select and emit code to implement load instructions. 746/// 747bool X86FastISel::X86SelectLoad(const Instruction *I) { 748 MVT VT; 749 if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) 750 return false; 751 752 X86AddressMode AM; 753 if (!X86SelectAddress(I->getOperand(0), AM)) 754 return false; 755 756 unsigned ResultReg = 0; 757 if (X86FastEmitLoad(VT, AM, ResultReg)) { 758 UpdateValueMap(I, ResultReg); 759 return true; 760 } 761 return false; 762} 763 764static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { 765 switch (VT.getSimpleVT().SimpleTy) { 766 default: return 0; 767 case MVT::i8: return X86::CMP8rr; 768 case MVT::i16: return X86::CMP16rr; 769 case MVT::i32: return X86::CMP32rr; 770 case MVT::i64: return X86::CMP64rr; 771 case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0; 772 case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0; 773 } 774} 775 776/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS 777/// of the comparison, return an opcode that works for the compare (e.g. 778/// CMP32ri) otherwise return 0. 779static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { 780 switch (VT.getSimpleVT().SimpleTy) { 781 // Otherwise, we can't fold the immediate into this comparison. 782 default: return 0; 783 case MVT::i8: return X86::CMP8ri; 784 case MVT::i16: return X86::CMP16ri; 785 case MVT::i32: return X86::CMP32ri; 786 case MVT::i64: 787 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext 788 // field. 789 if ((int)RHSC->getSExtValue() == RHSC->getSExtValue()) 790 return X86::CMP64ri32; 791 return 0; 792 } 793} 794 795bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, 796 EVT VT) { 797 unsigned Op0Reg = getRegForValue(Op0); 798 if (Op0Reg == 0) return false; 799 800 // Handle 'null' like i32/i64 0. 801 if (isa<ConstantPointerNull>(Op1)) 802 Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext())); 803 804 // We have two options: compare with register or immediate. If the RHS of 805 // the compare is an immediate that we can fold into this compare, use 806 // CMPri, otherwise use CMPrr. 807 if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { 808 if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { 809 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc)) 810 .addReg(Op0Reg) 811 .addImm(Op1C->getSExtValue()); 812 return true; 813 } 814 } 815 816 unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget); 817 if (CompareOpc == 0) return false; 818 819 unsigned Op1Reg = getRegForValue(Op1); 820 if (Op1Reg == 0) return false; 821 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc)) 822 .addReg(Op0Reg) 823 .addReg(Op1Reg); 824 825 return true; 826} 827 828bool X86FastISel::X86SelectCmp(const Instruction *I) { 829 const CmpInst *CI = cast<CmpInst>(I); 830 831 MVT VT; 832 if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 833 return false; 834 835 unsigned ResultReg = createResultReg(&X86::GR8RegClass); 836 unsigned SetCCOpc; 837 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 838 switch (CI->getPredicate()) { 839 case CmpInst::FCMP_OEQ: { 840 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 841 return false; 842 843 unsigned EReg = createResultReg(&X86::GR8RegClass); 844 unsigned NPReg = createResultReg(&X86::GR8RegClass); 845 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg); 846 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 847 TII.get(X86::SETNPr), NPReg); 848 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 849 TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); 850 UpdateValueMap(I, ResultReg); 851 return true; 852 } 853 case CmpInst::FCMP_UNE: { 854 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 855 return false; 856 857 unsigned NEReg = createResultReg(&X86::GR8RegClass); 858 unsigned PReg = createResultReg(&X86::GR8RegClass); 859 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 860 TII.get(X86::SETNEr), NEReg); 861 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 862 TII.get(X86::SETPr), PReg); 863 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 864 TII.get(X86::OR8rr), ResultReg) 865 .addReg(PReg).addReg(NEReg); 866 UpdateValueMap(I, ResultReg); 867 return true; 868 } 869 case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 870 case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 871 case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break; 872 case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break; 873 case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 874 case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break; 875 case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break; 876 case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 877 case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break; 878 case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break; 879 case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 880 case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 881 882 case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 883 case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 884 case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 885 case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 886 case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 887 case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 888 case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break; 889 case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break; 890 case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break; 891 case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break; 892 default: 893 return false; 894 } 895 896 const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 897 if (SwapArgs) 898 std::swap(Op0, Op1); 899 900 // Emit a compare of Op0/Op1. 901 if (!X86FastEmitCompare(Op0, Op1, VT)) 902 return false; 903 904 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg); 905 UpdateValueMap(I, ResultReg); 906 return true; 907} 908 909bool X86FastISel::X86SelectZExt(const Instruction *I) { 910 // Handle zero-extension from i1 to i8, which is common. 911 if (I->getType()->isIntegerTy(8) && 912 I->getOperand(0)->getType()->isIntegerTy(1)) { 913 unsigned ResultReg = getRegForValue(I->getOperand(0)); 914 if (ResultReg == 0) return false; 915 // Set the high bits to zero. 916 ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); 917 if (ResultReg == 0) return false; 918 UpdateValueMap(I, ResultReg); 919 return true; 920 } 921 922 return false; 923} 924 925 926bool X86FastISel::X86SelectBranch(const Instruction *I) { 927 // Unconditional branches are selected by tablegen-generated code. 928 // Handle a conditional branch. 929 const BranchInst *BI = cast<BranchInst>(I); 930 MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 931 MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 932 933 // Fold the common case of a conditional branch with a comparison 934 // in the same block (values defined on other blocks may not have 935 // initialized registers). 936 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 937 if (CI->hasOneUse() && CI->getParent() == I->getParent()) { 938 EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); 939 940 // Try to take advantage of fallthrough opportunities. 941 CmpInst::Predicate Predicate = CI->getPredicate(); 942 if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { 943 std::swap(TrueMBB, FalseMBB); 944 Predicate = CmpInst::getInversePredicate(Predicate); 945 } 946 947 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 948 unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA" 949 950 switch (Predicate) { 951 case CmpInst::FCMP_OEQ: 952 std::swap(TrueMBB, FalseMBB); 953 Predicate = CmpInst::FCMP_UNE; 954 // FALL THROUGH 955 case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break; 956 case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break; 957 case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; 958 case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break; 959 case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break; 960 case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break; 961 case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break; 962 case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break; 963 case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break; 964 case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break; 965 case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break; 966 case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; 967 case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; 968 969 case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break; 970 case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break; 971 case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break; 972 case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; 973 case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; 974 case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; 975 case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break; 976 case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break; 977 case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break; 978 case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break; 979 default: 980 return false; 981 } 982 983 const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 984 if (SwapArgs) 985 std::swap(Op0, Op1); 986 987 // Emit a compare of the LHS and RHS, setting the flags. 988 if (!X86FastEmitCompare(Op0, Op1, VT)) 989 return false; 990 991 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc)) 992 .addMBB(TrueMBB); 993 994 if (Predicate == CmpInst::FCMP_UNE) { 995 // X86 requires a second branch to handle UNE (and OEQ, 996 // which is mapped to UNE above). 997 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4)) 998 .addMBB(TrueMBB); 999 } 1000 1001 FastEmitBranch(FalseMBB, DL); 1002 FuncInfo.MBB->addSuccessor(TrueMBB); 1003 return true; 1004 } 1005 } else if (ExtractValueInst *EI = 1006 dyn_cast<ExtractValueInst>(BI->getCondition())) { 1007 // Check to see if the branch instruction is from an "arithmetic with 1008 // overflow" intrinsic. The main way these intrinsics are used is: 1009 // 1010 // %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) 1011 // %sum = extractvalue { i32, i1 } %t, 0 1012 // %obit = extractvalue { i32, i1 } %t, 1 1013 // br i1 %obit, label %overflow, label %normal 1014 // 1015 // The %sum and %obit are converted in an ADD and a SETO/SETB before 1016 // reaching the branch. Therefore, we search backwards through the MBB 1017 // looking for the SETO/SETB instruction. If an instruction modifies the 1018 // EFLAGS register before we reach the SETO/SETB instruction, then we can't 1019 // convert the branch into a JO/JB instruction. 1020 if (const IntrinsicInst *CI = 1021 dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){ 1022 if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow || 1023 CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) { 1024 const MachineInstr *SetMI = 0; 1025 unsigned Reg = getRegForValue(EI); 1026 1027 for (MachineBasicBlock::const_reverse_iterator 1028 RI = FuncInfo.MBB->rbegin(), RE = FuncInfo.MBB->rend(); 1029 RI != RE; ++RI) { 1030 const MachineInstr &MI = *RI; 1031 1032 if (MI.definesRegister(Reg)) { 1033 if (MI.isCopy()) { 1034 Reg = MI.getOperand(1).getReg(); 1035 continue; 1036 } 1037 1038 SetMI = &MI; 1039 break; 1040 } 1041 1042 const TargetInstrDesc &TID = MI.getDesc(); 1043 if (TID.hasImplicitDefOfPhysReg(X86::EFLAGS) || 1044 MI.hasUnmodeledSideEffects()) 1045 break; 1046 } 1047 1048 if (SetMI) { 1049 unsigned OpCode = SetMI->getOpcode(); 1050 1051 if (OpCode == X86::SETOr || OpCode == X86::SETBr) { 1052 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1053 TII.get(OpCode == X86::SETOr ? X86::JO_4 : X86::JB_4)) 1054 .addMBB(TrueMBB); 1055 FastEmitBranch(FalseMBB, DL); 1056 FuncInfo.MBB->addSuccessor(TrueMBB); 1057 return true; 1058 } 1059 } 1060 } 1061 } 1062 } 1063 1064 // Otherwise do a clumsy setcc and re-test it. 1065 unsigned OpReg = getRegForValue(BI->getCondition()); 1066 if (OpReg == 0) return false; 1067 1068 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr)) 1069 .addReg(OpReg).addReg(OpReg); 1070 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4)) 1071 .addMBB(TrueMBB); 1072 FastEmitBranch(FalseMBB, DL); 1073 FuncInfo.MBB->addSuccessor(TrueMBB); 1074 return true; 1075} 1076 1077bool X86FastISel::X86SelectShift(const Instruction *I) { 1078 unsigned CReg = 0, OpReg = 0, OpImm = 0; 1079 const TargetRegisterClass *RC = NULL; 1080 if (I->getType()->isIntegerTy(8)) { 1081 CReg = X86::CL; 1082 RC = &X86::GR8RegClass; 1083 switch (I->getOpcode()) { 1084 case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break; 1085 case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break; 1086 case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; 1087 default: return false; 1088 } 1089 } else if (I->getType()->isIntegerTy(16)) { 1090 CReg = X86::CX; 1091 RC = &X86::GR16RegClass; 1092 switch (I->getOpcode()) { 1093 case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break; 1094 case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break; 1095 case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; 1096 default: return false; 1097 } 1098 } else if (I->getType()->isIntegerTy(32)) { 1099 CReg = X86::ECX; 1100 RC = &X86::GR32RegClass; 1101 switch (I->getOpcode()) { 1102 case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break; 1103 case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break; 1104 case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; 1105 default: return false; 1106 } 1107 } else if (I->getType()->isIntegerTy(64)) { 1108 CReg = X86::RCX; 1109 RC = &X86::GR64RegClass; 1110 switch (I->getOpcode()) { 1111 case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break; 1112 case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break; 1113 case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break; 1114 default: return false; 1115 } 1116 } else { 1117 return false; 1118 } 1119 1120 MVT VT; 1121 if (!isTypeLegal(I->getType(), VT)) 1122 return false; 1123 1124 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 1125 if (Op0Reg == 0) return false; 1126 1127 // Fold immediate in shl(x,3). 1128 if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { 1129 unsigned ResultReg = createResultReg(RC); 1130 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm), 1131 ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); 1132 UpdateValueMap(I, ResultReg); 1133 return true; 1134 } 1135 1136 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1137 if (Op1Reg == 0) return false; 1138 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1139 CReg).addReg(Op1Reg); 1140 1141 // The shift instruction uses X86::CL. If we defined a super-register 1142 // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. 1143 if (CReg != X86::CL) 1144 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1145 TII.get(TargetOpcode::KILL), X86::CL) 1146 .addReg(CReg, RegState::Kill); 1147 1148 unsigned ResultReg = createResultReg(RC); 1149 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg) 1150 .addReg(Op0Reg); 1151 UpdateValueMap(I, ResultReg); 1152 return true; 1153} 1154 1155bool X86FastISel::X86SelectSelect(const Instruction *I) { 1156 MVT VT; 1157 if (!isTypeLegal(I->getType(), VT)) 1158 return false; 1159 1160 // We only use cmov here, if we don't have a cmov instruction bail. 1161 if (!Subtarget->hasCMov()) return false; 1162 1163 unsigned Opc = 0; 1164 const TargetRegisterClass *RC = NULL; 1165 if (VT == MVT::i16) { 1166 Opc = X86::CMOVE16rr; 1167 RC = &X86::GR16RegClass; 1168 } else if (VT == MVT::i32) { 1169 Opc = X86::CMOVE32rr; 1170 RC = &X86::GR32RegClass; 1171 } else if (VT == MVT::i64) { 1172 Opc = X86::CMOVE64rr; 1173 RC = &X86::GR64RegClass; 1174 } else { 1175 return false; 1176 } 1177 1178 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 1179 if (Op0Reg == 0) return false; 1180 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1181 if (Op1Reg == 0) return false; 1182 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 1183 if (Op2Reg == 0) return false; 1184 1185 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr)) 1186 .addReg(Op0Reg).addReg(Op0Reg); 1187 unsigned ResultReg = createResultReg(RC); 1188 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) 1189 .addReg(Op1Reg).addReg(Op2Reg); 1190 UpdateValueMap(I, ResultReg); 1191 return true; 1192} 1193 1194bool X86FastISel::X86SelectFPExt(const Instruction *I) { 1195 // fpext from float to double. 1196 if (Subtarget->hasSSE2() && 1197 I->getType()->isDoubleTy()) { 1198 const Value *V = I->getOperand(0); 1199 if (V->getType()->isFloatTy()) { 1200 unsigned OpReg = getRegForValue(V); 1201 if (OpReg == 0) return false; 1202 unsigned ResultReg = createResultReg(X86::FR64RegisterClass); 1203 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1204 TII.get(X86::CVTSS2SDrr), ResultReg) 1205 .addReg(OpReg); 1206 UpdateValueMap(I, ResultReg); 1207 return true; 1208 } 1209 } 1210 1211 return false; 1212} 1213 1214bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { 1215 if (Subtarget->hasSSE2()) { 1216 if (I->getType()->isFloatTy()) { 1217 const Value *V = I->getOperand(0); 1218 if (V->getType()->isDoubleTy()) { 1219 unsigned OpReg = getRegForValue(V); 1220 if (OpReg == 0) return false; 1221 unsigned ResultReg = createResultReg(X86::FR32RegisterClass); 1222 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1223 TII.get(X86::CVTSD2SSrr), ResultReg) 1224 .addReg(OpReg); 1225 UpdateValueMap(I, ResultReg); 1226 return true; 1227 } 1228 } 1229 } 1230 1231 return false; 1232} 1233 1234bool X86FastISel::X86SelectTrunc(const Instruction *I) { 1235 if (Subtarget->is64Bit()) 1236 // All other cases should be handled by the tblgen generated code. 1237 return false; 1238 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 1239 EVT DstVT = TLI.getValueType(I->getType()); 1240 1241 // This code only handles truncation to byte right now. 1242 if (DstVT != MVT::i8 && DstVT != MVT::i1) 1243 // All other cases should be handled by the tblgen generated code. 1244 return false; 1245 if (SrcVT != MVT::i16 && SrcVT != MVT::i32) 1246 // All other cases should be handled by the tblgen generated code. 1247 return false; 1248 1249 unsigned InputReg = getRegForValue(I->getOperand(0)); 1250 if (!InputReg) 1251 // Unhandled operand. Halt "fast" selection and bail. 1252 return false; 1253 1254 // First issue a copy to GR16_ABCD or GR32_ABCD. 1255 const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) 1256 ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; 1257 unsigned CopyReg = createResultReg(CopyRC); 1258 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1259 CopyReg).addReg(InputReg); 1260 1261 // Then issue an extract_subreg. 1262 unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, 1263 CopyReg, /*Kill=*/true, 1264 X86::sub_8bit); 1265 if (!ResultReg) 1266 return false; 1267 1268 UpdateValueMap(I, ResultReg); 1269 return true; 1270} 1271 1272bool X86FastISel::X86SelectExtractValue(const Instruction *I) { 1273 const ExtractValueInst *EI = cast<ExtractValueInst>(I); 1274 const Value *Agg = EI->getAggregateOperand(); 1275 1276 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) { 1277 switch (CI->getIntrinsicID()) { 1278 default: break; 1279 case Intrinsic::sadd_with_overflow: 1280 case Intrinsic::uadd_with_overflow: { 1281 // Cheat a little. We know that the registers for "add" and "seto" are 1282 // allocated sequentially. However, we only keep track of the register 1283 // for "add" in the value map. Use extractvalue's index to get the 1284 // correct register for "seto". 1285 unsigned OpReg = getRegForValue(Agg); 1286 if (OpReg == 0) 1287 return false; 1288 UpdateValueMap(I, OpReg + *EI->idx_begin()); 1289 return true; 1290 } 1291 } 1292 } 1293 1294 return false; 1295} 1296 1297bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { 1298 // FIXME: Handle more intrinsics. 1299 switch (I.getIntrinsicID()) { 1300 default: return false; 1301 case Intrinsic::stackprotector: { 1302 // Emit code inline code to store the stack guard onto the stack. 1303 EVT PtrTy = TLI.getPointerTy(); 1304 1305 const Value *Op1 = I.getArgOperand(0); // The guard's value. 1306 const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); 1307 1308 // Grab the frame index. 1309 X86AddressMode AM; 1310 if (!X86SelectAddress(Slot, AM)) return false; 1311 1312 if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; 1313 1314 return true; 1315 } 1316 case Intrinsic::objectsize: { 1317 ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); 1318 const Type *Ty = I.getCalledFunction()->getReturnType(); 1319 1320 assert(CI && "Non-constant type in Intrinsic::objectsize?"); 1321 1322 MVT VT; 1323 if (!isTypeLegal(Ty, VT)) 1324 return false; 1325 1326 unsigned OpC = 0; 1327 if (VT == MVT::i32) 1328 OpC = X86::MOV32ri; 1329 else if (VT == MVT::i64) 1330 OpC = X86::MOV64ri; 1331 else 1332 return false; 1333 1334 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1335 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg). 1336 addImm(CI->isZero() ? -1ULL : 0); 1337 UpdateValueMap(&I, ResultReg); 1338 return true; 1339 } 1340 case Intrinsic::dbg_declare: { 1341 const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I); 1342 X86AddressMode AM; 1343 assert(DI->getAddress() && "Null address should be checked earlier!"); 1344 if (!X86SelectAddress(DI->getAddress(), AM)) 1345 return false; 1346 const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); 1347 // FIXME may need to add RegState::Debug to any registers produced, 1348 // although ESP/EBP should be the only ones at the moment. 1349 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM). 1350 addImm(0).addMetadata(DI->getVariable()); 1351 return true; 1352 } 1353 case Intrinsic::trap: { 1354 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP)); 1355 return true; 1356 } 1357 case Intrinsic::sadd_with_overflow: 1358 case Intrinsic::uadd_with_overflow: { 1359 // Replace "add with overflow" intrinsics with an "add" instruction followed 1360 // by a seto/setc instruction. Later on, when the "extractvalue" 1361 // instructions are encountered, we use the fact that two registers were 1362 // created sequentially to get the correct registers for the "sum" and the 1363 // "overflow bit". 1364 const Function *Callee = I.getCalledFunction(); 1365 const Type *RetTy = 1366 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); 1367 1368 MVT VT; 1369 if (!isTypeLegal(RetTy, VT)) 1370 return false; 1371 1372 const Value *Op1 = I.getArgOperand(0); 1373 const Value *Op2 = I.getArgOperand(1); 1374 unsigned Reg1 = getRegForValue(Op1); 1375 unsigned Reg2 = getRegForValue(Op2); 1376 1377 if (Reg1 == 0 || Reg2 == 0) 1378 // FIXME: Handle values *not* in registers. 1379 return false; 1380 1381 unsigned OpC = 0; 1382 if (VT == MVT::i32) 1383 OpC = X86::ADD32rr; 1384 else if (VT == MVT::i64) 1385 OpC = X86::ADD64rr; 1386 else 1387 return false; 1388 1389 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1390 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg) 1391 .addReg(Reg1).addReg(Reg2); 1392 unsigned DestReg1 = UpdateValueMap(&I, ResultReg); 1393 1394 // If the add with overflow is an intra-block value then we just want to 1395 // create temporaries for it like normal. If it is a cross-block value then 1396 // UpdateValueMap will return the cross-block register used. Since we 1397 // *really* want the value to be live in the register pair known by 1398 // UpdateValueMap, we have to use DestReg1+1 as the destination register in 1399 // the cross block case. In the non-cross-block case, we should just make 1400 // another register for the value. 1401 if (DestReg1 != ResultReg) 1402 ResultReg = DestReg1+1; 1403 else 1404 ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8)); 1405 1406 unsigned Opc = X86::SETBr; 1407 if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow) 1408 Opc = X86::SETOr; 1409 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg); 1410 return true; 1411 } 1412 } 1413} 1414 1415bool X86FastISel::X86SelectCall(const Instruction *I) { 1416 const CallInst *CI = cast<CallInst>(I); 1417 const Value *Callee = CI->getCalledValue(); 1418 1419 // Can't handle inline asm yet. 1420 if (isa<InlineAsm>(Callee)) 1421 return false; 1422 1423 // Handle intrinsic calls. 1424 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) 1425 return X86VisitIntrinsicCall(*II); 1426 1427 // Handle only C and fastcc calling conventions for now. 1428 ImmutableCallSite CS(CI); 1429 CallingConv::ID CC = CS.getCallingConv(); 1430 if (CC != CallingConv::C && 1431 CC != CallingConv::Fast && 1432 CC != CallingConv::X86_FastCall) 1433 return false; 1434 1435 // fastcc with -tailcallopt is intended to provide a guaranteed 1436 // tail call optimization. Fastisel doesn't know how to do that. 1437 if (CC == CallingConv::Fast && GuaranteedTailCallOpt) 1438 return false; 1439 1440 // Let SDISel handle vararg functions. 1441 const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1442 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1443 if (FTy->isVarArg()) 1444 return false; 1445 1446 // Fast-isel doesn't know about callee-pop yet. 1447 if (Subtarget->IsCalleePop(FTy->isVarArg(), CC)) 1448 return false; 1449 1450 // Handle *simple* calls for now. 1451 const Type *RetTy = CS.getType(); 1452 MVT RetVT; 1453 if (RetTy->isVoidTy()) 1454 RetVT = MVT::isVoid; 1455 else if (!isTypeLegal(RetTy, RetVT, true)) 1456 return false; 1457 1458 // Materialize callee address in a register. FIXME: GV address can be 1459 // handled with a CALLpcrel32 instead. 1460 X86AddressMode CalleeAM; 1461 if (!X86SelectCallAddress(Callee, CalleeAM)) 1462 return false; 1463 unsigned CalleeOp = 0; 1464 const GlobalValue *GV = 0; 1465 if (CalleeAM.GV != 0) { 1466 GV = CalleeAM.GV; 1467 } else if (CalleeAM.Base.Reg != 0) { 1468 CalleeOp = CalleeAM.Base.Reg; 1469 } else 1470 return false; 1471 1472 // Allow calls which produce i1 results. 1473 bool AndToI1 = false; 1474 if (RetVT == MVT::i1) { 1475 RetVT = MVT::i8; 1476 AndToI1 = true; 1477 } 1478 1479 // Deal with call operands first. 1480 SmallVector<const Value *, 8> ArgVals; 1481 SmallVector<unsigned, 8> Args; 1482 SmallVector<MVT, 8> ArgVTs; 1483 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1484 Args.reserve(CS.arg_size()); 1485 ArgVals.reserve(CS.arg_size()); 1486 ArgVTs.reserve(CS.arg_size()); 1487 ArgFlags.reserve(CS.arg_size()); 1488 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1489 i != e; ++i) { 1490 unsigned Arg = getRegForValue(*i); 1491 if (Arg == 0) 1492 return false; 1493 ISD::ArgFlagsTy Flags; 1494 unsigned AttrInd = i - CS.arg_begin() + 1; 1495 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1496 Flags.setSExt(); 1497 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1498 Flags.setZExt(); 1499 1500 // FIXME: Only handle *easy* calls for now. 1501 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1502 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1503 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1504 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1505 return false; 1506 1507 const Type *ArgTy = (*i)->getType(); 1508 MVT ArgVT; 1509 if (!isTypeLegal(ArgTy, ArgVT)) 1510 return false; 1511 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1512 Flags.setOrigAlign(OriginalAlignment); 1513 1514 Args.push_back(Arg); 1515 ArgVals.push_back(*i); 1516 ArgVTs.push_back(ArgVT); 1517 ArgFlags.push_back(Flags); 1518 } 1519 1520 // Analyze operands of the call, assigning locations to each operand. 1521 SmallVector<CCValAssign, 16> ArgLocs; 1522 CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); 1523 1524 // Allocate shadow area for Win64 1525 if (Subtarget->isTargetWin64()) { 1526 CCInfo.AllocateStack(32, 8); 1527 } 1528 1529 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86); 1530 1531 // Get a count of how many bytes are to be pushed on the stack. 1532 unsigned NumBytes = CCInfo.getNextStackOffset(); 1533 1534 // Issue CALLSEQ_START 1535 unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); 1536 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown)) 1537 .addImm(NumBytes); 1538 1539 // Process argument: walk the register/memloc assignments, inserting 1540 // copies / loads. 1541 SmallVector<unsigned, 4> RegArgs; 1542 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1543 CCValAssign &VA = ArgLocs[i]; 1544 unsigned Arg = Args[VA.getValNo()]; 1545 EVT ArgVT = ArgVTs[VA.getValNo()]; 1546 1547 // Promote the value if needed. 1548 switch (VA.getLocInfo()) { 1549 default: llvm_unreachable("Unknown loc info!"); 1550 case CCValAssign::Full: break; 1551 case CCValAssign::SExt: { 1552 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1553 Arg, ArgVT, Arg); 1554 assert(Emitted && "Failed to emit a sext!"); (void)Emitted; 1555 ArgVT = VA.getLocVT(); 1556 break; 1557 } 1558 case CCValAssign::ZExt: { 1559 bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1560 Arg, ArgVT, Arg); 1561 assert(Emitted && "Failed to emit a zext!"); (void)Emitted; 1562 ArgVT = VA.getLocVT(); 1563 break; 1564 } 1565 case CCValAssign::AExt: { 1566 // We don't handle MMX parameters yet. 1567 if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() == 128) 1568 return false; 1569 bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), 1570 Arg, ArgVT, Arg); 1571 if (!Emitted) 1572 Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1573 Arg, ArgVT, Arg); 1574 if (!Emitted) 1575 Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1576 Arg, ArgVT, Arg); 1577 1578 assert(Emitted && "Failed to emit a aext!"); (void)Emitted; 1579 ArgVT = VA.getLocVT(); 1580 break; 1581 } 1582 case CCValAssign::BCvt: { 1583 unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(), 1584 ISD::BITCAST, Arg, /*TODO: Kill=*/false); 1585 assert(BC != 0 && "Failed to emit a bitcast!"); 1586 Arg = BC; 1587 ArgVT = VA.getLocVT(); 1588 break; 1589 } 1590 } 1591 1592 if (VA.isRegLoc()) { 1593 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1594 VA.getLocReg()).addReg(Arg); 1595 RegArgs.push_back(VA.getLocReg()); 1596 } else { 1597 unsigned LocMemOffset = VA.getLocMemOffset(); 1598 X86AddressMode AM; 1599 AM.Base.Reg = StackPtr; 1600 AM.Disp = LocMemOffset; 1601 const Value *ArgVal = ArgVals[VA.getValNo()]; 1602 1603 // If this is a really simple value, emit this with the Value* version of 1604 // X86FastEmitStore. If it isn't simple, we don't want to do this, as it 1605 // can cause us to reevaluate the argument. 1606 if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) 1607 X86FastEmitStore(ArgVT, ArgVal, AM); 1608 else 1609 X86FastEmitStore(ArgVT, Arg, AM); 1610 } 1611 } 1612 1613 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1614 // GOT pointer. 1615 if (Subtarget->isPICStyleGOT()) { 1616 unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 1617 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1618 X86::EBX).addReg(Base); 1619 } 1620 1621 // Issue the call. 1622 MachineInstrBuilder MIB; 1623 if (CalleeOp) { 1624 // Register-indirect call. 1625 unsigned CallOpc; 1626 if (Subtarget->isTargetWin64()) 1627 CallOpc = X86::WINCALL64r; 1628 else if (Subtarget->is64Bit()) 1629 CallOpc = X86::CALL64r; 1630 else 1631 CallOpc = X86::CALL32r; 1632 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) 1633 .addReg(CalleeOp); 1634 1635 } else { 1636 // Direct call. 1637 assert(GV && "Not a direct call"); 1638 unsigned CallOpc; 1639 if (Subtarget->isTargetWin64()) 1640 CallOpc = X86::WINCALL64pcrel32; 1641 else if (Subtarget->is64Bit()) 1642 CallOpc = X86::CALL64pcrel32; 1643 else 1644 CallOpc = X86::CALLpcrel32; 1645 1646 // See if we need any target-specific flags on the GV operand. 1647 unsigned char OpFlags = 0; 1648 1649 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to 1650 // external symbols most go through the PLT in PIC mode. If the symbol 1651 // has hidden or protected visibility, or if it is static or local, then 1652 // we don't need to use the PLT - we can directly call it. 1653 if (Subtarget->isTargetELF() && 1654 TM.getRelocationModel() == Reloc::PIC_ && 1655 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { 1656 OpFlags = X86II::MO_PLT; 1657 } else if (Subtarget->isPICStyleStubAny() && 1658 (GV->isDeclaration() || GV->isWeakForLinker()) && 1659 Subtarget->getDarwinVers() < 9) { 1660 // PC-relative references to external symbols should go through $stub, 1661 // unless we're building with the leopard linker or later, which 1662 // automatically synthesizes these stubs. 1663 OpFlags = X86II::MO_DARWIN_STUB; 1664 } 1665 1666 1667 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) 1668 .addGlobalAddress(GV, 0, OpFlags); 1669 } 1670 1671 // Add an implicit use GOT pointer in EBX. 1672 if (Subtarget->isPICStyleGOT()) 1673 MIB.addReg(X86::EBX); 1674 1675 // Add implicit physical register uses to the call. 1676 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1677 MIB.addReg(RegArgs[i]); 1678 1679 // Issue CALLSEQ_END 1680 unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); 1681 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) 1682 .addImm(NumBytes).addImm(0); 1683 1684 // Now handle call return value (if any). 1685 SmallVector<unsigned, 4> UsedRegs; 1686 if (RetVT != MVT::isVoid) { 1687 SmallVector<CCValAssign, 16> RVLocs; 1688 CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext()); 1689 CCInfo.AnalyzeCallResult(RetVT, RetCC_X86); 1690 1691 // Copy all of the result registers out of their specified physreg. 1692 assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); 1693 EVT CopyVT = RVLocs[0].getValVT(); 1694 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1695 1696 // If this is a call to a function that returns an fp value on the x87 fp 1697 // stack, but where we prefer to use the value in xmm registers, copy it 1698 // out as F80 and use a truncate to move it from fp stack reg to xmm reg. 1699 if ((RVLocs[0].getLocReg() == X86::ST0 || 1700 RVLocs[0].getLocReg() == X86::ST1) && 1701 isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { 1702 CopyVT = MVT::f80; 1703 DstRC = X86::RFP80RegisterClass; 1704 } 1705 1706 unsigned ResultReg = createResultReg(DstRC); 1707 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1708 ResultReg).addReg(RVLocs[0].getLocReg()); 1709 UsedRegs.push_back(RVLocs[0].getLocReg()); 1710 1711 if (CopyVT != RVLocs[0].getValVT()) { 1712 // Round the F80 the right size, which also moves to the appropriate xmm 1713 // register. This is accomplished by storing the F80 value in memory and 1714 // then loading it back. Ewww... 1715 EVT ResVT = RVLocs[0].getValVT(); 1716 unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; 1717 unsigned MemSize = ResVT.getSizeInBits()/8; 1718 int FI = MFI.CreateStackObject(MemSize, MemSize, false); 1719 addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1720 TII.get(Opc)), FI) 1721 .addReg(ResultReg); 1722 DstRC = ResVT == MVT::f32 1723 ? X86::FR32RegisterClass : X86::FR64RegisterClass; 1724 Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; 1725 ResultReg = createResultReg(DstRC); 1726 addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1727 TII.get(Opc), ResultReg), FI); 1728 } 1729 1730 if (AndToI1) { 1731 // Mask out all but lowest bit for some call which produces an i1. 1732 unsigned AndResult = createResultReg(X86::GR8RegisterClass); 1733 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1734 TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1); 1735 ResultReg = AndResult; 1736 } 1737 1738 UpdateValueMap(I, ResultReg); 1739 } 1740 1741 // Set all unused physreg defs as dead. 1742 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1743 1744 return true; 1745} 1746 1747 1748bool 1749X86FastISel::TargetSelectInstruction(const Instruction *I) { 1750 switch (I->getOpcode()) { 1751 default: break; 1752 case Instruction::Load: 1753 return X86SelectLoad(I); 1754 case Instruction::Store: 1755 return X86SelectStore(I); 1756 case Instruction::Ret: 1757 return X86SelectRet(I); 1758 case Instruction::ICmp: 1759 case Instruction::FCmp: 1760 return X86SelectCmp(I); 1761 case Instruction::ZExt: 1762 return X86SelectZExt(I); 1763 case Instruction::Br: 1764 return X86SelectBranch(I); 1765 case Instruction::Call: 1766 return X86SelectCall(I); 1767 case Instruction::LShr: 1768 case Instruction::AShr: 1769 case Instruction::Shl: 1770 return X86SelectShift(I); 1771 case Instruction::Select: 1772 return X86SelectSelect(I); 1773 case Instruction::Trunc: 1774 return X86SelectTrunc(I); 1775 case Instruction::FPExt: 1776 return X86SelectFPExt(I); 1777 case Instruction::FPTrunc: 1778 return X86SelectFPTrunc(I); 1779 case Instruction::ExtractValue: 1780 return X86SelectExtractValue(I); 1781 case Instruction::IntToPtr: // Deliberate fall-through. 1782 case Instruction::PtrToInt: { 1783 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 1784 EVT DstVT = TLI.getValueType(I->getType()); 1785 if (DstVT.bitsGT(SrcVT)) 1786 return X86SelectZExt(I); 1787 if (DstVT.bitsLT(SrcVT)) 1788 return X86SelectTrunc(I); 1789 unsigned Reg = getRegForValue(I->getOperand(0)); 1790 if (Reg == 0) return false; 1791 UpdateValueMap(I, Reg); 1792 return true; 1793 } 1794 } 1795 1796 return false; 1797} 1798 1799unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { 1800 MVT VT; 1801 if (!isTypeLegal(C->getType(), VT)) 1802 return false; 1803 1804 // Get opcode and regclass of the output for the given load instruction. 1805 unsigned Opc = 0; 1806 const TargetRegisterClass *RC = NULL; 1807 switch (VT.SimpleTy) { 1808 default: return false; 1809 case MVT::i8: 1810 Opc = X86::MOV8rm; 1811 RC = X86::GR8RegisterClass; 1812 break; 1813 case MVT::i16: 1814 Opc = X86::MOV16rm; 1815 RC = X86::GR16RegisterClass; 1816 break; 1817 case MVT::i32: 1818 Opc = X86::MOV32rm; 1819 RC = X86::GR32RegisterClass; 1820 break; 1821 case MVT::i64: 1822 // Must be in x86-64 mode. 1823 Opc = X86::MOV64rm; 1824 RC = X86::GR64RegisterClass; 1825 break; 1826 case MVT::f32: 1827 if (Subtarget->hasSSE1()) { 1828 Opc = X86::MOVSSrm; 1829 RC = X86::FR32RegisterClass; 1830 } else { 1831 Opc = X86::LD_Fp32m; 1832 RC = X86::RFP32RegisterClass; 1833 } 1834 break; 1835 case MVT::f64: 1836 if (Subtarget->hasSSE2()) { 1837 Opc = X86::MOVSDrm; 1838 RC = X86::FR64RegisterClass; 1839 } else { 1840 Opc = X86::LD_Fp64m; 1841 RC = X86::RFP64RegisterClass; 1842 } 1843 break; 1844 case MVT::f80: 1845 // No f80 support yet. 1846 return false; 1847 } 1848 1849 // Materialize addresses with LEA instructions. 1850 if (isa<GlobalValue>(C)) { 1851 X86AddressMode AM; 1852 if (X86SelectAddress(C, AM)) { 1853 if (TLI.getPointerTy() == MVT::i32) 1854 Opc = X86::LEA32r; 1855 else 1856 Opc = X86::LEA64r; 1857 unsigned ResultReg = createResultReg(RC); 1858 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1859 TII.get(Opc), ResultReg), AM); 1860 return ResultReg; 1861 } 1862 return 0; 1863 } 1864 1865 // MachineConstantPool wants an explicit alignment. 1866 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 1867 if (Align == 0) { 1868 // Alignment of vector types. FIXME! 1869 Align = TD.getTypeAllocSize(C->getType()); 1870 } 1871 1872 // x86-32 PIC requires a PIC base register for constant pools. 1873 unsigned PICBase = 0; 1874 unsigned char OpFlag = 0; 1875 if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic 1876 OpFlag = X86II::MO_PIC_BASE_OFFSET; 1877 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 1878 } else if (Subtarget->isPICStyleGOT()) { 1879 OpFlag = X86II::MO_GOTOFF; 1880 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 1881 } else if (Subtarget->isPICStyleRIPRel() && 1882 TM.getCodeModel() == CodeModel::Small) { 1883 PICBase = X86::RIP; 1884 } 1885 1886 // Create the load from the constant pool. 1887 unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align); 1888 unsigned ResultReg = createResultReg(RC); 1889 addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1890 TII.get(Opc), ResultReg), 1891 MCPOffset, PICBase, OpFlag); 1892 1893 return ResultReg; 1894} 1895 1896unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { 1897 // Fail on dynamic allocas. At this point, getRegForValue has already 1898 // checked its CSE maps, so if we're here trying to handle a dynamic 1899 // alloca, we're not going to succeed. X86SelectAddress has a 1900 // check for dynamic allocas, because it's called directly from 1901 // various places, but TargetMaterializeAlloca also needs a check 1902 // in order to avoid recursion between getRegForValue, 1903 // X86SelectAddrss, and TargetMaterializeAlloca. 1904 if (!FuncInfo.StaticAllocaMap.count(C)) 1905 return 0; 1906 1907 X86AddressMode AM; 1908 if (!X86SelectAddress(C, AM)) 1909 return 0; 1910 unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; 1911 TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); 1912 unsigned ResultReg = createResultReg(RC); 1913 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1914 TII.get(Opc), ResultReg), AM); 1915 return ResultReg; 1916} 1917 1918/// TryToFoldLoad - The specified machine instr operand is a vreg, and that 1919/// vreg is being provided by the specified load instruction. If possible, 1920/// try to fold the load as an operand to the instruction, returning true if 1921/// possible. 1922bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, 1923 const LoadInst *LI) { 1924 X86AddressMode AM; 1925 if (!X86SelectAddress(LI->getOperand(0), AM)) 1926 return false; 1927 1928 X86InstrInfo &XII = (X86InstrInfo&)TII; 1929 1930 unsigned Size = TD.getTypeAllocSize(LI->getType()); 1931 unsigned Alignment = LI->getAlignment(); 1932 1933 SmallVector<MachineOperand, 8> AddrOps; 1934 AM.getFullAddress(AddrOps); 1935 1936 MachineInstr *Result = 1937 XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment); 1938 if (Result == 0) return false; 1939 1940 FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); 1941 MI->eraseFromParent(); 1942 return true; 1943} 1944 1945 1946namespace llvm { 1947 llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { 1948 return new X86FastISel(funcInfo); 1949 } 1950} 1951