X86FastISel.cpp revision 195098
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the X86-specific support for the FastISel class. Much 11// of the target-specific code is generated by tablegen in the file 12// X86GenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86ISelLowering.h" 19#include "X86RegisterInfo.h" 20#include "X86Subtarget.h" 21#include "X86TargetMachine.h" 22#include "llvm/CallingConv.h" 23#include "llvm/DerivedTypes.h" 24#include "llvm/GlobalVariable.h" 25#include "llvm/Instructions.h" 26#include "llvm/IntrinsicInst.h" 27#include "llvm/CodeGen/FastISel.h" 28#include "llvm/CodeGen/MachineConstantPool.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineRegisterInfo.h" 31#include "llvm/Support/CallSite.h" 32#include "llvm/Support/GetElementPtrTypeIterator.h" 33#include "llvm/Target/TargetOptions.h" 34using namespace llvm; 35 36namespace { 37 38class X86FastISel : public FastISel { 39 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 40 /// make the right decision when generating code for different targets. 41 const X86Subtarget *Subtarget; 42 43 /// StackPtr - Register used as the stack pointer. 44 /// 45 unsigned StackPtr; 46 47 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 48 /// floating point ops. 49 /// When SSE is available, use it for f32 operations. 50 /// When SSE2 is available, use it for f64 operations. 51 bool X86ScalarSSEf64; 52 bool X86ScalarSSEf32; 53 54public: 55 explicit X86FastISel(MachineFunction &mf, 56 MachineModuleInfo *mmi, 57 DwarfWriter *dw, 58 DenseMap<const Value *, unsigned> &vm, 59 DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, 60 DenseMap<const AllocaInst *, int> &am 61#ifndef NDEBUG 62 , SmallSet<Instruction*, 8> &cil 63#endif 64 ) 65 : FastISel(mf, mmi, dw, vm, bm, am 66#ifndef NDEBUG 67 , cil 68#endif 69 ) { 70 Subtarget = &TM.getSubtarget<X86Subtarget>(); 71 StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 72 X86ScalarSSEf64 = Subtarget->hasSSE2(); 73 X86ScalarSSEf32 = Subtarget->hasSSE1(); 74 } 75 76 virtual bool TargetSelectInstruction(Instruction *I); 77 78#include "X86GenFastISel.inc" 79 80private: 81 bool X86FastEmitCompare(Value *LHS, Value *RHS, MVT VT); 82 83 bool X86FastEmitLoad(MVT VT, const X86AddressMode &AM, unsigned &RR); 84 85 bool X86FastEmitStore(MVT VT, Value *Val, 86 const X86AddressMode &AM); 87 bool X86FastEmitStore(MVT VT, unsigned Val, 88 const X86AddressMode &AM); 89 90 bool X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, unsigned Src, MVT SrcVT, 91 unsigned &ResultReg); 92 93 bool X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall); 94 95 bool X86SelectLoad(Instruction *I); 96 97 bool X86SelectStore(Instruction *I); 98 99 bool X86SelectCmp(Instruction *I); 100 101 bool X86SelectZExt(Instruction *I); 102 103 bool X86SelectBranch(Instruction *I); 104 105 bool X86SelectShift(Instruction *I); 106 107 bool X86SelectSelect(Instruction *I); 108 109 bool X86SelectTrunc(Instruction *I); 110 111 bool X86SelectFPExt(Instruction *I); 112 bool X86SelectFPTrunc(Instruction *I); 113 114 bool X86SelectExtractValue(Instruction *I); 115 116 bool X86VisitIntrinsicCall(IntrinsicInst &I); 117 bool X86SelectCall(Instruction *I); 118 119 CCAssignFn *CCAssignFnForCall(unsigned CC, bool isTailCall = false); 120 121 const X86InstrInfo *getInstrInfo() const { 122 return getTargetMachine()->getInstrInfo(); 123 } 124 const X86TargetMachine *getTargetMachine() const { 125 return static_cast<const X86TargetMachine *>(&TM); 126 } 127 128 unsigned TargetMaterializeConstant(Constant *C); 129 130 unsigned TargetMaterializeAlloca(AllocaInst *C); 131 132 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 133 /// computed in an SSE register, not on the X87 floating point stack. 134 bool isScalarFPTypeInSSEReg(MVT VT) const { 135 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 136 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 137 } 138 139 bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false); 140}; 141 142} // end anonymous namespace. 143 144bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) { 145 VT = TLI.getValueType(Ty, /*HandleUnknown=*/true); 146 if (VT == MVT::Other || !VT.isSimple()) 147 // Unhandled type. Halt "fast" selection and bail. 148 return false; 149 150 // For now, require SSE/SSE2 for performing floating-point operations, 151 // since x87 requires additional work. 152 if (VT == MVT::f64 && !X86ScalarSSEf64) 153 return false; 154 if (VT == MVT::f32 && !X86ScalarSSEf32) 155 return false; 156 // Similarly, no f80 support yet. 157 if (VT == MVT::f80) 158 return false; 159 // We only handle legal types. For example, on x86-32 the instruction 160 // selector contains all of the 64-bit instructions from x86-64, 161 // under the assumption that i64 won't be used if the target doesn't 162 // support it. 163 return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); 164} 165 166#include "X86GenCallingConv.inc" 167 168/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling 169/// convention. 170CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) { 171 if (Subtarget->is64Bit()) { 172 if (Subtarget->isTargetWin64()) 173 return CC_X86_Win64_C; 174 else 175 return CC_X86_64_C; 176 } 177 178 if (CC == CallingConv::X86_FastCall) 179 return CC_X86_32_FastCall; 180 else if (CC == CallingConv::Fast) 181 return CC_X86_32_FastCC; 182 else 183 return CC_X86_32_C; 184} 185 186/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. 187/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. 188/// Return true and the result register by reference if it is possible. 189bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM, 190 unsigned &ResultReg) { 191 // Get opcode and regclass of the output for the given load instruction. 192 unsigned Opc = 0; 193 const TargetRegisterClass *RC = NULL; 194 switch (VT.getSimpleVT()) { 195 default: return false; 196 case MVT::i8: 197 Opc = X86::MOV8rm; 198 RC = X86::GR8RegisterClass; 199 break; 200 case MVT::i16: 201 Opc = X86::MOV16rm; 202 RC = X86::GR16RegisterClass; 203 break; 204 case MVT::i32: 205 Opc = X86::MOV32rm; 206 RC = X86::GR32RegisterClass; 207 break; 208 case MVT::i64: 209 // Must be in x86-64 mode. 210 Opc = X86::MOV64rm; 211 RC = X86::GR64RegisterClass; 212 break; 213 case MVT::f32: 214 if (Subtarget->hasSSE1()) { 215 Opc = X86::MOVSSrm; 216 RC = X86::FR32RegisterClass; 217 } else { 218 Opc = X86::LD_Fp32m; 219 RC = X86::RFP32RegisterClass; 220 } 221 break; 222 case MVT::f64: 223 if (Subtarget->hasSSE2()) { 224 Opc = X86::MOVSDrm; 225 RC = X86::FR64RegisterClass; 226 } else { 227 Opc = X86::LD_Fp64m; 228 RC = X86::RFP64RegisterClass; 229 } 230 break; 231 case MVT::f80: 232 // No f80 support yet. 233 return false; 234 } 235 236 ResultReg = createResultReg(RC); 237 addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); 238 return true; 239} 240 241/// X86FastEmitStore - Emit a machine instruction to store a value Val of 242/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr 243/// and a displacement offset, or a GlobalAddress, 244/// i.e. V. Return true if it is possible. 245bool 246X86FastISel::X86FastEmitStore(MVT VT, unsigned Val, 247 const X86AddressMode &AM) { 248 // Get opcode and regclass of the output for the given store instruction. 249 unsigned Opc = 0; 250 switch (VT.getSimpleVT()) { 251 case MVT::f80: // No f80 support yet. 252 default: return false; 253 case MVT::i8: Opc = X86::MOV8mr; break; 254 case MVT::i16: Opc = X86::MOV16mr; break; 255 case MVT::i32: Opc = X86::MOV32mr; break; 256 case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. 257 case MVT::f32: 258 Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m; 259 break; 260 case MVT::f64: 261 Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m; 262 break; 263 } 264 265 addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val); 266 return true; 267} 268 269bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val, 270 const X86AddressMode &AM) { 271 // Handle 'null' like i32/i64 0. 272 if (isa<ConstantPointerNull>(Val)) 273 Val = Constant::getNullValue(TD.getIntPtrType()); 274 275 // If this is a store of a simple constant, fold the constant into the store. 276 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 277 unsigned Opc = 0; 278 switch (VT.getSimpleVT()) { 279 default: break; 280 case MVT::i8: Opc = X86::MOV8mi; break; 281 case MVT::i16: Opc = X86::MOV16mi; break; 282 case MVT::i32: Opc = X86::MOV32mi; break; 283 case MVT::i64: 284 // Must be a 32-bit sign extended value. 285 if ((int)CI->getSExtValue() == CI->getSExtValue()) 286 Opc = X86::MOV64mi32; 287 break; 288 } 289 290 if (Opc) { 291 addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM) 292 .addImm(CI->getSExtValue()); 293 return true; 294 } 295 } 296 297 unsigned ValReg = getRegForValue(Val); 298 if (ValReg == 0) 299 return false; 300 301 return X86FastEmitStore(VT, ValReg, AM); 302} 303 304/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of 305/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. 306/// ISD::SIGN_EXTEND). 307bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, 308 unsigned Src, MVT SrcVT, 309 unsigned &ResultReg) { 310 unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src); 311 312 if (RR != 0) { 313 ResultReg = RR; 314 return true; 315 } else 316 return false; 317} 318 319/// X86SelectAddress - Attempt to fill in an address from the given value. 320/// 321bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { 322 User *U = NULL; 323 unsigned Opcode = Instruction::UserOp1; 324 if (Instruction *I = dyn_cast<Instruction>(V)) { 325 Opcode = I->getOpcode(); 326 U = I; 327 } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 328 Opcode = C->getOpcode(); 329 U = C; 330 } 331 332 switch (Opcode) { 333 default: break; 334 case Instruction::BitCast: 335 // Look past bitcasts. 336 return X86SelectAddress(U->getOperand(0), AM, isCall); 337 338 case Instruction::IntToPtr: 339 // Look past no-op inttoptrs. 340 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 341 return X86SelectAddress(U->getOperand(0), AM, isCall); 342 break; 343 344 case Instruction::PtrToInt: 345 // Look past no-op ptrtoints. 346 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 347 return X86SelectAddress(U->getOperand(0), AM, isCall); 348 break; 349 350 case Instruction::Alloca: { 351 if (isCall) break; 352 // Do static allocas. 353 const AllocaInst *A = cast<AllocaInst>(V); 354 DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A); 355 if (SI != StaticAllocaMap.end()) { 356 AM.BaseType = X86AddressMode::FrameIndexBase; 357 AM.Base.FrameIndex = SI->second; 358 return true; 359 } 360 break; 361 } 362 363 case Instruction::Add: { 364 if (isCall) break; 365 // Adds of constants are common and easy enough. 366 if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 367 uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); 368 // They have to fit in the 32-bit signed displacement field though. 369 if (isInt32(Disp)) { 370 AM.Disp = (uint32_t)Disp; 371 return X86SelectAddress(U->getOperand(0), AM, isCall); 372 } 373 } 374 break; 375 } 376 377 case Instruction::GetElementPtr: { 378 if (isCall) break; 379 // Pattern-match simple GEPs. 380 uint64_t Disp = (int32_t)AM.Disp; 381 unsigned IndexReg = AM.IndexReg; 382 unsigned Scale = AM.Scale; 383 gep_type_iterator GTI = gep_type_begin(U); 384 // Iterate through the indices, folding what we can. Constants can be 385 // folded, and one dynamic index can be handled, if the scale is supported. 386 for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); 387 i != e; ++i, ++GTI) { 388 Value *Op = *i; 389 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 390 const StructLayout *SL = TD.getStructLayout(STy); 391 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 392 Disp += SL->getElementOffset(Idx); 393 } else { 394 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); 395 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 396 // Constant-offset addressing. 397 Disp += CI->getSExtValue() * S; 398 } else if (IndexReg == 0 && 399 (!AM.GV || !Subtarget->isPICStyleRIPRel()) && 400 (S == 1 || S == 2 || S == 4 || S == 8)) { 401 // Scaled-index addressing. 402 Scale = S; 403 IndexReg = getRegForGEPIndex(Op); 404 if (IndexReg == 0) 405 return false; 406 } else 407 // Unsupported. 408 goto unsupported_gep; 409 } 410 } 411 // Check for displacement overflow. 412 if (!isInt32(Disp)) 413 break; 414 // Ok, the GEP indices were covered by constant-offset and scaled-index 415 // addressing. Update the address state and move on to examining the base. 416 AM.IndexReg = IndexReg; 417 AM.Scale = Scale; 418 AM.Disp = (uint32_t)Disp; 419 return X86SelectAddress(U->getOperand(0), AM, isCall); 420 unsupported_gep: 421 // Ok, the GEP indices weren't all covered. 422 break; 423 } 424 } 425 426 // Handle constant address. 427 if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 428 // Can't handle alternate code models yet. 429 if (TM.getCodeModel() != CodeModel::Default && 430 TM.getCodeModel() != CodeModel::Small) 431 return false; 432 433 // RIP-relative addresses can't have additional register operands. 434 if (Subtarget->isPICStyleRIPRel() && 435 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 436 return false; 437 438 // Can't handle TLS yet. 439 if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 440 if (GVar->isThreadLocal()) 441 return false; 442 443 // Set up the basic address. 444 AM.GV = GV; 445 446 if (!isCall && 447 TM.getRelocationModel() == Reloc::PIC_ && 448 !Subtarget->is64Bit()) 449 AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF); 450 451 // Emit an extra load if the ABI requires it. 452 if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) { 453 // Check to see if we've already materialized this 454 // value in a register in this block. 455 if (unsigned Reg = LocalValueMap[V]) { 456 AM.Base.Reg = Reg; 457 AM.GV = 0; 458 return true; 459 } 460 // Issue load from stub if necessary. 461 unsigned Opc = 0; 462 const TargetRegisterClass *RC = NULL; 463 if (TLI.getPointerTy() == MVT::i32) { 464 Opc = X86::MOV32rm; 465 RC = X86::GR32RegisterClass; 466 } else { 467 Opc = X86::MOV64rm; 468 RC = X86::GR64RegisterClass; 469 } 470 471 X86AddressMode StubAM; 472 StubAM.Base.Reg = AM.Base.Reg; 473 StubAM.GV = AM.GV; 474 unsigned ResultReg = createResultReg(RC); 475 addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), StubAM); 476 477 // Now construct the final address. Note that the Disp, Scale, 478 // and Index values may already be set here. 479 AM.Base.Reg = ResultReg; 480 AM.GV = 0; 481 482 // Prevent loading GV stub multiple times in same MBB. 483 LocalValueMap[V] = AM.Base.Reg; 484 } else if (Subtarget->isPICStyleRIPRel()) { 485 // Use rip-relative addressing if we can. 486 AM.Base.Reg = X86::RIP; 487 } 488 489 return true; 490 } 491 492 // If all else fails, try to materialize the value in a register. 493 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 494 if (AM.Base.Reg == 0) { 495 AM.Base.Reg = getRegForValue(V); 496 return AM.Base.Reg != 0; 497 } 498 if (AM.IndexReg == 0) { 499 assert(AM.Scale == 1 && "Scale with no index!"); 500 AM.IndexReg = getRegForValue(V); 501 return AM.IndexReg != 0; 502 } 503 } 504 505 return false; 506} 507 508/// X86SelectStore - Select and emit code to implement store instructions. 509bool X86FastISel::X86SelectStore(Instruction* I) { 510 MVT VT; 511 if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 512 return false; 513 514 X86AddressMode AM; 515 if (!X86SelectAddress(I->getOperand(1), AM, false)) 516 return false; 517 518 return X86FastEmitStore(VT, I->getOperand(0), AM); 519} 520 521/// X86SelectLoad - Select and emit code to implement load instructions. 522/// 523bool X86FastISel::X86SelectLoad(Instruction *I) { 524 MVT VT; 525 if (!isTypeLegal(I->getType(), VT)) 526 return false; 527 528 X86AddressMode AM; 529 if (!X86SelectAddress(I->getOperand(0), AM, false)) 530 return false; 531 532 unsigned ResultReg = 0; 533 if (X86FastEmitLoad(VT, AM, ResultReg)) { 534 UpdateValueMap(I, ResultReg); 535 return true; 536 } 537 return false; 538} 539 540static unsigned X86ChooseCmpOpcode(MVT VT) { 541 switch (VT.getSimpleVT()) { 542 default: return 0; 543 case MVT::i8: return X86::CMP8rr; 544 case MVT::i16: return X86::CMP16rr; 545 case MVT::i32: return X86::CMP32rr; 546 case MVT::i64: return X86::CMP64rr; 547 case MVT::f32: return X86::UCOMISSrr; 548 case MVT::f64: return X86::UCOMISDrr; 549 } 550} 551 552/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS 553/// of the comparison, return an opcode that works for the compare (e.g. 554/// CMP32ri) otherwise return 0. 555static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) { 556 switch (VT.getSimpleVT()) { 557 // Otherwise, we can't fold the immediate into this comparison. 558 default: return 0; 559 case MVT::i8: return X86::CMP8ri; 560 case MVT::i16: return X86::CMP16ri; 561 case MVT::i32: return X86::CMP32ri; 562 case MVT::i64: 563 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext 564 // field. 565 if ((int)RHSC->getSExtValue() == RHSC->getSExtValue()) 566 return X86::CMP64ri32; 567 return 0; 568 } 569} 570 571bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) { 572 unsigned Op0Reg = getRegForValue(Op0); 573 if (Op0Reg == 0) return false; 574 575 // Handle 'null' like i32/i64 0. 576 if (isa<ConstantPointerNull>(Op1)) 577 Op1 = Constant::getNullValue(TD.getIntPtrType()); 578 579 // We have two options: compare with register or immediate. If the RHS of 580 // the compare is an immediate that we can fold into this compare, use 581 // CMPri, otherwise use CMPrr. 582 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { 583 if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { 584 BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg) 585 .addImm(Op1C->getSExtValue()); 586 return true; 587 } 588 } 589 590 unsigned CompareOpc = X86ChooseCmpOpcode(VT); 591 if (CompareOpc == 0) return false; 592 593 unsigned Op1Reg = getRegForValue(Op1); 594 if (Op1Reg == 0) return false; 595 BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg); 596 597 return true; 598} 599 600bool X86FastISel::X86SelectCmp(Instruction *I) { 601 CmpInst *CI = cast<CmpInst>(I); 602 603 MVT VT; 604 if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 605 return false; 606 607 unsigned ResultReg = createResultReg(&X86::GR8RegClass); 608 unsigned SetCCOpc; 609 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 610 switch (CI->getPredicate()) { 611 case CmpInst::FCMP_OEQ: { 612 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 613 return false; 614 615 unsigned EReg = createResultReg(&X86::GR8RegClass); 616 unsigned NPReg = createResultReg(&X86::GR8RegClass); 617 BuildMI(MBB, DL, TII.get(X86::SETEr), EReg); 618 BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg); 619 BuildMI(MBB, DL, 620 TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); 621 UpdateValueMap(I, ResultReg); 622 return true; 623 } 624 case CmpInst::FCMP_UNE: { 625 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 626 return false; 627 628 unsigned NEReg = createResultReg(&X86::GR8RegClass); 629 unsigned PReg = createResultReg(&X86::GR8RegClass); 630 BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg); 631 BuildMI(MBB, DL, TII.get(X86::SETPr), PReg); 632 BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg); 633 UpdateValueMap(I, ResultReg); 634 return true; 635 } 636 case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 637 case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 638 case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break; 639 case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break; 640 case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 641 case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break; 642 case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break; 643 case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 644 case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break; 645 case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break; 646 case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 647 case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 648 649 case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 650 case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 651 case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 652 case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 653 case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 654 case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 655 case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break; 656 case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break; 657 case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break; 658 case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break; 659 default: 660 return false; 661 } 662 663 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 664 if (SwapArgs) 665 std::swap(Op0, Op1); 666 667 // Emit a compare of Op0/Op1. 668 if (!X86FastEmitCompare(Op0, Op1, VT)) 669 return false; 670 671 BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg); 672 UpdateValueMap(I, ResultReg); 673 return true; 674} 675 676bool X86FastISel::X86SelectZExt(Instruction *I) { 677 // Handle zero-extension from i1 to i8, which is common. 678 if (I->getType() == Type::Int8Ty && 679 I->getOperand(0)->getType() == Type::Int1Ty) { 680 unsigned ResultReg = getRegForValue(I->getOperand(0)); 681 if (ResultReg == 0) return false; 682 // Set the high bits to zero. 683 ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg); 684 if (ResultReg == 0) return false; 685 UpdateValueMap(I, ResultReg); 686 return true; 687 } 688 689 return false; 690} 691 692 693bool X86FastISel::X86SelectBranch(Instruction *I) { 694 // Unconditional branches are selected by tablegen-generated code. 695 // Handle a conditional branch. 696 BranchInst *BI = cast<BranchInst>(I); 697 MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)]; 698 MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)]; 699 700 // Fold the common case of a conditional branch with a comparison. 701 if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 702 if (CI->hasOneUse()) { 703 MVT VT = TLI.getValueType(CI->getOperand(0)->getType()); 704 705 // Try to take advantage of fallthrough opportunities. 706 CmpInst::Predicate Predicate = CI->getPredicate(); 707 if (MBB->isLayoutSuccessor(TrueMBB)) { 708 std::swap(TrueMBB, FalseMBB); 709 Predicate = CmpInst::getInversePredicate(Predicate); 710 } 711 712 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 713 unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA" 714 715 switch (Predicate) { 716 case CmpInst::FCMP_OEQ: 717 std::swap(TrueMBB, FalseMBB); 718 Predicate = CmpInst::FCMP_UNE; 719 // FALL THROUGH 720 case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break; 721 case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA; break; 722 case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break; 723 case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA; break; 724 case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE; break; 725 case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break; 726 case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break; 727 case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP; break; 728 case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE; break; 729 case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB; break; 730 case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE; break; 731 case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; 732 case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; 733 734 case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE; break; 735 case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE; break; 736 case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA; break; 737 case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break; 738 case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; 739 case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; 740 case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG; break; 741 case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break; 742 case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL; break; 743 case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break; 744 default: 745 return false; 746 } 747 748 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 749 if (SwapArgs) 750 std::swap(Op0, Op1); 751 752 // Emit a compare of the LHS and RHS, setting the flags. 753 if (!X86FastEmitCompare(Op0, Op1, VT)) 754 return false; 755 756 BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB); 757 758 if (Predicate == CmpInst::FCMP_UNE) { 759 // X86 requires a second branch to handle UNE (and OEQ, 760 // which is mapped to UNE above). 761 BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB); 762 } 763 764 FastEmitBranch(FalseMBB); 765 MBB->addSuccessor(TrueMBB); 766 return true; 767 } 768 } else if (ExtractValueInst *EI = 769 dyn_cast<ExtractValueInst>(BI->getCondition())) { 770 // Check to see if the branch instruction is from an "arithmetic with 771 // overflow" intrinsic. The main way these intrinsics are used is: 772 // 773 // %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) 774 // %sum = extractvalue { i32, i1 } %t, 0 775 // %obit = extractvalue { i32, i1 } %t, 1 776 // br i1 %obit, label %overflow, label %normal 777 // 778 // The %sum and %obit are converted in an ADD and a SETO/SETB before 779 // reaching the branch. Therefore, we search backwards through the MBB 780 // looking for the SETO/SETB instruction. If an instruction modifies the 781 // EFLAGS register before we reach the SETO/SETB instruction, then we can't 782 // convert the branch into a JO/JB instruction. 783 if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){ 784 if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow || 785 CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) { 786 const MachineInstr *SetMI = 0; 787 unsigned Reg = lookUpRegForValue(EI); 788 789 for (MachineBasicBlock::const_reverse_iterator 790 RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) { 791 const MachineInstr &MI = *RI; 792 793 if (MI.modifiesRegister(Reg)) { 794 unsigned Src, Dst, SrcSR, DstSR; 795 796 if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) { 797 Reg = Src; 798 continue; 799 } 800 801 SetMI = &MI; 802 break; 803 } 804 805 const TargetInstrDesc &TID = MI.getDesc(); 806 if (TID.hasUnmodeledSideEffects() || 807 TID.hasImplicitDefOfPhysReg(X86::EFLAGS)) 808 break; 809 } 810 811 if (SetMI) { 812 unsigned OpCode = SetMI->getOpcode(); 813 814 if (OpCode == X86::SETOr || OpCode == X86::SETBr) { 815 BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB)) 816 .addMBB(TrueMBB); 817 FastEmitBranch(FalseMBB); 818 MBB->addSuccessor(TrueMBB); 819 return true; 820 } 821 } 822 } 823 } 824 } 825 826 // Otherwise do a clumsy setcc and re-test it. 827 unsigned OpReg = getRegForValue(BI->getCondition()); 828 if (OpReg == 0) return false; 829 830 BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); 831 BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB); 832 FastEmitBranch(FalseMBB); 833 MBB->addSuccessor(TrueMBB); 834 return true; 835} 836 837bool X86FastISel::X86SelectShift(Instruction *I) { 838 unsigned CReg = 0, OpReg = 0, OpImm = 0; 839 const TargetRegisterClass *RC = NULL; 840 if (I->getType() == Type::Int8Ty) { 841 CReg = X86::CL; 842 RC = &X86::GR8RegClass; 843 switch (I->getOpcode()) { 844 case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break; 845 case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break; 846 case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; 847 default: return false; 848 } 849 } else if (I->getType() == Type::Int16Ty) { 850 CReg = X86::CX; 851 RC = &X86::GR16RegClass; 852 switch (I->getOpcode()) { 853 case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break; 854 case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break; 855 case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; 856 default: return false; 857 } 858 } else if (I->getType() == Type::Int32Ty) { 859 CReg = X86::ECX; 860 RC = &X86::GR32RegClass; 861 switch (I->getOpcode()) { 862 case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break; 863 case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break; 864 case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; 865 default: return false; 866 } 867 } else if (I->getType() == Type::Int64Ty) { 868 CReg = X86::RCX; 869 RC = &X86::GR64RegClass; 870 switch (I->getOpcode()) { 871 case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break; 872 case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break; 873 case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break; 874 default: return false; 875 } 876 } else { 877 return false; 878 } 879 880 MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 881 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 882 return false; 883 884 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 885 if (Op0Reg == 0) return false; 886 887 // Fold immediate in shl(x,3). 888 if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { 889 unsigned ResultReg = createResultReg(RC); 890 BuildMI(MBB, DL, TII.get(OpImm), 891 ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); 892 UpdateValueMap(I, ResultReg); 893 return true; 894 } 895 896 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 897 if (Op1Reg == 0) return false; 898 TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC); 899 900 // The shift instruction uses X86::CL. If we defined a super-register 901 // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what 902 // we're doing here. 903 if (CReg != X86::CL) 904 BuildMI(MBB, DL, TII.get(TargetInstrInfo::EXTRACT_SUBREG), X86::CL) 905 .addReg(CReg).addImm(X86::SUBREG_8BIT); 906 907 unsigned ResultReg = createResultReg(RC); 908 BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg); 909 UpdateValueMap(I, ResultReg); 910 return true; 911} 912 913bool X86FastISel::X86SelectSelect(Instruction *I) { 914 MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 915 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 916 return false; 917 918 unsigned Opc = 0; 919 const TargetRegisterClass *RC = NULL; 920 if (VT.getSimpleVT() == MVT::i16) { 921 Opc = X86::CMOVE16rr; 922 RC = &X86::GR16RegClass; 923 } else if (VT.getSimpleVT() == MVT::i32) { 924 Opc = X86::CMOVE32rr; 925 RC = &X86::GR32RegClass; 926 } else if (VT.getSimpleVT() == MVT::i64) { 927 Opc = X86::CMOVE64rr; 928 RC = &X86::GR64RegClass; 929 } else { 930 return false; 931 } 932 933 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 934 if (Op0Reg == 0) return false; 935 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 936 if (Op1Reg == 0) return false; 937 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 938 if (Op2Reg == 0) return false; 939 940 BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg); 941 unsigned ResultReg = createResultReg(RC); 942 BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg); 943 UpdateValueMap(I, ResultReg); 944 return true; 945} 946 947bool X86FastISel::X86SelectFPExt(Instruction *I) { 948 // fpext from float to double. 949 if (Subtarget->hasSSE2() && I->getType() == Type::DoubleTy) { 950 Value *V = I->getOperand(0); 951 if (V->getType() == Type::FloatTy) { 952 unsigned OpReg = getRegForValue(V); 953 if (OpReg == 0) return false; 954 unsigned ResultReg = createResultReg(X86::FR64RegisterClass); 955 BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg); 956 UpdateValueMap(I, ResultReg); 957 return true; 958 } 959 } 960 961 return false; 962} 963 964bool X86FastISel::X86SelectFPTrunc(Instruction *I) { 965 if (Subtarget->hasSSE2()) { 966 if (I->getType() == Type::FloatTy) { 967 Value *V = I->getOperand(0); 968 if (V->getType() == Type::DoubleTy) { 969 unsigned OpReg = getRegForValue(V); 970 if (OpReg == 0) return false; 971 unsigned ResultReg = createResultReg(X86::FR32RegisterClass); 972 BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg); 973 UpdateValueMap(I, ResultReg); 974 return true; 975 } 976 } 977 } 978 979 return false; 980} 981 982bool X86FastISel::X86SelectTrunc(Instruction *I) { 983 if (Subtarget->is64Bit()) 984 // All other cases should be handled by the tblgen generated code. 985 return false; 986 MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 987 MVT DstVT = TLI.getValueType(I->getType()); 988 989 // This code only handles truncation to byte right now. 990 if (DstVT != MVT::i8 && DstVT != MVT::i1) 991 // All other cases should be handled by the tblgen generated code. 992 return false; 993 if (SrcVT != MVT::i16 && SrcVT != MVT::i32) 994 // All other cases should be handled by the tblgen generated code. 995 return false; 996 997 unsigned InputReg = getRegForValue(I->getOperand(0)); 998 if (!InputReg) 999 // Unhandled operand. Halt "fast" selection and bail. 1000 return false; 1001 1002 // First issue a copy to GR16_ABCD or GR32_ABCD. 1003 unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr; 1004 const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) 1005 ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; 1006 unsigned CopyReg = createResultReg(CopyRC); 1007 BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg); 1008 1009 // Then issue an extract_subreg. 1010 unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, 1011 CopyReg, X86::SUBREG_8BIT); 1012 if (!ResultReg) 1013 return false; 1014 1015 UpdateValueMap(I, ResultReg); 1016 return true; 1017} 1018 1019bool X86FastISel::X86SelectExtractValue(Instruction *I) { 1020 ExtractValueInst *EI = cast<ExtractValueInst>(I); 1021 Value *Agg = EI->getAggregateOperand(); 1022 1023 if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) { 1024 switch (CI->getIntrinsicID()) { 1025 default: break; 1026 case Intrinsic::sadd_with_overflow: 1027 case Intrinsic::uadd_with_overflow: 1028 // Cheat a little. We know that the registers for "add" and "seto" are 1029 // allocated sequentially. However, we only keep track of the register 1030 // for "add" in the value map. Use extractvalue's index to get the 1031 // correct register for "seto". 1032 UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin()); 1033 return true; 1034 } 1035 } 1036 1037 return false; 1038} 1039 1040bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { 1041 // FIXME: Handle more intrinsics. 1042 switch (I.getIntrinsicID()) { 1043 default: return false; 1044 case Intrinsic::sadd_with_overflow: 1045 case Intrinsic::uadd_with_overflow: { 1046 // Replace "add with overflow" intrinsics with an "add" instruction followed 1047 // by a seto/setc instruction. Later on, when the "extractvalue" 1048 // instructions are encountered, we use the fact that two registers were 1049 // created sequentially to get the correct registers for the "sum" and the 1050 // "overflow bit". 1051 const Function *Callee = I.getCalledFunction(); 1052 const Type *RetTy = 1053 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); 1054 1055 MVT VT; 1056 if (!isTypeLegal(RetTy, VT)) 1057 return false; 1058 1059 Value *Op1 = I.getOperand(1); 1060 Value *Op2 = I.getOperand(2); 1061 unsigned Reg1 = getRegForValue(Op1); 1062 unsigned Reg2 = getRegForValue(Op2); 1063 1064 if (Reg1 == 0 || Reg2 == 0) 1065 // FIXME: Handle values *not* in registers. 1066 return false; 1067 1068 unsigned OpC = 0; 1069 if (VT == MVT::i32) 1070 OpC = X86::ADD32rr; 1071 else if (VT == MVT::i64) 1072 OpC = X86::ADD64rr; 1073 else 1074 return false; 1075 1076 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1077 BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2); 1078 unsigned DestReg1 = UpdateValueMap(&I, ResultReg); 1079 1080 // If the add with overflow is an intra-block value then we just want to 1081 // create temporaries for it like normal. If it is a cross-block value then 1082 // UpdateValueMap will return the cross-block register used. Since we 1083 // *really* want the value to be live in the register pair known by 1084 // UpdateValueMap, we have to use DestReg1+1 as the destination register in 1085 // the cross block case. In the non-cross-block case, we should just make 1086 // another register for the value. 1087 if (DestReg1 != ResultReg) 1088 ResultReg = DestReg1+1; 1089 else 1090 ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8)); 1091 1092 unsigned Opc = X86::SETBr; 1093 if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow) 1094 Opc = X86::SETOr; 1095 BuildMI(MBB, DL, TII.get(Opc), ResultReg); 1096 return true; 1097 } 1098 } 1099} 1100 1101bool X86FastISel::X86SelectCall(Instruction *I) { 1102 CallInst *CI = cast<CallInst>(I); 1103 Value *Callee = I->getOperand(0); 1104 1105 // Can't handle inline asm yet. 1106 if (isa<InlineAsm>(Callee)) 1107 return false; 1108 1109 // Handle intrinsic calls. 1110 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) 1111 return X86VisitIntrinsicCall(*II); 1112 1113 // Handle only C and fastcc calling conventions for now. 1114 CallSite CS(CI); 1115 unsigned CC = CS.getCallingConv(); 1116 if (CC != CallingConv::C && 1117 CC != CallingConv::Fast && 1118 CC != CallingConv::X86_FastCall) 1119 return false; 1120 1121 // On X86, -tailcallopt changes the fastcc ABI. FastISel doesn't 1122 // handle this for now. 1123 if (CC == CallingConv::Fast && PerformTailCallOpt) 1124 return false; 1125 1126 // Let SDISel handle vararg functions. 1127 const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1128 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1129 if (FTy->isVarArg()) 1130 return false; 1131 1132 // Handle *simple* calls for now. 1133 const Type *RetTy = CS.getType(); 1134 MVT RetVT; 1135 if (RetTy == Type::VoidTy) 1136 RetVT = MVT::isVoid; 1137 else if (!isTypeLegal(RetTy, RetVT, true)) 1138 return false; 1139 1140 // Materialize callee address in a register. FIXME: GV address can be 1141 // handled with a CALLpcrel32 instead. 1142 X86AddressMode CalleeAM; 1143 if (!X86SelectAddress(Callee, CalleeAM, true)) 1144 return false; 1145 unsigned CalleeOp = 0; 1146 GlobalValue *GV = 0; 1147 if (CalleeAM.GV != 0) { 1148 GV = CalleeAM.GV; 1149 } else if (CalleeAM.Base.Reg != 0) { 1150 CalleeOp = CalleeAM.Base.Reg; 1151 } else 1152 return false; 1153 1154 // Allow calls which produce i1 results. 1155 bool AndToI1 = false; 1156 if (RetVT == MVT::i1) { 1157 RetVT = MVT::i8; 1158 AndToI1 = true; 1159 } 1160 1161 // Deal with call operands first. 1162 SmallVector<Value*, 8> ArgVals; 1163 SmallVector<unsigned, 8> Args; 1164 SmallVector<MVT, 8> ArgVTs; 1165 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1166 Args.reserve(CS.arg_size()); 1167 ArgVals.reserve(CS.arg_size()); 1168 ArgVTs.reserve(CS.arg_size()); 1169 ArgFlags.reserve(CS.arg_size()); 1170 for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1171 i != e; ++i) { 1172 unsigned Arg = getRegForValue(*i); 1173 if (Arg == 0) 1174 return false; 1175 ISD::ArgFlagsTy Flags; 1176 unsigned AttrInd = i - CS.arg_begin() + 1; 1177 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1178 Flags.setSExt(); 1179 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1180 Flags.setZExt(); 1181 1182 // FIXME: Only handle *easy* calls for now. 1183 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1184 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1185 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1186 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1187 return false; 1188 1189 const Type *ArgTy = (*i)->getType(); 1190 MVT ArgVT; 1191 if (!isTypeLegal(ArgTy, ArgVT)) 1192 return false; 1193 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1194 Flags.setOrigAlign(OriginalAlignment); 1195 1196 Args.push_back(Arg); 1197 ArgVals.push_back(*i); 1198 ArgVTs.push_back(ArgVT); 1199 ArgFlags.push_back(Flags); 1200 } 1201 1202 // Analyze operands of the call, assigning locations to each operand. 1203 SmallVector<CCValAssign, 16> ArgLocs; 1204 CCState CCInfo(CC, false, TM, ArgLocs); 1205 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); 1206 1207 // Get a count of how many bytes are to be pushed on the stack. 1208 unsigned NumBytes = CCInfo.getNextStackOffset(); 1209 1210 // Issue CALLSEQ_START 1211 unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); 1212 BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes); 1213 1214 // Process argument: walk the register/memloc assignments, inserting 1215 // copies / loads. 1216 SmallVector<unsigned, 4> RegArgs; 1217 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1218 CCValAssign &VA = ArgLocs[i]; 1219 unsigned Arg = Args[VA.getValNo()]; 1220 MVT ArgVT = ArgVTs[VA.getValNo()]; 1221 1222 // Promote the value if needed. 1223 switch (VA.getLocInfo()) { 1224 default: assert(0 && "Unknown loc info!"); 1225 case CCValAssign::Full: break; 1226 case CCValAssign::SExt: { 1227 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1228 Arg, ArgVT, Arg); 1229 assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted; 1230 Emitted = true; 1231 ArgVT = VA.getLocVT(); 1232 break; 1233 } 1234 case CCValAssign::ZExt: { 1235 bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1236 Arg, ArgVT, Arg); 1237 assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted; 1238 Emitted = true; 1239 ArgVT = VA.getLocVT(); 1240 break; 1241 } 1242 case CCValAssign::AExt: { 1243 bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), 1244 Arg, ArgVT, Arg); 1245 if (!Emitted) 1246 Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1247 Arg, ArgVT, Arg); 1248 if (!Emitted) 1249 Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1250 Arg, ArgVT, Arg); 1251 1252 assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted; 1253 ArgVT = VA.getLocVT(); 1254 break; 1255 } 1256 } 1257 1258 if (VA.isRegLoc()) { 1259 TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT); 1260 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(), 1261 Arg, RC, RC); 1262 assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; 1263 Emitted = true; 1264 RegArgs.push_back(VA.getLocReg()); 1265 } else { 1266 unsigned LocMemOffset = VA.getLocMemOffset(); 1267 X86AddressMode AM; 1268 AM.Base.Reg = StackPtr; 1269 AM.Disp = LocMemOffset; 1270 Value *ArgVal = ArgVals[VA.getValNo()]; 1271 1272 // If this is a really simple value, emit this with the Value* version of 1273 // X86FastEmitStore. If it isn't simple, we don't want to do this, as it 1274 // can cause us to reevaluate the argument. 1275 if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) 1276 X86FastEmitStore(ArgVT, ArgVal, AM); 1277 else 1278 X86FastEmitStore(ArgVT, Arg, AM); 1279 } 1280 } 1281 1282 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1283 // GOT pointer. 1284 if (!Subtarget->is64Bit() && 1285 TM.getRelocationModel() == Reloc::PIC_ && 1286 Subtarget->isPICStyleGOT()) { 1287 TargetRegisterClass *RC = X86::GR32RegisterClass; 1288 unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF); 1289 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC); 1290 assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; 1291 Emitted = true; 1292 } 1293 1294 // Issue the call. 1295 unsigned CallOpc = CalleeOp 1296 ? (Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r) 1297 : (Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32); 1298 MachineInstrBuilder MIB = CalleeOp 1299 ? BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp) 1300 : BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV); 1301 1302 // Add an implicit use GOT pointer in EBX. 1303 if (!Subtarget->is64Bit() && 1304 TM.getRelocationModel() == Reloc::PIC_ && 1305 Subtarget->isPICStyleGOT()) 1306 MIB.addReg(X86::EBX); 1307 1308 // Add implicit physical register uses to the call. 1309 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1310 MIB.addReg(RegArgs[i]); 1311 1312 // Issue CALLSEQ_END 1313 unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); 1314 BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0); 1315 1316 // Now handle call return value (if any). 1317 if (RetVT.getSimpleVT() != MVT::isVoid) { 1318 SmallVector<CCValAssign, 16> RVLocs; 1319 CCState CCInfo(CC, false, TM, RVLocs); 1320 CCInfo.AnalyzeCallResult(RetVT, RetCC_X86); 1321 1322 // Copy all of the result registers out of their specified physreg. 1323 assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); 1324 MVT CopyVT = RVLocs[0].getValVT(); 1325 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1326 TargetRegisterClass *SrcRC = DstRC; 1327 1328 // If this is a call to a function that returns an fp value on the x87 fp 1329 // stack, but where we prefer to use the value in xmm registers, copy it 1330 // out as F80 and use a truncate to move it from fp stack reg to xmm reg. 1331 if ((RVLocs[0].getLocReg() == X86::ST0 || 1332 RVLocs[0].getLocReg() == X86::ST1) && 1333 isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { 1334 CopyVT = MVT::f80; 1335 SrcRC = X86::RSTRegisterClass; 1336 DstRC = X86::RFP80RegisterClass; 1337 } 1338 1339 unsigned ResultReg = createResultReg(DstRC); 1340 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, 1341 RVLocs[0].getLocReg(), DstRC, SrcRC); 1342 assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; 1343 Emitted = true; 1344 if (CopyVT != RVLocs[0].getValVT()) { 1345 // Round the F80 the right size, which also moves to the appropriate xmm 1346 // register. This is accomplished by storing the F80 value in memory and 1347 // then loading it back. Ewww... 1348 MVT ResVT = RVLocs[0].getValVT(); 1349 unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; 1350 unsigned MemSize = ResVT.getSizeInBits()/8; 1351 int FI = MFI.CreateStackObject(MemSize, MemSize); 1352 addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg); 1353 DstRC = ResVT == MVT::f32 1354 ? X86::FR32RegisterClass : X86::FR64RegisterClass; 1355 Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; 1356 ResultReg = createResultReg(DstRC); 1357 addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI); 1358 } 1359 1360 if (AndToI1) { 1361 // Mask out all but lowest bit for some call which produces an i1. 1362 unsigned AndResult = createResultReg(X86::GR8RegisterClass); 1363 BuildMI(MBB, DL, 1364 TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1); 1365 ResultReg = AndResult; 1366 } 1367 1368 UpdateValueMap(I, ResultReg); 1369 } 1370 1371 return true; 1372} 1373 1374 1375bool 1376X86FastISel::TargetSelectInstruction(Instruction *I) { 1377 switch (I->getOpcode()) { 1378 default: break; 1379 case Instruction::Load: 1380 return X86SelectLoad(I); 1381 case Instruction::Store: 1382 return X86SelectStore(I); 1383 case Instruction::ICmp: 1384 case Instruction::FCmp: 1385 return X86SelectCmp(I); 1386 case Instruction::ZExt: 1387 return X86SelectZExt(I); 1388 case Instruction::Br: 1389 return X86SelectBranch(I); 1390 case Instruction::Call: 1391 return X86SelectCall(I); 1392 case Instruction::LShr: 1393 case Instruction::AShr: 1394 case Instruction::Shl: 1395 return X86SelectShift(I); 1396 case Instruction::Select: 1397 return X86SelectSelect(I); 1398 case Instruction::Trunc: 1399 return X86SelectTrunc(I); 1400 case Instruction::FPExt: 1401 return X86SelectFPExt(I); 1402 case Instruction::FPTrunc: 1403 return X86SelectFPTrunc(I); 1404 case Instruction::ExtractValue: 1405 return X86SelectExtractValue(I); 1406 case Instruction::IntToPtr: // Deliberate fall-through. 1407 case Instruction::PtrToInt: { 1408 MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 1409 MVT DstVT = TLI.getValueType(I->getType()); 1410 if (DstVT.bitsGT(SrcVT)) 1411 return X86SelectZExt(I); 1412 if (DstVT.bitsLT(SrcVT)) 1413 return X86SelectTrunc(I); 1414 unsigned Reg = getRegForValue(I->getOperand(0)); 1415 if (Reg == 0) return false; 1416 UpdateValueMap(I, Reg); 1417 return true; 1418 } 1419 } 1420 1421 return false; 1422} 1423 1424unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { 1425 MVT VT; 1426 if (!isTypeLegal(C->getType(), VT)) 1427 return false; 1428 1429 // Get opcode and regclass of the output for the given load instruction. 1430 unsigned Opc = 0; 1431 const TargetRegisterClass *RC = NULL; 1432 switch (VT.getSimpleVT()) { 1433 default: return false; 1434 case MVT::i8: 1435 Opc = X86::MOV8rm; 1436 RC = X86::GR8RegisterClass; 1437 break; 1438 case MVT::i16: 1439 Opc = X86::MOV16rm; 1440 RC = X86::GR16RegisterClass; 1441 break; 1442 case MVT::i32: 1443 Opc = X86::MOV32rm; 1444 RC = X86::GR32RegisterClass; 1445 break; 1446 case MVT::i64: 1447 // Must be in x86-64 mode. 1448 Opc = X86::MOV64rm; 1449 RC = X86::GR64RegisterClass; 1450 break; 1451 case MVT::f32: 1452 if (Subtarget->hasSSE1()) { 1453 Opc = X86::MOVSSrm; 1454 RC = X86::FR32RegisterClass; 1455 } else { 1456 Opc = X86::LD_Fp32m; 1457 RC = X86::RFP32RegisterClass; 1458 } 1459 break; 1460 case MVT::f64: 1461 if (Subtarget->hasSSE2()) { 1462 Opc = X86::MOVSDrm; 1463 RC = X86::FR64RegisterClass; 1464 } else { 1465 Opc = X86::LD_Fp64m; 1466 RC = X86::RFP64RegisterClass; 1467 } 1468 break; 1469 case MVT::f80: 1470 // No f80 support yet. 1471 return false; 1472 } 1473 1474 // Materialize addresses with LEA instructions. 1475 if (isa<GlobalValue>(C)) { 1476 X86AddressMode AM; 1477 if (X86SelectAddress(C, AM, false)) { 1478 if (TLI.getPointerTy() == MVT::i32) 1479 Opc = X86::LEA32r; 1480 else 1481 Opc = X86::LEA64r; 1482 unsigned ResultReg = createResultReg(RC); 1483 addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); 1484 return ResultReg; 1485 } 1486 return 0; 1487 } 1488 1489 // MachineConstantPool wants an explicit alignment. 1490 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 1491 if (Align == 0) { 1492 // Alignment of vector types. FIXME! 1493 Align = TD.getTypeAllocSize(C->getType()); 1494 } 1495 1496 // x86-32 PIC requires a PIC base register for constant pools. 1497 unsigned PICBase = 0; 1498 unsigned char OpFlag = 0; 1499 if (TM.getRelocationModel() == Reloc::PIC_) { 1500 if (Subtarget->isPICStyleStub()) { 1501 OpFlag = X86II::MO_PIC_BASE_OFFSET; 1502 PICBase = getInstrInfo()->getGlobalBaseReg(&MF); 1503 } else if (Subtarget->isPICStyleGOT()) { 1504 OpFlag = X86II::MO_GOTOFF; 1505 PICBase = getInstrInfo()->getGlobalBaseReg(&MF); 1506 } 1507 } 1508 1509 // Create the load from the constant pool. 1510 unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align); 1511 unsigned ResultReg = createResultReg(RC); 1512 addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), 1513 MCPOffset, PICBase, OpFlag); 1514 1515 return ResultReg; 1516} 1517 1518unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) { 1519 // Fail on dynamic allocas. At this point, getRegForValue has already 1520 // checked its CSE maps, so if we're here trying to handle a dynamic 1521 // alloca, we're not going to succeed. X86SelectAddress has a 1522 // check for dynamic allocas, because it's called directly from 1523 // various places, but TargetMaterializeAlloca also needs a check 1524 // in order to avoid recursion between getRegForValue, 1525 // X86SelectAddrss, and TargetMaterializeAlloca. 1526 if (!StaticAllocaMap.count(C)) 1527 return 0; 1528 1529 X86AddressMode AM; 1530 if (!X86SelectAddress(C, AM, false)) 1531 return 0; 1532 unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; 1533 TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); 1534 unsigned ResultReg = createResultReg(RC); 1535 addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); 1536 return ResultReg; 1537} 1538 1539namespace llvm { 1540 llvm::FastISel *X86::createFastISel(MachineFunction &mf, 1541 MachineModuleInfo *mmi, 1542 DwarfWriter *dw, 1543 DenseMap<const Value *, unsigned> &vm, 1544 DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, 1545 DenseMap<const AllocaInst *, int> &am 1546#ifndef NDEBUG 1547 , SmallSet<Instruction*, 8> &cil 1548#endif 1549 ) { 1550 return new X86FastISel(mf, mmi, dw, vm, bm, am 1551#ifndef NDEBUG 1552 , cil 1553#endif 1554 ); 1555 } 1556} 1557