X86InstrInfo.cpp revision 360661
1234353Sdim//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===// 2193323Sed// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6193323Sed// 7193323Sed//===----------------------------------------------------------------------===// 8193323Sed// 9193323Sed// This file contains the X86 implementation of the TargetInstrInfo class. 10193323Sed// 11193323Sed//===----------------------------------------------------------------------===// 12193323Sed 13193323Sed#include "X86InstrInfo.h" 14193323Sed#include "X86.h" 15193323Sed#include "X86InstrBuilder.h" 16341825Sdim#include "X86InstrFoldTables.h" 17193323Sed#include "X86MachineFunctionInfo.h" 18193323Sed#include "X86Subtarget.h" 19193323Sed#include "X86TargetMachine.h" 20193323Sed#include "llvm/ADT/STLExtras.h" 21341825Sdim#include "llvm/ADT/Sequence.h" 22309124Sdim#include "llvm/CodeGen/LivePhysRegs.h" 23249423Sdim#include "llvm/CodeGen/LiveVariables.h" 24193323Sed#include "llvm/CodeGen/MachineConstantPool.h" 25239462Sdim#include "llvm/CodeGen/MachineDominators.h" 26193323Sed#include "llvm/CodeGen/MachineFrameInfo.h" 27193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 28309124Sdim#include "llvm/CodeGen/MachineModuleInfo.h" 29193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 30261991Sdim#include "llvm/CodeGen/StackMaps.h" 31249423Sdim#include "llvm/IR/DerivedTypes.h" 32280031Sdim#include "llvm/IR/Function.h" 33249423Sdim#include "llvm/IR/LLVMContext.h" 34234353Sdim#include "llvm/MC/MCAsmInfo.h" 35276479Sdim#include "llvm/MC/MCExpr.h" 36207618Srdivacky#include "llvm/MC/MCInst.h" 37193323Sed#include "llvm/Support/CommandLine.h" 38202375Srdivacky#include "llvm/Support/Debug.h" 39198090Srdivacky#include "llvm/Support/ErrorHandling.h" 40198090Srdivacky#include "llvm/Support/raw_ostream.h" 41193323Sed#include "llvm/Target/TargetOptions.h" 42199481Srdivacky 43276479Sdimusing namespace llvm; 44276479Sdim 45276479Sdim#define DEBUG_TYPE "x86-instr-info" 46276479Sdim 47261991Sdim#define GET_INSTRINFO_CTOR_DTOR 48224145Sdim#include "X86GenInstrInfo.inc" 49224145Sdim 50198090Srdivackystatic cl::opt<bool> 51327952Sdim NoFusing("disable-spill-fusing", 52327952Sdim cl::desc("Disable fusing of spill code into instructions"), 53327952Sdim cl::Hidden); 54198090Srdivackystatic cl::opt<bool> 55198090SrdivackyPrintFailedFusing("print-failed-fuse-candidates", 56198090Srdivacky cl::desc("Print instructions that the allocator wants to" 57198090Srdivacky " fuse, but the X86 backend currently can't"), 58198090Srdivacky cl::Hidden); 59198090Srdivackystatic cl::opt<bool> 60198090SrdivackyReMatPICStubLoad("remat-pic-stub-load", 61198090Srdivacky cl::desc("Re-materialize load from stub in PIC mode"), 62198090Srdivacky cl::init(false), cl::Hidden); 63309124Sdimstatic cl::opt<unsigned> 64309124SdimPartialRegUpdateClearance("partial-reg-update-clearance", 65309124Sdim cl::desc("Clearance between two register writes " 66309124Sdim "for inserting XOR to avoid partial " 67309124Sdim "register update"), 68309124Sdim cl::init(64), cl::Hidden); 69309124Sdimstatic cl::opt<unsigned> 70309124SdimUndefRegClearance("undef-reg-clearance", 71309124Sdim cl::desc("How many idle instructions we would like before " 72309124Sdim "certain undef register reads"), 73314564Sdim cl::init(128), cl::Hidden); 74193323Sed 75226633Sdim 76261991Sdim// Pin the vtable to this file. 77261991Sdimvoid X86InstrInfo::anchor() {} 78261991Sdim 79276479SdimX86InstrInfo::X86InstrInfo(X86Subtarget &STI) 80296417Sdim : X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 81296417Sdim : X86::ADJCALLSTACKDOWN32), 82296417Sdim (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 83296417Sdim : X86::ADJCALLSTACKUP32), 84309124Sdim X86::CATCHRET, 85309124Sdim (STI.is64Bit() ? X86::RETQ : X86::RETL)), 86288943Sdim Subtarget(STI), RI(STI.getTargetTriple()) { 87226633Sdim} 88218893Sdim 89202375Srdivackybool 90202375SrdivackyX86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 91202375Srdivacky unsigned &SrcReg, unsigned &DstReg, 92202375Srdivacky unsigned &SubIdx) const { 93202375Srdivacky switch (MI.getOpcode()) { 94202375Srdivacky default: break; 95202375Srdivacky case X86::MOVSX16rr8: 96202375Srdivacky case X86::MOVZX16rr8: 97202375Srdivacky case X86::MOVSX32rr8: 98202375Srdivacky case X86::MOVZX32rr8: 99202375Srdivacky case X86::MOVSX64rr8: 100276479Sdim if (!Subtarget.is64Bit()) 101202375Srdivacky // It's not always legal to reference the low 8-bit of the larger 102202375Srdivacky // register in 32-bit mode. 103202375Srdivacky return false; 104321369Sdim LLVM_FALLTHROUGH; 105202375Srdivacky case X86::MOVSX32rr16: 106202375Srdivacky case X86::MOVZX32rr16: 107202375Srdivacky case X86::MOVSX64rr16: 108261991Sdim case X86::MOVSX64rr32: { 109202375Srdivacky if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) 110202375Srdivacky // Be conservative. 111202375Srdivacky return false; 112202375Srdivacky SrcReg = MI.getOperand(1).getReg(); 113202375Srdivacky DstReg = MI.getOperand(0).getReg(); 114202375Srdivacky switch (MI.getOpcode()) { 115243830Sdim default: llvm_unreachable("Unreachable!"); 116202375Srdivacky case X86::MOVSX16rr8: 117202375Srdivacky case X86::MOVZX16rr8: 118202375Srdivacky case X86::MOVSX32rr8: 119202375Srdivacky case X86::MOVZX32rr8: 120202375Srdivacky case X86::MOVSX64rr8: 121208599Srdivacky SubIdx = X86::sub_8bit; 122202375Srdivacky break; 123202375Srdivacky case X86::MOVSX32rr16: 124202375Srdivacky case X86::MOVZX32rr16: 125202375Srdivacky case X86::MOVSX64rr16: 126208599Srdivacky SubIdx = X86::sub_16bit; 127202375Srdivacky break; 128202375Srdivacky case X86::MOVSX64rr32: 129208599Srdivacky SubIdx = X86::sub_32bit; 130202375Srdivacky break; 131202375Srdivacky } 132202375Srdivacky return true; 133202375Srdivacky } 134202375Srdivacky } 135202375Srdivacky return false; 136202375Srdivacky} 137202375Srdivacky 138309124Sdimint X86InstrInfo::getSPAdjust(const MachineInstr &MI) const { 139309124Sdim const MachineFunction *MF = MI.getParent()->getParent(); 140280031Sdim const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); 141280031Sdim 142321369Sdim if (isFrameInstr(MI)) { 143280031Sdim unsigned StackAlign = TFI->getStackAlignment(); 144321369Sdim int SPAdj = alignTo(getFrameSize(MI), StackAlign); 145321369Sdim SPAdj -= getFrameAdjustment(MI); 146321369Sdim if (!isFrameSetup(MI)) 147321369Sdim SPAdj = -SPAdj; 148321369Sdim return SPAdj; 149280031Sdim } 150288943Sdim 151288943Sdim // To know whether a call adjusts the stack, we need information 152280031Sdim // that is bound to the following ADJCALLSTACKUP pseudo. 153280031Sdim // Look for the next ADJCALLSTACKUP that follows the call. 154309124Sdim if (MI.isCall()) { 155309124Sdim const MachineBasicBlock *MBB = MI.getParent(); 156280031Sdim auto I = ++MachineBasicBlock::const_iterator(MI); 157280031Sdim for (auto E = MBB->end(); I != E; ++I) { 158280031Sdim if (I->getOpcode() == getCallFrameDestroyOpcode() || 159280031Sdim I->isCall()) 160280031Sdim break; 161280031Sdim } 162280031Sdim 163280031Sdim // If we could not find a frame destroy opcode, then it has already 164280031Sdim // been simplified, so we don't care. 165280031Sdim if (I->getOpcode() != getCallFrameDestroyOpcode()) 166280031Sdim return 0; 167280031Sdim 168280031Sdim return -(I->getOperand(1).getImm()); 169280031Sdim } 170280031Sdim 171280031Sdim // Currently handle only PUSHes we can reasonably expect to see 172280031Sdim // in call sequences 173309124Sdim switch (MI.getOpcode()) { 174288943Sdim default: 175280031Sdim return 0; 176280031Sdim case X86::PUSH32i8: 177280031Sdim case X86::PUSH32r: 178280031Sdim case X86::PUSH32rmm: 179280031Sdim case X86::PUSH32rmr: 180280031Sdim case X86::PUSHi32: 181280031Sdim return 4; 182309124Sdim case X86::PUSH64i8: 183309124Sdim case X86::PUSH64r: 184309124Sdim case X86::PUSH64rmm: 185309124Sdim case X86::PUSH64rmr: 186309124Sdim case X86::PUSH64i32: 187309124Sdim return 8; 188280031Sdim } 189280031Sdim} 190280031Sdim 191288943Sdim/// Return true and the FrameIndex if the specified 192199481Srdivacky/// operand and follow operands form a reference to the stack frame. 193309124Sdimbool X86InstrInfo::isFrameOperand(const MachineInstr &MI, unsigned int Op, 194199481Srdivacky int &FrameIndex) const { 195309124Sdim if (MI.getOperand(Op + X86::AddrBaseReg).isFI() && 196309124Sdim MI.getOperand(Op + X86::AddrScaleAmt).isImm() && 197309124Sdim MI.getOperand(Op + X86::AddrIndexReg).isReg() && 198309124Sdim MI.getOperand(Op + X86::AddrDisp).isImm() && 199309124Sdim MI.getOperand(Op + X86::AddrScaleAmt).getImm() == 1 && 200309124Sdim MI.getOperand(Op + X86::AddrIndexReg).getReg() == 0 && 201309124Sdim MI.getOperand(Op + X86::AddrDisp).getImm() == 0) { 202309124Sdim FrameIndex = MI.getOperand(Op + X86::AddrBaseReg).getIndex(); 203199481Srdivacky return true; 204199481Srdivacky } 205199481Srdivacky return false; 206199481Srdivacky} 207199481Srdivacky 208341825Sdimstatic bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) { 209199481Srdivacky switch (Opcode) { 210234353Sdim default: 211234353Sdim return false; 212193323Sed case X86::MOV8rm: 213341825Sdim case X86::KMOVBkm: 214341825Sdim MemBytes = 1; 215341825Sdim return true; 216193323Sed case X86::MOV16rm: 217341825Sdim case X86::KMOVWkm: 218341825Sdim MemBytes = 2; 219341825Sdim return true; 220193323Sed case X86::MOV32rm: 221341825Sdim case X86::MOVSSrm: 222353358Sdim case X86::MOVSSrm_alt: 223353358Sdim case X86::VMOVSSrm: 224353358Sdim case X86::VMOVSSrm_alt: 225341825Sdim case X86::VMOVSSZrm: 226353358Sdim case X86::VMOVSSZrm_alt: 227341825Sdim case X86::KMOVDkm: 228341825Sdim MemBytes = 4; 229341825Sdim return true; 230193323Sed case X86::MOV64rm: 231193323Sed case X86::LD_Fp64m: 232193323Sed case X86::MOVSDrm: 233353358Sdim case X86::MOVSDrm_alt: 234341825Sdim case X86::VMOVSDrm: 235353358Sdim case X86::VMOVSDrm_alt: 236341825Sdim case X86::VMOVSDZrm: 237353358Sdim case X86::VMOVSDZrm_alt: 238341825Sdim case X86::MMX_MOVD64rm: 239341825Sdim case X86::MMX_MOVQ64rm: 240341825Sdim case X86::KMOVQkm: 241341825Sdim MemBytes = 8; 242341825Sdim return true; 243193323Sed case X86::MOVAPSrm: 244309124Sdim case X86::MOVUPSrm: 245193323Sed case X86::MOVAPDrm: 246309124Sdim case X86::MOVUPDrm: 247193323Sed case X86::MOVDQArm: 248309124Sdim case X86::MOVDQUrm: 249226633Sdim case X86::VMOVAPSrm: 250309124Sdim case X86::VMOVUPSrm: 251226633Sdim case X86::VMOVAPDrm: 252309124Sdim case X86::VMOVUPDrm: 253226633Sdim case X86::VMOVDQArm: 254309124Sdim case X86::VMOVDQUrm: 255341825Sdim case X86::VMOVAPSZ128rm: 256341825Sdim case X86::VMOVUPSZ128rm: 257341825Sdim case X86::VMOVAPSZ128rm_NOVLX: 258341825Sdim case X86::VMOVUPSZ128rm_NOVLX: 259341825Sdim case X86::VMOVAPDZ128rm: 260341825Sdim case X86::VMOVUPDZ128rm: 261341825Sdim case X86::VMOVDQU8Z128rm: 262341825Sdim case X86::VMOVDQU16Z128rm: 263341825Sdim case X86::VMOVDQA32Z128rm: 264341825Sdim case X86::VMOVDQU32Z128rm: 265341825Sdim case X86::VMOVDQA64Z128rm: 266341825Sdim case X86::VMOVDQU64Z128rm: 267341825Sdim MemBytes = 16; 268341825Sdim return true; 269341825Sdim case X86::VMOVAPSYrm: 270280031Sdim case X86::VMOVUPSYrm: 271341825Sdim case X86::VMOVAPDYrm: 272280031Sdim case X86::VMOVUPDYrm: 273341825Sdim case X86::VMOVDQAYrm: 274280031Sdim case X86::VMOVDQUYrm: 275309124Sdim case X86::VMOVAPSZ256rm: 276341825Sdim case X86::VMOVUPSZ256rm: 277314564Sdim case X86::VMOVAPSZ256rm_NOVLX: 278314564Sdim case X86::VMOVUPSZ256rm_NOVLX: 279309124Sdim case X86::VMOVAPDZ256rm: 280309124Sdim case X86::VMOVUPDZ256rm: 281341825Sdim case X86::VMOVDQU8Z256rm: 282341825Sdim case X86::VMOVDQU16Z256rm: 283309124Sdim case X86::VMOVDQA32Z256rm: 284309124Sdim case X86::VMOVDQU32Z256rm: 285309124Sdim case X86::VMOVDQA64Z256rm: 286309124Sdim case X86::VMOVDQU64Z256rm: 287341825Sdim MemBytes = 32; 288341825Sdim return true; 289341825Sdim case X86::VMOVAPSZrm: 290341825Sdim case X86::VMOVUPSZrm: 291341825Sdim case X86::VMOVAPDZrm: 292341825Sdim case X86::VMOVUPDZrm: 293309124Sdim case X86::VMOVDQU8Zrm: 294309124Sdim case X86::VMOVDQU16Zrm: 295341825Sdim case X86::VMOVDQA32Zrm: 296341825Sdim case X86::VMOVDQU32Zrm: 297341825Sdim case X86::VMOVDQA64Zrm: 298341825Sdim case X86::VMOVDQU64Zrm: 299341825Sdim MemBytes = 64; 300199481Srdivacky return true; 301193323Sed } 302193323Sed} 303193323Sed 304341825Sdimstatic bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) { 305199481Srdivacky switch (Opcode) { 306341825Sdim default: 307341825Sdim return false; 308193323Sed case X86::MOV8mr: 309341825Sdim case X86::KMOVBmk: 310341825Sdim MemBytes = 1; 311341825Sdim return true; 312193323Sed case X86::MOV16mr: 313341825Sdim case X86::KMOVWmk: 314341825Sdim MemBytes = 2; 315341825Sdim return true; 316193323Sed case X86::MOV32mr: 317341825Sdim case X86::MOVSSmr: 318341825Sdim case X86::VMOVSSmr: 319341825Sdim case X86::VMOVSSZmr: 320341825Sdim case X86::KMOVDmk: 321341825Sdim MemBytes = 4; 322341825Sdim return true; 323193323Sed case X86::MOV64mr: 324193323Sed case X86::ST_FpP64m: 325193323Sed case X86::MOVSDmr: 326341825Sdim case X86::VMOVSDmr: 327341825Sdim case X86::VMOVSDZmr: 328341825Sdim case X86::MMX_MOVD64mr: 329341825Sdim case X86::MMX_MOVQ64mr: 330341825Sdim case X86::MMX_MOVNTQmr: 331341825Sdim case X86::KMOVQmk: 332341825Sdim MemBytes = 8; 333341825Sdim return true; 334193323Sed case X86::MOVAPSmr: 335309124Sdim case X86::MOVUPSmr: 336193323Sed case X86::MOVAPDmr: 337309124Sdim case X86::MOVUPDmr: 338193323Sed case X86::MOVDQAmr: 339309124Sdim case X86::MOVDQUmr: 340226633Sdim case X86::VMOVAPSmr: 341309124Sdim case X86::VMOVUPSmr: 342226633Sdim case X86::VMOVAPDmr: 343309124Sdim case X86::VMOVUPDmr: 344226633Sdim case X86::VMOVDQAmr: 345309124Sdim case X86::VMOVDQUmr: 346341825Sdim case X86::VMOVUPSZ128mr: 347341825Sdim case X86::VMOVAPSZ128mr: 348341825Sdim case X86::VMOVUPSZ128mr_NOVLX: 349341825Sdim case X86::VMOVAPSZ128mr_NOVLX: 350341825Sdim case X86::VMOVUPDZ128mr: 351341825Sdim case X86::VMOVAPDZ128mr: 352341825Sdim case X86::VMOVDQA32Z128mr: 353341825Sdim case X86::VMOVDQU32Z128mr: 354341825Sdim case X86::VMOVDQA64Z128mr: 355341825Sdim case X86::VMOVDQU64Z128mr: 356341825Sdim case X86::VMOVDQU8Z128mr: 357341825Sdim case X86::VMOVDQU16Z128mr: 358341825Sdim MemBytes = 16; 359341825Sdim return true; 360280031Sdim case X86::VMOVUPSYmr: 361224145Sdim case X86::VMOVAPSYmr: 362280031Sdim case X86::VMOVUPDYmr: 363224145Sdim case X86::VMOVAPDYmr: 364280031Sdim case X86::VMOVDQUYmr: 365224145Sdim case X86::VMOVDQAYmr: 366309124Sdim case X86::VMOVUPSZ256mr: 367341825Sdim case X86::VMOVAPSZ256mr: 368314564Sdim case X86::VMOVUPSZ256mr_NOVLX: 369314564Sdim case X86::VMOVAPSZ256mr_NOVLX: 370309124Sdim case X86::VMOVUPDZ256mr: 371309124Sdim case X86::VMOVAPDZ256mr: 372341825Sdim case X86::VMOVDQU8Z256mr: 373341825Sdim case X86::VMOVDQU16Z256mr: 374309124Sdim case X86::VMOVDQA32Z256mr: 375309124Sdim case X86::VMOVDQU32Z256mr: 376309124Sdim case X86::VMOVDQA64Z256mr: 377309124Sdim case X86::VMOVDQU64Z256mr: 378341825Sdim MemBytes = 32; 379341825Sdim return true; 380341825Sdim case X86::VMOVUPSZmr: 381341825Sdim case X86::VMOVAPSZmr: 382341825Sdim case X86::VMOVUPDZmr: 383341825Sdim case X86::VMOVAPDZmr: 384309124Sdim case X86::VMOVDQU8Zmr: 385309124Sdim case X86::VMOVDQU16Zmr: 386341825Sdim case X86::VMOVDQA32Zmr: 387341825Sdim case X86::VMOVDQU32Zmr: 388341825Sdim case X86::VMOVDQA64Zmr: 389341825Sdim case X86::VMOVDQU64Zmr: 390341825Sdim MemBytes = 64; 391199481Srdivacky return true; 392199481Srdivacky } 393199481Srdivacky return false; 394199481Srdivacky} 395199481Srdivacky 396309124Sdimunsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 397199481Srdivacky int &FrameIndex) const { 398341825Sdim unsigned Dummy; 399341825Sdim return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy); 400341825Sdim} 401341825Sdim 402341825Sdimunsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 403341825Sdim int &FrameIndex, 404341825Sdim unsigned &MemBytes) const { 405341825Sdim if (isFrameLoadOpcode(MI.getOpcode(), MemBytes)) 406309124Sdim if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) 407309124Sdim return MI.getOperand(0).getReg(); 408199481Srdivacky return 0; 409199481Srdivacky} 410199481Srdivacky 411309124Sdimunsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, 412199481Srdivacky int &FrameIndex) const { 413341825Sdim unsigned Dummy; 414341825Sdim if (isFrameLoadOpcode(MI.getOpcode(), Dummy)) { 415199481Srdivacky unsigned Reg; 416199481Srdivacky if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) 417199481Srdivacky return Reg; 418199481Srdivacky // Check for post-frame index elimination operations 419344779Sdim SmallVector<const MachineMemOperand *, 1> Accesses; 420344779Sdim if (hasLoadFromStackSlot(MI, Accesses)) { 421344779Sdim FrameIndex = 422344779Sdim cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) 423344779Sdim ->getFrameIndex(); 424344779Sdim return 1; 425344779Sdim } 426199481Srdivacky } 427199481Srdivacky return 0; 428199481Srdivacky} 429199481Srdivacky 430309124Sdimunsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 431199481Srdivacky int &FrameIndex) const { 432341825Sdim unsigned Dummy; 433341825Sdim return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy); 434341825Sdim} 435341825Sdim 436341825Sdimunsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 437341825Sdim int &FrameIndex, 438341825Sdim unsigned &MemBytes) const { 439341825Sdim if (isFrameStoreOpcode(MI.getOpcode(), MemBytes)) 440309124Sdim if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 && 441212904Sdim isFrameOperand(MI, 0, FrameIndex)) 442309124Sdim return MI.getOperand(X86::AddrNumOperands).getReg(); 443199481Srdivacky return 0; 444199481Srdivacky} 445199481Srdivacky 446309124Sdimunsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, 447199481Srdivacky int &FrameIndex) const { 448341825Sdim unsigned Dummy; 449341825Sdim if (isFrameStoreOpcode(MI.getOpcode(), Dummy)) { 450199481Srdivacky unsigned Reg; 451199481Srdivacky if ((Reg = isStoreToStackSlot(MI, FrameIndex))) 452199481Srdivacky return Reg; 453199481Srdivacky // Check for post-frame index elimination operations 454344779Sdim SmallVector<const MachineMemOperand *, 1> Accesses; 455344779Sdim if (hasStoreToStackSlot(MI, Accesses)) { 456344779Sdim FrameIndex = 457344779Sdim cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) 458344779Sdim ->getFrameIndex(); 459344779Sdim return 1; 460344779Sdim } 461193323Sed } 462193323Sed return 0; 463193323Sed} 464193323Sed 465288943Sdim/// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r. 466193323Sedstatic bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { 467239462Sdim // Don't waste compile time scanning use-def chains of physregs. 468239462Sdim if (!TargetRegisterInfo::isVirtualRegister(BaseReg)) 469239462Sdim return false; 470193323Sed bool isPICBase = false; 471276479Sdim for (MachineRegisterInfo::def_instr_iterator I = MRI.def_instr_begin(BaseReg), 472276479Sdim E = MRI.def_instr_end(); I != E; ++I) { 473276479Sdim MachineInstr *DefMI = &*I; 474193323Sed if (DefMI->getOpcode() != X86::MOVPC32r) 475193323Sed return false; 476193323Sed assert(!isPICBase && "More than one PIC base?"); 477193323Sed isPICBase = true; 478193323Sed } 479193323Sed return isPICBase; 480193323Sed} 481193323Sed 482309124Sdimbool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, 483309124Sdim AliasAnalysis *AA) const { 484309124Sdim switch (MI.getOpcode()) { 485193323Sed default: break; 486243830Sdim case X86::MOV8rm: 487314564Sdim case X86::MOV8rm_NOREX: 488243830Sdim case X86::MOV16rm: 489243830Sdim case X86::MOV32rm: 490243830Sdim case X86::MOV64rm: 491243830Sdim case X86::MOVSSrm: 492353358Sdim case X86::MOVSSrm_alt: 493243830Sdim case X86::MOVSDrm: 494353358Sdim case X86::MOVSDrm_alt: 495243830Sdim case X86::MOVAPSrm: 496243830Sdim case X86::MOVUPSrm: 497243830Sdim case X86::MOVAPDrm: 498314564Sdim case X86::MOVUPDrm: 499243830Sdim case X86::MOVDQArm: 500249423Sdim case X86::MOVDQUrm: 501243830Sdim case X86::VMOVSSrm: 502353358Sdim case X86::VMOVSSrm_alt: 503243830Sdim case X86::VMOVSDrm: 504353358Sdim case X86::VMOVSDrm_alt: 505243830Sdim case X86::VMOVAPSrm: 506243830Sdim case X86::VMOVUPSrm: 507243830Sdim case X86::VMOVAPDrm: 508314564Sdim case X86::VMOVUPDrm: 509243830Sdim case X86::VMOVDQArm: 510249423Sdim case X86::VMOVDQUrm: 511243830Sdim case X86::VMOVAPSYrm: 512243830Sdim case X86::VMOVUPSYrm: 513243830Sdim case X86::VMOVAPDYrm: 514314564Sdim case X86::VMOVUPDYrm: 515243830Sdim case X86::VMOVDQAYrm: 516249423Sdim case X86::VMOVDQUYrm: 517243830Sdim case X86::MMX_MOVD64rm: 518243830Sdim case X86::MMX_MOVQ64rm: 519296417Sdim // AVX-512 520314564Sdim case X86::VMOVSSZrm: 521353358Sdim case X86::VMOVSSZrm_alt: 522314564Sdim case X86::VMOVSDZrm: 523353358Sdim case X86::VMOVSDZrm_alt: 524296417Sdim case X86::VMOVAPDZ128rm: 525296417Sdim case X86::VMOVAPDZ256rm: 526296417Sdim case X86::VMOVAPDZrm: 527296417Sdim case X86::VMOVAPSZ128rm: 528296417Sdim case X86::VMOVAPSZ256rm: 529314564Sdim case X86::VMOVAPSZ128rm_NOVLX: 530314564Sdim case X86::VMOVAPSZ256rm_NOVLX: 531296417Sdim case X86::VMOVAPSZrm: 532296417Sdim case X86::VMOVDQA32Z128rm: 533296417Sdim case X86::VMOVDQA32Z256rm: 534296417Sdim case X86::VMOVDQA32Zrm: 535296417Sdim case X86::VMOVDQA64Z128rm: 536296417Sdim case X86::VMOVDQA64Z256rm: 537296417Sdim case X86::VMOVDQA64Zrm: 538296417Sdim case X86::VMOVDQU16Z128rm: 539296417Sdim case X86::VMOVDQU16Z256rm: 540296417Sdim case X86::VMOVDQU16Zrm: 541296417Sdim case X86::VMOVDQU32Z128rm: 542296417Sdim case X86::VMOVDQU32Z256rm: 543296417Sdim case X86::VMOVDQU32Zrm: 544296417Sdim case X86::VMOVDQU64Z128rm: 545296417Sdim case X86::VMOVDQU64Z256rm: 546296417Sdim case X86::VMOVDQU64Zrm: 547296417Sdim case X86::VMOVDQU8Z128rm: 548296417Sdim case X86::VMOVDQU8Z256rm: 549296417Sdim case X86::VMOVDQU8Zrm: 550314564Sdim case X86::VMOVUPDZ128rm: 551314564Sdim case X86::VMOVUPDZ256rm: 552314564Sdim case X86::VMOVUPDZrm: 553296417Sdim case X86::VMOVUPSZ128rm: 554296417Sdim case X86::VMOVUPSZ256rm: 555314564Sdim case X86::VMOVUPSZ128rm_NOVLX: 556314564Sdim case X86::VMOVUPSZ256rm_NOVLX: 557296417Sdim case X86::VMOVUPSZrm: { 558243830Sdim // Loads from constant pools are trivially rematerializable. 559309124Sdim if (MI.getOperand(1 + X86::AddrBaseReg).isReg() && 560309124Sdim MI.getOperand(1 + X86::AddrScaleAmt).isImm() && 561309124Sdim MI.getOperand(1 + X86::AddrIndexReg).isReg() && 562309124Sdim MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 && 563314564Sdim MI.isDereferenceableInvariantLoad(AA)) { 564309124Sdim unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg(); 565243830Sdim if (BaseReg == 0 || BaseReg == X86::RIP) 566243830Sdim return true; 567243830Sdim // Allow re-materialization of PIC load. 568309124Sdim if (!ReMatPICStubLoad && MI.getOperand(1 + X86::AddrDisp).isGlobal()) 569243830Sdim return false; 570309124Sdim const MachineFunction &MF = *MI.getParent()->getParent(); 571243830Sdim const MachineRegisterInfo &MRI = MF.getRegInfo(); 572243830Sdim return regIsPICBase(BaseReg, MRI); 573193323Sed } 574243830Sdim return false; 575243830Sdim } 576218893Sdim 577243830Sdim case X86::LEA32r: 578243830Sdim case X86::LEA64r: { 579309124Sdim if (MI.getOperand(1 + X86::AddrScaleAmt).isImm() && 580309124Sdim MI.getOperand(1 + X86::AddrIndexReg).isReg() && 581309124Sdim MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 && 582309124Sdim !MI.getOperand(1 + X86::AddrDisp).isReg()) { 583243830Sdim // lea fi#, lea GV, etc. are all rematerializable. 584309124Sdim if (!MI.getOperand(1 + X86::AddrBaseReg).isReg()) 585243830Sdim return true; 586309124Sdim unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg(); 587243830Sdim if (BaseReg == 0) 588243830Sdim return true; 589243830Sdim // Allow re-materialization of lea PICBase + x. 590309124Sdim const MachineFunction &MF = *MI.getParent()->getParent(); 591243830Sdim const MachineRegisterInfo &MRI = MF.getRegInfo(); 592243830Sdim return regIsPICBase(BaseReg, MRI); 593243830Sdim } 594243830Sdim return false; 595193323Sed } 596243830Sdim } 597193323Sed 598193323Sed // All other instructions marked M_REMATERIALIZABLE are always trivially 599193323Sed // rematerializable. 600193323Sed return true; 601193323Sed} 602193323Sed 603193323Sedvoid X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, 604193323Sed MachineBasicBlock::iterator I, 605198090Srdivacky unsigned DestReg, unsigned SubIdx, 606309124Sdim const MachineInstr &Orig, 607210299Sed const TargetRegisterInfo &TRI) const { 608353358Sdim bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI); 609296417Sdim if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) { 610296417Sdim // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side 611296417Sdim // effects. 612296417Sdim int Value; 613309124Sdim switch (Orig.getOpcode()) { 614296417Sdim case X86::MOV32r0: Value = 0; break; 615296417Sdim case X86::MOV32r1: Value = 1; break; 616296417Sdim case X86::MOV32r_1: Value = -1; break; 617296417Sdim default: 618296417Sdim llvm_unreachable("Unexpected instruction!"); 619296417Sdim } 620296417Sdim 621309124Sdim const DebugLoc &DL = Orig.getDebugLoc(); 622309124Sdim BuildMI(MBB, I, DL, get(X86::MOV32ri)) 623321369Sdim .add(Orig.getOperand(0)) 624309124Sdim .addImm(Value); 625261991Sdim } else { 626309124Sdim MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); 627193323Sed MBB.insert(I, MI); 628193323Sed } 629193323Sed 630309124Sdim MachineInstr &NewMI = *std::prev(I); 631309124Sdim NewMI.substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI); 632193323Sed} 633193323Sed 634288943Sdim/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead. 635309124Sdimbool X86InstrInfo::hasLiveCondCodeDef(MachineInstr &MI) const { 636309124Sdim for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 637309124Sdim MachineOperand &MO = MI.getOperand(i); 638193323Sed if (MO.isReg() && MO.isDef() && 639193323Sed MO.getReg() == X86::EFLAGS && !MO.isDead()) { 640193323Sed return true; 641193323Sed } 642193323Sed } 643193323Sed return false; 644193323Sed} 645193323Sed 646288943Sdim/// Check whether the shift count for a machine operand is non-zero. 647344779Sdiminline static unsigned getTruncatedShiftCount(const MachineInstr &MI, 648261991Sdim unsigned ShiftAmtOperandIdx) { 649261991Sdim // The shift count is six bits with the REX.W prefix and five bits without. 650309124Sdim unsigned ShiftCountMask = (MI.getDesc().TSFlags & X86II::REX_W) ? 63 : 31; 651309124Sdim unsigned Imm = MI.getOperand(ShiftAmtOperandIdx).getImm(); 652261991Sdim return Imm & ShiftCountMask; 653261991Sdim} 654261991Sdim 655288943Sdim/// Check whether the given shift count is appropriate 656261991Sdim/// can be represented by a LEA instruction. 657261991Sdiminline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) { 658261991Sdim // Left shift instructions can be transformed into load-effective-address 659261991Sdim // instructions if we can encode them appropriately. 660296417Sdim // A LEA instruction utilizes a SIB byte to encode its scale factor. 661261991Sdim // The SIB.scale field is two bits wide which means that we can encode any 662261991Sdim // shift amount less than 4. 663261991Sdim return ShAmt < 4 && ShAmt > 0; 664261991Sdim} 665261991Sdim 666309124Sdimbool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, 667309124Sdim unsigned Opc, bool AllowSP, unsigned &NewSrc, 668344779Sdim bool &isKill, MachineOperand &ImplicitOp, 669309124Sdim LiveVariables *LV) const { 670309124Sdim MachineFunction &MF = *MI.getParent()->getParent(); 671261991Sdim const TargetRegisterClass *RC; 672261991Sdim if (AllowSP) { 673261991Sdim RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass; 674261991Sdim } else { 675261991Sdim RC = Opc != X86::LEA32r ? 676261991Sdim &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass; 677261991Sdim } 678261991Sdim unsigned SrcReg = Src.getReg(); 679261991Sdim 680261991Sdim // For both LEA64 and LEA32 the register already has essentially the right 681261991Sdim // type (32-bit or 64-bit) we may just need to forbid SP. 682261991Sdim if (Opc != X86::LEA64_32r) { 683261991Sdim NewSrc = SrcReg; 684261991Sdim isKill = Src.isKill(); 685344779Sdim assert(!Src.isUndef() && "Undef op doesn't need optimization"); 686261991Sdim 687261991Sdim if (TargetRegisterInfo::isVirtualRegister(NewSrc) && 688261991Sdim !MF.getRegInfo().constrainRegClass(NewSrc, RC)) 689261991Sdim return false; 690261991Sdim 691261991Sdim return true; 692261991Sdim } 693261991Sdim 694261991Sdim // This is for an LEA64_32r and incoming registers are 32-bit. One way or 695261991Sdim // another we need to add 64-bit registers to the final MI. 696261991Sdim if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) { 697261991Sdim ImplicitOp = Src; 698261991Sdim ImplicitOp.setImplicit(); 699261991Sdim 700296417Sdim NewSrc = getX86SubSuperRegister(Src.getReg(), 64); 701314564Sdim isKill = Src.isKill(); 702344779Sdim assert(!Src.isUndef() && "Undef op doesn't need optimization"); 703261991Sdim } else { 704261991Sdim // Virtual register of the wrong class, we have to create a temporary 64-bit 705261991Sdim // vreg to feed into the LEA. 706261991Sdim NewSrc = MF.getRegInfo().createVirtualRegister(RC); 707321369Sdim MachineInstr *Copy = 708321369Sdim BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY)) 709321369Sdim .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit) 710321369Sdim .add(Src); 711261991Sdim 712261991Sdim // Which is obviously going to be dead after we're done with it. 713261991Sdim isKill = true; 714309124Sdim 715309124Sdim if (LV) 716309124Sdim LV->replaceKillInstruction(SrcReg, MI, *Copy); 717261991Sdim } 718261991Sdim 719261991Sdim // We've set all the parameters without issue. 720261991Sdim return true; 721261991Sdim} 722261991Sdim 723309124SdimMachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( 724309124Sdim unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI, 725353358Sdim LiveVariables *LV, bool Is8BitOp) const { 726344779Sdim // We handle 8-bit adds and various 16-bit opcodes in the switch below. 727200581Srdivacky MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); 728353358Sdim assert((Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits( 729344779Sdim *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) && 730344779Sdim "Unexpected type for LEA transform"); 731218893Sdim 732344779Sdim // TODO: For a 32-bit target, we need to adjust the LEA variables with 733344779Sdim // something like this: 734344779Sdim // Opcode = X86::LEA32r; 735344779Sdim // InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); 736344779Sdim // OutRegLEA = 737344779Sdim // Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass) 738344779Sdim // : RegInfo.createVirtualRegister(&X86::GR32RegClass); 739344779Sdim if (!Subtarget.is64Bit()) 740344779Sdim return nullptr; 741344779Sdim 742344779Sdim unsigned Opcode = X86::LEA64_32r; 743344779Sdim unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); 744344779Sdim unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass); 745344779Sdim 746200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 747344779Sdim // we will be shifting and then extracting the lower 8/16-bits. 748200581Srdivacky // This has the potential to cause partial register stall. e.g. 749200581Srdivacky // movw (%rbp,%rcx,2), %dx 750200581Srdivacky // leal -65(%rdx), %esi 751200581Srdivacky // But testing has shown this *does* help performance in 64-bit mode (at 752200581Srdivacky // least on modern x86 machines). 753344779Sdim MachineBasicBlock::iterator MBBI = MI.getIterator(); 754344779Sdim unsigned Dest = MI.getOperand(0).getReg(); 755344779Sdim unsigned Src = MI.getOperand(1).getReg(); 756344779Sdim bool IsDead = MI.getOperand(0).isDead(); 757344779Sdim bool IsKill = MI.getOperand(1).isKill(); 758353358Sdim unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit; 759344779Sdim assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization"); 760344779Sdim BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA); 761200581Srdivacky MachineInstr *InsMI = 762309124Sdim BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) 763344779Sdim .addReg(InRegLEA, RegState::Define, SubReg) 764344779Sdim .addReg(Src, getKillRegState(IsKill)); 765200581Srdivacky 766309124Sdim MachineInstrBuilder MIB = 767344779Sdim BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA); 768200581Srdivacky switch (MIOpc) { 769243830Sdim default: llvm_unreachable("Unreachable!"); 770353358Sdim case X86::SHL8ri: 771200581Srdivacky case X86::SHL16ri: { 772309124Sdim unsigned ShAmt = MI.getOperand(2).getImm(); 773309124Sdim MIB.addReg(0).addImm(1ULL << ShAmt) 774344779Sdim .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0); 775200581Srdivacky break; 776200581Srdivacky } 777353358Sdim case X86::INC8r: 778200581Srdivacky case X86::INC16r: 779344779Sdim addRegOffset(MIB, InRegLEA, true, 1); 780200581Srdivacky break; 781353358Sdim case X86::DEC8r: 782200581Srdivacky case X86::DEC16r: 783344779Sdim addRegOffset(MIB, InRegLEA, true, -1); 784200581Srdivacky break; 785344779Sdim case X86::ADD8ri: 786353358Sdim case X86::ADD8ri_DB: 787200581Srdivacky case X86::ADD16ri: 788200581Srdivacky case X86::ADD16ri8: 789218893Sdim case X86::ADD16ri_DB: 790218893Sdim case X86::ADD16ri8_DB: 791344779Sdim addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm()); 792200581Srdivacky break; 793344779Sdim case X86::ADD8rr: 794353358Sdim case X86::ADD8rr_DB: 795218893Sdim case X86::ADD16rr: 796218893Sdim case X86::ADD16rr_DB: { 797309124Sdim unsigned Src2 = MI.getOperand(2).getReg(); 798344779Sdim bool IsKill2 = MI.getOperand(2).isKill(); 799344779Sdim assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization"); 800344779Sdim unsigned InRegLEA2 = 0; 801276479Sdim MachineInstr *InsMI2 = nullptr; 802200581Srdivacky if (Src == Src2) { 803344779Sdim // ADD8rr/ADD16rr killed %reg1028, %reg1028 804200581Srdivacky // just a single insert_subreg. 805344779Sdim addRegReg(MIB, InRegLEA, true, InRegLEA, false); 806200581Srdivacky } else { 807276479Sdim if (Subtarget.is64Bit()) 808344779Sdim InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); 809261991Sdim else 810344779Sdim InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); 811200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 812344779Sdim // we will be shifting and then extracting the lower 8/16-bits. 813344779Sdim BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2); 814309124Sdim InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY)) 815344779Sdim .addReg(InRegLEA2, RegState::Define, SubReg) 816344779Sdim .addReg(Src2, getKillRegState(IsKill2)); 817344779Sdim addRegReg(MIB, InRegLEA, true, InRegLEA2, true); 818200581Srdivacky } 819344779Sdim if (LV && IsKill2 && InsMI2) 820309124Sdim LV->replaceKillInstruction(Src2, MI, *InsMI2); 821200581Srdivacky break; 822200581Srdivacky } 823200581Srdivacky } 824200581Srdivacky 825200581Srdivacky MachineInstr *NewMI = MIB; 826200581Srdivacky MachineInstr *ExtMI = 827309124Sdim BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) 828344779Sdim .addReg(Dest, RegState::Define | getDeadRegState(IsDead)) 829344779Sdim .addReg(OutRegLEA, RegState::Kill, SubReg); 830200581Srdivacky 831200581Srdivacky if (LV) { 832344779Sdim // Update live variables. 833344779Sdim LV->getVarInfo(InRegLEA).Kills.push_back(NewMI); 834344779Sdim LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI); 835344779Sdim if (IsKill) 836309124Sdim LV->replaceKillInstruction(Src, MI, *InsMI); 837344779Sdim if (IsDead) 838309124Sdim LV->replaceKillInstruction(Dest, MI, *ExtMI); 839200581Srdivacky } 840200581Srdivacky 841200581Srdivacky return ExtMI; 842200581Srdivacky} 843200581Srdivacky 844288943Sdim/// This method must be implemented by targets that 845193323Sed/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target 846193323Sed/// may be able to convert a two-address instruction into a true 847193323Sed/// three-address instruction on demand. This allows the X86 target (for 848193323Sed/// example) to convert ADD and SHL instructions into LEA instructions if they 849193323Sed/// would require register copies due to two-addressness. 850193323Sed/// 851193323Sed/// This method returns a null pointer if the transformation cannot be 852193323Sed/// performed, otherwise it returns the new instruction. 853193323Sed/// 854193323SedMachineInstr * 855193323SedX86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 856309124Sdim MachineInstr &MI, LiveVariables *LV) const { 857261991Sdim // The following opcodes also sets the condition code register(s). Only 858261991Sdim // convert them to equivalent lea if the condition code register def's 859261991Sdim // are dead! 860261991Sdim if (hasLiveCondCodeDef(MI)) 861276479Sdim return nullptr; 862261991Sdim 863309124Sdim MachineFunction &MF = *MI.getParent()->getParent(); 864193323Sed // All instructions input are two-addr instructions. Get the known operands. 865309124Sdim const MachineOperand &Dest = MI.getOperand(0); 866309124Sdim const MachineOperand &Src = MI.getOperand(1); 867193323Sed 868344779Sdim // Ideally, operations with undef should be folded before we get here, but we 869344779Sdim // can't guarantee it. Bail out because optimizing undefs is a waste of time. 870344779Sdim // Without this, we have to forward undef state to new register operands to 871344779Sdim // avoid machine verifier errors. 872344779Sdim if (Src.isUndef()) 873344779Sdim return nullptr; 874344779Sdim if (MI.getNumOperands() > 2) 875344779Sdim if (MI.getOperand(2).isReg() && MI.getOperand(2).isUndef()) 876344779Sdim return nullptr; 877344779Sdim 878276479Sdim MachineInstr *NewMI = nullptr; 879344779Sdim bool Is64Bit = Subtarget.is64Bit(); 880193323Sed 881353358Sdim bool Is8BitOp = false; 882309124Sdim unsigned MIOpc = MI.getOpcode(); 883193323Sed switch (MIOpc) { 884353358Sdim default: llvm_unreachable("Unreachable!"); 885193323Sed case X86::SHL64ri: { 886309124Sdim assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); 887261991Sdim unsigned ShAmt = getTruncatedShiftCount(MI, 2); 888276479Sdim if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; 889193323Sed 890218893Sdim // LEA can't handle RSP. 891243830Sdim if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && 892243830Sdim !MF.getRegInfo().constrainRegClass(Src.getReg(), 893243830Sdim &X86::GR64_NOSPRegClass)) 894276479Sdim return nullptr; 895218893Sdim 896309124Sdim NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)) 897321369Sdim .add(Dest) 898309124Sdim .addReg(0) 899309124Sdim .addImm(1ULL << ShAmt) 900321369Sdim .add(Src) 901309124Sdim .addImm(0) 902309124Sdim .addReg(0); 903193323Sed break; 904193323Sed } 905193323Sed case X86::SHL32ri: { 906309124Sdim assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); 907261991Sdim unsigned ShAmt = getTruncatedShiftCount(MI, 2); 908276479Sdim if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; 909193323Sed 910344779Sdim unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; 911261991Sdim 912218893Sdim // LEA can't handle ESP. 913344779Sdim bool isKill; 914261991Sdim unsigned SrcReg; 915261991Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 916261991Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, 917344779Sdim SrcReg, isKill, ImplicitOp, LV)) 918276479Sdim return nullptr; 919218893Sdim 920309124Sdim MachineInstrBuilder MIB = 921309124Sdim BuildMI(MF, MI.getDebugLoc(), get(Opc)) 922321369Sdim .add(Dest) 923309124Sdim .addReg(0) 924309124Sdim .addImm(1ULL << ShAmt) 925344779Sdim .addReg(SrcReg, getKillRegState(isKill)) 926309124Sdim .addImm(0) 927309124Sdim .addReg(0); 928261991Sdim if (ImplicitOp.getReg() != 0) 929321369Sdim MIB.add(ImplicitOp); 930261991Sdim NewMI = MIB; 931261991Sdim 932193323Sed break; 933193323Sed } 934353358Sdim case X86::SHL8ri: 935353358Sdim Is8BitOp = true; 936353358Sdim LLVM_FALLTHROUGH; 937193323Sed case X86::SHL16ri: { 938309124Sdim assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); 939261991Sdim unsigned ShAmt = getTruncatedShiftCount(MI, 2); 940344779Sdim if (!isTruncatedShiftCountForLEA(ShAmt)) 941344779Sdim return nullptr; 942353358Sdim return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); 943193323Sed } 944280031Sdim case X86::INC64r: 945280031Sdim case X86::INC32r: { 946309124Sdim assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!"); 947344779Sdim unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : 948344779Sdim (Is64Bit ? X86::LEA64_32r : X86::LEA32r); 949344779Sdim bool isKill; 950280031Sdim unsigned SrcReg; 951280031Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 952344779Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, 953344779Sdim ImplicitOp, LV)) 954280031Sdim return nullptr; 955193323Sed 956309124Sdim MachineInstrBuilder MIB = 957309124Sdim BuildMI(MF, MI.getDebugLoc(), get(Opc)) 958321369Sdim .add(Dest) 959344779Sdim .addReg(SrcReg, getKillRegState(isKill)); 960280031Sdim if (ImplicitOp.getReg() != 0) 961321369Sdim MIB.add(ImplicitOp); 962218893Sdim 963280031Sdim NewMI = addOffset(MIB, 1); 964280031Sdim break; 965280031Sdim } 966280031Sdim case X86::DEC64r: 967280031Sdim case X86::DEC32r: { 968309124Sdim assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!"); 969280031Sdim unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r 970344779Sdim : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); 971261991Sdim 972344779Sdim bool isKill; 973280031Sdim unsigned SrcReg; 974280031Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 975344779Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, 976344779Sdim ImplicitOp, LV)) 977280031Sdim return nullptr; 978261991Sdim 979309124Sdim MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) 980321369Sdim .add(Dest) 981344779Sdim .addReg(SrcReg, getKillRegState(isKill)); 982280031Sdim if (ImplicitOp.getReg() != 0) 983321369Sdim MIB.add(ImplicitOp); 984218893Sdim 985280031Sdim NewMI = addOffset(MIB, -1); 986261991Sdim 987280031Sdim break; 988280031Sdim } 989353358Sdim case X86::DEC8r: 990353358Sdim case X86::INC8r: 991353358Sdim Is8BitOp = true; 992353358Sdim LLVM_FALLTHROUGH; 993280031Sdim case X86::DEC16r: 994353358Sdim case X86::INC16r: 995353358Sdim return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); 996280031Sdim case X86::ADD64rr: 997280031Sdim case X86::ADD64rr_DB: 998280031Sdim case X86::ADD32rr: 999280031Sdim case X86::ADD32rr_DB: { 1000309124Sdim assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); 1001280031Sdim unsigned Opc; 1002280031Sdim if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) 1003280031Sdim Opc = X86::LEA64r; 1004280031Sdim else 1005344779Sdim Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; 1006261991Sdim 1007344779Sdim bool isKill; 1008280031Sdim unsigned SrcReg; 1009280031Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 1010280031Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, 1011344779Sdim SrcReg, isKill, ImplicitOp, LV)) 1012280031Sdim return nullptr; 1013218893Sdim 1014309124Sdim const MachineOperand &Src2 = MI.getOperand(2); 1015344779Sdim bool isKill2; 1016280031Sdim unsigned SrcReg2; 1017280031Sdim MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false); 1018280031Sdim if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false, 1019344779Sdim SrcReg2, isKill2, ImplicitOp2, LV)) 1020280031Sdim return nullptr; 1021218893Sdim 1022321369Sdim MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest); 1023280031Sdim if (ImplicitOp.getReg() != 0) 1024321369Sdim MIB.add(ImplicitOp); 1025280031Sdim if (ImplicitOp2.getReg() != 0) 1026321369Sdim MIB.add(ImplicitOp2); 1027218893Sdim 1028280031Sdim NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2); 1029280031Sdim if (LV && Src2.isKill()) 1030309124Sdim LV->replaceKillInstruction(SrcReg2, MI, *NewMI); 1031280031Sdim break; 1032280031Sdim } 1033344779Sdim case X86::ADD8rr: 1034353358Sdim case X86::ADD8rr_DB: 1035353358Sdim Is8BitOp = true; 1036353358Sdim LLVM_FALLTHROUGH; 1037280031Sdim case X86::ADD16rr: 1038344779Sdim case X86::ADD16rr_DB: 1039353358Sdim return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); 1040280031Sdim case X86::ADD64ri32: 1041280031Sdim case X86::ADD64ri8: 1042280031Sdim case X86::ADD64ri32_DB: 1043280031Sdim case X86::ADD64ri8_DB: 1044309124Sdim assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); 1045321369Sdim NewMI = addOffset( 1046321369Sdim BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src), 1047321369Sdim MI.getOperand(2)); 1048280031Sdim break; 1049280031Sdim case X86::ADD32ri: 1050280031Sdim case X86::ADD32ri8: 1051280031Sdim case X86::ADD32ri_DB: 1052280031Sdim case X86::ADD32ri8_DB: { 1053309124Sdim assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); 1054344779Sdim unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; 1055243830Sdim 1056344779Sdim bool isKill; 1057280031Sdim unsigned SrcReg; 1058280031Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 1059280031Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, 1060344779Sdim SrcReg, isKill, ImplicitOp, LV)) 1061280031Sdim return nullptr; 1062261991Sdim 1063309124Sdim MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) 1064321369Sdim .add(Dest) 1065344779Sdim .addReg(SrcReg, getKillRegState(isKill)); 1066280031Sdim if (ImplicitOp.getReg() != 0) 1067321369Sdim MIB.add(ImplicitOp); 1068261991Sdim 1069314564Sdim NewMI = addOffset(MIB, MI.getOperand(2)); 1070280031Sdim break; 1071193323Sed } 1072344779Sdim case X86::ADD8ri: 1073353358Sdim case X86::ADD8ri_DB: 1074353358Sdim Is8BitOp = true; 1075353358Sdim LLVM_FALLTHROUGH; 1076280031Sdim case X86::ADD16ri: 1077280031Sdim case X86::ADD16ri8: 1078280031Sdim case X86::ADD16ri_DB: 1079280031Sdim case X86::ADD16ri8_DB: 1080353358Sdim return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); 1081353358Sdim case X86::SUB8ri: 1082353358Sdim case X86::SUB16ri8: 1083353358Sdim case X86::SUB16ri: 1084353358Sdim /// FIXME: Support these similar to ADD8ri/ADD16ri*. 1085353358Sdim return nullptr; 1086353358Sdim case X86::SUB32ri8: 1087353358Sdim case X86::SUB32ri: { 1088360661Sdim if (!MI.getOperand(2).isImm()) 1089360661Sdim return nullptr; 1090353358Sdim int64_t Imm = MI.getOperand(2).getImm(); 1091353358Sdim if (!isInt<32>(-Imm)) 1092353358Sdim return nullptr; 1093353358Sdim 1094353358Sdim assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); 1095353358Sdim unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; 1096353358Sdim 1097353358Sdim bool isKill; 1098353358Sdim unsigned SrcReg; 1099353358Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 1100353358Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, 1101353358Sdim SrcReg, isKill, ImplicitOp, LV)) 1102353358Sdim return nullptr; 1103353358Sdim 1104353358Sdim MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) 1105353358Sdim .add(Dest) 1106353358Sdim .addReg(SrcReg, getKillRegState(isKill)); 1107353358Sdim if (ImplicitOp.getReg() != 0) 1108353358Sdim MIB.add(ImplicitOp); 1109353358Sdim 1110353358Sdim NewMI = addOffset(MIB, -Imm); 1111353358Sdim break; 1112353358Sdim } 1113353358Sdim 1114353358Sdim case X86::SUB64ri8: 1115353358Sdim case X86::SUB64ri32: { 1116360661Sdim if (!MI.getOperand(2).isImm()) 1117360661Sdim return nullptr; 1118353358Sdim int64_t Imm = MI.getOperand(2).getImm(); 1119353358Sdim if (!isInt<32>(-Imm)) 1120353358Sdim return nullptr; 1121353358Sdim 1122353358Sdim assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!"); 1123353358Sdim 1124353358Sdim MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), 1125353358Sdim get(X86::LEA64r)).add(Dest).add(Src); 1126353358Sdim NewMI = addOffset(MIB, -Imm); 1127353358Sdim break; 1128353358Sdim } 1129353358Sdim 1130321369Sdim case X86::VMOVDQU8Z128rmk: 1131321369Sdim case X86::VMOVDQU8Z256rmk: 1132321369Sdim case X86::VMOVDQU8Zrmk: 1133321369Sdim case X86::VMOVDQU16Z128rmk: 1134321369Sdim case X86::VMOVDQU16Z256rmk: 1135321369Sdim case X86::VMOVDQU16Zrmk: 1136321369Sdim case X86::VMOVDQU32Z128rmk: case X86::VMOVDQA32Z128rmk: 1137321369Sdim case X86::VMOVDQU32Z256rmk: case X86::VMOVDQA32Z256rmk: 1138321369Sdim case X86::VMOVDQU32Zrmk: case X86::VMOVDQA32Zrmk: 1139321369Sdim case X86::VMOVDQU64Z128rmk: case X86::VMOVDQA64Z128rmk: 1140321369Sdim case X86::VMOVDQU64Z256rmk: case X86::VMOVDQA64Z256rmk: 1141321369Sdim case X86::VMOVDQU64Zrmk: case X86::VMOVDQA64Zrmk: 1142321369Sdim case X86::VMOVUPDZ128rmk: case X86::VMOVAPDZ128rmk: 1143321369Sdim case X86::VMOVUPDZ256rmk: case X86::VMOVAPDZ256rmk: 1144321369Sdim case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk: 1145321369Sdim case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk: 1146321369Sdim case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk: 1147321369Sdim case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: { 1148321369Sdim unsigned Opc; 1149321369Sdim switch (MIOpc) { 1150321369Sdim default: llvm_unreachable("Unreachable!"); 1151321369Sdim case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break; 1152321369Sdim case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break; 1153321369Sdim case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break; 1154321369Sdim case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break; 1155321369Sdim case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break; 1156321369Sdim case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break; 1157321369Sdim case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; 1158321369Sdim case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; 1159321369Sdim case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break; 1160321369Sdim case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; 1161321369Sdim case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; 1162321369Sdim case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break; 1163321369Sdim case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; 1164321369Sdim case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; 1165321369Sdim case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break; 1166321369Sdim case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; 1167321369Sdim case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; 1168321369Sdim case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break; 1169321369Sdim case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; 1170321369Sdim case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; 1171321369Sdim case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break; 1172321369Sdim case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; 1173321369Sdim case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; 1174321369Sdim case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break; 1175321369Sdim case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; 1176321369Sdim case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; 1177321369Sdim case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break; 1178321369Sdim case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; 1179321369Sdim case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; 1180321369Sdim case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break; 1181321369Sdim } 1182321369Sdim 1183321369Sdim NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc)) 1184321369Sdim .add(Dest) 1185321369Sdim .add(MI.getOperand(2)) 1186321369Sdim .add(Src) 1187321369Sdim .add(MI.getOperand(3)) 1188321369Sdim .add(MI.getOperand(4)) 1189321369Sdim .add(MI.getOperand(5)) 1190321369Sdim .add(MI.getOperand(6)) 1191321369Sdim .add(MI.getOperand(7)); 1192321369Sdim break; 1193193323Sed } 1194321369Sdim case X86::VMOVDQU8Z128rrk: 1195321369Sdim case X86::VMOVDQU8Z256rrk: 1196321369Sdim case X86::VMOVDQU8Zrrk: 1197321369Sdim case X86::VMOVDQU16Z128rrk: 1198321369Sdim case X86::VMOVDQU16Z256rrk: 1199321369Sdim case X86::VMOVDQU16Zrrk: 1200321369Sdim case X86::VMOVDQU32Z128rrk: case X86::VMOVDQA32Z128rrk: 1201321369Sdim case X86::VMOVDQU32Z256rrk: case X86::VMOVDQA32Z256rrk: 1202321369Sdim case X86::VMOVDQU32Zrrk: case X86::VMOVDQA32Zrrk: 1203321369Sdim case X86::VMOVDQU64Z128rrk: case X86::VMOVDQA64Z128rrk: 1204321369Sdim case X86::VMOVDQU64Z256rrk: case X86::VMOVDQA64Z256rrk: 1205321369Sdim case X86::VMOVDQU64Zrrk: case X86::VMOVDQA64Zrrk: 1206321369Sdim case X86::VMOVUPDZ128rrk: case X86::VMOVAPDZ128rrk: 1207321369Sdim case X86::VMOVUPDZ256rrk: case X86::VMOVAPDZ256rrk: 1208321369Sdim case X86::VMOVUPDZrrk: case X86::VMOVAPDZrrk: 1209321369Sdim case X86::VMOVUPSZ128rrk: case X86::VMOVAPSZ128rrk: 1210321369Sdim case X86::VMOVUPSZ256rrk: case X86::VMOVAPSZ256rrk: 1211321369Sdim case X86::VMOVUPSZrrk: case X86::VMOVAPSZrrk: { 1212321369Sdim unsigned Opc; 1213321369Sdim switch (MIOpc) { 1214321369Sdim default: llvm_unreachable("Unreachable!"); 1215321369Sdim case X86::VMOVDQU8Z128rrk: Opc = X86::VPBLENDMBZ128rrk; break; 1216321369Sdim case X86::VMOVDQU8Z256rrk: Opc = X86::VPBLENDMBZ256rrk; break; 1217321369Sdim case X86::VMOVDQU8Zrrk: Opc = X86::VPBLENDMBZrrk; break; 1218321369Sdim case X86::VMOVDQU16Z128rrk: Opc = X86::VPBLENDMWZ128rrk; break; 1219321369Sdim case X86::VMOVDQU16Z256rrk: Opc = X86::VPBLENDMWZ256rrk; break; 1220321369Sdim case X86::VMOVDQU16Zrrk: Opc = X86::VPBLENDMWZrrk; break; 1221321369Sdim case X86::VMOVDQU32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break; 1222321369Sdim case X86::VMOVDQU32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break; 1223321369Sdim case X86::VMOVDQU32Zrrk: Opc = X86::VPBLENDMDZrrk; break; 1224321369Sdim case X86::VMOVDQU64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break; 1225321369Sdim case X86::VMOVDQU64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break; 1226321369Sdim case X86::VMOVDQU64Zrrk: Opc = X86::VPBLENDMQZrrk; break; 1227321369Sdim case X86::VMOVUPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break; 1228321369Sdim case X86::VMOVUPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break; 1229321369Sdim case X86::VMOVUPDZrrk: Opc = X86::VBLENDMPDZrrk; break; 1230321369Sdim case X86::VMOVUPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break; 1231321369Sdim case X86::VMOVUPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break; 1232321369Sdim case X86::VMOVUPSZrrk: Opc = X86::VBLENDMPSZrrk; break; 1233321369Sdim case X86::VMOVDQA32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break; 1234321369Sdim case X86::VMOVDQA32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break; 1235321369Sdim case X86::VMOVDQA32Zrrk: Opc = X86::VPBLENDMDZrrk; break; 1236321369Sdim case X86::VMOVDQA64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break; 1237321369Sdim case X86::VMOVDQA64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break; 1238321369Sdim case X86::VMOVDQA64Zrrk: Opc = X86::VPBLENDMQZrrk; break; 1239321369Sdim case X86::VMOVAPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break; 1240321369Sdim case X86::VMOVAPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break; 1241321369Sdim case X86::VMOVAPDZrrk: Opc = X86::VBLENDMPDZrrk; break; 1242321369Sdim case X86::VMOVAPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break; 1243321369Sdim case X86::VMOVAPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break; 1244321369Sdim case X86::VMOVAPSZrrk: Opc = X86::VBLENDMPSZrrk; break; 1245321369Sdim } 1246193323Sed 1247321369Sdim NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc)) 1248321369Sdim .add(Dest) 1249321369Sdim .add(MI.getOperand(2)) 1250321369Sdim .add(Src) 1251321369Sdim .add(MI.getOperand(3)); 1252321369Sdim break; 1253321369Sdim } 1254321369Sdim } 1255321369Sdim 1256276479Sdim if (!NewMI) return nullptr; 1257193323Sed 1258193323Sed if (LV) { // Update live variables 1259243830Sdim if (Src.isKill()) 1260309124Sdim LV->replaceKillInstruction(Src.getReg(), MI, *NewMI); 1261243830Sdim if (Dest.isDead()) 1262309124Sdim LV->replaceKillInstruction(Dest.getReg(), MI, *NewMI); 1263193323Sed } 1264193323Sed 1265309124Sdim MFI->insert(MI.getIterator(), NewMI); // Insert the new inst 1266193323Sed return NewMI; 1267193323Sed} 1268193323Sed 1269314564Sdim/// This determines which of three possible cases of a three source commute 1270314564Sdim/// the source indexes correspond to taking into account any mask operands. 1271314564Sdim/// All prevents commuting a passthru operand. Returns -1 if the commute isn't 1272314564Sdim/// possible. 1273314564Sdim/// Case 0 - Possible to commute the first and second operands. 1274314564Sdim/// Case 1 - Possible to commute the first and third operands. 1275314564Sdim/// Case 2 - Possible to commute the second and third operands. 1276341825Sdimstatic unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1, 1277341825Sdim unsigned SrcOpIdx2) { 1278314564Sdim // Put the lowest index to SrcOpIdx1 to simplify the checks below. 1279314564Sdim if (SrcOpIdx1 > SrcOpIdx2) 1280314564Sdim std::swap(SrcOpIdx1, SrcOpIdx2); 1281296417Sdim 1282314564Sdim unsigned Op1 = 1, Op2 = 2, Op3 = 3; 1283314564Sdim if (X86II::isKMasked(TSFlags)) { 1284314564Sdim Op2++; 1285314564Sdim Op3++; 1286314564Sdim } 1287296417Sdim 1288314564Sdim if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op2) 1289314564Sdim return 0; 1290314564Sdim if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op3) 1291314564Sdim return 1; 1292314564Sdim if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3) 1293314564Sdim return 2; 1294341825Sdim llvm_unreachable("Unknown three src commute case."); 1295314564Sdim} 1296296417Sdim 1297314564Sdimunsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands( 1298314564Sdim const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2, 1299314564Sdim const X86InstrFMA3Group &FMA3Group) const { 1300296417Sdim 1301314564Sdim unsigned Opc = MI.getOpcode(); 1302296417Sdim 1303314564Sdim // TODO: Commuting the 1st operand of FMA*_Int requires some additional 1304314564Sdim // analysis. The commute optimization is legal only if all users of FMA*_Int 1305314564Sdim // use only the lowest element of the FMA*_Int instruction. Such analysis are 1306314564Sdim // not implemented yet. So, just return 0 in that case. 1307314564Sdim // When such analysis are available this place will be the right place for 1308314564Sdim // calling it. 1309341825Sdim assert(!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) && 1310341825Sdim "Intrinsic instructions can't commute operand 1"); 1311296417Sdim 1312314564Sdim // Determine which case this commute is or if it can't be done. 1313341825Sdim unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, 1314341825Sdim SrcOpIdx2); 1315341825Sdim assert(Case < 3 && "Unexpected case number!"); 1316296417Sdim 1317314564Sdim // Define the FMA forms mapping array that helps to map input FMA form 1318314564Sdim // to output FMA form to preserve the operation semantics after 1319314564Sdim // commuting the operands. 1320314564Sdim const unsigned Form132Index = 0; 1321314564Sdim const unsigned Form213Index = 1; 1322314564Sdim const unsigned Form231Index = 2; 1323314564Sdim static const unsigned FormMapping[][3] = { 1324314564Sdim // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2; 1325314564Sdim // FMA132 A, C, b; ==> FMA231 C, A, b; 1326314564Sdim // FMA213 B, A, c; ==> FMA213 A, B, c; 1327314564Sdim // FMA231 C, A, b; ==> FMA132 A, C, b; 1328314564Sdim { Form231Index, Form213Index, Form132Index }, 1329314564Sdim // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3; 1330314564Sdim // FMA132 A, c, B; ==> FMA132 B, c, A; 1331314564Sdim // FMA213 B, a, C; ==> FMA231 C, a, B; 1332314564Sdim // FMA231 C, a, B; ==> FMA213 B, a, C; 1333314564Sdim { Form132Index, Form231Index, Form213Index }, 1334314564Sdim // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3; 1335314564Sdim // FMA132 a, C, B; ==> FMA213 a, B, C; 1336314564Sdim // FMA213 b, A, C; ==> FMA132 b, C, A; 1337314564Sdim // FMA231 c, A, B; ==> FMA231 c, B, A; 1338314564Sdim { Form213Index, Form132Index, Form231Index } 1339314564Sdim }; 1340296417Sdim 1341314564Sdim unsigned FMAForms[3]; 1342341825Sdim FMAForms[0] = FMA3Group.get132Opcode(); 1343341825Sdim FMAForms[1] = FMA3Group.get213Opcode(); 1344341825Sdim FMAForms[2] = FMA3Group.get231Opcode(); 1345314564Sdim unsigned FormIndex; 1346314564Sdim for (FormIndex = 0; FormIndex < 3; FormIndex++) 1347314564Sdim if (Opc == FMAForms[FormIndex]) 1348314564Sdim break; 1349296417Sdim 1350314564Sdim // Everything is ready, just adjust the FMA opcode and return it. 1351314564Sdim FormIndex = FormMapping[Case][FormIndex]; 1352314564Sdim return FMAForms[FormIndex]; 1353314564Sdim} 1354296417Sdim 1355341825Sdimstatic void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1, 1356314564Sdim unsigned SrcOpIdx2) { 1357314564Sdim // Determine which case this commute is or if it can't be done. 1358341825Sdim unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, 1359341825Sdim SrcOpIdx2); 1360341825Sdim assert(Case < 3 && "Unexpected case value!"); 1361314564Sdim 1362314564Sdim // For each case we need to swap two pairs of bits in the final immediate. 1363314564Sdim static const uint8_t SwapMasks[3][4] = { 1364314564Sdim { 0x04, 0x10, 0x08, 0x20 }, // Swap bits 2/4 and 3/5. 1365314564Sdim { 0x02, 0x10, 0x08, 0x40 }, // Swap bits 1/4 and 3/6. 1366314564Sdim { 0x02, 0x04, 0x20, 0x40 }, // Swap bits 1/2 and 5/6. 1367314564Sdim }; 1368314564Sdim 1369314564Sdim uint8_t Imm = MI.getOperand(MI.getNumOperands()-1).getImm(); 1370314564Sdim // Clear out the bits we are swapping. 1371314564Sdim uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] | 1372314564Sdim SwapMasks[Case][2] | SwapMasks[Case][3]); 1373314564Sdim // If the immediate had a bit of the pair set, then set the opposite bit. 1374314564Sdim if (Imm & SwapMasks[Case][0]) NewImm |= SwapMasks[Case][1]; 1375314564Sdim if (Imm & SwapMasks[Case][1]) NewImm |= SwapMasks[Case][0]; 1376314564Sdim if (Imm & SwapMasks[Case][2]) NewImm |= SwapMasks[Case][3]; 1377314564Sdim if (Imm & SwapMasks[Case][3]) NewImm |= SwapMasks[Case][2]; 1378314564Sdim MI.getOperand(MI.getNumOperands()-1).setImm(NewImm); 1379314564Sdim} 1380314564Sdim 1381341825Sdim// Returns true if this is a VPERMI2 or VPERMT2 instruction that can be 1382314564Sdim// commuted. 1383314564Sdimstatic bool isCommutableVPERMV3Instruction(unsigned Opcode) { 1384314564Sdim#define VPERM_CASES(Suffix) \ 1385314564Sdim case X86::VPERMI2##Suffix##128rr: case X86::VPERMT2##Suffix##128rr: \ 1386314564Sdim case X86::VPERMI2##Suffix##256rr: case X86::VPERMT2##Suffix##256rr: \ 1387314564Sdim case X86::VPERMI2##Suffix##rr: case X86::VPERMT2##Suffix##rr: \ 1388314564Sdim case X86::VPERMI2##Suffix##128rm: case X86::VPERMT2##Suffix##128rm: \ 1389314564Sdim case X86::VPERMI2##Suffix##256rm: case X86::VPERMT2##Suffix##256rm: \ 1390314564Sdim case X86::VPERMI2##Suffix##rm: case X86::VPERMT2##Suffix##rm: \ 1391314564Sdim case X86::VPERMI2##Suffix##128rrkz: case X86::VPERMT2##Suffix##128rrkz: \ 1392314564Sdim case X86::VPERMI2##Suffix##256rrkz: case X86::VPERMT2##Suffix##256rrkz: \ 1393314564Sdim case X86::VPERMI2##Suffix##rrkz: case X86::VPERMT2##Suffix##rrkz: \ 1394314564Sdim case X86::VPERMI2##Suffix##128rmkz: case X86::VPERMT2##Suffix##128rmkz: \ 1395314564Sdim case X86::VPERMI2##Suffix##256rmkz: case X86::VPERMT2##Suffix##256rmkz: \ 1396314564Sdim case X86::VPERMI2##Suffix##rmkz: case X86::VPERMT2##Suffix##rmkz: 1397314564Sdim 1398314564Sdim#define VPERM_CASES_BROADCAST(Suffix) \ 1399314564Sdim VPERM_CASES(Suffix) \ 1400314564Sdim case X86::VPERMI2##Suffix##128rmb: case X86::VPERMT2##Suffix##128rmb: \ 1401314564Sdim case X86::VPERMI2##Suffix##256rmb: case X86::VPERMT2##Suffix##256rmb: \ 1402314564Sdim case X86::VPERMI2##Suffix##rmb: case X86::VPERMT2##Suffix##rmb: \ 1403314564Sdim case X86::VPERMI2##Suffix##128rmbkz: case X86::VPERMT2##Suffix##128rmbkz: \ 1404314564Sdim case X86::VPERMI2##Suffix##256rmbkz: case X86::VPERMT2##Suffix##256rmbkz: \ 1405314564Sdim case X86::VPERMI2##Suffix##rmbkz: case X86::VPERMT2##Suffix##rmbkz: 1406314564Sdim 1407314564Sdim switch (Opcode) { 1408314564Sdim default: return false; 1409314564Sdim VPERM_CASES(B) 1410314564Sdim VPERM_CASES_BROADCAST(D) 1411314564Sdim VPERM_CASES_BROADCAST(PD) 1412314564Sdim VPERM_CASES_BROADCAST(PS) 1413314564Sdim VPERM_CASES_BROADCAST(Q) 1414314564Sdim VPERM_CASES(W) 1415314564Sdim return true; 1416296417Sdim } 1417314564Sdim#undef VPERM_CASES_BROADCAST 1418314564Sdim#undef VPERM_CASES 1419296417Sdim} 1420296417Sdim 1421314564Sdim// Returns commuted opcode for VPERMI2 and VPERMT2 instructions by switching 1422341825Sdim// from the I opcode to the T opcode and vice versa. 1423314564Sdimstatic unsigned getCommutedVPERMV3Opcode(unsigned Opcode) { 1424314564Sdim#define VPERM_CASES(Orig, New) \ 1425314564Sdim case X86::Orig##128rr: return X86::New##128rr; \ 1426314564Sdim case X86::Orig##128rrkz: return X86::New##128rrkz; \ 1427314564Sdim case X86::Orig##128rm: return X86::New##128rm; \ 1428314564Sdim case X86::Orig##128rmkz: return X86::New##128rmkz; \ 1429314564Sdim case X86::Orig##256rr: return X86::New##256rr; \ 1430314564Sdim case X86::Orig##256rrkz: return X86::New##256rrkz; \ 1431314564Sdim case X86::Orig##256rm: return X86::New##256rm; \ 1432314564Sdim case X86::Orig##256rmkz: return X86::New##256rmkz; \ 1433314564Sdim case X86::Orig##rr: return X86::New##rr; \ 1434314564Sdim case X86::Orig##rrkz: return X86::New##rrkz; \ 1435314564Sdim case X86::Orig##rm: return X86::New##rm; \ 1436314564Sdim case X86::Orig##rmkz: return X86::New##rmkz; 1437314564Sdim 1438314564Sdim#define VPERM_CASES_BROADCAST(Orig, New) \ 1439314564Sdim VPERM_CASES(Orig, New) \ 1440314564Sdim case X86::Orig##128rmb: return X86::New##128rmb; \ 1441314564Sdim case X86::Orig##128rmbkz: return X86::New##128rmbkz; \ 1442314564Sdim case X86::Orig##256rmb: return X86::New##256rmb; \ 1443314564Sdim case X86::Orig##256rmbkz: return X86::New##256rmbkz; \ 1444314564Sdim case X86::Orig##rmb: return X86::New##rmb; \ 1445314564Sdim case X86::Orig##rmbkz: return X86::New##rmbkz; 1446314564Sdim 1447314564Sdim switch (Opcode) { 1448314564Sdim VPERM_CASES(VPERMI2B, VPERMT2B) 1449314564Sdim VPERM_CASES_BROADCAST(VPERMI2D, VPERMT2D) 1450314564Sdim VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD) 1451314564Sdim VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS) 1452314564Sdim VPERM_CASES_BROADCAST(VPERMI2Q, VPERMT2Q) 1453314564Sdim VPERM_CASES(VPERMI2W, VPERMT2W) 1454314564Sdim VPERM_CASES(VPERMT2B, VPERMI2B) 1455314564Sdim VPERM_CASES_BROADCAST(VPERMT2D, VPERMI2D) 1456314564Sdim VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD) 1457314564Sdim VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS) 1458314564Sdim VPERM_CASES_BROADCAST(VPERMT2Q, VPERMI2Q) 1459314564Sdim VPERM_CASES(VPERMT2W, VPERMI2W) 1460314564Sdim } 1461314564Sdim 1462314564Sdim llvm_unreachable("Unreachable!"); 1463314564Sdim#undef VPERM_CASES_BROADCAST 1464314564Sdim#undef VPERM_CASES 1465314564Sdim} 1466314564Sdim 1467309124SdimMachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, 1468296417Sdim unsigned OpIdx1, 1469296417Sdim unsigned OpIdx2) const { 1470309124Sdim auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { 1471309124Sdim if (NewMI) 1472309124Sdim return *MI.getParent()->getParent()->CloneMachineInstr(&MI); 1473309124Sdim return MI; 1474309124Sdim }; 1475309124Sdim 1476309124Sdim switch (MI.getOpcode()) { 1477193323Sed case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) 1478193323Sed case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) 1479193323Sed case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) 1480193323Sed case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) 1481193323Sed case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) 1482193323Sed case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) 1483193323Sed unsigned Opc; 1484193323Sed unsigned Size; 1485309124Sdim switch (MI.getOpcode()) { 1486198090Srdivacky default: llvm_unreachable("Unreachable!"); 1487193323Sed case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; 1488193323Sed case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; 1489193323Sed case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; 1490193323Sed case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break; 1491193323Sed case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break; 1492193323Sed case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; 1493193323Sed } 1494309124Sdim unsigned Amt = MI.getOperand(3).getImm(); 1495309124Sdim auto &WorkingMI = cloneIfNew(MI); 1496309124Sdim WorkingMI.setDesc(get(Opc)); 1497309124Sdim WorkingMI.getOperand(3).setImm(Size - Amt); 1498309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1499309124Sdim OpIdx1, OpIdx2); 1500193323Sed } 1501321369Sdim case X86::PFSUBrr: 1502321369Sdim case X86::PFSUBRrr: { 1503321369Sdim // PFSUB x, y: x = x - y 1504321369Sdim // PFSUBR x, y: x = y - x 1505321369Sdim unsigned Opc = 1506321369Sdim (X86::PFSUBRrr == MI.getOpcode() ? X86::PFSUBrr : X86::PFSUBRrr); 1507321369Sdim auto &WorkingMI = cloneIfNew(MI); 1508321369Sdim WorkingMI.setDesc(get(Opc)); 1509321369Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1510321369Sdim OpIdx1, OpIdx2); 1511321369Sdim } 1512280031Sdim case X86::BLENDPDrri: 1513280031Sdim case X86::BLENDPSrri: 1514280031Sdim case X86::VBLENDPDrri: 1515280031Sdim case X86::VBLENDPSrri: 1516341825Sdim // If we're optimizing for size, try to use MOVSD/MOVSS. 1517353358Sdim if (MI.getParent()->getParent()->getFunction().hasOptSize()) { 1518341825Sdim unsigned Mask, Opc; 1519341825Sdim switch (MI.getOpcode()) { 1520341825Sdim default: llvm_unreachable("Unreachable!"); 1521341825Sdim case X86::BLENDPDrri: Opc = X86::MOVSDrr; Mask = 0x03; break; 1522341825Sdim case X86::BLENDPSrri: Opc = X86::MOVSSrr; Mask = 0x0F; break; 1523341825Sdim case X86::VBLENDPDrri: Opc = X86::VMOVSDrr; Mask = 0x03; break; 1524341825Sdim case X86::VBLENDPSrri: Opc = X86::VMOVSSrr; Mask = 0x0F; break; 1525341825Sdim } 1526341825Sdim if ((MI.getOperand(3).getImm() ^ Mask) == 1) { 1527341825Sdim auto &WorkingMI = cloneIfNew(MI); 1528341825Sdim WorkingMI.setDesc(get(Opc)); 1529341825Sdim WorkingMI.RemoveOperand(3); 1530341825Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, 1531341825Sdim /*NewMI=*/false, 1532341825Sdim OpIdx1, OpIdx2); 1533341825Sdim } 1534341825Sdim } 1535341825Sdim LLVM_FALLTHROUGH; 1536341825Sdim case X86::PBLENDWrri: 1537280031Sdim case X86::VBLENDPDYrri: 1538280031Sdim case X86::VBLENDPSYrri: 1539280031Sdim case X86::VPBLENDDrri: 1540280031Sdim case X86::VPBLENDWrri: 1541280031Sdim case X86::VPBLENDDYrri: 1542280031Sdim case X86::VPBLENDWYrri:{ 1543353358Sdim int8_t Mask; 1544309124Sdim switch (MI.getOpcode()) { 1545280031Sdim default: llvm_unreachable("Unreachable!"); 1546353358Sdim case X86::BLENDPDrri: Mask = (int8_t)0x03; break; 1547353358Sdim case X86::BLENDPSrri: Mask = (int8_t)0x0F; break; 1548353358Sdim case X86::PBLENDWrri: Mask = (int8_t)0xFF; break; 1549353358Sdim case X86::VBLENDPDrri: Mask = (int8_t)0x03; break; 1550353358Sdim case X86::VBLENDPSrri: Mask = (int8_t)0x0F; break; 1551353358Sdim case X86::VBLENDPDYrri: Mask = (int8_t)0x0F; break; 1552353358Sdim case X86::VBLENDPSYrri: Mask = (int8_t)0xFF; break; 1553353358Sdim case X86::VPBLENDDrri: Mask = (int8_t)0x0F; break; 1554353358Sdim case X86::VPBLENDWrri: Mask = (int8_t)0xFF; break; 1555353358Sdim case X86::VPBLENDDYrri: Mask = (int8_t)0xFF; break; 1556353358Sdim case X86::VPBLENDWYrri: Mask = (int8_t)0xFF; break; 1557280031Sdim } 1558280031Sdim // Only the least significant bits of Imm are used. 1559353358Sdim // Using int8_t to ensure it will be sign extended to the int64_t that 1560353358Sdim // setImm takes in order to match isel behavior. 1561353358Sdim int8_t Imm = MI.getOperand(3).getImm() & Mask; 1562309124Sdim auto &WorkingMI = cloneIfNew(MI); 1563309124Sdim WorkingMI.getOperand(3).setImm(Mask ^ Imm); 1564309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1565309124Sdim OpIdx1, OpIdx2); 1566280031Sdim } 1567353358Sdim case X86::INSERTPSrr: 1568353358Sdim case X86::VINSERTPSrr: 1569353358Sdim case X86::VINSERTPSZrr: { 1570353358Sdim unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm(); 1571353358Sdim unsigned ZMask = Imm & 15; 1572353358Sdim unsigned DstIdx = (Imm >> 4) & 3; 1573353358Sdim unsigned SrcIdx = (Imm >> 6) & 3; 1574353358Sdim 1575353358Sdim // We can commute insertps if we zero 2 of the elements, the insertion is 1576353358Sdim // "inline" and we don't override the insertion with a zero. 1577353358Sdim if (DstIdx == SrcIdx && (ZMask & (1 << DstIdx)) == 0 && 1578353358Sdim countPopulation(ZMask) == 2) { 1579353358Sdim unsigned AltIdx = findFirstSet((ZMask | (1 << DstIdx)) ^ 15); 1580353358Sdim assert(AltIdx < 4 && "Illegal insertion index"); 1581353358Sdim unsigned AltImm = (AltIdx << 6) | (AltIdx << 4) | ZMask; 1582353358Sdim auto &WorkingMI = cloneIfNew(MI); 1583353358Sdim WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(AltImm); 1584353358Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1585353358Sdim OpIdx1, OpIdx2); 1586353358Sdim } 1587353358Sdim return nullptr; 1588353358Sdim } 1589314564Sdim case X86::MOVSDrr: 1590314564Sdim case X86::MOVSSrr: 1591314564Sdim case X86::VMOVSDrr: 1592314564Sdim case X86::VMOVSSrr:{ 1593314564Sdim // On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD. 1594353358Sdim if (Subtarget.hasSSE41()) { 1595353358Sdim unsigned Mask, Opc; 1596353358Sdim switch (MI.getOpcode()) { 1597353358Sdim default: llvm_unreachable("Unreachable!"); 1598353358Sdim case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break; 1599353358Sdim case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break; 1600353358Sdim case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break; 1601353358Sdim case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break; 1602353358Sdim } 1603314564Sdim 1604353358Sdim auto &WorkingMI = cloneIfNew(MI); 1605353358Sdim WorkingMI.setDesc(get(Opc)); 1606353358Sdim WorkingMI.addOperand(MachineOperand::CreateImm(Mask)); 1607353358Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1608353358Sdim OpIdx1, OpIdx2); 1609314564Sdim } 1610314564Sdim 1611353358Sdim // Convert to SHUFPD. 1612353358Sdim assert(MI.getOpcode() == X86::MOVSDrr && 1613353358Sdim "Can only commute MOVSDrr without SSE4.1"); 1614353358Sdim 1615314564Sdim auto &WorkingMI = cloneIfNew(MI); 1616353358Sdim WorkingMI.setDesc(get(X86::SHUFPDrri)); 1617353358Sdim WorkingMI.addOperand(MachineOperand::CreateImm(0x02)); 1618314564Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1619314564Sdim OpIdx1, OpIdx2); 1620314564Sdim } 1621353358Sdim case X86::SHUFPDrri: { 1622353358Sdim // Commute to MOVSD. 1623353358Sdim assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!"); 1624353358Sdim auto &WorkingMI = cloneIfNew(MI); 1625353358Sdim WorkingMI.setDesc(get(X86::MOVSDrr)); 1626353358Sdim WorkingMI.RemoveOperand(3); 1627353358Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1628353358Sdim OpIdx1, OpIdx2); 1629353358Sdim } 1630288943Sdim case X86::PCLMULQDQrr: 1631327952Sdim case X86::VPCLMULQDQrr: 1632327952Sdim case X86::VPCLMULQDQYrr: 1633327952Sdim case X86::VPCLMULQDQZrr: 1634327952Sdim case X86::VPCLMULQDQZ128rr: 1635327952Sdim case X86::VPCLMULQDQZ256rr: { 1636288943Sdim // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0] 1637288943Sdim // SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0] 1638309124Sdim unsigned Imm = MI.getOperand(3).getImm(); 1639288943Sdim unsigned Src1Hi = Imm & 0x01; 1640288943Sdim unsigned Src2Hi = Imm & 0x10; 1641309124Sdim auto &WorkingMI = cloneIfNew(MI); 1642309124Sdim WorkingMI.getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4)); 1643309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1644309124Sdim OpIdx1, OpIdx2); 1645288943Sdim } 1646321369Sdim case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri: 1647321369Sdim case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri: 1648321369Sdim case X86::VPCMPBZrri: case X86::VPCMPUBZrri: 1649321369Sdim case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri: 1650321369Sdim case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri: 1651321369Sdim case X86::VPCMPDZrri: case X86::VPCMPUDZrri: 1652321369Sdim case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri: 1653321369Sdim case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri: 1654321369Sdim case X86::VPCMPQZrri: case X86::VPCMPUQZrri: 1655321369Sdim case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri: 1656321369Sdim case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri: 1657321369Sdim case X86::VPCMPWZrri: case X86::VPCMPUWZrri: 1658321369Sdim case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik: 1659321369Sdim case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik: 1660321369Sdim case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik: 1661321369Sdim case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik: 1662321369Sdim case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik: 1663321369Sdim case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik: 1664321369Sdim case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik: 1665321369Sdim case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik: 1666321369Sdim case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik: 1667321369Sdim case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik: 1668321369Sdim case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik: 1669321369Sdim case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: { 1670314564Sdim // Flip comparison mode immediate (if necessary). 1671321369Sdim unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7; 1672341825Sdim Imm = X86::getSwappedVPCMPImm(Imm); 1673314564Sdim auto &WorkingMI = cloneIfNew(MI); 1674321369Sdim WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); 1675314564Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1676314564Sdim OpIdx1, OpIdx2); 1677314564Sdim } 1678288943Sdim case X86::VPCOMBri: case X86::VPCOMUBri: 1679288943Sdim case X86::VPCOMDri: case X86::VPCOMUDri: 1680288943Sdim case X86::VPCOMQri: case X86::VPCOMUQri: 1681288943Sdim case X86::VPCOMWri: case X86::VPCOMUWri: { 1682288943Sdim // Flip comparison mode immediate (if necessary). 1683309124Sdim unsigned Imm = MI.getOperand(3).getImm() & 0x7; 1684341825Sdim Imm = X86::getSwappedVPCOMImm(Imm); 1685309124Sdim auto &WorkingMI = cloneIfNew(MI); 1686309124Sdim WorkingMI.getOperand(3).setImm(Imm); 1687309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1688309124Sdim OpIdx1, OpIdx2); 1689288943Sdim } 1690309124Sdim case X86::VPERM2F128rr: 1691309124Sdim case X86::VPERM2I128rr: { 1692309124Sdim // Flip permute source immediate. 1693309124Sdim // Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi. 1694309124Sdim // Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi. 1695353358Sdim int8_t Imm = MI.getOperand(3).getImm() & 0xFF; 1696309124Sdim auto &WorkingMI = cloneIfNew(MI); 1697309124Sdim WorkingMI.getOperand(3).setImm(Imm ^ 0x22); 1698309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1699309124Sdim OpIdx1, OpIdx2); 1700309124Sdim } 1701314564Sdim case X86::MOVHLPSrr: 1702341825Sdim case X86::UNPCKHPDrr: 1703341825Sdim case X86::VMOVHLPSrr: 1704341825Sdim case X86::VUNPCKHPDrr: 1705341825Sdim case X86::VMOVHLPSZrr: 1706341825Sdim case X86::VUNPCKHPDZ128rr: { 1707341825Sdim assert(Subtarget.hasSSE2() && "Commuting MOVHLP/UNPCKHPD requires SSE2!"); 1708314564Sdim 1709314564Sdim unsigned Opc = MI.getOpcode(); 1710314564Sdim switch (Opc) { 1711341825Sdim default: llvm_unreachable("Unreachable!"); 1712341825Sdim case X86::MOVHLPSrr: Opc = X86::UNPCKHPDrr; break; 1713341825Sdim case X86::UNPCKHPDrr: Opc = X86::MOVHLPSrr; break; 1714341825Sdim case X86::VMOVHLPSrr: Opc = X86::VUNPCKHPDrr; break; 1715341825Sdim case X86::VUNPCKHPDrr: Opc = X86::VMOVHLPSrr; break; 1716341825Sdim case X86::VMOVHLPSZrr: Opc = X86::VUNPCKHPDZ128rr; break; 1717341825Sdim case X86::VUNPCKHPDZ128rr: Opc = X86::VMOVHLPSZrr; break; 1718314564Sdim } 1719314564Sdim auto &WorkingMI = cloneIfNew(MI); 1720314564Sdim WorkingMI.setDesc(get(Opc)); 1721314564Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1722314564Sdim OpIdx1, OpIdx2); 1723314564Sdim } 1724353358Sdim case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: { 1725309124Sdim auto &WorkingMI = cloneIfNew(MI); 1726353358Sdim unsigned OpNo = MI.getDesc().getNumOperands() - 1; 1727353358Sdim X86::CondCode CC = static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm()); 1728353358Sdim WorkingMI.getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC)); 1729309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1730309124Sdim OpIdx1, OpIdx2); 1731193323Sed } 1732314564Sdim case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi: 1733314564Sdim case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi: 1734314564Sdim case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi: 1735314564Sdim case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi: 1736314564Sdim case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi: 1737314564Sdim case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi: 1738321369Sdim case X86::VPTERNLOGDZrrik: 1739321369Sdim case X86::VPTERNLOGDZ128rrik: 1740321369Sdim case X86::VPTERNLOGDZ256rrik: 1741321369Sdim case X86::VPTERNLOGQZrrik: 1742321369Sdim case X86::VPTERNLOGQZ128rrik: 1743321369Sdim case X86::VPTERNLOGQZ256rrik: 1744314564Sdim case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz: 1745314564Sdim case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz: 1746314564Sdim case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz: 1747314564Sdim case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz: 1748314564Sdim case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz: 1749321369Sdim case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: 1750321369Sdim case X86::VPTERNLOGDZ128rmbi: 1751321369Sdim case X86::VPTERNLOGDZ256rmbi: 1752321369Sdim case X86::VPTERNLOGDZrmbi: 1753321369Sdim case X86::VPTERNLOGQZ128rmbi: 1754321369Sdim case X86::VPTERNLOGQZ256rmbi: 1755321369Sdim case X86::VPTERNLOGQZrmbi: 1756321369Sdim case X86::VPTERNLOGDZ128rmbikz: 1757321369Sdim case X86::VPTERNLOGDZ256rmbikz: 1758321369Sdim case X86::VPTERNLOGDZrmbikz: 1759321369Sdim case X86::VPTERNLOGQZ128rmbikz: 1760321369Sdim case X86::VPTERNLOGQZ256rmbikz: 1761321369Sdim case X86::VPTERNLOGQZrmbikz: { 1762314564Sdim auto &WorkingMI = cloneIfNew(MI); 1763341825Sdim commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2); 1764314564Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1765314564Sdim OpIdx1, OpIdx2); 1766314564Sdim } 1767314564Sdim default: { 1768314564Sdim if (isCommutableVPERMV3Instruction(MI.getOpcode())) { 1769314564Sdim unsigned Opc = getCommutedVPERMV3Opcode(MI.getOpcode()); 1770314564Sdim auto &WorkingMI = cloneIfNew(MI); 1771314564Sdim WorkingMI.setDesc(get(Opc)); 1772314564Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1773314564Sdim OpIdx1, OpIdx2); 1774314564Sdim } 1775314564Sdim 1776341825Sdim const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(), 1777341825Sdim MI.getDesc().TSFlags); 1778314564Sdim if (FMA3Group) { 1779314564Sdim unsigned Opc = 1780314564Sdim getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group); 1781309124Sdim auto &WorkingMI = cloneIfNew(MI); 1782309124Sdim WorkingMI.setDesc(get(Opc)); 1783309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1784309124Sdim OpIdx1, OpIdx2); 1785296417Sdim } 1786309124Sdim 1787296417Sdim return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 1788193323Sed } 1789314564Sdim } 1790193323Sed} 1791193323Sed 1792341825Sdimbool 1793341825SdimX86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI, 1794341825Sdim unsigned &SrcOpIdx1, 1795341825Sdim unsigned &SrcOpIdx2, 1796341825Sdim bool IsIntrinsic) const { 1797314564Sdim uint64_t TSFlags = MI.getDesc().TSFlags; 1798314564Sdim 1799314564Sdim unsigned FirstCommutableVecOp = 1; 1800314564Sdim unsigned LastCommutableVecOp = 3; 1801341825Sdim unsigned KMaskOp = -1U; 1802314564Sdim if (X86II::isKMasked(TSFlags)) { 1803341825Sdim // For k-zero-masked operations it is Ok to commute the first vector 1804341825Sdim // operand. 1805341825Sdim // For regular k-masked operations a conservative choice is done as the 1806341825Sdim // elements of the first vector operand, for which the corresponding bit 1807341825Sdim // in the k-mask operand is set to 0, are copied to the result of the 1808341825Sdim // instruction. 1809341825Sdim // TODO/FIXME: The commute still may be legal if it is known that the 1810341825Sdim // k-mask operand is set to either all ones or all zeroes. 1811341825Sdim // It is also Ok to commute the 1st operand if all users of MI use only 1812341825Sdim // the elements enabled by the k-mask operand. For example, 1813341825Sdim // v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i] 1814341825Sdim // : v1[i]; 1815341825Sdim // VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 -> 1816341825Sdim // // Ok, to commute v1 in FMADD213PSZrk. 1817341825Sdim 1818314564Sdim // The k-mask operand has index = 2 for masked and zero-masked operations. 1819314564Sdim KMaskOp = 2; 1820314564Sdim 1821314564Sdim // The operand with index = 1 is used as a source for those elements for 1822314564Sdim // which the corresponding bit in the k-mask is set to 0. 1823314564Sdim if (X86II::isKMergeMasked(TSFlags)) 1824314564Sdim FirstCommutableVecOp = 3; 1825314564Sdim 1826314564Sdim LastCommutableVecOp++; 1827341825Sdim } else if (IsIntrinsic) { 1828341825Sdim // Commuting the first operand of an intrinsic instruction isn't possible 1829341825Sdim // unless we can prove that only the lowest element of the result is used. 1830341825Sdim FirstCommutableVecOp = 2; 1831314564Sdim } 1832314564Sdim 1833314564Sdim if (isMem(MI, LastCommutableVecOp)) 1834314564Sdim LastCommutableVecOp--; 1835314564Sdim 1836296417Sdim // Only the first RegOpsNum operands are commutable. 1837296417Sdim // Also, the value 'CommuteAnyOperandIndex' is valid here as it means 1838296417Sdim // that the operand is not specified/fixed. 1839296417Sdim if (SrcOpIdx1 != CommuteAnyOperandIndex && 1840314564Sdim (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp || 1841314564Sdim SrcOpIdx1 == KMaskOp)) 1842296417Sdim return false; 1843296417Sdim if (SrcOpIdx2 != CommuteAnyOperandIndex && 1844314564Sdim (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp || 1845314564Sdim SrcOpIdx2 == KMaskOp)) 1846296417Sdim return false; 1847296417Sdim 1848296417Sdim // Look for two different register operands assumed to be commutable 1849296417Sdim // regardless of the FMA opcode. The FMA opcode is adjusted later. 1850296417Sdim if (SrcOpIdx1 == CommuteAnyOperandIndex || 1851296417Sdim SrcOpIdx2 == CommuteAnyOperandIndex) { 1852296417Sdim unsigned CommutableOpIdx2 = SrcOpIdx2; 1853296417Sdim 1854296417Sdim // At least one of operands to be commuted is not specified and 1855296417Sdim // this method is free to choose appropriate commutable operands. 1856296417Sdim if (SrcOpIdx1 == SrcOpIdx2) 1857296417Sdim // Both of operands are not fixed. By default set one of commutable 1858296417Sdim // operands to the last register operand of the instruction. 1859314564Sdim CommutableOpIdx2 = LastCommutableVecOp; 1860296417Sdim else if (SrcOpIdx2 == CommuteAnyOperandIndex) 1861296417Sdim // Only one of operands is not fixed. 1862296417Sdim CommutableOpIdx2 = SrcOpIdx1; 1863296417Sdim 1864296417Sdim // CommutableOpIdx2 is well defined now. Let's choose another commutable 1865296417Sdim // operand and assign its index to CommutableOpIdx1. 1866309124Sdim unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg(); 1867353358Sdim 1868353358Sdim unsigned CommutableOpIdx1; 1869314564Sdim for (CommutableOpIdx1 = LastCommutableVecOp; 1870314564Sdim CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) { 1871314564Sdim // Just ignore and skip the k-mask operand. 1872314564Sdim if (CommutableOpIdx1 == KMaskOp) 1873314564Sdim continue; 1874314564Sdim 1875296417Sdim // The commuted operands must have different registers. 1876296417Sdim // Otherwise, the commute transformation does not change anything and 1877296417Sdim // is useless then. 1878309124Sdim if (Op2Reg != MI.getOperand(CommutableOpIdx1).getReg()) 1879296417Sdim break; 1880296417Sdim } 1881296417Sdim 1882296417Sdim // No appropriate commutable operands were found. 1883314564Sdim if (CommutableOpIdx1 < FirstCommutableVecOp) 1884296417Sdim return false; 1885296417Sdim 1886296417Sdim // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2 1887296417Sdim // to return those values. 1888296417Sdim if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1889296417Sdim CommutableOpIdx1, CommutableOpIdx2)) 1890296417Sdim return false; 1891296417Sdim } 1892296417Sdim 1893314564Sdim return true; 1894296417Sdim} 1895296417Sdim 1896309124Sdimbool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, 1897276479Sdim unsigned &SrcOpIdx2) const { 1898314564Sdim const MCInstrDesc &Desc = MI.getDesc(); 1899314564Sdim if (!Desc.isCommutable()) 1900314564Sdim return false; 1901314564Sdim 1902309124Sdim switch (MI.getOpcode()) { 1903314564Sdim case X86::CMPSDrr: 1904314564Sdim case X86::CMPSSrr: 1905309124Sdim case X86::CMPPDrri: 1906309124Sdim case X86::CMPPSrri: 1907314564Sdim case X86::VCMPSDrr: 1908314564Sdim case X86::VCMPSSrr: 1909309124Sdim case X86::VCMPPDrri: 1910309124Sdim case X86::VCMPPSrri: 1911309124Sdim case X86::VCMPPDYrri: 1912314564Sdim case X86::VCMPPSYrri: 1913314564Sdim case X86::VCMPSDZrr: 1914314564Sdim case X86::VCMPSSZrr: 1915314564Sdim case X86::VCMPPDZrri: 1916314564Sdim case X86::VCMPPSZrri: 1917314564Sdim case X86::VCMPPDZ128rri: 1918314564Sdim case X86::VCMPPSZ128rri: 1919314564Sdim case X86::VCMPPDZ256rri: 1920353358Sdim case X86::VCMPPSZ256rri: 1921353358Sdim case X86::VCMPPDZrrik: 1922353358Sdim case X86::VCMPPSZrrik: 1923353358Sdim case X86::VCMPPDZ128rrik: 1924353358Sdim case X86::VCMPPSZ128rrik: 1925353358Sdim case X86::VCMPPDZ256rrik: 1926353358Sdim case X86::VCMPPSZ256rrik: { 1927353358Sdim unsigned OpOffset = X86II::isKMasked(Desc.TSFlags) ? 1 : 0; 1928353358Sdim 1929309124Sdim // Float comparison can be safely commuted for 1930309124Sdim // Ordered/Unordered/Equal/NotEqual tests 1931353358Sdim unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7; 1932309124Sdim switch (Imm) { 1933309124Sdim case 0x00: // EQUAL 1934309124Sdim case 0x03: // UNORDERED 1935309124Sdim case 0x04: // NOT EQUAL 1936309124Sdim case 0x07: // ORDERED 1937353358Sdim // The indices of the commutable operands are 1 and 2 (or 2 and 3 1938353358Sdim // when masked). 1939309124Sdim // Assign them to the returned operand indices here. 1940353358Sdim return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset, 1941353358Sdim 2 + OpOffset); 1942288943Sdim } 1943309124Sdim return false; 1944276479Sdim } 1945314564Sdim case X86::MOVSSrr: 1946353358Sdim // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can 1947353358Sdim // form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since 1948353358Sdim // AVX implies sse4.1. 1949314564Sdim if (Subtarget.hasSSE41()) 1950314564Sdim return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 1951314564Sdim return false; 1952353358Sdim case X86::SHUFPDrri: 1953353358Sdim // We can commute this to MOVSD. 1954353358Sdim if (MI.getOperand(3).getImm() == 0x02) 1955353358Sdim return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 1956353358Sdim return false; 1957341825Sdim case X86::MOVHLPSrr: 1958341825Sdim case X86::UNPCKHPDrr: 1959341825Sdim case X86::VMOVHLPSrr: 1960341825Sdim case X86::VUNPCKHPDrr: 1961341825Sdim case X86::VMOVHLPSZrr: 1962341825Sdim case X86::VUNPCKHPDZ128rr: 1963341825Sdim if (Subtarget.hasSSE2()) 1964341825Sdim return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 1965341825Sdim return false; 1966314564Sdim case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi: 1967314564Sdim case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi: 1968314564Sdim case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi: 1969314564Sdim case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi: 1970314564Sdim case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi: 1971314564Sdim case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi: 1972321369Sdim case X86::VPTERNLOGDZrrik: 1973321369Sdim case X86::VPTERNLOGDZ128rrik: 1974321369Sdim case X86::VPTERNLOGDZ256rrik: 1975321369Sdim case X86::VPTERNLOGQZrrik: 1976321369Sdim case X86::VPTERNLOGQZ128rrik: 1977321369Sdim case X86::VPTERNLOGQZ256rrik: 1978314564Sdim case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz: 1979314564Sdim case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz: 1980314564Sdim case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz: 1981314564Sdim case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz: 1982314564Sdim case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz: 1983314564Sdim case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: 1984321369Sdim case X86::VPTERNLOGDZ128rmbi: 1985321369Sdim case X86::VPTERNLOGDZ256rmbi: 1986321369Sdim case X86::VPTERNLOGDZrmbi: 1987321369Sdim case X86::VPTERNLOGQZ128rmbi: 1988321369Sdim case X86::VPTERNLOGQZ256rmbi: 1989321369Sdim case X86::VPTERNLOGQZrmbi: 1990321369Sdim case X86::VPTERNLOGDZ128rmbikz: 1991321369Sdim case X86::VPTERNLOGDZ256rmbikz: 1992321369Sdim case X86::VPTERNLOGDZrmbikz: 1993321369Sdim case X86::VPTERNLOGQZ128rmbikz: 1994321369Sdim case X86::VPTERNLOGQZ256rmbikz: 1995321369Sdim case X86::VPTERNLOGQZrmbikz: 1996314564Sdim return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 1997327952Sdim case X86::VPMADD52HUQZ128r: 1998327952Sdim case X86::VPMADD52HUQZ128rk: 1999327952Sdim case X86::VPMADD52HUQZ128rkz: 2000327952Sdim case X86::VPMADD52HUQZ256r: 2001327952Sdim case X86::VPMADD52HUQZ256rk: 2002327952Sdim case X86::VPMADD52HUQZ256rkz: 2003327952Sdim case X86::VPMADD52HUQZr: 2004327952Sdim case X86::VPMADD52HUQZrk: 2005327952Sdim case X86::VPMADD52HUQZrkz: 2006327952Sdim case X86::VPMADD52LUQZ128r: 2007327952Sdim case X86::VPMADD52LUQZ128rk: 2008327952Sdim case X86::VPMADD52LUQZ128rkz: 2009327952Sdim case X86::VPMADD52LUQZ256r: 2010327952Sdim case X86::VPMADD52LUQZ256rk: 2011327952Sdim case X86::VPMADD52LUQZ256rkz: 2012327952Sdim case X86::VPMADD52LUQZr: 2013327952Sdim case X86::VPMADD52LUQZrk: 2014327952Sdim case X86::VPMADD52LUQZrkz: { 2015327952Sdim unsigned CommutableOpIdx1 = 2; 2016327952Sdim unsigned CommutableOpIdx2 = 3; 2017341825Sdim if (X86II::isKMasked(Desc.TSFlags)) { 2018327952Sdim // Skip the mask register. 2019327952Sdim ++CommutableOpIdx1; 2020327952Sdim ++CommutableOpIdx2; 2021327952Sdim } 2022327952Sdim if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2023327952Sdim CommutableOpIdx1, CommutableOpIdx2)) 2024327952Sdim return false; 2025327952Sdim if (!MI.getOperand(SrcOpIdx1).isReg() || 2026327952Sdim !MI.getOperand(SrcOpIdx2).isReg()) 2027327952Sdim // No idea. 2028327952Sdim return false; 2029327952Sdim return true; 2030327952Sdim } 2031327952Sdim 2032309124Sdim default: 2033341825Sdim const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(), 2034341825Sdim MI.getDesc().TSFlags); 2035314564Sdim if (FMA3Group) 2036341825Sdim return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2, 2037341825Sdim FMA3Group->isIntrinsic()); 2038314564Sdim 2039314564Sdim // Handled masked instructions since we need to skip over the mask input 2040314564Sdim // and the preserved input. 2041341825Sdim if (X86II::isKMasked(Desc.TSFlags)) { 2042314564Sdim // First assume that the first input is the mask operand and skip past it. 2043314564Sdim unsigned CommutableOpIdx1 = Desc.getNumDefs() + 1; 2044314564Sdim unsigned CommutableOpIdx2 = Desc.getNumDefs() + 2; 2045314564Sdim // Check if the first input is tied. If there isn't one then we only 2046314564Sdim // need to skip the mask operand which we did above. 2047314564Sdim if ((MI.getDesc().getOperandConstraint(Desc.getNumDefs(), 2048314564Sdim MCOI::TIED_TO) != -1)) { 2049314564Sdim // If this is zero masking instruction with a tied operand, we need to 2050314564Sdim // move the first index back to the first input since this must 2051314564Sdim // be a 3 input instruction and we want the first two non-mask inputs. 2052314564Sdim // Otherwise this is a 2 input instruction with a preserved input and 2053314564Sdim // mask, so we need to move the indices to skip one more input. 2054341825Sdim if (X86II::isKMergeMasked(Desc.TSFlags)) { 2055314564Sdim ++CommutableOpIdx1; 2056314564Sdim ++CommutableOpIdx2; 2057341825Sdim } else { 2058341825Sdim --CommutableOpIdx1; 2059314564Sdim } 2060314564Sdim } 2061314564Sdim 2062314564Sdim if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2063314564Sdim CommutableOpIdx1, CommutableOpIdx2)) 2064314564Sdim return false; 2065314564Sdim 2066314564Sdim if (!MI.getOperand(SrcOpIdx1).isReg() || 2067314564Sdim !MI.getOperand(SrcOpIdx2).isReg()) 2068314564Sdim // No idea. 2069314564Sdim return false; 2070314564Sdim return true; 2071314564Sdim } 2072314564Sdim 2073309124Sdim return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 2074309124Sdim } 2075296417Sdim return false; 2076276479Sdim} 2077276479Sdim 2078353358SdimX86::CondCode X86::getCondFromBranch(const MachineInstr &MI) { 2079353358Sdim switch (MI.getOpcode()) { 2080193323Sed default: return X86::COND_INVALID; 2081353358Sdim case X86::JCC_1: 2082353358Sdim return static_cast<X86::CondCode>( 2083353358Sdim MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); 2084193323Sed } 2085193323Sed} 2086193323Sed 2087353358Sdim/// Return condition code of a SETCC opcode. 2088353358SdimX86::CondCode X86::getCondFromSETCC(const MachineInstr &MI) { 2089353358Sdim switch (MI.getOpcode()) { 2090239462Sdim default: return X86::COND_INVALID; 2091353358Sdim case X86::SETCCr: case X86::SETCCm: 2092353358Sdim return static_cast<X86::CondCode>( 2093353358Sdim MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); 2094239462Sdim } 2095239462Sdim} 2096239462Sdim 2097288943Sdim/// Return condition code of a CMov opcode. 2098353358SdimX86::CondCode X86::getCondFromCMov(const MachineInstr &MI) { 2099353358Sdim switch (MI.getOpcode()) { 2100239462Sdim default: return X86::COND_INVALID; 2101353358Sdim case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: 2102353358Sdim case X86::CMOV16rm: case X86::CMOV32rm: case X86::CMOV64rm: 2103353358Sdim return static_cast<X86::CondCode>( 2104353358Sdim MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); 2105239462Sdim } 2106239462Sdim} 2107239462Sdim 2108288943Sdim/// Return the inverse of the specified condition, 2109193323Sed/// e.g. turning COND_E to COND_NE. 2110193323SedX86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { 2111193323Sed switch (CC) { 2112198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 2113193323Sed case X86::COND_E: return X86::COND_NE; 2114193323Sed case X86::COND_NE: return X86::COND_E; 2115193323Sed case X86::COND_L: return X86::COND_GE; 2116193323Sed case X86::COND_LE: return X86::COND_G; 2117193323Sed case X86::COND_G: return X86::COND_LE; 2118193323Sed case X86::COND_GE: return X86::COND_L; 2119193323Sed case X86::COND_B: return X86::COND_AE; 2120193323Sed case X86::COND_BE: return X86::COND_A; 2121193323Sed case X86::COND_A: return X86::COND_BE; 2122193323Sed case X86::COND_AE: return X86::COND_B; 2123193323Sed case X86::COND_S: return X86::COND_NS; 2124193323Sed case X86::COND_NS: return X86::COND_S; 2125193323Sed case X86::COND_P: return X86::COND_NP; 2126193323Sed case X86::COND_NP: return X86::COND_P; 2127193323Sed case X86::COND_O: return X86::COND_NO; 2128193323Sed case X86::COND_NO: return X86::COND_O; 2129309124Sdim case X86::COND_NE_OR_P: return X86::COND_E_AND_NP; 2130309124Sdim case X86::COND_E_AND_NP: return X86::COND_NE_OR_P; 2131193323Sed } 2132193323Sed} 2133193323Sed 2134288943Sdim/// Assuming the flags are set by MI(a,b), return the condition code if we 2135288943Sdim/// modify the instructions such that flags are set by MI(b,a). 2136239462Sdimstatic X86::CondCode getSwappedCondition(X86::CondCode CC) { 2137239462Sdim switch (CC) { 2138239462Sdim default: return X86::COND_INVALID; 2139239462Sdim case X86::COND_E: return X86::COND_E; 2140239462Sdim case X86::COND_NE: return X86::COND_NE; 2141239462Sdim case X86::COND_L: return X86::COND_G; 2142239462Sdim case X86::COND_LE: return X86::COND_GE; 2143239462Sdim case X86::COND_G: return X86::COND_L; 2144239462Sdim case X86::COND_GE: return X86::COND_LE; 2145239462Sdim case X86::COND_B: return X86::COND_A; 2146239462Sdim case X86::COND_BE: return X86::COND_AE; 2147239462Sdim case X86::COND_A: return X86::COND_B; 2148239462Sdim case X86::COND_AE: return X86::COND_BE; 2149239462Sdim } 2150239462Sdim} 2151239462Sdim 2152321369Sdimstd::pair<X86::CondCode, bool> 2153321369SdimX86::getX86ConditionCode(CmpInst::Predicate Predicate) { 2154321369Sdim X86::CondCode CC = X86::COND_INVALID; 2155321369Sdim bool NeedSwap = false; 2156321369Sdim switch (Predicate) { 2157321369Sdim default: break; 2158321369Sdim // Floating-point Predicates 2159321369Sdim case CmpInst::FCMP_UEQ: CC = X86::COND_E; break; 2160321369Sdim case CmpInst::FCMP_OLT: NeedSwap = true; LLVM_FALLTHROUGH; 2161321369Sdim case CmpInst::FCMP_OGT: CC = X86::COND_A; break; 2162321369Sdim case CmpInst::FCMP_OLE: NeedSwap = true; LLVM_FALLTHROUGH; 2163321369Sdim case CmpInst::FCMP_OGE: CC = X86::COND_AE; break; 2164321369Sdim case CmpInst::FCMP_UGT: NeedSwap = true; LLVM_FALLTHROUGH; 2165321369Sdim case CmpInst::FCMP_ULT: CC = X86::COND_B; break; 2166321369Sdim case CmpInst::FCMP_UGE: NeedSwap = true; LLVM_FALLTHROUGH; 2167321369Sdim case CmpInst::FCMP_ULE: CC = X86::COND_BE; break; 2168321369Sdim case CmpInst::FCMP_ONE: CC = X86::COND_NE; break; 2169321369Sdim case CmpInst::FCMP_UNO: CC = X86::COND_P; break; 2170321369Sdim case CmpInst::FCMP_ORD: CC = X86::COND_NP; break; 2171321369Sdim case CmpInst::FCMP_OEQ: LLVM_FALLTHROUGH; 2172321369Sdim case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break; 2173321369Sdim 2174321369Sdim // Integer Predicates 2175321369Sdim case CmpInst::ICMP_EQ: CC = X86::COND_E; break; 2176321369Sdim case CmpInst::ICMP_NE: CC = X86::COND_NE; break; 2177321369Sdim case CmpInst::ICMP_UGT: CC = X86::COND_A; break; 2178321369Sdim case CmpInst::ICMP_UGE: CC = X86::COND_AE; break; 2179321369Sdim case CmpInst::ICMP_ULT: CC = X86::COND_B; break; 2180321369Sdim case CmpInst::ICMP_ULE: CC = X86::COND_BE; break; 2181321369Sdim case CmpInst::ICMP_SGT: CC = X86::COND_G; break; 2182321369Sdim case CmpInst::ICMP_SGE: CC = X86::COND_GE; break; 2183321369Sdim case CmpInst::ICMP_SLT: CC = X86::COND_L; break; 2184321369Sdim case CmpInst::ICMP_SLE: CC = X86::COND_LE; break; 2185321369Sdim } 2186321369Sdim 2187321369Sdim return std::make_pair(CC, NeedSwap); 2188321369Sdim} 2189321369Sdim 2190353358Sdim/// Return a setcc opcode based on whether it has memory operand. 2191353358Sdimunsigned X86::getSETOpc(bool HasMemoryOperand) { 2192353358Sdim return HasMemoryOperand ? X86::SETCCr : X86::SETCCm; 2193239462Sdim} 2194239462Sdim 2195353358Sdim/// Return a cmov opcode for the given register size in bytes, and operand type. 2196353358Sdimunsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand) { 2197239462Sdim switch(RegBytes) { 2198239462Sdim default: llvm_unreachable("Illegal register size!"); 2199353358Sdim case 2: return HasMemoryOperand ? X86::CMOV16rm : X86::CMOV16rr; 2200353358Sdim case 4: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV32rr; 2201360661Sdim case 8: return HasMemoryOperand ? X86::CMOV64rm : X86::CMOV64rr; 2202239462Sdim } 2203239462Sdim} 2204239462Sdim 2205341825Sdim/// Get the VPCMP immediate for the given condition. 2206341825Sdimunsigned X86::getVPCMPImmForCond(ISD::CondCode CC) { 2207341825Sdim switch (CC) { 2208341825Sdim default: llvm_unreachable("Unexpected SETCC condition"); 2209341825Sdim case ISD::SETNE: return 4; 2210341825Sdim case ISD::SETEQ: return 0; 2211341825Sdim case ISD::SETULT: 2212341825Sdim case ISD::SETLT: return 1; 2213341825Sdim case ISD::SETUGT: 2214341825Sdim case ISD::SETGT: return 6; 2215341825Sdim case ISD::SETUGE: 2216341825Sdim case ISD::SETGE: return 5; 2217341825Sdim case ISD::SETULE: 2218341825Sdim case ISD::SETLE: return 2; 2219341825Sdim } 2220341825Sdim} 2221341825Sdim 2222341825Sdim/// Get the VPCMP immediate if the opcodes are swapped. 2223341825Sdimunsigned X86::getSwappedVPCMPImm(unsigned Imm) { 2224341825Sdim switch (Imm) { 2225341825Sdim default: llvm_unreachable("Unreachable!"); 2226341825Sdim case 0x01: Imm = 0x06; break; // LT -> NLE 2227341825Sdim case 0x02: Imm = 0x05; break; // LE -> NLT 2228341825Sdim case 0x05: Imm = 0x02; break; // NLT -> LE 2229341825Sdim case 0x06: Imm = 0x01; break; // NLE -> LT 2230341825Sdim case 0x00: // EQ 2231341825Sdim case 0x03: // FALSE 2232341825Sdim case 0x04: // NE 2233341825Sdim case 0x07: // TRUE 2234341825Sdim break; 2235341825Sdim } 2236341825Sdim 2237341825Sdim return Imm; 2238341825Sdim} 2239341825Sdim 2240341825Sdim/// Get the VPCOM immediate if the opcodes are swapped. 2241341825Sdimunsigned X86::getSwappedVPCOMImm(unsigned Imm) { 2242341825Sdim switch (Imm) { 2243341825Sdim default: llvm_unreachable("Unreachable!"); 2244341825Sdim case 0x00: Imm = 0x02; break; // LT -> GT 2245341825Sdim case 0x01: Imm = 0x03; break; // LE -> GE 2246341825Sdim case 0x02: Imm = 0x00; break; // GT -> LT 2247341825Sdim case 0x03: Imm = 0x01; break; // GE -> LE 2248341825Sdim case 0x04: // EQ 2249341825Sdim case 0x05: // NE 2250341825Sdim case 0x06: // FALSE 2251341825Sdim case 0x07: // TRUE 2252341825Sdim break; 2253341825Sdim } 2254341825Sdim 2255341825Sdim return Imm; 2256341825Sdim} 2257341825Sdim 2258309124Sdimbool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { 2259309124Sdim if (!MI.isTerminator()) return false; 2260218893Sdim 2261193323Sed // Conditional branch is a special case. 2262309124Sdim if (MI.isBranch() && !MI.isBarrier()) 2263193323Sed return true; 2264309124Sdim if (!MI.isPredicable()) 2265193323Sed return true; 2266193323Sed return !isPredicated(MI); 2267193323Sed} 2268193323Sed 2269321369Sdimbool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const { 2270321369Sdim switch (MI.getOpcode()) { 2271321369Sdim case X86::TCRETURNdi: 2272321369Sdim case X86::TCRETURNri: 2273321369Sdim case X86::TCRETURNmi: 2274321369Sdim case X86::TCRETURNdi64: 2275321369Sdim case X86::TCRETURNri64: 2276321369Sdim case X86::TCRETURNmi64: 2277321369Sdim return true; 2278321369Sdim default: 2279321369Sdim return false; 2280321369Sdim } 2281321369Sdim} 2282321369Sdim 2283321369Sdimbool X86InstrInfo::canMakeTailCallConditional( 2284321369Sdim SmallVectorImpl<MachineOperand> &BranchCond, 2285321369Sdim const MachineInstr &TailCall) const { 2286321369Sdim if (TailCall.getOpcode() != X86::TCRETURNdi && 2287321369Sdim TailCall.getOpcode() != X86::TCRETURNdi64) { 2288321369Sdim // Only direct calls can be done with a conditional branch. 2289321369Sdim return false; 2290321369Sdim } 2291321369Sdim 2292321369Sdim const MachineFunction *MF = TailCall.getParent()->getParent(); 2293321369Sdim if (Subtarget.isTargetWin64() && MF->hasWinCFI()) { 2294321369Sdim // Conditional tail calls confuse the Win64 unwinder. 2295321369Sdim return false; 2296321369Sdim } 2297321369Sdim 2298321369Sdim assert(BranchCond.size() == 1); 2299321369Sdim if (BranchCond[0].getImm() > X86::LAST_VALID_COND) { 2300321369Sdim // Can't make a conditional tail call with this condition. 2301321369Sdim return false; 2302321369Sdim } 2303321369Sdim 2304321369Sdim const X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); 2305321369Sdim if (X86FI->getTCReturnAddrDelta() != 0 || 2306321369Sdim TailCall.getOperand(1).getImm() != 0) { 2307321369Sdim // A conditional tail call cannot do any stack adjustment. 2308321369Sdim return false; 2309321369Sdim } 2310321369Sdim 2311321369Sdim return true; 2312321369Sdim} 2313321369Sdim 2314321369Sdimvoid X86InstrInfo::replaceBranchWithTailCall( 2315321369Sdim MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond, 2316321369Sdim const MachineInstr &TailCall) const { 2317321369Sdim assert(canMakeTailCallConditional(BranchCond, TailCall)); 2318321369Sdim 2319321369Sdim MachineBasicBlock::iterator I = MBB.end(); 2320321369Sdim while (I != MBB.begin()) { 2321321369Sdim --I; 2322341825Sdim if (I->isDebugInstr()) 2323321369Sdim continue; 2324321369Sdim if (!I->isBranch()) 2325321369Sdim assert(0 && "Can't find the branch to replace!"); 2326321369Sdim 2327353358Sdim X86::CondCode CC = X86::getCondFromBranch(*I); 2328321369Sdim assert(BranchCond.size() == 1); 2329321369Sdim if (CC != BranchCond[0].getImm()) 2330321369Sdim continue; 2331321369Sdim 2332321369Sdim break; 2333321369Sdim } 2334321369Sdim 2335321369Sdim unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc 2336321369Sdim : X86::TCRETURNdi64cc; 2337321369Sdim 2338321369Sdim auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc)); 2339321369Sdim MIB->addOperand(TailCall.getOperand(0)); // Destination. 2340321369Sdim MIB.addImm(0); // Stack offset (not used). 2341321369Sdim MIB->addOperand(BranchCond[0]); // Condition. 2342321369Sdim MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters. 2343321369Sdim 2344321369Sdim // Add implicit uses and defs of all live regs potentially clobbered by the 2345321369Sdim // call. This way they still appear live across the call. 2346321369Sdim LivePhysRegs LiveRegs(getRegisterInfo()); 2347321369Sdim LiveRegs.addLiveOuts(MBB); 2348344779Sdim SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers; 2349321369Sdim LiveRegs.stepForward(*MIB, Clobbers); 2350321369Sdim for (const auto &C : Clobbers) { 2351321369Sdim MIB.addReg(C.first, RegState::Implicit); 2352321369Sdim MIB.addReg(C.first, RegState::Implicit | RegState::Define); 2353321369Sdim } 2354321369Sdim 2355321369Sdim I->eraseFromParent(); 2356321369Sdim} 2357321369Sdim 2358309124Sdim// Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may 2359309124Sdim// not be a fallthrough MBB now due to layout changes). Return nullptr if the 2360309124Sdim// fallthrough MBB cannot be identified. 2361309124Sdimstatic MachineBasicBlock *getFallThroughMBB(MachineBasicBlock *MBB, 2362309124Sdim MachineBasicBlock *TBB) { 2363309124Sdim // Look for non-EHPad successors other than TBB. If we find exactly one, it 2364309124Sdim // is the fallthrough MBB. If we find zero, then TBB is both the target MBB 2365309124Sdim // and fallthrough MBB. If we find more than one, we cannot identify the 2366309124Sdim // fallthrough MBB and should return nullptr. 2367309124Sdim MachineBasicBlock *FallthroughBB = nullptr; 2368309124Sdim for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { 2369309124Sdim if ((*SI)->isEHPad() || (*SI == TBB && FallthroughBB)) 2370309124Sdim continue; 2371309124Sdim // Return a nullptr if we found more than one fallthrough successor. 2372309124Sdim if (FallthroughBB && FallthroughBB != TBB) 2373309124Sdim return nullptr; 2374309124Sdim FallthroughBB = *SI; 2375309124Sdim } 2376309124Sdim return FallthroughBB; 2377309124Sdim} 2378309124Sdim 2379288943Sdimbool X86InstrInfo::AnalyzeBranchImpl( 2380288943Sdim MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, 2381288943Sdim SmallVectorImpl<MachineOperand> &Cond, 2382288943Sdim SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const { 2383288943Sdim 2384193323Sed // Start from the bottom of the block and work up, examining the 2385193323Sed // terminator instructions. 2386193323Sed MachineBasicBlock::iterator I = MBB.end(); 2387207618Srdivacky MachineBasicBlock::iterator UnCondBrIter = MBB.end(); 2388193323Sed while (I != MBB.begin()) { 2389193323Sed --I; 2390341825Sdim if (I->isDebugInstr()) 2391206083Srdivacky continue; 2392200581Srdivacky 2393200581Srdivacky // Working from the bottom, when we see a non-terminator instruction, we're 2394200581Srdivacky // done. 2395309124Sdim if (!isUnpredicatedTerminator(*I)) 2396193323Sed break; 2397200581Srdivacky 2398200581Srdivacky // A terminator that isn't a branch can't easily be handled by this 2399200581Srdivacky // analysis. 2400234353Sdim if (!I->isBranch()) 2401193323Sed return true; 2402200581Srdivacky 2403193323Sed // Handle unconditional branches. 2404280031Sdim if (I->getOpcode() == X86::JMP_1) { 2405207618Srdivacky UnCondBrIter = I; 2406207618Srdivacky 2407193323Sed if (!AllowModify) { 2408193323Sed TBB = I->getOperand(0).getMBB(); 2409193323Sed continue; 2410193323Sed } 2411193323Sed 2412193323Sed // If the block has any instructions after a JMP, delete them. 2413276479Sdim while (std::next(I) != MBB.end()) 2414276479Sdim std::next(I)->eraseFromParent(); 2415200581Srdivacky 2416193323Sed Cond.clear(); 2417276479Sdim FBB = nullptr; 2418200581Srdivacky 2419193323Sed // Delete the JMP if it's equivalent to a fall-through. 2420193323Sed if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { 2421276479Sdim TBB = nullptr; 2422193323Sed I->eraseFromParent(); 2423193323Sed I = MBB.end(); 2424207618Srdivacky UnCondBrIter = MBB.end(); 2425193323Sed continue; 2426193323Sed } 2427200581Srdivacky 2428207618Srdivacky // TBB is used to indicate the unconditional destination. 2429193323Sed TBB = I->getOperand(0).getMBB(); 2430193323Sed continue; 2431193323Sed } 2432200581Srdivacky 2433193323Sed // Handle conditional branches. 2434353358Sdim X86::CondCode BranchCode = X86::getCondFromBranch(*I); 2435193323Sed if (BranchCode == X86::COND_INVALID) 2436193323Sed return true; // Can't handle indirect branch. 2437200581Srdivacky 2438344779Sdim // In practice we should never have an undef eflags operand, if we do 2439344779Sdim // abort here as we are not prepared to preserve the flag. 2440353358Sdim if (I->findRegisterUseOperand(X86::EFLAGS)->isUndef()) 2441344779Sdim return true; 2442344779Sdim 2443193323Sed // Working from the bottom, handle the first conditional branch. 2444193323Sed if (Cond.empty()) { 2445207618Srdivacky MachineBasicBlock *TargetBB = I->getOperand(0).getMBB(); 2446207618Srdivacky if (AllowModify && UnCondBrIter != MBB.end() && 2447207618Srdivacky MBB.isLayoutSuccessor(TargetBB)) { 2448207618Srdivacky // If we can modify the code and it ends in something like: 2449207618Srdivacky // 2450207618Srdivacky // jCC L1 2451207618Srdivacky // jmp L2 2452207618Srdivacky // L1: 2453207618Srdivacky // ... 2454207618Srdivacky // L2: 2455207618Srdivacky // 2456207618Srdivacky // Then we can change this to: 2457207618Srdivacky // 2458207618Srdivacky // jnCC L2 2459207618Srdivacky // L1: 2460207618Srdivacky // ... 2461207618Srdivacky // L2: 2462207618Srdivacky // 2463207618Srdivacky // Which is a bit more efficient. 2464207618Srdivacky // We conditionally jump to the fall-through block. 2465207618Srdivacky BranchCode = GetOppositeBranchCondition(BranchCode); 2466207618Srdivacky MachineBasicBlock::iterator OldInst = I; 2467207618Srdivacky 2468353358Sdim BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JCC_1)) 2469353358Sdim .addMBB(UnCondBrIter->getOperand(0).getMBB()) 2470353358Sdim .addImm(BranchCode); 2471280031Sdim BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_1)) 2472207618Srdivacky .addMBB(TargetBB); 2473207618Srdivacky 2474207618Srdivacky OldInst->eraseFromParent(); 2475207618Srdivacky UnCondBrIter->eraseFromParent(); 2476207618Srdivacky 2477207618Srdivacky // Restart the analysis. 2478207618Srdivacky UnCondBrIter = MBB.end(); 2479207618Srdivacky I = MBB.end(); 2480207618Srdivacky continue; 2481207618Srdivacky } 2482207618Srdivacky 2483193323Sed FBB = TBB; 2484193323Sed TBB = I->getOperand(0).getMBB(); 2485193323Sed Cond.push_back(MachineOperand::CreateImm(BranchCode)); 2486309124Sdim CondBranches.push_back(&*I); 2487193323Sed continue; 2488193323Sed } 2489200581Srdivacky 2490200581Srdivacky // Handle subsequent conditional branches. Only handle the case where all 2491200581Srdivacky // conditional branches branch to the same destination and their condition 2492200581Srdivacky // opcodes fit one of the special multi-branch idioms. 2493193323Sed assert(Cond.size() == 1); 2494193323Sed assert(TBB); 2495200581Srdivacky 2496200581Srdivacky // If the conditions are the same, we can leave them alone. 2497193323Sed X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); 2498309124Sdim auto NewTBB = I->getOperand(0).getMBB(); 2499309124Sdim if (OldBranchCode == BranchCode && TBB == NewTBB) 2500193323Sed continue; 2501200581Srdivacky 2502200581Srdivacky // If they differ, see if they fit one of the known patterns. Theoretically, 2503200581Srdivacky // we could handle more patterns here, but we shouldn't expect to see them 2504200581Srdivacky // if instruction selection has done a reasonable job. 2505309124Sdim if (TBB == NewTBB && 2506309124Sdim ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) || 2507309124Sdim (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) { 2508193323Sed BranchCode = X86::COND_NE_OR_P; 2509309124Sdim } else if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) || 2510309124Sdim (OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) { 2511309124Sdim if (NewTBB != (FBB ? FBB : getFallThroughMBB(&MBB, TBB))) 2512309124Sdim return true; 2513309124Sdim 2514309124Sdim // X86::COND_E_AND_NP usually has two different branch destinations. 2515309124Sdim // 2516309124Sdim // JP B1 2517309124Sdim // JE B2 2518309124Sdim // JMP B1 2519309124Sdim // B1: 2520309124Sdim // B2: 2521309124Sdim // 2522309124Sdim // Here this condition branches to B2 only if NP && E. It has another 2523309124Sdim // equivalent form: 2524309124Sdim // 2525309124Sdim // JNE B1 2526309124Sdim // JNP B2 2527309124Sdim // JMP B1 2528309124Sdim // B1: 2529309124Sdim // B2: 2530309124Sdim // 2531309124Sdim // Similarly it branches to B2 only if E && NP. That is why this condition 2532309124Sdim // is named with COND_E_AND_NP. 2533309124Sdim BranchCode = X86::COND_E_AND_NP; 2534309124Sdim } else 2535193323Sed return true; 2536200581Srdivacky 2537193323Sed // Update the MachineOperand. 2538193323Sed Cond[0].setImm(BranchCode); 2539309124Sdim CondBranches.push_back(&*I); 2540193323Sed } 2541193323Sed 2542193323Sed return false; 2543193323Sed} 2544193323Sed 2545309124Sdimbool X86InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 2546288943Sdim MachineBasicBlock *&TBB, 2547288943Sdim MachineBasicBlock *&FBB, 2548288943Sdim SmallVectorImpl<MachineOperand> &Cond, 2549288943Sdim bool AllowModify) const { 2550288943Sdim SmallVector<MachineInstr *, 4> CondBranches; 2551288943Sdim return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify); 2552288943Sdim} 2553288943Sdim 2554309124Sdimbool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, 2555288943Sdim MachineBranchPredicate &MBP, 2556288943Sdim bool AllowModify) const { 2557288943Sdim using namespace std::placeholders; 2558288943Sdim 2559288943Sdim SmallVector<MachineOperand, 4> Cond; 2560288943Sdim SmallVector<MachineInstr *, 4> CondBranches; 2561288943Sdim if (AnalyzeBranchImpl(MBB, MBP.TrueDest, MBP.FalseDest, Cond, CondBranches, 2562288943Sdim AllowModify)) 2563288943Sdim return true; 2564288943Sdim 2565288943Sdim if (Cond.size() != 1) 2566288943Sdim return true; 2567288943Sdim 2568288943Sdim assert(MBP.TrueDest && "expected!"); 2569288943Sdim 2570288943Sdim if (!MBP.FalseDest) 2571288943Sdim MBP.FalseDest = MBB.getNextNode(); 2572288943Sdim 2573288943Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 2574288943Sdim 2575288943Sdim MachineInstr *ConditionDef = nullptr; 2576288943Sdim bool SingleUseCondition = true; 2577288943Sdim 2578288943Sdim for (auto I = std::next(MBB.rbegin()), E = MBB.rend(); I != E; ++I) { 2579288943Sdim if (I->modifiesRegister(X86::EFLAGS, TRI)) { 2580288943Sdim ConditionDef = &*I; 2581288943Sdim break; 2582288943Sdim } 2583288943Sdim 2584288943Sdim if (I->readsRegister(X86::EFLAGS, TRI)) 2585288943Sdim SingleUseCondition = false; 2586288943Sdim } 2587288943Sdim 2588288943Sdim if (!ConditionDef) 2589288943Sdim return true; 2590288943Sdim 2591288943Sdim if (SingleUseCondition) { 2592288943Sdim for (auto *Succ : MBB.successors()) 2593288943Sdim if (Succ->isLiveIn(X86::EFLAGS)) 2594288943Sdim SingleUseCondition = false; 2595288943Sdim } 2596288943Sdim 2597288943Sdim MBP.ConditionDef = ConditionDef; 2598288943Sdim MBP.SingleUseCondition = SingleUseCondition; 2599288943Sdim 2600288943Sdim // Currently we only recognize the simple pattern: 2601288943Sdim // 2602288943Sdim // test %reg, %reg 2603288943Sdim // je %label 2604288943Sdim // 2605288943Sdim const unsigned TestOpcode = 2606288943Sdim Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr; 2607288943Sdim 2608288943Sdim if (ConditionDef->getOpcode() == TestOpcode && 2609288943Sdim ConditionDef->getNumOperands() == 3 && 2610288943Sdim ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) && 2611288943Sdim (Cond[0].getImm() == X86::COND_NE || Cond[0].getImm() == X86::COND_E)) { 2612288943Sdim MBP.LHS = ConditionDef->getOperand(0); 2613288943Sdim MBP.RHS = MachineOperand::CreateImm(0); 2614288943Sdim MBP.Predicate = Cond[0].getImm() == X86::COND_NE 2615288943Sdim ? MachineBranchPredicate::PRED_NE 2616288943Sdim : MachineBranchPredicate::PRED_EQ; 2617288943Sdim return false; 2618288943Sdim } 2619288943Sdim 2620288943Sdim return true; 2621288943Sdim} 2622288943Sdim 2623314564Sdimunsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB, 2624314564Sdim int *BytesRemoved) const { 2625314564Sdim assert(!BytesRemoved && "code size not handled"); 2626314564Sdim 2627193323Sed MachineBasicBlock::iterator I = MBB.end(); 2628193323Sed unsigned Count = 0; 2629193323Sed 2630193323Sed while (I != MBB.begin()) { 2631193323Sed --I; 2632341825Sdim if (I->isDebugInstr()) 2633206083Srdivacky continue; 2634280031Sdim if (I->getOpcode() != X86::JMP_1 && 2635353358Sdim X86::getCondFromBranch(*I) == X86::COND_INVALID) 2636193323Sed break; 2637193323Sed // Remove the branch. 2638193323Sed I->eraseFromParent(); 2639193323Sed I = MBB.end(); 2640193323Sed ++Count; 2641193323Sed } 2642218893Sdim 2643193323Sed return Count; 2644193323Sed} 2645193323Sed 2646314564Sdimunsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB, 2647309124Sdim MachineBasicBlock *TBB, 2648309124Sdim MachineBasicBlock *FBB, 2649309124Sdim ArrayRef<MachineOperand> Cond, 2650314564Sdim const DebugLoc &DL, 2651314564Sdim int *BytesAdded) const { 2652193323Sed // Shouldn't be a fall through. 2653314564Sdim assert(TBB && "insertBranch must not be told to insert a fallthrough"); 2654193323Sed assert((Cond.size() == 1 || Cond.size() == 0) && 2655193323Sed "X86 branch conditions have one component!"); 2656314564Sdim assert(!BytesAdded && "code size not handled"); 2657193323Sed 2658193323Sed if (Cond.empty()) { 2659193323Sed // Unconditional branch? 2660193323Sed assert(!FBB && "Unconditional branch with multiple successors!"); 2661280031Sdim BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(TBB); 2662193323Sed return 1; 2663193323Sed } 2664193323Sed 2665309124Sdim // If FBB is null, it is implied to be a fall-through block. 2666309124Sdim bool FallThru = FBB == nullptr; 2667309124Sdim 2668193323Sed // Conditional branch. 2669193323Sed unsigned Count = 0; 2670193323Sed X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); 2671193323Sed switch (CC) { 2672193323Sed case X86::COND_NE_OR_P: 2673193323Sed // Synthesize NE_OR_P with two branches. 2674353358Sdim BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NE); 2675193323Sed ++Count; 2676353358Sdim BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_P); 2677193323Sed ++Count; 2678193323Sed break; 2679309124Sdim case X86::COND_E_AND_NP: 2680309124Sdim // Use the next block of MBB as FBB if it is null. 2681309124Sdim if (FBB == nullptr) { 2682309124Sdim FBB = getFallThroughMBB(&MBB, TBB); 2683309124Sdim assert(FBB && "MBB cannot be the last block in function when the false " 2684309124Sdim "body is a fall-through."); 2685309124Sdim } 2686309124Sdim // Synthesize COND_E_AND_NP with two branches. 2687353358Sdim BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(FBB).addImm(X86::COND_NE); 2688309124Sdim ++Count; 2689353358Sdim BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NP); 2690309124Sdim ++Count; 2691309124Sdim break; 2692193323Sed default: { 2693353358Sdim BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(CC); 2694193323Sed ++Count; 2695193323Sed } 2696193323Sed } 2697309124Sdim if (!FallThru) { 2698193323Sed // Two-way Conditional branch. Insert the second branch. 2699280031Sdim BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB); 2700193323Sed ++Count; 2701193323Sed } 2702193323Sed return Count; 2703193323Sed} 2704193323Sed 2705239462Sdimbool X86InstrInfo:: 2706239462SdimcanInsertSelect(const MachineBasicBlock &MBB, 2707288943Sdim ArrayRef<MachineOperand> Cond, 2708239462Sdim unsigned TrueReg, unsigned FalseReg, 2709239462Sdim int &CondCycles, int &TrueCycles, int &FalseCycles) const { 2710239462Sdim // Not all subtargets have cmov instructions. 2711276479Sdim if (!Subtarget.hasCMov()) 2712239462Sdim return false; 2713239462Sdim if (Cond.size() != 1) 2714239462Sdim return false; 2715239462Sdim // We cannot do the composite conditions, at least not in SSA form. 2716353358Sdim if ((X86::CondCode)Cond[0].getImm() > X86::LAST_VALID_COND) 2717239462Sdim return false; 2718239462Sdim 2719239462Sdim // Check register classes. 2720239462Sdim const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2721239462Sdim const TargetRegisterClass *RC = 2722239462Sdim RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 2723239462Sdim if (!RC) 2724239462Sdim return false; 2725239462Sdim 2726239462Sdim // We have cmov instructions for 16, 32, and 64 bit general purpose registers. 2727239462Sdim if (X86::GR16RegClass.hasSubClassEq(RC) || 2728239462Sdim X86::GR32RegClass.hasSubClassEq(RC) || 2729239462Sdim X86::GR64RegClass.hasSubClassEq(RC)) { 2730239462Sdim // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy 2731239462Sdim // Bridge. Probably Ivy Bridge as well. 2732239462Sdim CondCycles = 2; 2733239462Sdim TrueCycles = 2; 2734239462Sdim FalseCycles = 2; 2735239462Sdim return true; 2736239462Sdim } 2737239462Sdim 2738239462Sdim // Can't do vectors. 2739239462Sdim return false; 2740239462Sdim} 2741239462Sdim 2742239462Sdimvoid X86InstrInfo::insertSelect(MachineBasicBlock &MBB, 2743309124Sdim MachineBasicBlock::iterator I, 2744309124Sdim const DebugLoc &DL, unsigned DstReg, 2745309124Sdim ArrayRef<MachineOperand> Cond, unsigned TrueReg, 2746309124Sdim unsigned FalseReg) const { 2747309124Sdim MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2748321369Sdim const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 2749321369Sdim const TargetRegisterClass &RC = *MRI.getRegClass(DstReg); 2750309124Sdim assert(Cond.size() == 1 && "Invalid Cond array"); 2751353358Sdim unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(RC) / 8, 2752353358Sdim false /*HasMemoryOperand*/); 2753353358Sdim BuildMI(MBB, I, DL, get(Opc), DstReg) 2754353358Sdim .addReg(FalseReg) 2755353358Sdim .addReg(TrueReg) 2756353358Sdim .addImm(Cond[0].getImm()); 2757239462Sdim} 2758239462Sdim 2759288943Sdim/// Test if the given register is a physical h register. 2760193323Sedstatic bool isHReg(unsigned Reg) { 2761193323Sed return X86::GR8_ABCD_HRegClass.contains(Reg); 2762193323Sed} 2763193323Sed 2764212904Sdim// Try and copy between VR128/VR64 and GR64 registers. 2765341825Sdimstatic unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, 2766276479Sdim const X86Subtarget &Subtarget) { 2767314564Sdim bool HasAVX = Subtarget.hasAVX(); 2768314564Sdim bool HasAVX512 = Subtarget.hasAVX512(); 2769261991Sdim 2770314564Sdim // SrcReg(MaskReg) -> DestReg(GR64) 2771314564Sdim // SrcReg(MaskReg) -> DestReg(GR32) 2772314564Sdim 2773314564Sdim // All KMASK RegClasses hold the same k registers, can be tested against anyone. 2774314564Sdim if (X86::VK16RegClass.contains(SrcReg)) { 2775314564Sdim if (X86::GR64RegClass.contains(DestReg)) { 2776314564Sdim assert(Subtarget.hasBWI()); 2777314564Sdim return X86::KMOVQrk; 2778314564Sdim } 2779314564Sdim if (X86::GR32RegClass.contains(DestReg)) 2780314564Sdim return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk; 2781314564Sdim } 2782314564Sdim 2783314564Sdim // SrcReg(GR64) -> DestReg(MaskReg) 2784314564Sdim // SrcReg(GR32) -> DestReg(MaskReg) 2785314564Sdim 2786314564Sdim // All KMASK RegClasses hold the same k registers, can be tested against anyone. 2787314564Sdim if (X86::VK16RegClass.contains(DestReg)) { 2788314564Sdim if (X86::GR64RegClass.contains(SrcReg)) { 2789314564Sdim assert(Subtarget.hasBWI()); 2790314564Sdim return X86::KMOVQkr; 2791314564Sdim } 2792314564Sdim if (X86::GR32RegClass.contains(SrcReg)) 2793314564Sdim return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr; 2794314564Sdim } 2795314564Sdim 2796314564Sdim 2797212904Sdim // SrcReg(VR128) -> DestReg(GR64) 2798212904Sdim // SrcReg(VR64) -> DestReg(GR64) 2799212904Sdim // SrcReg(GR64) -> DestReg(VR128) 2800212904Sdim // SrcReg(GR64) -> DestReg(VR64) 2801212904Sdim 2802212904Sdim if (X86::GR64RegClass.contains(DestReg)) { 2803261991Sdim if (X86::VR128XRegClass.contains(SrcReg)) 2804212904Sdim // Copy from a VR128 register to a GR64 register. 2805309124Sdim return HasAVX512 ? X86::VMOVPQIto64Zrr : 2806309124Sdim HasAVX ? X86::VMOVPQIto64rr : 2807309124Sdim X86::MOVPQIto64rr; 2808243830Sdim if (X86::VR64RegClass.contains(SrcReg)) 2809212904Sdim // Copy from a VR64 register to a GR64 register. 2810288943Sdim return X86::MMX_MOVD64from64rr; 2811212904Sdim } else if (X86::GR64RegClass.contains(SrcReg)) { 2812212904Sdim // Copy from a GR64 register to a VR128 register. 2813261991Sdim if (X86::VR128XRegClass.contains(DestReg)) 2814309124Sdim return HasAVX512 ? X86::VMOV64toPQIZrr : 2815309124Sdim HasAVX ? X86::VMOV64toPQIrr : 2816309124Sdim X86::MOV64toPQIrr; 2817212904Sdim // Copy from a GR64 register to a VR64 register. 2818243830Sdim if (X86::VR64RegClass.contains(DestReg)) 2819288943Sdim return X86::MMX_MOVD64to64rr; 2820212904Sdim } 2821212904Sdim 2822353358Sdim // SrcReg(VR128) -> DestReg(GR32) 2823353358Sdim // SrcReg(GR32) -> DestReg(VR128) 2824226633Sdim 2825309124Sdim if (X86::GR32RegClass.contains(DestReg) && 2826353358Sdim X86::VR128XRegClass.contains(SrcReg)) 2827353358Sdim // Copy from a VR128 register to a GR32 register. 2828353358Sdim return HasAVX512 ? X86::VMOVPDI2DIZrr : 2829353358Sdim HasAVX ? X86::VMOVPDI2DIrr : 2830353358Sdim X86::MOVPDI2DIrr; 2831226633Sdim 2832353358Sdim if (X86::VR128XRegClass.contains(DestReg) && 2833309124Sdim X86::GR32RegClass.contains(SrcReg)) 2834353358Sdim // Copy from a VR128 register to a VR128 register. 2835353358Sdim return HasAVX512 ? X86::VMOVDI2PDIZrr : 2836353358Sdim HasAVX ? X86::VMOVDI2PDIrr : 2837353358Sdim X86::MOVDI2PDIrr; 2838261991Sdim return 0; 2839261991Sdim} 2840226633Sdim 2841210299Sedvoid X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 2842309124Sdim MachineBasicBlock::iterator MI, 2843309124Sdim const DebugLoc &DL, unsigned DestReg, 2844309124Sdim unsigned SrcReg, bool KillSrc) const { 2845210299Sed // First deal with the normal symmetric copies. 2846276479Sdim bool HasAVX = Subtarget.hasAVX(); 2847314564Sdim bool HasVLX = Subtarget.hasVLX(); 2848261991Sdim unsigned Opc = 0; 2849210299Sed if (X86::GR64RegClass.contains(DestReg, SrcReg)) 2850210299Sed Opc = X86::MOV64rr; 2851210299Sed else if (X86::GR32RegClass.contains(DestReg, SrcReg)) 2852210299Sed Opc = X86::MOV32rr; 2853210299Sed else if (X86::GR16RegClass.contains(DestReg, SrcReg)) 2854210299Sed Opc = X86::MOV16rr; 2855210299Sed else if (X86::GR8RegClass.contains(DestReg, SrcReg)) { 2856210299Sed // Copying to or from a physical H register on x86-64 requires a NOREX 2857210299Sed // move. Otherwise use a normal move. 2858210299Sed if ((isHReg(DestReg) || isHReg(SrcReg)) && 2859276479Sdim Subtarget.is64Bit()) { 2860210299Sed Opc = X86::MOV8rr_NOREX; 2861226633Sdim // Both operands must be encodable without an REX prefix. 2862226633Sdim assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) && 2863226633Sdim "8-bit H register can not be copied outside GR8_NOREX"); 2864226633Sdim } else 2865210299Sed Opc = X86::MOV8rr; 2866261991Sdim } 2867261991Sdim else if (X86::VR64RegClass.contains(DestReg, SrcReg)) 2868261991Sdim Opc = X86::MMX_MOVQ64rr; 2869314564Sdim else if (X86::VR128XRegClass.contains(DestReg, SrcReg)) { 2870314564Sdim if (HasVLX) 2871314564Sdim Opc = X86::VMOVAPSZ128rr; 2872314564Sdim else if (X86::VR128RegClass.contains(DestReg, SrcReg)) 2873314564Sdim Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr; 2874314564Sdim else { 2875314564Sdim // If this an extended register and we don't have VLX we need to use a 2876314564Sdim // 512-bit move. 2877314564Sdim Opc = X86::VMOVAPSZrr; 2878314564Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 2879314564Sdim DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_xmm, 2880314564Sdim &X86::VR512RegClass); 2881314564Sdim SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, 2882314564Sdim &X86::VR512RegClass); 2883314564Sdim } 2884314564Sdim } else if (X86::VR256XRegClass.contains(DestReg, SrcReg)) { 2885314564Sdim if (HasVLX) 2886314564Sdim Opc = X86::VMOVAPSZ256rr; 2887314564Sdim else if (X86::VR256RegClass.contains(DestReg, SrcReg)) 2888314564Sdim Opc = X86::VMOVAPSYrr; 2889314564Sdim else { 2890314564Sdim // If this an extended register and we don't have VLX we need to use a 2891314564Sdim // 512-bit move. 2892314564Sdim Opc = X86::VMOVAPSZrr; 2893314564Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 2894314564Sdim DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_ymm, 2895314564Sdim &X86::VR512RegClass); 2896314564Sdim SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, 2897314564Sdim &X86::VR512RegClass); 2898314564Sdim } 2899314564Sdim } else if (X86::VR512RegClass.contains(DestReg, SrcReg)) 2900314564Sdim Opc = X86::VMOVAPSZrr; 2901314564Sdim // All KMASK RegClasses hold the same k registers, can be tested against anyone. 2902314564Sdim else if (X86::VK16RegClass.contains(DestReg, SrcReg)) 2903314564Sdim Opc = Subtarget.hasBWI() ? X86::KMOVQkk : X86::KMOVWkk; 2904261991Sdim if (!Opc) 2905276479Sdim Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget); 2906193323Sed 2907210299Sed if (Opc) { 2908210299Sed BuildMI(MBB, MI, DL, get(Opc), DestReg) 2909210299Sed .addReg(SrcReg, getKillRegState(KillSrc)); 2910210299Sed return; 2911193323Sed } 2912198090Srdivacky 2913332833Sdim if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) { 2914332833Sdim // FIXME: We use a fatal error here because historically LLVM has tried 2915332833Sdim // lower some of these physreg copies and we want to ensure we get 2916332833Sdim // reasonable bug reports if someone encounters a case no other testing 2917332833Sdim // found. This path should be removed after the LLVM 7 release. 2918332833Sdim report_fatal_error("Unable to copy EFLAGS physical register!"); 2919193323Sed } 2920193323Sed 2921341825Sdim LLVM_DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) << " to " 2922341825Sdim << RI.getName(DestReg) << '\n'); 2923341825Sdim report_fatal_error("Cannot emit physreg copy instruction"); 2924193323Sed} 2925193323Sed 2926344779Sdimbool X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI, 2927344779Sdim const MachineOperand *&Src, 2928344779Sdim const MachineOperand *&Dest) const { 2929341825Sdim if (MI.isMoveReg()) { 2930341825Sdim Dest = &MI.getOperand(0); 2931341825Sdim Src = &MI.getOperand(1); 2932341825Sdim return true; 2933341825Sdim } 2934341825Sdim return false; 2935341825Sdim} 2936341825Sdim 2937210299Sedstatic unsigned getLoadStoreRegOpcode(unsigned Reg, 2938210299Sed const TargetRegisterClass *RC, 2939210299Sed bool isStackAligned, 2940276479Sdim const X86Subtarget &STI, 2941210299Sed bool load) { 2942314564Sdim bool HasAVX = STI.hasAVX(); 2943314564Sdim bool HasAVX512 = STI.hasAVX512(); 2944314564Sdim bool HasVLX = STI.hasVLX(); 2945261991Sdim 2946321369Sdim switch (STI.getRegisterInfo()->getSpillSize(*RC)) { 2947210299Sed default: 2948223017Sdim llvm_unreachable("Unknown spill size"); 2949223017Sdim case 1: 2950223017Sdim assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass"); 2951276479Sdim if (STI.is64Bit()) 2952223017Sdim // Copying to or from a physical H register on x86-64 requires a NOREX 2953223017Sdim // move. Otherwise use a normal move. 2954223017Sdim if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC)) 2955223017Sdim return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; 2956223017Sdim return load ? X86::MOV8rm : X86::MOV8mr; 2957223017Sdim case 2: 2958314564Sdim if (X86::VK16RegClass.hasSubClassEq(RC)) 2959314564Sdim return load ? X86::KMOVWkm : X86::KMOVWmk; 2960223017Sdim assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass"); 2961210299Sed return load ? X86::MOV16rm : X86::MOV16mr; 2962223017Sdim case 4: 2963223017Sdim if (X86::GR32RegClass.hasSubClassEq(RC)) 2964223017Sdim return load ? X86::MOV32rm : X86::MOV32mr; 2965314564Sdim if (X86::FR32XRegClass.hasSubClassEq(RC)) 2966226633Sdim return load ? 2967353358Sdim (HasAVX512 ? X86::VMOVSSZrm_alt : 2968353358Sdim HasAVX ? X86::VMOVSSrm_alt : 2969353358Sdim X86::MOVSSrm_alt) : 2970353358Sdim (HasAVX512 ? X86::VMOVSSZmr : 2971353358Sdim HasAVX ? X86::VMOVSSmr : 2972353358Sdim X86::MOVSSmr); 2973223017Sdim if (X86::RFP32RegClass.hasSubClassEq(RC)) 2974223017Sdim return load ? X86::LD_Fp32m : X86::ST_Fp32m; 2975341825Sdim if (X86::VK32RegClass.hasSubClassEq(RC)) { 2976341825Sdim assert(STI.hasBWI() && "KMOVD requires BWI"); 2977314564Sdim return load ? X86::KMOVDkm : X86::KMOVDmk; 2978341825Sdim } 2979353358Sdim // All of these mask pair classes have the same spill size, the same kind 2980353358Sdim // of kmov instructions can be used with all of them. 2981353358Sdim if (X86::VK1PAIRRegClass.hasSubClassEq(RC) || 2982353358Sdim X86::VK2PAIRRegClass.hasSubClassEq(RC) || 2983353358Sdim X86::VK4PAIRRegClass.hasSubClassEq(RC) || 2984353358Sdim X86::VK8PAIRRegClass.hasSubClassEq(RC) || 2985353358Sdim X86::VK16PAIRRegClass.hasSubClassEq(RC)) 2986353358Sdim return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE; 2987223017Sdim llvm_unreachable("Unknown 4-byte regclass"); 2988223017Sdim case 8: 2989223017Sdim if (X86::GR64RegClass.hasSubClassEq(RC)) 2990223017Sdim return load ? X86::MOV64rm : X86::MOV64mr; 2991314564Sdim if (X86::FR64XRegClass.hasSubClassEq(RC)) 2992226633Sdim return load ? 2993353358Sdim (HasAVX512 ? X86::VMOVSDZrm_alt : 2994353358Sdim HasAVX ? X86::VMOVSDrm_alt : 2995353358Sdim X86::MOVSDrm_alt) : 2996353358Sdim (HasAVX512 ? X86::VMOVSDZmr : 2997353358Sdim HasAVX ? X86::VMOVSDmr : 2998353358Sdim X86::MOVSDmr); 2999223017Sdim if (X86::VR64RegClass.hasSubClassEq(RC)) 3000223017Sdim return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; 3001223017Sdim if (X86::RFP64RegClass.hasSubClassEq(RC)) 3002223017Sdim return load ? X86::LD_Fp64m : X86::ST_Fp64m; 3003341825Sdim if (X86::VK64RegClass.hasSubClassEq(RC)) { 3004341825Sdim assert(STI.hasBWI() && "KMOVQ requires BWI"); 3005314564Sdim return load ? X86::KMOVQkm : X86::KMOVQmk; 3006341825Sdim } 3007223017Sdim llvm_unreachable("Unknown 8-byte regclass"); 3008223017Sdim case 10: 3009223017Sdim assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); 3010210299Sed return load ? X86::LD_Fp80m : X86::ST_FpP80m; 3011226633Sdim case 16: { 3012321369Sdim if (X86::VR128XRegClass.hasSubClassEq(RC)) { 3013321369Sdim // If stack is realigned we can use aligned stores. 3014321369Sdim if (isStackAligned) 3015321369Sdim return load ? 3016321369Sdim (HasVLX ? X86::VMOVAPSZ128rm : 3017321369Sdim HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX : 3018321369Sdim HasAVX ? X86::VMOVAPSrm : 3019321369Sdim X86::MOVAPSrm): 3020321369Sdim (HasVLX ? X86::VMOVAPSZ128mr : 3021321369Sdim HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX : 3022321369Sdim HasAVX ? X86::VMOVAPSmr : 3023321369Sdim X86::MOVAPSmr); 3024321369Sdim else 3025321369Sdim return load ? 3026321369Sdim (HasVLX ? X86::VMOVUPSZ128rm : 3027321369Sdim HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX : 3028321369Sdim HasAVX ? X86::VMOVUPSrm : 3029321369Sdim X86::MOVUPSrm): 3030321369Sdim (HasVLX ? X86::VMOVUPSZ128mr : 3031321369Sdim HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX : 3032321369Sdim HasAVX ? X86::VMOVUPSmr : 3033321369Sdim X86::MOVUPSmr); 3034321369Sdim } 3035321369Sdim if (X86::BNDRRegClass.hasSubClassEq(RC)) { 3036321369Sdim if (STI.is64Bit()) 3037341825Sdim return load ? X86::BNDMOV64rm : X86::BNDMOV64mr; 3038321369Sdim else 3039341825Sdim return load ? X86::BNDMOV32rm : X86::BNDMOV32mr; 3040321369Sdim } 3041321369Sdim llvm_unreachable("Unknown 16-byte regclass"); 3042226633Sdim } 3043224145Sdim case 32: 3044314564Sdim assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass"); 3045224145Sdim // If stack is realigned we can use aligned stores. 3046224145Sdim if (isStackAligned) 3047314564Sdim return load ? 3048314564Sdim (HasVLX ? X86::VMOVAPSZ256rm : 3049314564Sdim HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX : 3050314564Sdim X86::VMOVAPSYrm) : 3051314564Sdim (HasVLX ? X86::VMOVAPSZ256mr : 3052314564Sdim HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX : 3053314564Sdim X86::VMOVAPSYmr); 3054224145Sdim else 3055314564Sdim return load ? 3056314564Sdim (HasVLX ? X86::VMOVUPSZ256rm : 3057314564Sdim HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX : 3058314564Sdim X86::VMOVUPSYrm) : 3059314564Sdim (HasVLX ? X86::VMOVUPSZ256mr : 3060314564Sdim HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX : 3061314564Sdim X86::VMOVUPSYmr); 3062261991Sdim case 64: 3063261991Sdim assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass"); 3064314564Sdim assert(STI.hasAVX512() && "Using 512-bit register requires AVX512"); 3065261991Sdim if (isStackAligned) 3066261991Sdim return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; 3067261991Sdim else 3068261991Sdim return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; 3069193323Sed } 3070210299Sed} 3071193323Sed 3072344779Sdimbool X86InstrInfo::getMemOperandWithOffset( 3073353358Sdim const MachineInstr &MemOp, const MachineOperand *&BaseOp, int64_t &Offset, 3074344779Sdim const TargetRegisterInfo *TRI) const { 3075309124Sdim const MCInstrDesc &Desc = MemOp.getDesc(); 3076309124Sdim int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags); 3077288943Sdim if (MemRefBegin < 0) 3078288943Sdim return false; 3079288943Sdim 3080288943Sdim MemRefBegin += X86II::getOperandBias(Desc); 3081288943Sdim 3082344779Sdim BaseOp = &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg); 3083344779Sdim if (!BaseOp->isReg()) // Can be an MO_FrameIndex 3084288943Sdim return false; 3085288943Sdim 3086309124Sdim if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1) 3087309124Sdim return false; 3088309124Sdim 3089309124Sdim if (MemOp.getOperand(MemRefBegin + X86::AddrIndexReg).getReg() != 3090288943Sdim X86::NoRegister) 3091288943Sdim return false; 3092288943Sdim 3093309124Sdim const MachineOperand &DispMO = MemOp.getOperand(MemRefBegin + X86::AddrDisp); 3094288943Sdim 3095288943Sdim // Displacement can be symbolic 3096288943Sdim if (!DispMO.isImm()) 3097288943Sdim return false; 3098288943Sdim 3099288943Sdim Offset = DispMO.getImm(); 3100288943Sdim 3101344779Sdim assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base " 3102344779Sdim "operands of type register."); 3103314564Sdim return true; 3104288943Sdim} 3105288943Sdim 3106210299Sedstatic unsigned getStoreRegOpcode(unsigned SrcReg, 3107210299Sed const TargetRegisterClass *RC, 3108210299Sed bool isStackAligned, 3109276479Sdim const X86Subtarget &STI) { 3110276479Sdim return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, STI, false); 3111193323Sed} 3112193323Sed 3113210299Sed 3114210299Sedstatic unsigned getLoadRegOpcode(unsigned DestReg, 3115210299Sed const TargetRegisterClass *RC, 3116210299Sed bool isStackAligned, 3117276479Sdim const X86Subtarget &STI) { 3118276479Sdim return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, STI, true); 3119210299Sed} 3120210299Sed 3121193323Sedvoid X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 3122193323Sed MachineBasicBlock::iterator MI, 3123193323Sed unsigned SrcReg, bool isKill, int FrameIdx, 3124208599Srdivacky const TargetRegisterClass *RC, 3125208599Srdivacky const TargetRegisterInfo *TRI) const { 3126193323Sed const MachineFunction &MF = *MBB.getParent(); 3127321369Sdim assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && 3128212904Sdim "Stack slot too small for store"); 3129321369Sdim unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16); 3130288943Sdim bool isAligned = 3131288943Sdim (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) || 3132288943Sdim RI.canRealignStack(MF); 3133276479Sdim unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); 3134344779Sdim addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx) 3135193323Sed .addReg(SrcReg, getKillRegState(isKill)); 3136193323Sed} 3137193323Sed 3138344779Sdimvoid X86InstrInfo::storeRegToAddr( 3139344779Sdim MachineFunction &MF, unsigned SrcReg, bool isKill, 3140344779Sdim SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC, 3141344779Sdim ArrayRef<MachineMemOperand *> MMOs, 3142344779Sdim SmallVectorImpl<MachineInstr *> &NewMIs) const { 3143321369Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 3144321369Sdim unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); 3145344779Sdim bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; 3146276479Sdim unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); 3147206124Srdivacky DebugLoc DL; 3148193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); 3149193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 3150321369Sdim MIB.add(Addr[i]); 3151193323Sed MIB.addReg(SrcReg, getKillRegState(isKill)); 3152344779Sdim MIB.setMemRefs(MMOs); 3153193323Sed NewMIs.push_back(MIB); 3154193323Sed} 3155193323Sed 3156193323Sed 3157193323Sedvoid X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 3158193323Sed MachineBasicBlock::iterator MI, 3159193323Sed unsigned DestReg, int FrameIdx, 3160208599Srdivacky const TargetRegisterClass *RC, 3161208599Srdivacky const TargetRegisterInfo *TRI) const { 3162193323Sed const MachineFunction &MF = *MBB.getParent(); 3163321369Sdim unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16); 3164288943Sdim bool isAligned = 3165288943Sdim (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) || 3166288943Sdim RI.canRealignStack(MF); 3167276479Sdim unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); 3168344779Sdim addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx); 3169193323Sed} 3170193323Sed 3171344779Sdimvoid X86InstrInfo::loadRegFromAddr( 3172344779Sdim MachineFunction &MF, unsigned DestReg, 3173344779Sdim SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC, 3174344779Sdim ArrayRef<MachineMemOperand *> MMOs, 3175344779Sdim SmallVectorImpl<MachineInstr *> &NewMIs) const { 3176321369Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 3177321369Sdim unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); 3178344779Sdim bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; 3179276479Sdim unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); 3180206124Srdivacky DebugLoc DL; 3181193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); 3182193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 3183321369Sdim MIB.add(Addr[i]); 3184344779Sdim MIB.setMemRefs(MMOs); 3185193323Sed NewMIs.push_back(MIB); 3186193323Sed} 3187193323Sed 3188309124Sdimbool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, 3189309124Sdim unsigned &SrcReg2, int &CmpMask, 3190309124Sdim int &CmpValue) const { 3191309124Sdim switch (MI.getOpcode()) { 3192239462Sdim default: break; 3193239462Sdim case X86::CMP64ri32: 3194239462Sdim case X86::CMP64ri8: 3195239462Sdim case X86::CMP32ri: 3196239462Sdim case X86::CMP32ri8: 3197239462Sdim case X86::CMP16ri: 3198239462Sdim case X86::CMP16ri8: 3199239462Sdim case X86::CMP8ri: 3200309124Sdim SrcReg = MI.getOperand(0).getReg(); 3201239462Sdim SrcReg2 = 0; 3202321369Sdim if (MI.getOperand(1).isImm()) { 3203321369Sdim CmpMask = ~0; 3204321369Sdim CmpValue = MI.getOperand(1).getImm(); 3205321369Sdim } else { 3206321369Sdim CmpMask = CmpValue = 0; 3207321369Sdim } 3208239462Sdim return true; 3209239462Sdim // A SUB can be used to perform comparison. 3210239462Sdim case X86::SUB64rm: 3211239462Sdim case X86::SUB32rm: 3212239462Sdim case X86::SUB16rm: 3213239462Sdim case X86::SUB8rm: 3214309124Sdim SrcReg = MI.getOperand(1).getReg(); 3215239462Sdim SrcReg2 = 0; 3216321369Sdim CmpMask = 0; 3217239462Sdim CmpValue = 0; 3218239462Sdim return true; 3219239462Sdim case X86::SUB64rr: 3220239462Sdim case X86::SUB32rr: 3221239462Sdim case X86::SUB16rr: 3222239462Sdim case X86::SUB8rr: 3223309124Sdim SrcReg = MI.getOperand(1).getReg(); 3224309124Sdim SrcReg2 = MI.getOperand(2).getReg(); 3225321369Sdim CmpMask = 0; 3226239462Sdim CmpValue = 0; 3227239462Sdim return true; 3228239462Sdim case X86::SUB64ri32: 3229239462Sdim case X86::SUB64ri8: 3230239462Sdim case X86::SUB32ri: 3231239462Sdim case X86::SUB32ri8: 3232239462Sdim case X86::SUB16ri: 3233239462Sdim case X86::SUB16ri8: 3234239462Sdim case X86::SUB8ri: 3235309124Sdim SrcReg = MI.getOperand(1).getReg(); 3236239462Sdim SrcReg2 = 0; 3237321369Sdim if (MI.getOperand(2).isImm()) { 3238321369Sdim CmpMask = ~0; 3239321369Sdim CmpValue = MI.getOperand(2).getImm(); 3240321369Sdim } else { 3241321369Sdim CmpMask = CmpValue = 0; 3242321369Sdim } 3243239462Sdim return true; 3244239462Sdim case X86::CMP64rr: 3245239462Sdim case X86::CMP32rr: 3246239462Sdim case X86::CMP16rr: 3247239462Sdim case X86::CMP8rr: 3248309124Sdim SrcReg = MI.getOperand(0).getReg(); 3249309124Sdim SrcReg2 = MI.getOperand(1).getReg(); 3250321369Sdim CmpMask = 0; 3251239462Sdim CmpValue = 0; 3252239462Sdim return true; 3253239462Sdim case X86::TEST8rr: 3254239462Sdim case X86::TEST16rr: 3255239462Sdim case X86::TEST32rr: 3256239462Sdim case X86::TEST64rr: 3257309124Sdim SrcReg = MI.getOperand(0).getReg(); 3258309124Sdim if (MI.getOperand(1).getReg() != SrcReg) 3259309124Sdim return false; 3260239462Sdim // Compare against zero. 3261239462Sdim SrcReg2 = 0; 3262239462Sdim CmpMask = ~0; 3263239462Sdim CmpValue = 0; 3264239462Sdim return true; 3265239462Sdim } 3266239462Sdim return false; 3267239462Sdim} 3268239462Sdim 3269288943Sdim/// Check whether the first instruction, whose only 3270239462Sdim/// purpose is to update flags, can be made redundant. 3271239462Sdim/// CMPrr can be made redundant by SUBrr if the operands are the same. 3272239462Sdim/// This function can be extended later on. 3273239462Sdim/// SrcReg, SrcRegs: register operands for FlagI. 3274239462Sdim/// ImmValue: immediate for FlagI if it takes an immediate. 3275344779Sdiminline static bool isRedundantFlagInstr(const MachineInstr &FlagI, 3276344779Sdim unsigned SrcReg, unsigned SrcReg2, 3277344779Sdim int ImmMask, int ImmValue, 3278344779Sdim const MachineInstr &OI) { 3279309124Sdim if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) || 3280309124Sdim (FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) || 3281309124Sdim (FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) || 3282309124Sdim (FlagI.getOpcode() == X86::CMP8rr && OI.getOpcode() == X86::SUB8rr)) && 3283309124Sdim ((OI.getOperand(1).getReg() == SrcReg && 3284309124Sdim OI.getOperand(2).getReg() == SrcReg2) || 3285309124Sdim (OI.getOperand(1).getReg() == SrcReg2 && 3286309124Sdim OI.getOperand(2).getReg() == SrcReg))) 3287239462Sdim return true; 3288239462Sdim 3289321369Sdim if (ImmMask != 0 && 3290321369Sdim ((FlagI.getOpcode() == X86::CMP64ri32 && 3291309124Sdim OI.getOpcode() == X86::SUB64ri32) || 3292309124Sdim (FlagI.getOpcode() == X86::CMP64ri8 && 3293309124Sdim OI.getOpcode() == X86::SUB64ri8) || 3294309124Sdim (FlagI.getOpcode() == X86::CMP32ri && OI.getOpcode() == X86::SUB32ri) || 3295309124Sdim (FlagI.getOpcode() == X86::CMP32ri8 && 3296309124Sdim OI.getOpcode() == X86::SUB32ri8) || 3297309124Sdim (FlagI.getOpcode() == X86::CMP16ri && OI.getOpcode() == X86::SUB16ri) || 3298309124Sdim (FlagI.getOpcode() == X86::CMP16ri8 && 3299309124Sdim OI.getOpcode() == X86::SUB16ri8) || 3300309124Sdim (FlagI.getOpcode() == X86::CMP8ri && OI.getOpcode() == X86::SUB8ri)) && 3301309124Sdim OI.getOperand(1).getReg() == SrcReg && 3302309124Sdim OI.getOperand(2).getImm() == ImmValue) 3303239462Sdim return true; 3304239462Sdim return false; 3305239462Sdim} 3306239462Sdim 3307288943Sdim/// Check whether the definition can be converted 3308239462Sdim/// to remove a comparison against zero. 3309344779Sdiminline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) { 3310344779Sdim NoSignFlag = false; 3311344779Sdim 3312309124Sdim switch (MI.getOpcode()) { 3313239462Sdim default: return false; 3314261991Sdim 3315261991Sdim // The shift instructions only modify ZF if their shift count is non-zero. 3316261991Sdim // N.B.: The processor truncates the shift count depending on the encoding. 3317261991Sdim case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri:case X86::SAR64ri: 3318261991Sdim case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri:case X86::SHR64ri: 3319261991Sdim return getTruncatedShiftCount(MI, 2) != 0; 3320261991Sdim 3321261991Sdim // Some left shift instructions can be turned into LEA instructions but only 3322261991Sdim // if their flags aren't used. Avoid transforming such instructions. 3323261991Sdim case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri:case X86::SHL64ri:{ 3324261991Sdim unsigned ShAmt = getTruncatedShiftCount(MI, 2); 3325261991Sdim if (isTruncatedShiftCountForLEA(ShAmt)) return false; 3326261991Sdim return ShAmt != 0; 3327261991Sdim } 3328261991Sdim 3329261991Sdim case X86::SHRD16rri8:case X86::SHRD32rri8:case X86::SHRD64rri8: 3330261991Sdim case X86::SHLD16rri8:case X86::SHLD32rri8:case X86::SHLD64rri8: 3331261991Sdim return getTruncatedShiftCount(MI, 3) != 0; 3332261991Sdim 3333239462Sdim case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri: 3334239462Sdim case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8: 3335239462Sdim case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr: 3336239462Sdim case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm: 3337239462Sdim case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm: 3338249423Sdim case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r: 3339239462Sdim case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri: 3340239462Sdim case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8: 3341239462Sdim case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr: 3342239462Sdim case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm: 3343239462Sdim case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm: 3344249423Sdim case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r: 3345239462Sdim case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri: 3346239462Sdim case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8: 3347239462Sdim case X86::AND8ri: case X86::AND64rr: case X86::AND32rr: 3348239462Sdim case X86::AND16rr: case X86::AND8rr: case X86::AND64rm: 3349239462Sdim case X86::AND32rm: case X86::AND16rm: case X86::AND8rm: 3350239462Sdim case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri: 3351239462Sdim case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8: 3352239462Sdim case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr: 3353239462Sdim case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm: 3354239462Sdim case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm: 3355239462Sdim case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri: 3356239462Sdim case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8: 3357239462Sdim case X86::OR8ri: case X86::OR64rr: case X86::OR32rr: 3358239462Sdim case X86::OR16rr: case X86::OR8rr: case X86::OR64rm: 3359239462Sdim case X86::OR32rm: case X86::OR16rm: case X86::OR8rm: 3360327952Sdim case X86::ADC64ri32: case X86::ADC64ri8: case X86::ADC32ri: 3361327952Sdim case X86::ADC32ri8: case X86::ADC16ri: case X86::ADC16ri8: 3362327952Sdim case X86::ADC8ri: case X86::ADC64rr: case X86::ADC32rr: 3363327952Sdim case X86::ADC16rr: case X86::ADC8rr: case X86::ADC64rm: 3364327952Sdim case X86::ADC32rm: case X86::ADC16rm: case X86::ADC8rm: 3365327952Sdim case X86::SBB64ri32: case X86::SBB64ri8: case X86::SBB32ri: 3366327952Sdim case X86::SBB32ri8: case X86::SBB16ri: case X86::SBB16ri8: 3367327952Sdim case X86::SBB8ri: case X86::SBB64rr: case X86::SBB32rr: 3368327952Sdim case X86::SBB16rr: case X86::SBB8rr: case X86::SBB64rm: 3369327952Sdim case X86::SBB32rm: case X86::SBB16rm: case X86::SBB8rm: 3370261991Sdim case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r: 3371261991Sdim case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1: 3372261991Sdim case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1: 3373261991Sdim case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1: 3374249423Sdim case X86::ANDN32rr: case X86::ANDN32rm: 3375249423Sdim case X86::ANDN64rr: case X86::ANDN64rm: 3376261991Sdim case X86::BLSI32rr: case X86::BLSI32rm: 3377261991Sdim case X86::BLSI64rr: case X86::BLSI64rm: 3378261991Sdim case X86::BLSMSK32rr:case X86::BLSMSK32rm: 3379261991Sdim case X86::BLSMSK64rr:case X86::BLSMSK64rm: 3380261991Sdim case X86::BLSR32rr: case X86::BLSR32rm: 3381261991Sdim case X86::BLSR64rr: case X86::BLSR64rm: 3382261991Sdim case X86::BZHI32rr: case X86::BZHI32rm: 3383261991Sdim case X86::BZHI64rr: case X86::BZHI64rm: 3384261991Sdim case X86::LZCNT16rr: case X86::LZCNT16rm: 3385261991Sdim case X86::LZCNT32rr: case X86::LZCNT32rm: 3386261991Sdim case X86::LZCNT64rr: case X86::LZCNT64rm: 3387261991Sdim case X86::POPCNT16rr:case X86::POPCNT16rm: 3388261991Sdim case X86::POPCNT32rr:case X86::POPCNT32rm: 3389261991Sdim case X86::POPCNT64rr:case X86::POPCNT64rm: 3390261991Sdim case X86::TZCNT16rr: case X86::TZCNT16rm: 3391261991Sdim case X86::TZCNT32rr: case X86::TZCNT32rm: 3392261991Sdim case X86::TZCNT64rr: case X86::TZCNT64rm: 3393327952Sdim case X86::BLCFILL32rr: case X86::BLCFILL32rm: 3394327952Sdim case X86::BLCFILL64rr: case X86::BLCFILL64rm: 3395327952Sdim case X86::BLCI32rr: case X86::BLCI32rm: 3396327952Sdim case X86::BLCI64rr: case X86::BLCI64rm: 3397327952Sdim case X86::BLCIC32rr: case X86::BLCIC32rm: 3398327952Sdim case X86::BLCIC64rr: case X86::BLCIC64rm: 3399327952Sdim case X86::BLCMSK32rr: case X86::BLCMSK32rm: 3400327952Sdim case X86::BLCMSK64rr: case X86::BLCMSK64rm: 3401327952Sdim case X86::BLCS32rr: case X86::BLCS32rm: 3402327952Sdim case X86::BLCS64rr: case X86::BLCS64rm: 3403327952Sdim case X86::BLSFILL32rr: case X86::BLSFILL32rm: 3404327952Sdim case X86::BLSFILL64rr: case X86::BLSFILL64rm: 3405327952Sdim case X86::BLSIC32rr: case X86::BLSIC32rm: 3406327952Sdim case X86::BLSIC64rr: case X86::BLSIC64rm: 3407344779Sdim case X86::T1MSKC32rr: case X86::T1MSKC32rm: 3408344779Sdim case X86::T1MSKC64rr: case X86::T1MSKC64rm: 3409344779Sdim case X86::TZMSK32rr: case X86::TZMSK32rm: 3410344779Sdim case X86::TZMSK64rr: case X86::TZMSK64rm: 3411239462Sdim return true; 3412344779Sdim case X86::BEXTR32rr: case X86::BEXTR64rr: 3413344779Sdim case X86::BEXTR32rm: case X86::BEXTR64rm: 3414344779Sdim case X86::BEXTRI32ri: case X86::BEXTRI32mi: 3415344779Sdim case X86::BEXTRI64ri: case X86::BEXTRI64mi: 3416344779Sdim // BEXTR doesn't update the sign flag so we can't use it. 3417344779Sdim NoSignFlag = true; 3418344779Sdim return true; 3419239462Sdim } 3420239462Sdim} 3421239462Sdim 3422288943Sdim/// Check whether the use can be converted to remove a comparison against zero. 3423344779Sdimstatic X86::CondCode isUseDefConvertible(const MachineInstr &MI) { 3424309124Sdim switch (MI.getOpcode()) { 3425276479Sdim default: return X86::COND_INVALID; 3426353358Sdim case X86::NEG8r: 3427353358Sdim case X86::NEG16r: 3428353358Sdim case X86::NEG32r: 3429353358Sdim case X86::NEG64r: 3430353358Sdim return X86::COND_AE; 3431353358Sdim case X86::LZCNT16rr: 3432353358Sdim case X86::LZCNT32rr: 3433353358Sdim case X86::LZCNT64rr: 3434276479Sdim return X86::COND_B; 3435353358Sdim case X86::POPCNT16rr: 3436353358Sdim case X86::POPCNT32rr: 3437353358Sdim case X86::POPCNT64rr: 3438276479Sdim return X86::COND_E; 3439353358Sdim case X86::TZCNT16rr: 3440353358Sdim case X86::TZCNT32rr: 3441353358Sdim case X86::TZCNT64rr: 3442276479Sdim return X86::COND_B; 3443353358Sdim case X86::BSF16rr: 3444353358Sdim case X86::BSF32rr: 3445353358Sdim case X86::BSF64rr: 3446353358Sdim case X86::BSR16rr: 3447353358Sdim case X86::BSR32rr: 3448353358Sdim case X86::BSR64rr: 3449341825Sdim return X86::COND_E; 3450353358Sdim case X86::BLSI32rr: 3451353358Sdim case X86::BLSI64rr: 3452353358Sdim return X86::COND_AE; 3453353358Sdim case X86::BLSR32rr: 3454353358Sdim case X86::BLSR64rr: 3455353358Sdim case X86::BLSMSK32rr: 3456353358Sdim case X86::BLSMSK64rr: 3457353358Sdim return X86::COND_B; 3458353358Sdim // TODO: TBM instructions. 3459276479Sdim } 3460276479Sdim} 3461276479Sdim 3462288943Sdim/// Check if there exists an earlier instruction that 3463239462Sdim/// operates on the same source operands and sets flags in the same way as 3464239462Sdim/// Compare; remove Compare if possible. 3465309124Sdimbool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, 3466309124Sdim unsigned SrcReg2, int CmpMask, 3467309124Sdim int CmpValue, 3468309124Sdim const MachineRegisterInfo *MRI) const { 3469239462Sdim // Check whether we can replace SUB with CMP. 3470309124Sdim switch (CmpInstr.getOpcode()) { 3471239462Sdim default: break; 3472239462Sdim case X86::SUB64ri32: 3473239462Sdim case X86::SUB64ri8: 3474239462Sdim case X86::SUB32ri: 3475239462Sdim case X86::SUB32ri8: 3476239462Sdim case X86::SUB16ri: 3477239462Sdim case X86::SUB16ri8: 3478239462Sdim case X86::SUB8ri: 3479239462Sdim case X86::SUB64rm: 3480239462Sdim case X86::SUB32rm: 3481239462Sdim case X86::SUB16rm: 3482239462Sdim case X86::SUB8rm: 3483239462Sdim case X86::SUB64rr: 3484239462Sdim case X86::SUB32rr: 3485239462Sdim case X86::SUB16rr: 3486239462Sdim case X86::SUB8rr: { 3487309124Sdim if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) 3488239462Sdim return false; 3489239462Sdim // There is no use of the destination register, we can replace SUB with CMP. 3490353358Sdim unsigned NewOpcode = 0; 3491309124Sdim switch (CmpInstr.getOpcode()) { 3492243830Sdim default: llvm_unreachable("Unreachable!"); 3493239462Sdim case X86::SUB64rm: NewOpcode = X86::CMP64rm; break; 3494239462Sdim case X86::SUB32rm: NewOpcode = X86::CMP32rm; break; 3495239462Sdim case X86::SUB16rm: NewOpcode = X86::CMP16rm; break; 3496239462Sdim case X86::SUB8rm: NewOpcode = X86::CMP8rm; break; 3497239462Sdim case X86::SUB64rr: NewOpcode = X86::CMP64rr; break; 3498239462Sdim case X86::SUB32rr: NewOpcode = X86::CMP32rr; break; 3499239462Sdim case X86::SUB16rr: NewOpcode = X86::CMP16rr; break; 3500239462Sdim case X86::SUB8rr: NewOpcode = X86::CMP8rr; break; 3501239462Sdim case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break; 3502239462Sdim case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break; 3503239462Sdim case X86::SUB32ri: NewOpcode = X86::CMP32ri; break; 3504239462Sdim case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break; 3505239462Sdim case X86::SUB16ri: NewOpcode = X86::CMP16ri; break; 3506239462Sdim case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break; 3507239462Sdim case X86::SUB8ri: NewOpcode = X86::CMP8ri; break; 3508239462Sdim } 3509309124Sdim CmpInstr.setDesc(get(NewOpcode)); 3510309124Sdim CmpInstr.RemoveOperand(0); 3511239462Sdim // Fall through to optimize Cmp if Cmp is CMPrr or CMPri. 3512239462Sdim if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm || 3513239462Sdim NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm) 3514239462Sdim return false; 3515239462Sdim } 3516239462Sdim } 3517239462Sdim 3518239462Sdim // Get the unique definition of SrcReg. 3519239462Sdim MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 3520239462Sdim if (!MI) return false; 3521239462Sdim 3522239462Sdim // CmpInstr is the first instruction of the BB. 3523239462Sdim MachineBasicBlock::iterator I = CmpInstr, Def = MI; 3524239462Sdim 3525239462Sdim // If we are comparing against zero, check whether we can use MI to update 3526239462Sdim // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize. 3527321369Sdim bool IsCmpZero = (CmpMask != 0 && CmpValue == 0); 3528309124Sdim if (IsCmpZero && MI->getParent() != CmpInstr.getParent()) 3529239462Sdim return false; 3530239462Sdim 3531276479Sdim // If we have a use of the source register between the def and our compare 3532276479Sdim // instruction we can eliminate the compare iff the use sets EFLAGS in the 3533276479Sdim // right way. 3534276479Sdim bool ShouldUpdateCC = false; 3535344779Sdim bool NoSignFlag = false; 3536276479Sdim X86::CondCode NewCC = X86::COND_INVALID; 3537344779Sdim if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag)) { 3538276479Sdim // Scan forward from the use until we hit the use we're looking for or the 3539276479Sdim // compare instruction. 3540276479Sdim for (MachineBasicBlock::iterator J = MI;; ++J) { 3541276479Sdim // Do we have a convertible instruction? 3542309124Sdim NewCC = isUseDefConvertible(*J); 3543276479Sdim if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() && 3544276479Sdim J->getOperand(1).getReg() == SrcReg) { 3545276479Sdim assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!"); 3546276479Sdim ShouldUpdateCC = true; // Update CC later on. 3547276479Sdim // This is not a def of SrcReg, but still a def of EFLAGS. Keep going 3548276479Sdim // with the new def. 3549309124Sdim Def = J; 3550309124Sdim MI = &*Def; 3551276479Sdim break; 3552276479Sdim } 3553276479Sdim 3554276479Sdim if (J == I) 3555276479Sdim return false; 3556276479Sdim } 3557276479Sdim } 3558276479Sdim 3559239462Sdim // We are searching for an earlier instruction that can make CmpInstr 3560239462Sdim // redundant and that instruction will be saved in Sub. 3561276479Sdim MachineInstr *Sub = nullptr; 3562239462Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 3563239462Sdim 3564239462Sdim // We iterate backward, starting from the instruction before CmpInstr and 3565239462Sdim // stop when reaching the definition of a source register or done with the BB. 3566239462Sdim // RI points to the instruction before CmpInstr. 3567239462Sdim // If the definition is in this basic block, RE points to the definition; 3568239462Sdim // otherwise, RE is the rend of the basic block. 3569239462Sdim MachineBasicBlock::reverse_iterator 3570314564Sdim RI = ++I.getReverse(), 3571309124Sdim RE = CmpInstr.getParent() == MI->getParent() 3572314564Sdim ? Def.getReverse() /* points to MI */ 3573309124Sdim : CmpInstr.getParent()->rend(); 3574276479Sdim MachineInstr *Movr0Inst = nullptr; 3575239462Sdim for (; RI != RE; ++RI) { 3576309124Sdim MachineInstr &Instr = *RI; 3577239462Sdim // Check whether CmpInstr can be made redundant by the current instruction. 3578321369Sdim if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, 3579321369Sdim CmpValue, Instr)) { 3580309124Sdim Sub = &Instr; 3581239462Sdim break; 3582239462Sdim } 3583239462Sdim 3584309124Sdim if (Instr.modifiesRegister(X86::EFLAGS, TRI) || 3585309124Sdim Instr.readsRegister(X86::EFLAGS, TRI)) { 3586239462Sdim // This instruction modifies or uses EFLAGS. 3587239462Sdim 3588239462Sdim // MOV32r0 etc. are implemented with xor which clobbers condition code. 3589239462Sdim // They are safe to move up, if the definition to EFLAGS is dead and 3590239462Sdim // earlier instructions do not read or write EFLAGS. 3591309124Sdim if (!Movr0Inst && Instr.getOpcode() == X86::MOV32r0 && 3592309124Sdim Instr.registerDefIsDead(X86::EFLAGS, TRI)) { 3593309124Sdim Movr0Inst = &Instr; 3594239462Sdim continue; 3595239462Sdim } 3596239462Sdim 3597239462Sdim // We can't remove CmpInstr. 3598239462Sdim return false; 3599239462Sdim } 3600239462Sdim } 3601239462Sdim 3602239462Sdim // Return false if no candidates exist. 3603239462Sdim if (!IsCmpZero && !Sub) 3604239462Sdim return false; 3605239462Sdim 3606239462Sdim bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && 3607239462Sdim Sub->getOperand(2).getReg() == SrcReg); 3608239462Sdim 3609239462Sdim // Scan forward from the instruction after CmpInstr for uses of EFLAGS. 3610239462Sdim // It is safe to remove CmpInstr if EFLAGS is redefined or killed. 3611239462Sdim // If we are done with the basic block, we need to check whether EFLAGS is 3612239462Sdim // live-out. 3613239462Sdim bool IsSafe = false; 3614353358Sdim SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate; 3615309124Sdim MachineBasicBlock::iterator E = CmpInstr.getParent()->end(); 3616239462Sdim for (++I; I != E; ++I) { 3617239462Sdim const MachineInstr &Instr = *I; 3618239462Sdim bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI); 3619239462Sdim bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI); 3620239462Sdim // We should check the usage if this instruction uses and updates EFLAGS. 3621239462Sdim if (!UseEFLAGS && ModifyEFLAGS) { 3622239462Sdim // It is safe to remove CmpInstr if EFLAGS is updated again. 3623239462Sdim IsSafe = true; 3624239462Sdim break; 3625239462Sdim } 3626239462Sdim if (!UseEFLAGS && !ModifyEFLAGS) 3627239462Sdim continue; 3628239462Sdim 3629239462Sdim // EFLAGS is used by this instruction. 3630276479Sdim X86::CondCode OldCC = X86::COND_INVALID; 3631239462Sdim if (IsCmpZero || IsSwapped) { 3632239462Sdim // We decode the condition code from opcode. 3633239462Sdim if (Instr.isBranch()) 3634353358Sdim OldCC = X86::getCondFromBranch(Instr); 3635239462Sdim else { 3636353358Sdim OldCC = X86::getCondFromSETCC(Instr); 3637353358Sdim if (OldCC == X86::COND_INVALID) 3638353358Sdim OldCC = X86::getCondFromCMov(Instr); 3639239462Sdim } 3640239462Sdim if (OldCC == X86::COND_INVALID) return false; 3641239462Sdim } 3642327952Sdim X86::CondCode ReplacementCC = X86::COND_INVALID; 3643239462Sdim if (IsCmpZero) { 3644239462Sdim switch (OldCC) { 3645239462Sdim default: break; 3646239462Sdim case X86::COND_A: case X86::COND_AE: 3647239462Sdim case X86::COND_B: case X86::COND_BE: 3648239462Sdim case X86::COND_G: case X86::COND_GE: 3649239462Sdim case X86::COND_L: case X86::COND_LE: 3650239462Sdim case X86::COND_O: case X86::COND_NO: 3651239462Sdim // CF and OF are used, we can't perform this optimization. 3652239462Sdim return false; 3653344779Sdim case X86::COND_S: case X86::COND_NS: 3654344779Sdim // If SF is used, but the instruction doesn't update the SF, then we 3655344779Sdim // can't do the optimization. 3656344779Sdim if (NoSignFlag) 3657344779Sdim return false; 3658344779Sdim break; 3659239462Sdim } 3660276479Sdim 3661276479Sdim // If we're updating the condition code check if we have to reverse the 3662276479Sdim // condition. 3663276479Sdim if (ShouldUpdateCC) 3664276479Sdim switch (OldCC) { 3665276479Sdim default: 3666276479Sdim return false; 3667276479Sdim case X86::COND_E: 3668327952Sdim ReplacementCC = NewCC; 3669276479Sdim break; 3670276479Sdim case X86::COND_NE: 3671327952Sdim ReplacementCC = GetOppositeBranchCondition(NewCC); 3672276479Sdim break; 3673276479Sdim } 3674239462Sdim } else if (IsSwapped) { 3675239462Sdim // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs 3676239462Sdim // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. 3677239462Sdim // We swap the condition code and synthesize the new opcode. 3678327952Sdim ReplacementCC = getSwappedCondition(OldCC); 3679327952Sdim if (ReplacementCC == X86::COND_INVALID) return false; 3680276479Sdim } 3681239462Sdim 3682327952Sdim if ((ShouldUpdateCC || IsSwapped) && ReplacementCC != OldCC) { 3683239462Sdim // Push the MachineInstr to OpsToUpdate. 3684239462Sdim // If it is safe to remove CmpInstr, the condition code of these 3685239462Sdim // instructions will be modified. 3686353358Sdim OpsToUpdate.push_back(std::make_pair(&*I, ReplacementCC)); 3687239462Sdim } 3688239462Sdim if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) { 3689239462Sdim // It is safe to remove CmpInstr if EFLAGS is updated again or killed. 3690239462Sdim IsSafe = true; 3691239462Sdim break; 3692239462Sdim } 3693239462Sdim } 3694239462Sdim 3695239462Sdim // If EFLAGS is not killed nor re-defined, we should check whether it is 3696239462Sdim // live-out. If it is live-out, do not optimize. 3697239462Sdim if ((IsCmpZero || IsSwapped) && !IsSafe) { 3698309124Sdim MachineBasicBlock *MBB = CmpInstr.getParent(); 3699296417Sdim for (MachineBasicBlock *Successor : MBB->successors()) 3700296417Sdim if (Successor->isLiveIn(X86::EFLAGS)) 3701239462Sdim return false; 3702239462Sdim } 3703239462Sdim 3704239462Sdim // The instruction to be updated is either Sub or MI. 3705239462Sdim Sub = IsCmpZero ? MI : Sub; 3706261991Sdim // Move Movr0Inst to the appropriate place before Sub. 3707239462Sdim if (Movr0Inst) { 3708261991Sdim // Look backwards until we find a def that doesn't use the current EFLAGS. 3709261991Sdim Def = Sub; 3710314564Sdim MachineBasicBlock::reverse_iterator InsertI = Def.getReverse(), 3711314564Sdim InsertE = Sub->getParent()->rend(); 3712261991Sdim for (; InsertI != InsertE; ++InsertI) { 3713261991Sdim MachineInstr *Instr = &*InsertI; 3714261991Sdim if (!Instr->readsRegister(X86::EFLAGS, TRI) && 3715261991Sdim Instr->modifiesRegister(X86::EFLAGS, TRI)) { 3716261991Sdim Sub->getParent()->remove(Movr0Inst); 3717261991Sdim Instr->getParent()->insert(MachineBasicBlock::iterator(Instr), 3718261991Sdim Movr0Inst); 3719261991Sdim break; 3720261991Sdim } 3721261991Sdim } 3722261991Sdim if (InsertI == InsertE) 3723261991Sdim return false; 3724239462Sdim } 3725239462Sdim 3726243830Sdim // Make sure Sub instruction defines EFLAGS and mark the def live. 3727353358Sdim MachineOperand *FlagDef = Sub->findRegisterDefOperand(X86::EFLAGS); 3728353358Sdim assert(FlagDef && "Unable to locate a def EFLAGS operand"); 3729353358Sdim FlagDef->setIsDead(false); 3730261991Sdim 3731309124Sdim CmpInstr.eraseFromParent(); 3732239462Sdim 3733239462Sdim // Modify the condition code of instructions in OpsToUpdate. 3734353358Sdim for (auto &Op : OpsToUpdate) { 3735353358Sdim Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1) 3736353358Sdim .setImm(Op.second); 3737353358Sdim } 3738239462Sdim return true; 3739239462Sdim} 3740239462Sdim 3741288943Sdim/// Try to remove the load by folding it to a register 3742239462Sdim/// operand at the use. We fold the load instructions if load defines a virtual 3743239462Sdim/// register, the virtual register is used once in the same BB, and the 3744239462Sdim/// instructions in-between do not load or store, and have no side effects. 3745309124SdimMachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI, 3746280031Sdim const MachineRegisterInfo *MRI, 3747280031Sdim unsigned &FoldAsLoadDefReg, 3748280031Sdim MachineInstr *&DefMI) const { 3749239462Sdim // Check whether we can move DefMI here. 3750239462Sdim DefMI = MRI->getVRegDef(FoldAsLoadDefReg); 3751239462Sdim assert(DefMI); 3752239462Sdim bool SawStore = false; 3753288943Sdim if (!DefMI->isSafeToMove(nullptr, SawStore)) 3754276479Sdim return nullptr; 3755239462Sdim 3756280031Sdim // Collect information about virtual register operands of MI. 3757314564Sdim SmallVector<unsigned, 1> SrcOperandIds; 3758314564Sdim for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 3759309124Sdim MachineOperand &MO = MI.getOperand(i); 3760280031Sdim if (!MO.isReg()) 3761280031Sdim continue; 3762280031Sdim unsigned Reg = MO.getReg(); 3763280031Sdim if (Reg != FoldAsLoadDefReg) 3764280031Sdim continue; 3765314564Sdim // Do not fold if we have a subreg use or a def. 3766314564Sdim if (MO.getSubReg() || MO.isDef()) 3767280031Sdim return nullptr; 3768314564Sdim SrcOperandIds.push_back(i); 3769280031Sdim } 3770314564Sdim if (SrcOperandIds.empty()) 3771280031Sdim return nullptr; 3772239462Sdim 3773280031Sdim // Check whether we can fold the def into SrcOperandId. 3774314564Sdim if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandIds, *DefMI)) { 3775280031Sdim FoldAsLoadDefReg = 0; 3776280031Sdim return FoldMI; 3777280031Sdim } 3778239462Sdim 3779276479Sdim return nullptr; 3780239462Sdim} 3781239462Sdim 3782288943Sdim/// Expand a single-def pseudo instruction to a two-addr 3783288943Sdim/// instruction with two undef reads of the register being defined. 3784288943Sdim/// This is used for mapping: 3785226633Sdim/// %xmm4 = V_SET0 3786226633Sdim/// to: 3787327952Sdim/// %xmm4 = PXORrr undef %xmm4, undef %xmm4 3788226633Sdim/// 3789249423Sdimstatic bool Expand2AddrUndef(MachineInstrBuilder &MIB, 3790249423Sdim const MCInstrDesc &Desc) { 3791226633Sdim assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); 3792249423Sdim unsigned Reg = MIB->getOperand(0).getReg(); 3793249423Sdim MIB->setDesc(Desc); 3794226633Sdim 3795226633Sdim // MachineInstr::addOperand() will insert explicit operands before any 3796226633Sdim // implicit operands. 3797249423Sdim MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); 3798226633Sdim // But we don't trust that. 3799249423Sdim assert(MIB->getOperand(1).getReg() == Reg && 3800249423Sdim MIB->getOperand(2).getReg() == Reg && "Misplaced operand"); 3801226633Sdim return true; 3802226633Sdim} 3803226633Sdim 3804296417Sdim/// Expand a single-def pseudo instruction to a two-addr 3805296417Sdim/// instruction with two %k0 reads. 3806296417Sdim/// This is used for mapping: 3807296417Sdim/// %k4 = K_SET1 3808296417Sdim/// to: 3809296417Sdim/// %k4 = KXNORrr %k0, %k0 3810296417Sdimstatic bool Expand2AddrKreg(MachineInstrBuilder &MIB, 3811296417Sdim const MCInstrDesc &Desc, unsigned Reg) { 3812296417Sdim assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); 3813296417Sdim MIB->setDesc(Desc); 3814296417Sdim MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); 3815296417Sdim return true; 3816296417Sdim} 3817296417Sdim 3818296417Sdimstatic bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, 3819296417Sdim bool MinusOne) { 3820296417Sdim MachineBasicBlock &MBB = *MIB->getParent(); 3821296417Sdim DebugLoc DL = MIB->getDebugLoc(); 3822296417Sdim unsigned Reg = MIB->getOperand(0).getReg(); 3823296417Sdim 3824296417Sdim // Insert the XOR. 3825296417Sdim BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg) 3826296417Sdim .addReg(Reg, RegState::Undef) 3827296417Sdim .addReg(Reg, RegState::Undef); 3828296417Sdim 3829296417Sdim // Turn the pseudo into an INC or DEC. 3830296417Sdim MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r)); 3831296417Sdim MIB.addReg(Reg); 3832296417Sdim 3833296417Sdim return true; 3834296417Sdim} 3835296417Sdim 3836314564Sdimstatic bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, 3837314564Sdim const TargetInstrInfo &TII, 3838314564Sdim const X86Subtarget &Subtarget) { 3839309124Sdim MachineBasicBlock &MBB = *MIB->getParent(); 3840309124Sdim DebugLoc DL = MIB->getDebugLoc(); 3841309124Sdim int64_t Imm = MIB->getOperand(1).getImm(); 3842309124Sdim assert(Imm != 0 && "Using push/pop for 0 is not efficient."); 3843309124Sdim MachineBasicBlock::iterator I = MIB.getInstr(); 3844309124Sdim 3845309124Sdim int StackAdjustment; 3846309124Sdim 3847309124Sdim if (Subtarget.is64Bit()) { 3848309124Sdim assert(MIB->getOpcode() == X86::MOV64ImmSExti8 || 3849309124Sdim MIB->getOpcode() == X86::MOV32ImmSExti8); 3850309124Sdim 3851309124Sdim // Can't use push/pop lowering if the function might write to the red zone. 3852309124Sdim X86MachineFunctionInfo *X86FI = 3853309124Sdim MBB.getParent()->getInfo<X86MachineFunctionInfo>(); 3854309124Sdim if (X86FI->getUsesRedZone()) { 3855314564Sdim MIB->setDesc(TII.get(MIB->getOpcode() == 3856314564Sdim X86::MOV32ImmSExti8 ? X86::MOV32ri : X86::MOV64ri)); 3857309124Sdim return true; 3858309124Sdim } 3859309124Sdim 3860309124Sdim // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and 3861309124Sdim // widen the register if necessary. 3862309124Sdim StackAdjustment = 8; 3863314564Sdim BuildMI(MBB, I, DL, TII.get(X86::PUSH64i8)).addImm(Imm); 3864314564Sdim MIB->setDesc(TII.get(X86::POP64r)); 3865309124Sdim MIB->getOperand(0) 3866309124Sdim .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64)); 3867309124Sdim } else { 3868309124Sdim assert(MIB->getOpcode() == X86::MOV32ImmSExti8); 3869309124Sdim StackAdjustment = 4; 3870314564Sdim BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm); 3871314564Sdim MIB->setDesc(TII.get(X86::POP32r)); 3872309124Sdim } 3873309124Sdim 3874309124Sdim // Build CFI if necessary. 3875309124Sdim MachineFunction &MF = *MBB.getParent(); 3876309124Sdim const X86FrameLowering *TFL = Subtarget.getFrameLowering(); 3877309124Sdim bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 3878309124Sdim bool NeedsDwarfCFI = 3879309124Sdim !IsWin64Prologue && 3880327952Sdim (MF.getMMI().hasDebugInfo() || MF.getFunction().needsUnwindTableEntry()); 3881309124Sdim bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI; 3882309124Sdim if (EmitCFI) { 3883309124Sdim TFL->BuildCFI(MBB, I, DL, 3884309124Sdim MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment)); 3885309124Sdim TFL->BuildCFI(MBB, std::next(I), DL, 3886309124Sdim MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment)); 3887309124Sdim } 3888309124Sdim 3889309124Sdim return true; 3890309124Sdim} 3891309124Sdim 3892280031Sdim// LoadStackGuard has so far only been implemented for 64-bit MachO. Different 3893280031Sdim// code sequence is needed for other targets. 3894280031Sdimstatic void expandLoadStackGuard(MachineInstrBuilder &MIB, 3895280031Sdim const TargetInstrInfo &TII) { 3896280031Sdim MachineBasicBlock &MBB = *MIB->getParent(); 3897280031Sdim DebugLoc DL = MIB->getDebugLoc(); 3898280031Sdim unsigned Reg = MIB->getOperand(0).getReg(); 3899280031Sdim const GlobalValue *GV = 3900280031Sdim cast<GlobalValue>((*MIB->memoperands_begin())->getValue()); 3901314564Sdim auto Flags = MachineMemOperand::MOLoad | 3902314564Sdim MachineMemOperand::MODereferenceable | 3903314564Sdim MachineMemOperand::MOInvariant; 3904296417Sdim MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( 3905309124Sdim MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, 8); 3906280031Sdim MachineBasicBlock::iterator I = MIB.getInstr(); 3907280031Sdim 3908280031Sdim BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1) 3909280031Sdim .addReg(0).addGlobalAddress(GV, 0, X86II::MO_GOTPCREL).addReg(0) 3910280031Sdim .addMemOperand(MMO); 3911280031Sdim MIB->setDebugLoc(DL); 3912280031Sdim MIB->setDesc(TII.get(X86::MOV64rm)); 3913280031Sdim MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0); 3914280031Sdim} 3915280031Sdim 3916327952Sdimstatic bool expandXorFP(MachineInstrBuilder &MIB, const TargetInstrInfo &TII) { 3917327952Sdim MachineBasicBlock &MBB = *MIB->getParent(); 3918327952Sdim MachineFunction &MF = *MBB.getParent(); 3919327952Sdim const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 3920327952Sdim const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); 3921327952Sdim unsigned XorOp = 3922327952Sdim MIB->getOpcode() == X86::XOR64_FP ? X86::XOR64rr : X86::XOR32rr; 3923327952Sdim MIB->setDesc(TII.get(XorOp)); 3924327952Sdim MIB.addReg(TRI->getFrameRegister(MF), RegState::Undef); 3925327952Sdim return true; 3926327952Sdim} 3927327952Sdim 3928314564Sdim// This is used to handle spills for 128/256-bit registers when we have AVX512, 3929314564Sdim// but not VLX. If it uses an extended register we need to use an instruction 3930314564Sdim// that loads the lower 128/256-bit, but is available with only AVX512F. 3931314564Sdimstatic bool expandNOVLXLoad(MachineInstrBuilder &MIB, 3932314564Sdim const TargetRegisterInfo *TRI, 3933314564Sdim const MCInstrDesc &LoadDesc, 3934314564Sdim const MCInstrDesc &BroadcastDesc, 3935314564Sdim unsigned SubIdx) { 3936314564Sdim unsigned DestReg = MIB->getOperand(0).getReg(); 3937314564Sdim // Check if DestReg is XMM16-31 or YMM16-31. 3938314564Sdim if (TRI->getEncodingValue(DestReg) < 16) { 3939314564Sdim // We can use a normal VEX encoded load. 3940314564Sdim MIB->setDesc(LoadDesc); 3941314564Sdim } else { 3942314564Sdim // Use a 128/256-bit VBROADCAST instruction. 3943314564Sdim MIB->setDesc(BroadcastDesc); 3944314564Sdim // Change the destination to a 512-bit register. 3945314564Sdim DestReg = TRI->getMatchingSuperReg(DestReg, SubIdx, &X86::VR512RegClass); 3946314564Sdim MIB->getOperand(0).setReg(DestReg); 3947314564Sdim } 3948314564Sdim return true; 3949314564Sdim} 3950314564Sdim 3951314564Sdim// This is used to handle spills for 128/256-bit registers when we have AVX512, 3952314564Sdim// but not VLX. If it uses an extended register we need to use an instruction 3953314564Sdim// that stores the lower 128/256-bit, but is available with only AVX512F. 3954314564Sdimstatic bool expandNOVLXStore(MachineInstrBuilder &MIB, 3955314564Sdim const TargetRegisterInfo *TRI, 3956314564Sdim const MCInstrDesc &StoreDesc, 3957314564Sdim const MCInstrDesc &ExtractDesc, 3958314564Sdim unsigned SubIdx) { 3959314564Sdim unsigned SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg(); 3960314564Sdim // Check if DestReg is XMM16-31 or YMM16-31. 3961314564Sdim if (TRI->getEncodingValue(SrcReg) < 16) { 3962314564Sdim // We can use a normal VEX encoded store. 3963314564Sdim MIB->setDesc(StoreDesc); 3964314564Sdim } else { 3965314564Sdim // Use a VEXTRACTF instruction. 3966314564Sdim MIB->setDesc(ExtractDesc); 3967314564Sdim // Change the destination to a 512-bit register. 3968314564Sdim SrcReg = TRI->getMatchingSuperReg(SrcReg, SubIdx, &X86::VR512RegClass); 3969314564Sdim MIB->getOperand(X86::AddrNumOperands).setReg(SrcReg); 3970314564Sdim MIB.addImm(0x0); // Append immediate to extract from the lower bits. 3971314564Sdim } 3972314564Sdim 3973314564Sdim return true; 3974314564Sdim} 3975353358Sdim 3976353358Sdimstatic bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) { 3977353358Sdim MIB->setDesc(Desc); 3978353358Sdim int64_t ShiftAmt = MIB->getOperand(2).getImm(); 3979353358Sdim // Temporarily remove the immediate so we can add another source register. 3980353358Sdim MIB->RemoveOperand(2); 3981353358Sdim // Add the register. Don't copy the kill flag if there is one. 3982353358Sdim MIB.addReg(MIB->getOperand(1).getReg(), 3983353358Sdim getUndefRegState(MIB->getOperand(1).isUndef())); 3984353358Sdim // Add back the immediate. 3985353358Sdim MIB.addImm(ShiftAmt); 3986353358Sdim return true; 3987353358Sdim} 3988353358Sdim 3989309124Sdimbool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 3990276479Sdim bool HasAVX = Subtarget.hasAVX(); 3991309124Sdim MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); 3992309124Sdim switch (MI.getOpcode()) { 3993276479Sdim case X86::MOV32r0: 3994276479Sdim return Expand2AddrUndef(MIB, get(X86::XOR32rr)); 3995296417Sdim case X86::MOV32r1: 3996296417Sdim return expandMOV32r1(MIB, *this, /*MinusOne=*/ false); 3997296417Sdim case X86::MOV32r_1: 3998296417Sdim return expandMOV32r1(MIB, *this, /*MinusOne=*/ true); 3999309124Sdim case X86::MOV32ImmSExti8: 4000309124Sdim case X86::MOV64ImmSExti8: 4001314564Sdim return ExpandMOVImmSExti8(MIB, *this, Subtarget); 4002243830Sdim case X86::SETB_C8r: 4003249423Sdim return Expand2AddrUndef(MIB, get(X86::SBB8rr)); 4004243830Sdim case X86::SETB_C16r: 4005249423Sdim return Expand2AddrUndef(MIB, get(X86::SBB16rr)); 4006243830Sdim case X86::SETB_C32r: 4007249423Sdim return Expand2AddrUndef(MIB, get(X86::SBB32rr)); 4008243830Sdim case X86::SETB_C64r: 4009249423Sdim return Expand2AddrUndef(MIB, get(X86::SBB64rr)); 4010341825Sdim case X86::MMX_SET0: 4011341825Sdim return Expand2AddrUndef(MIB, get(X86::MMX_PXORirr)); 4012226633Sdim case X86::V_SET0: 4013234353Sdim case X86::FsFLD0SS: 4014234353Sdim case X86::FsFLD0SD: 4015249423Sdim return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); 4016327952Sdim case X86::AVX_SET0: { 4017243830Sdim assert(HasAVX && "AVX not supported"); 4018327952Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 4019327952Sdim unsigned SrcReg = MIB->getOperand(0).getReg(); 4020327952Sdim unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); 4021327952Sdim MIB->getOperand(0).setReg(XReg); 4022327952Sdim Expand2AddrUndef(MIB, get(X86::VXORPSrr)); 4023327952Sdim MIB.addReg(SrcReg, RegState::ImplicitDefine); 4024327952Sdim return true; 4025327952Sdim } 4026309124Sdim case X86::AVX512_128_SET0: 4027321369Sdim case X86::AVX512_FsFLD0SS: 4028321369Sdim case X86::AVX512_FsFLD0SD: { 4029321369Sdim bool HasVLX = Subtarget.hasVLX(); 4030321369Sdim unsigned SrcReg = MIB->getOperand(0).getReg(); 4031321369Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 4032321369Sdim if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) 4033321369Sdim return Expand2AddrUndef(MIB, 4034321369Sdim get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr)); 4035321369Sdim // Extended register without VLX. Use a larger XOR. 4036327952Sdim SrcReg = 4037327952Sdim TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass); 4038321369Sdim MIB->getOperand(0).setReg(SrcReg); 4039321369Sdim return Expand2AddrUndef(MIB, get(X86::VPXORDZrr)); 4040321369Sdim } 4041327952Sdim case X86::AVX512_256_SET0: 4042327952Sdim case X86::AVX512_512_SET0: { 4043321369Sdim bool HasVLX = Subtarget.hasVLX(); 4044321369Sdim unsigned SrcReg = MIB->getOperand(0).getReg(); 4045321369Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 4046327952Sdim if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) { 4047327952Sdim unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); 4048327952Sdim MIB->getOperand(0).setReg(XReg); 4049327952Sdim Expand2AddrUndef(MIB, 4050327952Sdim get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr)); 4051327952Sdim MIB.addReg(SrcReg, RegState::ImplicitDefine); 4052327952Sdim return true; 4053327952Sdim } 4054353358Sdim if (MI.getOpcode() == X86::AVX512_256_SET0) { 4055353358Sdim // No VLX so we must reference a zmm. 4056353358Sdim unsigned ZReg = 4057353358Sdim TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass); 4058353358Sdim MIB->getOperand(0).setReg(ZReg); 4059353358Sdim } 4060321369Sdim return Expand2AddrUndef(MIB, get(X86::VPXORDZrr)); 4061321369Sdim } 4062243830Sdim case X86::V_SETALLONES: 4063249423Sdim return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); 4064243830Sdim case X86::AVX2_SETALLONES: 4065249423Sdim return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr)); 4066321369Sdim case X86::AVX1_SETALLONES: { 4067321369Sdim unsigned Reg = MIB->getOperand(0).getReg(); 4068321369Sdim // VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS. 4069321369Sdim MIB->setDesc(get(X86::VCMPPSYrri)); 4070321369Sdim MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf); 4071321369Sdim return true; 4072321369Sdim } 4073309124Sdim case X86::AVX512_512_SETALLONES: { 4074309124Sdim unsigned Reg = MIB->getOperand(0).getReg(); 4075309124Sdim MIB->setDesc(get(X86::VPTERNLOGDZrri)); 4076309124Sdim // VPTERNLOGD needs 3 register inputs and an immediate. 4077309124Sdim // 0xff will return 1s for any input. 4078309124Sdim MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef) 4079309124Sdim .addReg(Reg, RegState::Undef).addImm(0xff); 4080309124Sdim return true; 4081309124Sdim } 4082314564Sdim case X86::AVX512_512_SEXT_MASK_32: 4083314564Sdim case X86::AVX512_512_SEXT_MASK_64: { 4084314564Sdim unsigned Reg = MIB->getOperand(0).getReg(); 4085314564Sdim unsigned MaskReg = MIB->getOperand(1).getReg(); 4086314564Sdim unsigned MaskState = getRegState(MIB->getOperand(1)); 4087314564Sdim unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ? 4088314564Sdim X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz; 4089314564Sdim MI.RemoveOperand(1); 4090314564Sdim MIB->setDesc(get(Opc)); 4091314564Sdim // VPTERNLOG needs 3 register inputs and an immediate. 4092314564Sdim // 0xff will return 1s for any input. 4093314564Sdim MIB.addReg(Reg, RegState::Undef).addReg(MaskReg, MaskState) 4094314564Sdim .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xff); 4095314564Sdim return true; 4096314564Sdim } 4097314564Sdim case X86::VMOVAPSZ128rm_NOVLX: 4098314564Sdim return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm), 4099314564Sdim get(X86::VBROADCASTF32X4rm), X86::sub_xmm); 4100314564Sdim case X86::VMOVUPSZ128rm_NOVLX: 4101314564Sdim return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSrm), 4102314564Sdim get(X86::VBROADCASTF32X4rm), X86::sub_xmm); 4103314564Sdim case X86::VMOVAPSZ256rm_NOVLX: 4104314564Sdim return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSYrm), 4105314564Sdim get(X86::VBROADCASTF64X4rm), X86::sub_ymm); 4106314564Sdim case X86::VMOVUPSZ256rm_NOVLX: 4107314564Sdim return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSYrm), 4108314564Sdim get(X86::VBROADCASTF64X4rm), X86::sub_ymm); 4109314564Sdim case X86::VMOVAPSZ128mr_NOVLX: 4110314564Sdim return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSmr), 4111314564Sdim get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm); 4112314564Sdim case X86::VMOVUPSZ128mr_NOVLX: 4113314564Sdim return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSmr), 4114314564Sdim get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm); 4115314564Sdim case X86::VMOVAPSZ256mr_NOVLX: 4116314564Sdim return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSYmr), 4117314564Sdim get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm); 4118314564Sdim case X86::VMOVUPSZ256mr_NOVLX: 4119314564Sdim return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr), 4120314564Sdim get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm); 4121344779Sdim case X86::MOV32ri64: { 4122344779Sdim unsigned Reg = MIB->getOperand(0).getReg(); 4123344779Sdim unsigned Reg32 = RI.getSubReg(Reg, X86::sub_32bit); 4124309124Sdim MI.setDesc(get(X86::MOV32ri)); 4125344779Sdim MIB->getOperand(0).setReg(Reg32); 4126344779Sdim MIB.addReg(Reg, RegState::ImplicitDefine); 4127296417Sdim return true; 4128344779Sdim } 4129296417Sdim 4130296417Sdim // KNL does not recognize dependency-breaking idioms for mask registers, 4131296417Sdim // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1. 4132296417Sdim // Using %k0 as the undef input register is a performance heuristic based 4133296417Sdim // on the assumption that %k0 is used less frequently than the other mask 4134296417Sdim // registers, since it is not usable as a write mask. 4135296417Sdim // FIXME: A more advanced approach would be to choose the best input mask 4136296417Sdim // register based on context. 4137296417Sdim case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0); 4138296417Sdim case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0); 4139296417Sdim case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0); 4140296417Sdim case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0); 4141296417Sdim case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0); 4142296417Sdim case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0); 4143280031Sdim case TargetOpcode::LOAD_STACK_GUARD: 4144280031Sdim expandLoadStackGuard(MIB, *this); 4145280031Sdim return true; 4146327952Sdim case X86::XOR64_FP: 4147327952Sdim case X86::XOR32_FP: 4148327952Sdim return expandXorFP(MIB, *this); 4149353358Sdim case X86::SHLDROT32ri: return expandSHXDROT(MIB, get(X86::SHLD32rri8)); 4150353358Sdim case X86::SHLDROT64ri: return expandSHXDROT(MIB, get(X86::SHLD64rri8)); 4151353358Sdim case X86::SHRDROT32ri: return expandSHXDROT(MIB, get(X86::SHRD32rri8)); 4152353358Sdim case X86::SHRDROT64ri: return expandSHXDROT(MIB, get(X86::SHRD64rri8)); 4153353358Sdim case X86::ADD8rr_DB: MIB->setDesc(get(X86::OR8rr)); break; 4154353358Sdim case X86::ADD16rr_DB: MIB->setDesc(get(X86::OR16rr)); break; 4155353358Sdim case X86::ADD32rr_DB: MIB->setDesc(get(X86::OR32rr)); break; 4156353358Sdim case X86::ADD64rr_DB: MIB->setDesc(get(X86::OR64rr)); break; 4157353358Sdim case X86::ADD8ri_DB: MIB->setDesc(get(X86::OR8ri)); break; 4158353358Sdim case X86::ADD16ri_DB: MIB->setDesc(get(X86::OR16ri)); break; 4159353358Sdim case X86::ADD32ri_DB: MIB->setDesc(get(X86::OR32ri)); break; 4160353358Sdim case X86::ADD64ri32_DB: MIB->setDesc(get(X86::OR64ri32)); break; 4161353358Sdim case X86::ADD16ri8_DB: MIB->setDesc(get(X86::OR16ri8)); break; 4162353358Sdim case X86::ADD32ri8_DB: MIB->setDesc(get(X86::OR32ri8)); break; 4163353358Sdim case X86::ADD64ri8_DB: MIB->setDesc(get(X86::OR64ri8)); break; 4164226633Sdim } 4165226633Sdim return false; 4166226633Sdim} 4167226633Sdim 4168327952Sdim/// Return true for all instructions that only update 4169327952Sdim/// the first 32 or 64-bits of the destination register and leave the rest 4170327952Sdim/// unmodified. This can be used to avoid folding loads if the instructions 4171327952Sdim/// only update part of the destination register, and the non-updated part is 4172327952Sdim/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these 4173327952Sdim/// instructions breaks the partial register dependency and it can improve 4174327952Sdim/// performance. e.g.: 4175327952Sdim/// 4176327952Sdim/// movss (%rdi), %xmm0 4177327952Sdim/// cvtss2sd %xmm0, %xmm0 4178327952Sdim/// 4179327952Sdim/// Instead of 4180327952Sdim/// cvtss2sd (%rdi), %xmm0 4181327952Sdim/// 4182327952Sdim/// FIXME: This should be turned into a TSFlags. 4183327952Sdim/// 4184341825Sdimstatic bool hasPartialRegUpdate(unsigned Opcode, 4185353358Sdim const X86Subtarget &Subtarget, 4186353358Sdim bool ForLoadFold = false) { 4187327952Sdim switch (Opcode) { 4188327952Sdim case X86::CVTSI2SSrr: 4189327952Sdim case X86::CVTSI2SSrm: 4190327952Sdim case X86::CVTSI642SSrr: 4191327952Sdim case X86::CVTSI642SSrm: 4192327952Sdim case X86::CVTSI2SDrr: 4193327952Sdim case X86::CVTSI2SDrm: 4194327952Sdim case X86::CVTSI642SDrr: 4195327952Sdim case X86::CVTSI642SDrm: 4196353358Sdim // Load folding won't effect the undef register update since the input is 4197353358Sdim // a GPR. 4198353358Sdim return !ForLoadFold; 4199327952Sdim case X86::CVTSD2SSrr: 4200327952Sdim case X86::CVTSD2SSrm: 4201327952Sdim case X86::CVTSS2SDrr: 4202327952Sdim case X86::CVTSS2SDrm: 4203327952Sdim case X86::MOVHPDrm: 4204327952Sdim case X86::MOVHPSrm: 4205327952Sdim case X86::MOVLPDrm: 4206327952Sdim case X86::MOVLPSrm: 4207327952Sdim case X86::RCPSSr: 4208327952Sdim case X86::RCPSSm: 4209327952Sdim case X86::RCPSSr_Int: 4210327952Sdim case X86::RCPSSm_Int: 4211327952Sdim case X86::ROUNDSDr: 4212327952Sdim case X86::ROUNDSDm: 4213327952Sdim case X86::ROUNDSSr: 4214327952Sdim case X86::ROUNDSSm: 4215327952Sdim case X86::RSQRTSSr: 4216327952Sdim case X86::RSQRTSSm: 4217327952Sdim case X86::RSQRTSSr_Int: 4218327952Sdim case X86::RSQRTSSm_Int: 4219327952Sdim case X86::SQRTSSr: 4220327952Sdim case X86::SQRTSSm: 4221327952Sdim case X86::SQRTSSr_Int: 4222327952Sdim case X86::SQRTSSm_Int: 4223327952Sdim case X86::SQRTSDr: 4224327952Sdim case X86::SQRTSDm: 4225327952Sdim case X86::SQRTSDr_Int: 4226327952Sdim case X86::SQRTSDm_Int: 4227327952Sdim return true; 4228341825Sdim // GPR 4229341825Sdim case X86::POPCNT32rm: 4230341825Sdim case X86::POPCNT32rr: 4231341825Sdim case X86::POPCNT64rm: 4232341825Sdim case X86::POPCNT64rr: 4233341825Sdim return Subtarget.hasPOPCNTFalseDeps(); 4234341825Sdim case X86::LZCNT32rm: 4235341825Sdim case X86::LZCNT32rr: 4236341825Sdim case X86::LZCNT64rm: 4237341825Sdim case X86::LZCNT64rr: 4238341825Sdim case X86::TZCNT32rm: 4239341825Sdim case X86::TZCNT32rr: 4240341825Sdim case X86::TZCNT64rm: 4241341825Sdim case X86::TZCNT64rr: 4242341825Sdim return Subtarget.hasLZCNTFalseDeps(); 4243327952Sdim } 4244327952Sdim 4245327952Sdim return false; 4246327952Sdim} 4247327952Sdim 4248341825Sdim/// Inform the BreakFalseDeps pass how many idle 4249327952Sdim/// instructions we would like before a partial register update. 4250327952Sdimunsigned X86InstrInfo::getPartialRegUpdateClearance( 4251327952Sdim const MachineInstr &MI, unsigned OpNum, 4252327952Sdim const TargetRegisterInfo *TRI) const { 4253341825Sdim if (OpNum != 0 || !hasPartialRegUpdate(MI.getOpcode(), Subtarget)) 4254327952Sdim return 0; 4255327952Sdim 4256327952Sdim // If MI is marked as reading Reg, the partial register update is wanted. 4257327952Sdim const MachineOperand &MO = MI.getOperand(0); 4258327952Sdim unsigned Reg = MO.getReg(); 4259327952Sdim if (TargetRegisterInfo::isVirtualRegister(Reg)) { 4260327952Sdim if (MO.readsReg() || MI.readsVirtualRegister(Reg)) 4261327952Sdim return 0; 4262327952Sdim } else { 4263327952Sdim if (MI.readsRegister(Reg, TRI)) 4264327952Sdim return 0; 4265327952Sdim } 4266327952Sdim 4267327952Sdim // If any instructions in the clearance range are reading Reg, insert a 4268327952Sdim // dependency breaking instruction, which is inexpensive and is likely to 4269327952Sdim // be hidden in other instruction's cycles. 4270327952Sdim return PartialRegUpdateClearance; 4271327952Sdim} 4272327952Sdim 4273327952Sdim// Return true for any instruction the copies the high bits of the first source 4274327952Sdim// operand into the unused high bits of the destination operand. 4275353358Sdimstatic bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) { 4276327952Sdim switch (Opcode) { 4277327952Sdim case X86::VCVTSI2SSrr: 4278327952Sdim case X86::VCVTSI2SSrm: 4279327952Sdim case X86::VCVTSI2SSrr_Int: 4280327952Sdim case X86::VCVTSI2SSrm_Int: 4281327952Sdim case X86::VCVTSI642SSrr: 4282327952Sdim case X86::VCVTSI642SSrm: 4283327952Sdim case X86::VCVTSI642SSrr_Int: 4284327952Sdim case X86::VCVTSI642SSrm_Int: 4285327952Sdim case X86::VCVTSI2SDrr: 4286327952Sdim case X86::VCVTSI2SDrm: 4287327952Sdim case X86::VCVTSI2SDrr_Int: 4288327952Sdim case X86::VCVTSI2SDrm_Int: 4289327952Sdim case X86::VCVTSI642SDrr: 4290327952Sdim case X86::VCVTSI642SDrm: 4291327952Sdim case X86::VCVTSI642SDrr_Int: 4292327952Sdim case X86::VCVTSI642SDrm_Int: 4293327952Sdim // AVX-512 4294327952Sdim case X86::VCVTSI2SSZrr: 4295327952Sdim case X86::VCVTSI2SSZrm: 4296327952Sdim case X86::VCVTSI2SSZrr_Int: 4297327952Sdim case X86::VCVTSI2SSZrrb_Int: 4298327952Sdim case X86::VCVTSI2SSZrm_Int: 4299327952Sdim case X86::VCVTSI642SSZrr: 4300327952Sdim case X86::VCVTSI642SSZrm: 4301327952Sdim case X86::VCVTSI642SSZrr_Int: 4302327952Sdim case X86::VCVTSI642SSZrrb_Int: 4303327952Sdim case X86::VCVTSI642SSZrm_Int: 4304327952Sdim case X86::VCVTSI2SDZrr: 4305327952Sdim case X86::VCVTSI2SDZrm: 4306327952Sdim case X86::VCVTSI2SDZrr_Int: 4307327952Sdim case X86::VCVTSI2SDZrm_Int: 4308327952Sdim case X86::VCVTSI642SDZrr: 4309327952Sdim case X86::VCVTSI642SDZrm: 4310327952Sdim case X86::VCVTSI642SDZrr_Int: 4311327952Sdim case X86::VCVTSI642SDZrrb_Int: 4312327952Sdim case X86::VCVTSI642SDZrm_Int: 4313327952Sdim case X86::VCVTUSI2SSZrr: 4314327952Sdim case X86::VCVTUSI2SSZrm: 4315327952Sdim case X86::VCVTUSI2SSZrr_Int: 4316327952Sdim case X86::VCVTUSI2SSZrrb_Int: 4317327952Sdim case X86::VCVTUSI2SSZrm_Int: 4318327952Sdim case X86::VCVTUSI642SSZrr: 4319327952Sdim case X86::VCVTUSI642SSZrm: 4320327952Sdim case X86::VCVTUSI642SSZrr_Int: 4321327952Sdim case X86::VCVTUSI642SSZrrb_Int: 4322327952Sdim case X86::VCVTUSI642SSZrm_Int: 4323327952Sdim case X86::VCVTUSI2SDZrr: 4324327952Sdim case X86::VCVTUSI2SDZrm: 4325327952Sdim case X86::VCVTUSI2SDZrr_Int: 4326327952Sdim case X86::VCVTUSI2SDZrm_Int: 4327327952Sdim case X86::VCVTUSI642SDZrr: 4328327952Sdim case X86::VCVTUSI642SDZrm: 4329327952Sdim case X86::VCVTUSI642SDZrr_Int: 4330327952Sdim case X86::VCVTUSI642SDZrrb_Int: 4331327952Sdim case X86::VCVTUSI642SDZrm_Int: 4332353358Sdim // Load folding won't effect the undef register update since the input is 4333353358Sdim // a GPR. 4334353358Sdim return !ForLoadFold; 4335353358Sdim case X86::VCVTSD2SSrr: 4336353358Sdim case X86::VCVTSD2SSrm: 4337353358Sdim case X86::VCVTSD2SSrr_Int: 4338353358Sdim case X86::VCVTSD2SSrm_Int: 4339353358Sdim case X86::VCVTSS2SDrr: 4340353358Sdim case X86::VCVTSS2SDrm: 4341353358Sdim case X86::VCVTSS2SDrr_Int: 4342353358Sdim case X86::VCVTSS2SDrm_Int: 4343353358Sdim case X86::VRCPSSr: 4344353358Sdim case X86::VRCPSSr_Int: 4345353358Sdim case X86::VRCPSSm: 4346353358Sdim case X86::VRCPSSm_Int: 4347353358Sdim case X86::VROUNDSDr: 4348353358Sdim case X86::VROUNDSDm: 4349353358Sdim case X86::VROUNDSDr_Int: 4350353358Sdim case X86::VROUNDSDm_Int: 4351353358Sdim case X86::VROUNDSSr: 4352353358Sdim case X86::VROUNDSSm: 4353353358Sdim case X86::VROUNDSSr_Int: 4354353358Sdim case X86::VROUNDSSm_Int: 4355353358Sdim case X86::VRSQRTSSr: 4356353358Sdim case X86::VRSQRTSSr_Int: 4357353358Sdim case X86::VRSQRTSSm: 4358353358Sdim case X86::VRSQRTSSm_Int: 4359353358Sdim case X86::VSQRTSSr: 4360353358Sdim case X86::VSQRTSSr_Int: 4361353358Sdim case X86::VSQRTSSm: 4362353358Sdim case X86::VSQRTSSm_Int: 4363353358Sdim case X86::VSQRTSDr: 4364353358Sdim case X86::VSQRTSDr_Int: 4365353358Sdim case X86::VSQRTSDm: 4366353358Sdim case X86::VSQRTSDm_Int: 4367353358Sdim // AVX-512 4368327952Sdim case X86::VCVTSD2SSZrr: 4369327952Sdim case X86::VCVTSD2SSZrr_Int: 4370327952Sdim case X86::VCVTSD2SSZrrb_Int: 4371327952Sdim case X86::VCVTSD2SSZrm: 4372327952Sdim case X86::VCVTSD2SSZrm_Int: 4373327952Sdim case X86::VCVTSS2SDZrr: 4374327952Sdim case X86::VCVTSS2SDZrr_Int: 4375327952Sdim case X86::VCVTSS2SDZrrb_Int: 4376327952Sdim case X86::VCVTSS2SDZrm: 4377327952Sdim case X86::VCVTSS2SDZrm_Int: 4378341825Sdim case X86::VGETEXPSDZr: 4379341825Sdim case X86::VGETEXPSDZrb: 4380341825Sdim case X86::VGETEXPSDZm: 4381341825Sdim case X86::VGETEXPSSZr: 4382341825Sdim case X86::VGETEXPSSZrb: 4383341825Sdim case X86::VGETEXPSSZm: 4384341825Sdim case X86::VGETMANTSDZrri: 4385341825Sdim case X86::VGETMANTSDZrrib: 4386341825Sdim case X86::VGETMANTSDZrmi: 4387341825Sdim case X86::VGETMANTSSZrri: 4388341825Sdim case X86::VGETMANTSSZrrib: 4389341825Sdim case X86::VGETMANTSSZrmi: 4390341825Sdim case X86::VRNDSCALESDZr: 4391341825Sdim case X86::VRNDSCALESDZr_Int: 4392341825Sdim case X86::VRNDSCALESDZrb_Int: 4393341825Sdim case X86::VRNDSCALESDZm: 4394341825Sdim case X86::VRNDSCALESDZm_Int: 4395341825Sdim case X86::VRNDSCALESSZr: 4396341825Sdim case X86::VRNDSCALESSZr_Int: 4397341825Sdim case X86::VRNDSCALESSZrb_Int: 4398341825Sdim case X86::VRNDSCALESSZm: 4399341825Sdim case X86::VRNDSCALESSZm_Int: 4400341825Sdim case X86::VRCP14SDZrr: 4401341825Sdim case X86::VRCP14SDZrm: 4402341825Sdim case X86::VRCP14SSZrr: 4403341825Sdim case X86::VRCP14SSZrm: 4404341825Sdim case X86::VRCP28SDZr: 4405341825Sdim case X86::VRCP28SDZrb: 4406341825Sdim case X86::VRCP28SDZm: 4407341825Sdim case X86::VRCP28SSZr: 4408341825Sdim case X86::VRCP28SSZrb: 4409341825Sdim case X86::VRCP28SSZm: 4410341825Sdim case X86::VREDUCESSZrmi: 4411341825Sdim case X86::VREDUCESSZrri: 4412341825Sdim case X86::VREDUCESSZrrib: 4413341825Sdim case X86::VRSQRT14SDZrr: 4414341825Sdim case X86::VRSQRT14SDZrm: 4415341825Sdim case X86::VRSQRT14SSZrr: 4416341825Sdim case X86::VRSQRT14SSZrm: 4417341825Sdim case X86::VRSQRT28SDZr: 4418341825Sdim case X86::VRSQRT28SDZrb: 4419341825Sdim case X86::VRSQRT28SDZm: 4420341825Sdim case X86::VRSQRT28SSZr: 4421341825Sdim case X86::VRSQRT28SSZrb: 4422341825Sdim case X86::VRSQRT28SSZm: 4423327952Sdim case X86::VSQRTSSZr: 4424327952Sdim case X86::VSQRTSSZr_Int: 4425327952Sdim case X86::VSQRTSSZrb_Int: 4426327952Sdim case X86::VSQRTSSZm: 4427327952Sdim case X86::VSQRTSSZm_Int: 4428327952Sdim case X86::VSQRTSDZr: 4429327952Sdim case X86::VSQRTSDZr_Int: 4430327952Sdim case X86::VSQRTSDZrb_Int: 4431327952Sdim case X86::VSQRTSDZm: 4432327952Sdim case X86::VSQRTSDZm_Int: 4433327952Sdim return true; 4434327952Sdim } 4435327952Sdim 4436327952Sdim return false; 4437327952Sdim} 4438327952Sdim 4439341825Sdim/// Inform the BreakFalseDeps pass how many idle instructions we would like 4440327952Sdim/// before certain undef register reads. 4441327952Sdim/// 4442327952Sdim/// This catches the VCVTSI2SD family of instructions: 4443327952Sdim/// 4444327952Sdim/// vcvtsi2sdq %rax, undef %xmm0, %xmm14 4445327952Sdim/// 4446327952Sdim/// We should to be careful *not* to catch VXOR idioms which are presumably 4447327952Sdim/// handled specially in the pipeline: 4448327952Sdim/// 4449327952Sdim/// vxorps undef %xmm1, undef %xmm1, %xmm1 4450327952Sdim/// 4451327952Sdim/// Like getPartialRegUpdateClearance, this makes a strong assumption that the 4452327952Sdim/// high bits that are passed-through are not live. 4453327952Sdimunsigned 4454327952SdimX86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum, 4455327952Sdim const TargetRegisterInfo *TRI) const { 4456327952Sdim if (!hasUndefRegUpdate(MI.getOpcode())) 4457327952Sdim return 0; 4458327952Sdim 4459327952Sdim // Set the OpNum parameter to the first source operand. 4460327952Sdim OpNum = 1; 4461327952Sdim 4462327952Sdim const MachineOperand &MO = MI.getOperand(OpNum); 4463327952Sdim if (MO.isUndef() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 4464327952Sdim return UndefRegClearance; 4465327952Sdim } 4466327952Sdim return 0; 4467327952Sdim} 4468327952Sdim 4469327952Sdimvoid X86InstrInfo::breakPartialRegDependency( 4470327952Sdim MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { 4471327952Sdim unsigned Reg = MI.getOperand(OpNum).getReg(); 4472327952Sdim // If MI kills this register, the false dependence is already broken. 4473327952Sdim if (MI.killsRegister(Reg, TRI)) 4474327952Sdim return; 4475327952Sdim 4476327952Sdim if (X86::VR128RegClass.contains(Reg)) { 4477327952Sdim // These instructions are all floating point domain, so xorps is the best 4478327952Sdim // choice. 4479327952Sdim unsigned Opc = Subtarget.hasAVX() ? X86::VXORPSrr : X86::XORPSrr; 4480327952Sdim BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(Opc), Reg) 4481327952Sdim .addReg(Reg, RegState::Undef) 4482327952Sdim .addReg(Reg, RegState::Undef); 4483327952Sdim MI.addRegisterKilled(Reg, TRI, true); 4484327952Sdim } else if (X86::VR256RegClass.contains(Reg)) { 4485327952Sdim // Use vxorps to clear the full ymm register. 4486327952Sdim // It wants to read and write the xmm sub-register. 4487327952Sdim unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm); 4488327952Sdim BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VXORPSrr), XReg) 4489327952Sdim .addReg(XReg, RegState::Undef) 4490327952Sdim .addReg(XReg, RegState::Undef) 4491327952Sdim .addReg(Reg, RegState::ImplicitDefine); 4492327952Sdim MI.addRegisterKilled(Reg, TRI, true); 4493341825Sdim } else if (X86::GR64RegClass.contains(Reg)) { 4494341825Sdim // Using XOR32rr because it has shorter encoding and zeros up the upper bits 4495341825Sdim // as well. 4496341825Sdim unsigned XReg = TRI->getSubReg(Reg, X86::sub_32bit); 4497341825Sdim BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), XReg) 4498341825Sdim .addReg(XReg, RegState::Undef) 4499341825Sdim .addReg(XReg, RegState::Undef) 4500341825Sdim .addReg(Reg, RegState::ImplicitDefine); 4501341825Sdim MI.addRegisterKilled(Reg, TRI, true); 4502341825Sdim } else if (X86::GR32RegClass.contains(Reg)) { 4503341825Sdim BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), Reg) 4504341825Sdim .addReg(Reg, RegState::Undef) 4505341825Sdim .addReg(Reg, RegState::Undef); 4506341825Sdim MI.addRegisterKilled(Reg, TRI, true); 4507327952Sdim } 4508327952Sdim} 4509327952Sdim 4510296417Sdimstatic void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs, 4511296417Sdim int PtrOffset = 0) { 4512288943Sdim unsigned NumAddrOps = MOs.size(); 4513296417Sdim 4514296417Sdim if (NumAddrOps < 4) { 4515296417Sdim // FrameIndex only - add an immediate offset (whether its zero or not). 4516296417Sdim for (unsigned i = 0; i != NumAddrOps; ++i) 4517321369Sdim MIB.add(MOs[i]); 4518296417Sdim addOffset(MIB, PtrOffset); 4519296417Sdim } else { 4520296417Sdim // General Memory Addressing - we need to add any offset to an existing 4521296417Sdim // offset. 4522296417Sdim assert(MOs.size() == 5 && "Unexpected memory operand list length"); 4523296417Sdim for (unsigned i = 0; i != NumAddrOps; ++i) { 4524296417Sdim const MachineOperand &MO = MOs[i]; 4525296417Sdim if (i == 3 && PtrOffset != 0) { 4526296417Sdim MIB.addDisp(MO, PtrOffset); 4527296417Sdim } else { 4528321369Sdim MIB.add(MO); 4529296417Sdim } 4530296417Sdim } 4531296417Sdim } 4532288943Sdim} 4533288943Sdim 4534341825Sdimstatic void updateOperandRegConstraints(MachineFunction &MF, 4535341825Sdim MachineInstr &NewMI, 4536341825Sdim const TargetInstrInfo &TII) { 4537341825Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 4538341825Sdim const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 4539341825Sdim 4540341825Sdim for (int Idx : llvm::seq<int>(0, NewMI.getNumOperands())) { 4541341825Sdim MachineOperand &MO = NewMI.getOperand(Idx); 4542341825Sdim // We only need to update constraints on virtual register operands. 4543341825Sdim if (!MO.isReg()) 4544341825Sdim continue; 4545341825Sdim unsigned Reg = MO.getReg(); 4546341825Sdim if (!TRI.isVirtualRegister(Reg)) 4547341825Sdim continue; 4548341825Sdim 4549341825Sdim auto *NewRC = MRI.constrainRegClass( 4550341825Sdim Reg, TII.getRegClass(NewMI.getDesc(), Idx, &TRI, MF)); 4551341825Sdim if (!NewRC) { 4552341825Sdim LLVM_DEBUG( 4553341825Sdim dbgs() << "WARNING: Unable to update register constraint for operand " 4554341825Sdim << Idx << " of instruction:\n"; 4555341825Sdim NewMI.dump(); dbgs() << "\n"); 4556341825Sdim } 4557341825Sdim } 4558341825Sdim} 4559341825Sdim 4560193323Sedstatic MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, 4561288943Sdim ArrayRef<MachineOperand> MOs, 4562288943Sdim MachineBasicBlock::iterator InsertPt, 4563309124Sdim MachineInstr &MI, 4564193323Sed const TargetInstrInfo &TII) { 4565193323Sed // Create the base instruction with the memory operand as the first part. 4566249423Sdim // Omit the implicit operands, something BuildMI can't do. 4567309124Sdim MachineInstr *NewMI = 4568309124Sdim MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true); 4569249423Sdim MachineInstrBuilder MIB(MF, NewMI); 4570288943Sdim addOperands(MIB, MOs); 4571218893Sdim 4572193323Sed // Loop over the rest of the ri operands, converting them over. 4573309124Sdim unsigned NumOps = MI.getDesc().getNumOperands() - 2; 4574193323Sed for (unsigned i = 0; i != NumOps; ++i) { 4575309124Sdim MachineOperand &MO = MI.getOperand(i + 2); 4576321369Sdim MIB.add(MO); 4577193323Sed } 4578309124Sdim for (unsigned i = NumOps + 2, e = MI.getNumOperands(); i != e; ++i) { 4579309124Sdim MachineOperand &MO = MI.getOperand(i); 4580321369Sdim MIB.add(MO); 4581193323Sed } 4582288943Sdim 4583341825Sdim updateOperandRegConstraints(MF, *NewMI, TII); 4584341825Sdim 4585288943Sdim MachineBasicBlock *MBB = InsertPt->getParent(); 4586288943Sdim MBB->insert(InsertPt, NewMI); 4587288943Sdim 4588193323Sed return MIB; 4589193323Sed} 4590193323Sed 4591288943Sdimstatic MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode, 4592288943Sdim unsigned OpNo, ArrayRef<MachineOperand> MOs, 4593288943Sdim MachineBasicBlock::iterator InsertPt, 4594309124Sdim MachineInstr &MI, const TargetInstrInfo &TII, 4595296417Sdim int PtrOffset = 0) { 4596249423Sdim // Omit the implicit operands, something BuildMI can't do. 4597309124Sdim MachineInstr *NewMI = 4598309124Sdim MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true); 4599249423Sdim MachineInstrBuilder MIB(MF, NewMI); 4600218893Sdim 4601309124Sdim for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 4602309124Sdim MachineOperand &MO = MI.getOperand(i); 4603193323Sed if (i == OpNo) { 4604193323Sed assert(MO.isReg() && "Expected to fold into reg operand!"); 4605296417Sdim addOperands(MIB, MOs, PtrOffset); 4606193323Sed } else { 4607321369Sdim MIB.add(MO); 4608193323Sed } 4609193323Sed } 4610288943Sdim 4611341825Sdim updateOperandRegConstraints(MF, *NewMI, TII); 4612341825Sdim 4613288943Sdim MachineBasicBlock *MBB = InsertPt->getParent(); 4614288943Sdim MBB->insert(InsertPt, NewMI); 4615288943Sdim 4616193323Sed return MIB; 4617193323Sed} 4618193323Sed 4619193323Sedstatic MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, 4620288943Sdim ArrayRef<MachineOperand> MOs, 4621288943Sdim MachineBasicBlock::iterator InsertPt, 4622309124Sdim MachineInstr &MI) { 4623288943Sdim MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, 4624309124Sdim MI.getDebugLoc(), TII.get(Opcode)); 4625288943Sdim addOperands(MIB, MOs); 4626193323Sed return MIB.addImm(0); 4627193323Sed} 4628193323Sed 4629296417SdimMachineInstr *X86InstrInfo::foldMemoryOperandCustom( 4630309124Sdim MachineFunction &MF, MachineInstr &MI, unsigned OpNum, 4631296417Sdim ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt, 4632296417Sdim unsigned Size, unsigned Align) const { 4633309124Sdim switch (MI.getOpcode()) { 4634296417Sdim case X86::INSERTPSrr: 4635296417Sdim case X86::VINSERTPSrr: 4636314564Sdim case X86::VINSERTPSZrr: 4637296417Sdim // Attempt to convert the load of inserted vector into a fold load 4638296417Sdim // of a single float. 4639296417Sdim if (OpNum == 2) { 4640309124Sdim unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm(); 4641296417Sdim unsigned ZMask = Imm & 15; 4642296417Sdim unsigned DstIdx = (Imm >> 4) & 3; 4643296417Sdim unsigned SrcIdx = (Imm >> 6) & 3; 4644296417Sdim 4645321369Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 4646321369Sdim const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); 4647321369Sdim unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; 4648353358Sdim if ((Size == 0 || Size >= 16) && RCSize >= 16 && 4 <= Align) { 4649296417Sdim int PtrOffset = SrcIdx * 4; 4650296417Sdim unsigned NewImm = (DstIdx << 4) | ZMask; 4651296417Sdim unsigned NewOpCode = 4652314564Sdim (MI.getOpcode() == X86::VINSERTPSZrr) ? X86::VINSERTPSZrm : 4653314564Sdim (MI.getOpcode() == X86::VINSERTPSrr) ? X86::VINSERTPSrm : 4654314564Sdim X86::INSERTPSrm; 4655296417Sdim MachineInstr *NewMI = 4656296417Sdim FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset); 4657296417Sdim NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm); 4658296417Sdim return NewMI; 4659296417Sdim } 4660296417Sdim } 4661296417Sdim break; 4662309124Sdim case X86::MOVHLPSrr: 4663309124Sdim case X86::VMOVHLPSrr: 4664314564Sdim case X86::VMOVHLPSZrr: 4665309124Sdim // Move the upper 64-bits of the second operand to the lower 64-bits. 4666309124Sdim // To fold the load, adjust the pointer to the upper and use (V)MOVLPS. 4667309124Sdim // TODO: In most cases AVX doesn't have a 8-byte alignment requirement. 4668309124Sdim if (OpNum == 2) { 4669321369Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 4670321369Sdim const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); 4671321369Sdim unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; 4672353358Sdim if ((Size == 0 || Size >= 16) && RCSize >= 16 && 8 <= Align) { 4673309124Sdim unsigned NewOpCode = 4674314564Sdim (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm : 4675314564Sdim (MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm : 4676314564Sdim X86::MOVLPSrm; 4677309124Sdim MachineInstr *NewMI = 4678309124Sdim FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, 8); 4679309124Sdim return NewMI; 4680309124Sdim } 4681309124Sdim } 4682309124Sdim break; 4683353358Sdim case X86::UNPCKLPDrr: 4684353358Sdim // If we won't be able to fold this to the memory form of UNPCKL, use 4685353358Sdim // MOVHPD instead. Done as custom because we can't have this in the load 4686353358Sdim // table twice. 4687353358Sdim if (OpNum == 2) { 4688353358Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 4689353358Sdim const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); 4690353358Sdim unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; 4691353358Sdim if ((Size == 0 || Size >= 16) && RCSize >= 16 && Align < 16) { 4692353358Sdim MachineInstr *NewMI = 4693353358Sdim FuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this); 4694353358Sdim return NewMI; 4695353358Sdim } 4696353358Sdim } 4697353358Sdim break; 4698353358Sdim } 4699296417Sdim 4700296417Sdim return nullptr; 4701296417Sdim} 4702296417Sdim 4703353358Sdimstatic bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, 4704353358Sdim MachineInstr &MI) { 4705353358Sdim if (!hasUndefRegUpdate(MI.getOpcode(), /*ForLoadFold*/true) || 4706341825Sdim !MI.getOperand(1).isReg()) 4707341825Sdim return false; 4708341825Sdim 4709341825Sdim // The are two cases we need to handle depending on where in the pipeline 4710341825Sdim // the folding attempt is being made. 4711341825Sdim // -Register has the undef flag set. 4712341825Sdim // -Register is produced by the IMPLICIT_DEF instruction. 4713341825Sdim 4714341825Sdim if (MI.getOperand(1).isUndef()) 4715341825Sdim return true; 4716341825Sdim 4717341825Sdim MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4718341825Sdim MachineInstr *VRegDef = RegInfo.getUniqueVRegDef(MI.getOperand(1).getReg()); 4719341825Sdim return VRegDef && VRegDef->isImplicitDef(); 4720341825Sdim} 4721341825Sdim 4722341825Sdim 4723288943SdimMachineInstr *X86InstrInfo::foldMemoryOperandImpl( 4724309124Sdim MachineFunction &MF, MachineInstr &MI, unsigned OpNum, 4725288943Sdim ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt, 4726288943Sdim unsigned Size, unsigned Align, bool AllowCommute) const { 4727327952Sdim bool isSlowTwoMemOps = Subtarget.slowTwoMemOps(); 4728193323Sed bool isTwoAddrFold = false; 4729249423Sdim 4730296417Sdim // For CPUs that favor the register form of a call or push, 4731296417Sdim // do not fold loads into calls or pushes, unless optimizing for size 4732296417Sdim // aggressively. 4733353358Sdim if (isSlowTwoMemOps && !MF.getFunction().hasMinSize() && 4734309124Sdim (MI.getOpcode() == X86::CALL32r || MI.getOpcode() == X86::CALL64r || 4735309124Sdim MI.getOpcode() == X86::PUSH16r || MI.getOpcode() == X86::PUSH32r || 4736309124Sdim MI.getOpcode() == X86::PUSH64r)) 4737276479Sdim return nullptr; 4738249423Sdim 4739341825Sdim // Avoid partial and undef register update stalls unless optimizing for size. 4740353358Sdim if (!MF.getFunction().hasOptSize() && 4741353358Sdim (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) || 4742341825Sdim shouldPreventUndefRegUpdateMemFold(MF, MI))) 4743327952Sdim return nullptr; 4744327952Sdim 4745309124Sdim unsigned NumOps = MI.getDesc().getNumOperands(); 4746309124Sdim bool isTwoAddr = 4747309124Sdim NumOps > 1 && MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; 4748193323Sed 4749221345Sdim // FIXME: AsmPrinter doesn't know how to handle 4750221345Sdim // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. 4751309124Sdim if (MI.getOpcode() == X86::ADD32ri && 4752309124Sdim MI.getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) 4753276479Sdim return nullptr; 4754221345Sdim 4755341825Sdim // GOTTPOFF relocation loads can only be folded into add instructions. 4756341825Sdim // FIXME: Need to exclude other relocations that only support specific 4757341825Sdim // instructions. 4758341825Sdim if (MOs.size() == X86::AddrNumOperands && 4759341825Sdim MOs[X86::AddrDisp].getTargetFlags() == X86II::MO_GOTTPOFF && 4760341825Sdim MI.getOpcode() != X86::ADD64rr) 4761341825Sdim return nullptr; 4762341825Sdim 4763276479Sdim MachineInstr *NewMI = nullptr; 4764296417Sdim 4765296417Sdim // Attempt to fold any custom cases we have. 4766296417Sdim if (MachineInstr *CustomMI = 4767296417Sdim foldMemoryOperandCustom(MF, MI, OpNum, MOs, InsertPt, Size, Align)) 4768296417Sdim return CustomMI; 4769296417Sdim 4770341825Sdim const X86MemoryFoldTableEntry *I = nullptr; 4771341825Sdim 4772193323Sed // Folding a memory location into the two-address part of a two-address 4773193323Sed // instruction is different than folding it other places. It requires 4774193323Sed // replacing the *two* registers with the memory location. 4775309124Sdim if (isTwoAddr && NumOps >= 2 && OpNum < 2 && MI.getOperand(0).isReg() && 4776309124Sdim MI.getOperand(1).isReg() && 4777309124Sdim MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { 4778341825Sdim I = lookupTwoAddrFoldTable(MI.getOpcode()); 4779193323Sed isTwoAddrFold = true; 4780341825Sdim } else { 4781341825Sdim if (OpNum == 0) { 4782341825Sdim if (MI.getOpcode() == X86::MOV32r0) { 4783341825Sdim NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI); 4784341825Sdim if (NewMI) 4785341825Sdim return NewMI; 4786341825Sdim } 4787243830Sdim } 4788218893Sdim 4789341825Sdim I = lookupFoldTable(MI.getOpcode(), OpNum); 4790193323Sed } 4791218893Sdim 4792341825Sdim if (I != nullptr) { 4793341825Sdim unsigned Opcode = I->DstOp; 4794341825Sdim unsigned MinAlign = (I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT; 4795341825Sdim if (Align < MinAlign) 4796341825Sdim return nullptr; 4797341825Sdim bool NarrowToMOV32rm = false; 4798341825Sdim if (Size) { 4799341825Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 4800341825Sdim const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, 4801341825Sdim &RI, MF); 4802341825Sdim unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; 4803341825Sdim if (Size < RCSize) { 4804353358Sdim // FIXME: Allow scalar intrinsic instructions like ADDSSrm_Int. 4805341825Sdim // Check if it's safe to fold the load. If the size of the object is 4806341825Sdim // narrower than the load width, then it's not. 4807341825Sdim if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) 4808341825Sdim return nullptr; 4809341825Sdim // If this is a 64-bit load, but the spill slot is 32, then we can do 4810341825Sdim // a 32-bit load which is implicitly zero-extended. This likely is 4811341825Sdim // due to live interval analysis remat'ing a load from stack slot. 4812341825Sdim if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) 4813341825Sdim return nullptr; 4814341825Sdim Opcode = X86::MOV32rm; 4815341825Sdim NarrowToMOV32rm = true; 4816198090Srdivacky } 4817341825Sdim } 4818198090Srdivacky 4819341825Sdim if (isTwoAddrFold) 4820341825Sdim NewMI = FuseTwoAddrInst(MF, Opcode, MOs, InsertPt, MI, *this); 4821341825Sdim else 4822341825Sdim NewMI = FuseInst(MF, Opcode, OpNum, MOs, InsertPt, MI, *this); 4823341825Sdim 4824341825Sdim if (NarrowToMOV32rm) { 4825341825Sdim // If this is the special case where we use a MOV32rm to load a 32-bit 4826341825Sdim // value and zero-extend the top bits. Change the destination register 4827341825Sdim // to a 32-bit one. 4828341825Sdim unsigned DstReg = NewMI->getOperand(0).getReg(); 4829341825Sdim if (TargetRegisterInfo::isPhysicalRegister(DstReg)) 4830341825Sdim NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit)); 4831193323Sed else 4832341825Sdim NewMI->getOperand(0).setSubReg(X86::sub_32bit); 4833193323Sed } 4834341825Sdim return NewMI; 4835193323Sed } 4836218893Sdim 4837280031Sdim // If the instruction and target operand are commutable, commute the 4838280031Sdim // instruction and try again. 4839280031Sdim if (AllowCommute) { 4840296417Sdim unsigned CommuteOpIdx1 = OpNum, CommuteOpIdx2 = CommuteAnyOperandIndex; 4841280031Sdim if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) { 4842309124Sdim bool HasDef = MI.getDesc().getNumDefs(); 4843353358Sdim Register Reg0 = HasDef ? MI.getOperand(0).getReg() : Register(); 4844353358Sdim Register Reg1 = MI.getOperand(CommuteOpIdx1).getReg(); 4845353358Sdim Register Reg2 = MI.getOperand(CommuteOpIdx2).getReg(); 4846296417Sdim bool Tied1 = 4847309124Sdim 0 == MI.getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO); 4848296417Sdim bool Tied2 = 4849309124Sdim 0 == MI.getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO); 4850280031Sdim 4851280031Sdim // If either of the commutable operands are tied to the destination 4852280031Sdim // then we can not commute + fold. 4853296417Sdim if ((HasDef && Reg0 == Reg1 && Tied1) || 4854296417Sdim (HasDef && Reg0 == Reg2 && Tied2)) 4855280031Sdim return nullptr; 4856280031Sdim 4857296417Sdim MachineInstr *CommutedMI = 4858296417Sdim commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2); 4859296417Sdim if (!CommutedMI) { 4860296417Sdim // Unable to commute. 4861296417Sdim return nullptr; 4862296417Sdim } 4863309124Sdim if (CommutedMI != &MI) { 4864296417Sdim // New instruction. We can't fold from this. 4865296417Sdim CommutedMI->eraseFromParent(); 4866296417Sdim return nullptr; 4867296417Sdim } 4868280031Sdim 4869296417Sdim // Attempt to fold with the commuted version of the instruction. 4870296417Sdim NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt, 4871296417Sdim Size, Align, /*AllowCommute=*/false); 4872296417Sdim if (NewMI) 4873296417Sdim return NewMI; 4874280031Sdim 4875296417Sdim // Folding failed again - undo the commute before returning. 4876296417Sdim MachineInstr *UncommutedMI = 4877296417Sdim commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2); 4878296417Sdim if (!UncommutedMI) { 4879296417Sdim // Unable to commute. 4880280031Sdim return nullptr; 4881280031Sdim } 4882309124Sdim if (UncommutedMI != &MI) { 4883296417Sdim // New instruction. It doesn't need to be kept. 4884296417Sdim UncommutedMI->eraseFromParent(); 4885296417Sdim return nullptr; 4886296417Sdim } 4887296417Sdim 4888296417Sdim // Return here to prevent duplicate fuse failure report. 4889296417Sdim return nullptr; 4890280031Sdim } 4891280031Sdim } 4892280031Sdim 4893218893Sdim // No fusion 4894309124Sdim if (PrintFailedFusing && !MI.isCopy()) 4895309124Sdim dbgs() << "We failed to fuse operand " << OpNum << " in " << MI; 4896276479Sdim return nullptr; 4897193323Sed} 4898193323Sed 4899309124SdimMachineInstr * 4900309124SdimX86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, 4901309124Sdim ArrayRef<unsigned> Ops, 4902309124Sdim MachineBasicBlock::iterator InsertPt, 4903353358Sdim int FrameIndex, LiveIntervals *LIS, 4904353358Sdim VirtRegMap *VRM) const { 4905218893Sdim // Check switch flag 4906296417Sdim if (NoFusing) 4907296417Sdim return nullptr; 4908193323Sed 4909341825Sdim // Avoid partial and undef register update stalls unless optimizing for size. 4910353358Sdim if (!MF.getFunction().hasOptSize() && 4911353358Sdim (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) || 4912341825Sdim shouldPreventUndefRegUpdateMemFold(MF, MI))) 4913276479Sdim return nullptr; 4914201360Srdivacky 4915314564Sdim // Don't fold subreg spills, or reloads that use a high subreg. 4916314564Sdim for (auto Op : Ops) { 4917314564Sdim MachineOperand &MO = MI.getOperand(Op); 4918314564Sdim auto SubReg = MO.getSubReg(); 4919314564Sdim if (SubReg && (MO.isDef() || SubReg == X86::sub_8bit_hi)) 4920314564Sdim return nullptr; 4921314564Sdim } 4922314564Sdim 4923314564Sdim const MachineFrameInfo &MFI = MF.getFrameInfo(); 4924314564Sdim unsigned Size = MFI.getObjectSize(FrameIndex); 4925314564Sdim unsigned Alignment = MFI.getObjectAlignment(FrameIndex); 4926256090Sdim // If the function stack isn't realigned we don't want to fold instructions 4927256090Sdim // that need increased alignment. 4928256090Sdim if (!RI.needsStackRealignment(MF)) 4929288943Sdim Alignment = 4930288943Sdim std::min(Alignment, Subtarget.getFrameLowering()->getStackAlignment()); 4931193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 4932193323Sed unsigned NewOpc = 0; 4933198090Srdivacky unsigned RCSize = 0; 4934309124Sdim switch (MI.getOpcode()) { 4935276479Sdim default: return nullptr; 4936198090Srdivacky case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; 4937208599Srdivacky case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break; 4938208599Srdivacky case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break; 4939208599Srdivacky case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break; 4940193323Sed } 4941198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 4942198090Srdivacky // narrower than the load width, then it's not. 4943198090Srdivacky if (Size < RCSize) 4944276479Sdim return nullptr; 4945193323Sed // Change to CMPXXri r, 0 first. 4946309124Sdim MI.setDesc(get(NewOpc)); 4947309124Sdim MI.getOperand(1).ChangeToImmediate(0); 4948193323Sed } else if (Ops.size() != 1) 4949276479Sdim return nullptr; 4950193323Sed 4951288943Sdim return foldMemoryOperandImpl(MF, MI, Ops[0], 4952288943Sdim MachineOperand::CreateFI(FrameIndex), InsertPt, 4953280031Sdim Size, Alignment, /*AllowCommute=*/true); 4954193323Sed} 4955193323Sed 4956288943Sdim/// Check if \p LoadMI is a partial register load that we can't fold into \p MI 4957288943Sdim/// because the latter uses contents that wouldn't be defined in the folded 4958288943Sdim/// version. For instance, this transformation isn't legal: 4959288943Sdim/// movss (%rdi), %xmm0 4960288943Sdim/// addps %xmm0, %xmm0 4961288943Sdim/// -> 4962288943Sdim/// addps (%rdi), %xmm0 4963288943Sdim/// 4964288943Sdim/// But this one is: 4965288943Sdim/// movss (%rdi), %xmm0 4966288943Sdim/// addss %xmm0, %xmm0 4967288943Sdim/// -> 4968288943Sdim/// addss (%rdi), %xmm0 4969288943Sdim/// 4970288943Sdimstatic bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, 4971288943Sdim const MachineInstr &UserMI, 4972288943Sdim const MachineFunction &MF) { 4973280031Sdim unsigned Opc = LoadMI.getOpcode(); 4974288943Sdim unsigned UserOpc = UserMI.getOpcode(); 4975321369Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 4976321369Sdim const TargetRegisterClass *RC = 4977321369Sdim MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg()); 4978321369Sdim unsigned RegSize = TRI.getRegSizeInBits(*RC); 4979280031Sdim 4980353358Sdim if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm || 4981353358Sdim Opc == X86::MOVSSrm_alt || Opc == X86::VMOVSSrm_alt || 4982353358Sdim Opc == X86::VMOVSSZrm_alt) && 4983321369Sdim RegSize > 32) { 4984280031Sdim // These instructions only load 32 bits, we can't fold them if the 4985288943Sdim // destination register is wider than 32 bits (4 bytes), and its user 4986288943Sdim // instruction isn't scalar (SS). 4987288943Sdim switch (UserOpc) { 4988309124Sdim case X86::ADDSSrr_Int: case X86::VADDSSrr_Int: case X86::VADDSSZrr_Int: 4989327952Sdim case X86::CMPSSrr_Int: case X86::VCMPSSrr_Int: case X86::VCMPSSZrr_Int: 4990309124Sdim case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::VDIVSSZrr_Int: 4991314564Sdim case X86::MAXSSrr_Int: case X86::VMAXSSrr_Int: case X86::VMAXSSZrr_Int: 4992314564Sdim case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int: 4993309124Sdim case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int: 4994309124Sdim case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int: 4995321369Sdim case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz: 4996353358Sdim case X86::VCMPSSZrr_Intk: 4997321369Sdim case X86::VDIVSSZrr_Intk: case X86::VDIVSSZrr_Intkz: 4998321369Sdim case X86::VMAXSSZrr_Intk: case X86::VMAXSSZrr_Intkz: 4999321369Sdim case X86::VMINSSZrr_Intk: case X86::VMINSSZrr_Intkz: 5000321369Sdim case X86::VMULSSZrr_Intk: case X86::VMULSSZrr_Intkz: 5001321369Sdim case X86::VSUBSSZrr_Intk: case X86::VSUBSSZrr_Intkz: 5002314564Sdim case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int: 5003314564Sdim case X86::VFMSUBSS4rr_Int: case X86::VFNMSUBSS4rr_Int: 5004314564Sdim case X86::VFMADD132SSr_Int: case X86::VFNMADD132SSr_Int: 5005314564Sdim case X86::VFMADD213SSr_Int: case X86::VFNMADD213SSr_Int: 5006314564Sdim case X86::VFMADD231SSr_Int: case X86::VFNMADD231SSr_Int: 5007314564Sdim case X86::VFMSUB132SSr_Int: case X86::VFNMSUB132SSr_Int: 5008314564Sdim case X86::VFMSUB213SSr_Int: case X86::VFNMSUB213SSr_Int: 5009314564Sdim case X86::VFMSUB231SSr_Int: case X86::VFNMSUB231SSr_Int: 5010314564Sdim case X86::VFMADD132SSZr_Int: case X86::VFNMADD132SSZr_Int: 5011314564Sdim case X86::VFMADD213SSZr_Int: case X86::VFNMADD213SSZr_Int: 5012314564Sdim case X86::VFMADD231SSZr_Int: case X86::VFNMADD231SSZr_Int: 5013314564Sdim case X86::VFMSUB132SSZr_Int: case X86::VFNMSUB132SSZr_Int: 5014314564Sdim case X86::VFMSUB213SSZr_Int: case X86::VFNMSUB213SSZr_Int: 5015314564Sdim case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int: 5016321369Sdim case X86::VFMADD132SSZr_Intk: case X86::VFNMADD132SSZr_Intk: 5017321369Sdim case X86::VFMADD213SSZr_Intk: case X86::VFNMADD213SSZr_Intk: 5018321369Sdim case X86::VFMADD231SSZr_Intk: case X86::VFNMADD231SSZr_Intk: 5019321369Sdim case X86::VFMSUB132SSZr_Intk: case X86::VFNMSUB132SSZr_Intk: 5020321369Sdim case X86::VFMSUB213SSZr_Intk: case X86::VFNMSUB213SSZr_Intk: 5021321369Sdim case X86::VFMSUB231SSZr_Intk: case X86::VFNMSUB231SSZr_Intk: 5022321369Sdim case X86::VFMADD132SSZr_Intkz: case X86::VFNMADD132SSZr_Intkz: 5023321369Sdim case X86::VFMADD213SSZr_Intkz: case X86::VFNMADD213SSZr_Intkz: 5024321369Sdim case X86::VFMADD231SSZr_Intkz: case X86::VFNMADD231SSZr_Intkz: 5025321369Sdim case X86::VFMSUB132SSZr_Intkz: case X86::VFNMSUB132SSZr_Intkz: 5026321369Sdim case X86::VFMSUB213SSZr_Intkz: case X86::VFNMSUB213SSZr_Intkz: 5027321369Sdim case X86::VFMSUB231SSZr_Intkz: case X86::VFNMSUB231SSZr_Intkz: 5028288943Sdim return false; 5029288943Sdim default: 5030288943Sdim return true; 5031288943Sdim } 5032288943Sdim } 5033280031Sdim 5034353358Sdim if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm || 5035353358Sdim Opc == X86::MOVSDrm_alt || Opc == X86::VMOVSDrm_alt || 5036353358Sdim Opc == X86::VMOVSDZrm_alt) && 5037321369Sdim RegSize > 64) { 5038280031Sdim // These instructions only load 64 bits, we can't fold them if the 5039288943Sdim // destination register is wider than 64 bits (8 bytes), and its user 5040288943Sdim // instruction isn't scalar (SD). 5041288943Sdim switch (UserOpc) { 5042309124Sdim case X86::ADDSDrr_Int: case X86::VADDSDrr_Int: case X86::VADDSDZrr_Int: 5043327952Sdim case X86::CMPSDrr_Int: case X86::VCMPSDrr_Int: case X86::VCMPSDZrr_Int: 5044309124Sdim case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::VDIVSDZrr_Int: 5045314564Sdim case X86::MAXSDrr_Int: case X86::VMAXSDrr_Int: case X86::VMAXSDZrr_Int: 5046314564Sdim case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int: 5047309124Sdim case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int: 5048309124Sdim case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int: 5049321369Sdim case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz: 5050353358Sdim case X86::VCMPSDZrr_Intk: 5051321369Sdim case X86::VDIVSDZrr_Intk: case X86::VDIVSDZrr_Intkz: 5052321369Sdim case X86::VMAXSDZrr_Intk: case X86::VMAXSDZrr_Intkz: 5053321369Sdim case X86::VMINSDZrr_Intk: case X86::VMINSDZrr_Intkz: 5054321369Sdim case X86::VMULSDZrr_Intk: case X86::VMULSDZrr_Intkz: 5055321369Sdim case X86::VSUBSDZrr_Intk: case X86::VSUBSDZrr_Intkz: 5056314564Sdim case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int: 5057314564Sdim case X86::VFMSUBSD4rr_Int: case X86::VFNMSUBSD4rr_Int: 5058314564Sdim case X86::VFMADD132SDr_Int: case X86::VFNMADD132SDr_Int: 5059314564Sdim case X86::VFMADD213SDr_Int: case X86::VFNMADD213SDr_Int: 5060314564Sdim case X86::VFMADD231SDr_Int: case X86::VFNMADD231SDr_Int: 5061314564Sdim case X86::VFMSUB132SDr_Int: case X86::VFNMSUB132SDr_Int: 5062314564Sdim case X86::VFMSUB213SDr_Int: case X86::VFNMSUB213SDr_Int: 5063314564Sdim case X86::VFMSUB231SDr_Int: case X86::VFNMSUB231SDr_Int: 5064314564Sdim case X86::VFMADD132SDZr_Int: case X86::VFNMADD132SDZr_Int: 5065314564Sdim case X86::VFMADD213SDZr_Int: case X86::VFNMADD213SDZr_Int: 5066314564Sdim case X86::VFMADD231SDZr_Int: case X86::VFNMADD231SDZr_Int: 5067314564Sdim case X86::VFMSUB132SDZr_Int: case X86::VFNMSUB132SDZr_Int: 5068314564Sdim case X86::VFMSUB213SDZr_Int: case X86::VFNMSUB213SDZr_Int: 5069314564Sdim case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int: 5070321369Sdim case X86::VFMADD132SDZr_Intk: case X86::VFNMADD132SDZr_Intk: 5071321369Sdim case X86::VFMADD213SDZr_Intk: case X86::VFNMADD213SDZr_Intk: 5072321369Sdim case X86::VFMADD231SDZr_Intk: case X86::VFNMADD231SDZr_Intk: 5073321369Sdim case X86::VFMSUB132SDZr_Intk: case X86::VFNMSUB132SDZr_Intk: 5074321369Sdim case X86::VFMSUB213SDZr_Intk: case X86::VFNMSUB213SDZr_Intk: 5075321369Sdim case X86::VFMSUB231SDZr_Intk: case X86::VFNMSUB231SDZr_Intk: 5076321369Sdim case X86::VFMADD132SDZr_Intkz: case X86::VFNMADD132SDZr_Intkz: 5077321369Sdim case X86::VFMADD213SDZr_Intkz: case X86::VFNMADD213SDZr_Intkz: 5078321369Sdim case X86::VFMADD231SDZr_Intkz: case X86::VFNMADD231SDZr_Intkz: 5079321369Sdim case X86::VFMSUB132SDZr_Intkz: case X86::VFNMSUB132SDZr_Intkz: 5080321369Sdim case X86::VFMSUB213SDZr_Intkz: case X86::VFNMSUB213SDZr_Intkz: 5081321369Sdim case X86::VFMSUB231SDZr_Intkz: case X86::VFNMSUB231SDZr_Intkz: 5082288943Sdim return false; 5083288943Sdim default: 5084288943Sdim return true; 5085288943Sdim } 5086288943Sdim } 5087280031Sdim 5088280031Sdim return false; 5089280031Sdim} 5090280031Sdim 5091288943SdimMachineInstr *X86InstrInfo::foldMemoryOperandImpl( 5092309124Sdim MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 5093309124Sdim MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, 5094309124Sdim LiveIntervals *LIS) const { 5095314564Sdim 5096314564Sdim // TODO: Support the case where LoadMI loads a wide register, but MI 5097314564Sdim // only uses a subreg. 5098314564Sdim for (auto Op : Ops) { 5099314564Sdim if (MI.getOperand(Op).getSubReg()) 5100314564Sdim return nullptr; 5101314564Sdim } 5102314564Sdim 5103261991Sdim // If loading from a FrameIndex, fold directly from the FrameIndex. 5104309124Sdim unsigned NumOps = LoadMI.getDesc().getNumOperands(); 5105261991Sdim int FrameIndex; 5106280031Sdim if (isLoadFromStackSlot(LoadMI, FrameIndex)) { 5107309124Sdim if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF)) 5108280031Sdim return nullptr; 5109309124Sdim return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex, LIS); 5110280031Sdim } 5111261991Sdim 5112218893Sdim // Check switch flag 5113276479Sdim if (NoFusing) return nullptr; 5114193323Sed 5115341825Sdim // Avoid partial and undef register update stalls unless optimizing for size. 5116353358Sdim if (!MF.getFunction().hasOptSize() && 5117353358Sdim (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) || 5118341825Sdim shouldPreventUndefRegUpdateMemFold(MF, MI))) 5119276479Sdim return nullptr; 5120201360Srdivacky 5121193323Sed // Determine the alignment of the load. 5122193323Sed unsigned Alignment = 0; 5123309124Sdim if (LoadMI.hasOneMemOperand()) 5124309124Sdim Alignment = (*LoadMI.memoperands_begin())->getAlignment(); 5125198090Srdivacky else 5126309124Sdim switch (LoadMI.getOpcode()) { 5127309124Sdim case X86::AVX512_512_SET0: 5128309124Sdim case X86::AVX512_512_SETALLONES: 5129309124Sdim Alignment = 64; 5130309124Sdim break; 5131234353Sdim case X86::AVX2_SETALLONES: 5132321369Sdim case X86::AVX1_SETALLONES: 5133243830Sdim case X86::AVX_SET0: 5134309124Sdim case X86::AVX512_256_SET0: 5135212904Sdim Alignment = 32; 5136212904Sdim break; 5137226633Sdim case X86::V_SET0: 5138198090Srdivacky case X86::V_SETALLONES: 5139309124Sdim case X86::AVX512_128_SET0: 5140198090Srdivacky Alignment = 16; 5141198090Srdivacky break; 5142341825Sdim case X86::MMX_SET0: 5143198090Srdivacky case X86::FsFLD0SD: 5144314564Sdim case X86::AVX512_FsFLD0SD: 5145198090Srdivacky Alignment = 8; 5146198090Srdivacky break; 5147198090Srdivacky case X86::FsFLD0SS: 5148314564Sdim case X86::AVX512_FsFLD0SS: 5149198090Srdivacky Alignment = 4; 5150198090Srdivacky break; 5151198090Srdivacky default: 5152276479Sdim return nullptr; 5153193323Sed } 5154193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 5155193323Sed unsigned NewOpc = 0; 5156309124Sdim switch (MI.getOpcode()) { 5157276479Sdim default: return nullptr; 5158193323Sed case X86::TEST8rr: NewOpc = X86::CMP8ri; break; 5159208599Srdivacky case X86::TEST16rr: NewOpc = X86::CMP16ri8; break; 5160208599Srdivacky case X86::TEST32rr: NewOpc = X86::CMP32ri8; break; 5161208599Srdivacky case X86::TEST64rr: NewOpc = X86::CMP64ri8; break; 5162193323Sed } 5163193323Sed // Change to CMPXXri r, 0 first. 5164309124Sdim MI.setDesc(get(NewOpc)); 5165309124Sdim MI.getOperand(1).ChangeToImmediate(0); 5166193323Sed } else if (Ops.size() != 1) 5167276479Sdim return nullptr; 5168193323Sed 5169212904Sdim // Make sure the subregisters match. 5170212904Sdim // Otherwise we risk changing the size of the load. 5171309124Sdim if (LoadMI.getOperand(0).getSubReg() != MI.getOperand(Ops[0]).getSubReg()) 5172276479Sdim return nullptr; 5173212904Sdim 5174210299Sed SmallVector<MachineOperand,X86::AddrNumOperands> MOs; 5175309124Sdim switch (LoadMI.getOpcode()) { 5176341825Sdim case X86::MMX_SET0: 5177226633Sdim case X86::V_SET0: 5178198090Srdivacky case X86::V_SETALLONES: 5179234353Sdim case X86::AVX2_SETALLONES: 5180321369Sdim case X86::AVX1_SETALLONES: 5181243830Sdim case X86::AVX_SET0: 5182309124Sdim case X86::AVX512_128_SET0: 5183309124Sdim case X86::AVX512_256_SET0: 5184309124Sdim case X86::AVX512_512_SET0: 5185309124Sdim case X86::AVX512_512_SETALLONES: 5186198090Srdivacky case X86::FsFLD0SD: 5187314564Sdim case X86::AVX512_FsFLD0SD: 5188314564Sdim case X86::FsFLD0SS: 5189314564Sdim case X86::AVX512_FsFLD0SS: { 5190226633Sdim // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. 5191193323Sed // Create a constant-pool entry and operands to load from it. 5192193323Sed 5193204961Srdivacky // Medium and large mode can't fold loads this way. 5194276479Sdim if (MF.getTarget().getCodeModel() != CodeModel::Small && 5195276479Sdim MF.getTarget().getCodeModel() != CodeModel::Kernel) 5196276479Sdim return nullptr; 5197204961Srdivacky 5198193323Sed // x86-32 PIC requires a PIC base register for constant pools. 5199193323Sed unsigned PICBase = 0; 5200309124Sdim if (MF.getTarget().isPositionIndependent()) { 5201276479Sdim if (Subtarget.is64Bit()) 5202198090Srdivacky PICBase = X86::RIP; 5203198090Srdivacky else 5204210299Sed // FIXME: PICBase = getGlobalBaseReg(&MF); 5205198090Srdivacky // This doesn't work for several reasons. 5206198090Srdivacky // 1. GlobalBaseReg may have been spilled. 5207198090Srdivacky // 2. It may not be live at MI. 5208276479Sdim return nullptr; 5209198090Srdivacky } 5210193323Sed 5211198090Srdivacky // Create a constant-pool entry. 5212193323Sed MachineConstantPool &MCP = *MF.getConstantPool(); 5213226633Sdim Type *Ty; 5214309124Sdim unsigned Opc = LoadMI.getOpcode(); 5215314564Sdim if (Opc == X86::FsFLD0SS || Opc == X86::AVX512_FsFLD0SS) 5216327952Sdim Ty = Type::getFloatTy(MF.getFunction().getContext()); 5217314564Sdim else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD) 5218327952Sdim Ty = Type::getDoubleTy(MF.getFunction().getContext()); 5219309124Sdim else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES) 5220327952Sdim Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16); 5221309124Sdim else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 || 5222321369Sdim Opc == X86::AVX512_256_SET0 || Opc == X86::AVX1_SETALLONES) 5223327952Sdim Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 8); 5224341825Sdim else if (Opc == X86::MMX_SET0) 5225341825Sdim Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 2); 5226198090Srdivacky else 5227327952Sdim Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 4); 5228226633Sdim 5229309124Sdim bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES || 5230321369Sdim Opc == X86::AVX512_512_SETALLONES || 5231321369Sdim Opc == X86::AVX1_SETALLONES); 5232226633Sdim const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) : 5233226633Sdim Constant::getNullValue(Ty); 5234198090Srdivacky unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); 5235193323Sed 5236193323Sed // Create operands to load from the constant pool entry. 5237193323Sed MOs.push_back(MachineOperand::CreateReg(PICBase, false)); 5238193323Sed MOs.push_back(MachineOperand::CreateImm(1)); 5239193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 5240193323Sed MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); 5241193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 5242198090Srdivacky break; 5243198090Srdivacky } 5244198090Srdivacky default: { 5245309124Sdim if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF)) 5246276479Sdim return nullptr; 5247249423Sdim 5248193323Sed // Folding a normal load. Just copy the load's address operands. 5249309124Sdim MOs.append(LoadMI.operands_begin() + NumOps - X86::AddrNumOperands, 5250309124Sdim LoadMI.operands_begin() + NumOps); 5251198090Srdivacky break; 5252193323Sed } 5253198090Srdivacky } 5254288943Sdim return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt, 5255280031Sdim /*Size=*/0, Alignment, /*AllowCommute=*/true); 5256193323Sed} 5257193323Sed 5258344779Sdimstatic SmallVector<MachineMemOperand *, 2> 5259344779SdimextractLoadMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) { 5260344779Sdim SmallVector<MachineMemOperand *, 2> LoadMMOs; 5261344779Sdim 5262344779Sdim for (MachineMemOperand *MMO : MMOs) { 5263344779Sdim if (!MMO->isLoad()) 5264344779Sdim continue; 5265344779Sdim 5266344779Sdim if (!MMO->isStore()) { 5267344779Sdim // Reuse the MMO. 5268344779Sdim LoadMMOs.push_back(MMO); 5269344779Sdim } else { 5270344779Sdim // Clone the MMO and unset the store flag. 5271344779Sdim LoadMMOs.push_back(MF.getMachineMemOperand( 5272353358Sdim MMO, MMO->getFlags() & ~MachineMemOperand::MOStore)); 5273344779Sdim } 5274344779Sdim } 5275344779Sdim 5276344779Sdim return LoadMMOs; 5277344779Sdim} 5278344779Sdim 5279344779Sdimstatic SmallVector<MachineMemOperand *, 2> 5280344779SdimextractStoreMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) { 5281344779Sdim SmallVector<MachineMemOperand *, 2> StoreMMOs; 5282344779Sdim 5283344779Sdim for (MachineMemOperand *MMO : MMOs) { 5284344779Sdim if (!MMO->isStore()) 5285344779Sdim continue; 5286344779Sdim 5287344779Sdim if (!MMO->isLoad()) { 5288344779Sdim // Reuse the MMO. 5289344779Sdim StoreMMOs.push_back(MMO); 5290344779Sdim } else { 5291344779Sdim // Clone the MMO and unset the load flag. 5292344779Sdim StoreMMOs.push_back(MF.getMachineMemOperand( 5293353358Sdim MMO, MMO->getFlags() & ~MachineMemOperand::MOLoad)); 5294344779Sdim } 5295344779Sdim } 5296344779Sdim 5297344779Sdim return StoreMMOs; 5298344779Sdim} 5299344779Sdim 5300309124Sdimbool X86InstrInfo::unfoldMemoryOperand( 5301309124Sdim MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad, 5302309124Sdim bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const { 5303341825Sdim const X86MemoryFoldTableEntry *I = lookupUnfoldTable(MI.getOpcode()); 5304341825Sdim if (I == nullptr) 5305193323Sed return false; 5306341825Sdim unsigned Opc = I->DstOp; 5307341825Sdim unsigned Index = I->Flags & TB_INDEX_MASK; 5308341825Sdim bool FoldedLoad = I->Flags & TB_FOLDED_LOAD; 5309341825Sdim bool FoldedStore = I->Flags & TB_FOLDED_STORE; 5310193323Sed if (UnfoldLoad && !FoldedLoad) 5311193323Sed return false; 5312193323Sed UnfoldLoad &= FoldedLoad; 5313193323Sed if (UnfoldStore && !FoldedStore) 5314193323Sed return false; 5315193323Sed UnfoldStore &= FoldedStore; 5316193323Sed 5317224145Sdim const MCInstrDesc &MCID = get(Opc); 5318239462Sdim const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); 5319296417Sdim // TODO: Check if 32-byte or greater accesses are slow too? 5320309124Sdim if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass && 5321296417Sdim Subtarget.isUnalignedMem16Slow()) 5322210299Sed // Without memoperands, loadRegFromAddr and storeRegToStackSlot will 5323210299Sed // conservatively assume the address is unaligned. That's bad for 5324210299Sed // performance. 5325210299Sed return false; 5326210299Sed SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps; 5327193323Sed SmallVector<MachineOperand,2> BeforeOps; 5328193323Sed SmallVector<MachineOperand,2> AfterOps; 5329193323Sed SmallVector<MachineOperand,4> ImpOps; 5330309124Sdim for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 5331309124Sdim MachineOperand &Op = MI.getOperand(i); 5332210299Sed if (i >= Index && i < Index + X86::AddrNumOperands) 5333193323Sed AddrOps.push_back(Op); 5334193323Sed else if (Op.isReg() && Op.isImplicit()) 5335193323Sed ImpOps.push_back(Op); 5336193323Sed else if (i < Index) 5337193323Sed BeforeOps.push_back(Op); 5338193323Sed else if (i > Index) 5339193323Sed AfterOps.push_back(Op); 5340193323Sed } 5341193323Sed 5342193323Sed // Emit the load instruction. 5343193323Sed if (UnfoldLoad) { 5344344779Sdim auto MMOs = extractLoadMMOs(MI.memoperands(), MF); 5345344779Sdim loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs, NewMIs); 5346193323Sed if (UnfoldStore) { 5347193323Sed // Address operands cannot be marked isKill. 5348210299Sed for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) { 5349193323Sed MachineOperand &MO = NewMIs[0]->getOperand(i); 5350193323Sed if (MO.isReg()) 5351193323Sed MO.setIsKill(false); 5352193323Sed } 5353193323Sed } 5354193323Sed } 5355193323Sed 5356193323Sed // Emit the data processing instruction. 5357309124Sdim MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI.getDebugLoc(), true); 5358249423Sdim MachineInstrBuilder MIB(MF, DataMI); 5359218893Sdim 5360193323Sed if (FoldedStore) 5361193323Sed MIB.addReg(Reg, RegState::Define); 5362296417Sdim for (MachineOperand &BeforeOp : BeforeOps) 5363321369Sdim MIB.add(BeforeOp); 5364193323Sed if (FoldedLoad) 5365193323Sed MIB.addReg(Reg); 5366296417Sdim for (MachineOperand &AfterOp : AfterOps) 5367321369Sdim MIB.add(AfterOp); 5368296417Sdim for (MachineOperand &ImpOp : ImpOps) { 5369296417Sdim MIB.addReg(ImpOp.getReg(), 5370296417Sdim getDefRegState(ImpOp.isDef()) | 5371193323Sed RegState::Implicit | 5372296417Sdim getKillRegState(ImpOp.isKill()) | 5373296417Sdim getDeadRegState(ImpOp.isDead()) | 5374296417Sdim getUndefRegState(ImpOp.isUndef())); 5375193323Sed } 5376193323Sed // Change CMP32ri r, 0 back to TEST32rr r, r, etc. 5377193323Sed switch (DataMI->getOpcode()) { 5378193323Sed default: break; 5379193323Sed case X86::CMP64ri32: 5380208599Srdivacky case X86::CMP64ri8: 5381193323Sed case X86::CMP32ri: 5382208599Srdivacky case X86::CMP32ri8: 5383193323Sed case X86::CMP16ri: 5384208599Srdivacky case X86::CMP16ri8: 5385193323Sed case X86::CMP8ri: { 5386193323Sed MachineOperand &MO0 = DataMI->getOperand(0); 5387193323Sed MachineOperand &MO1 = DataMI->getOperand(1); 5388193323Sed if (MO1.getImm() == 0) { 5389243830Sdim unsigned NewOpc; 5390193323Sed switch (DataMI->getOpcode()) { 5391243830Sdim default: llvm_unreachable("Unreachable!"); 5392208599Srdivacky case X86::CMP64ri8: 5393193323Sed case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; 5394208599Srdivacky case X86::CMP32ri8: 5395193323Sed case X86::CMP32ri: NewOpc = X86::TEST32rr; break; 5396208599Srdivacky case X86::CMP16ri8: 5397193323Sed case X86::CMP16ri: NewOpc = X86::TEST16rr; break; 5398193323Sed case X86::CMP8ri: NewOpc = X86::TEST8rr; break; 5399193323Sed } 5400193323Sed DataMI->setDesc(get(NewOpc)); 5401193323Sed MO1.ChangeToRegister(MO0.getReg(), false); 5402193323Sed } 5403193323Sed } 5404193323Sed } 5405193323Sed NewMIs.push_back(DataMI); 5406193323Sed 5407193323Sed // Emit the store instruction. 5408193323Sed if (UnfoldStore) { 5409239462Sdim const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF); 5410344779Sdim auto MMOs = extractStoreMMOs(MI.memoperands(), MF); 5411344779Sdim storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs, NewMIs); 5412193323Sed } 5413193323Sed 5414193323Sed return true; 5415193323Sed} 5416193323Sed 5417193323Sedbool 5418193323SedX86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, 5419193323Sed SmallVectorImpl<SDNode*> &NewNodes) const { 5420193323Sed if (!N->isMachineOpcode()) 5421193323Sed return false; 5422193323Sed 5423341825Sdim const X86MemoryFoldTableEntry *I = lookupUnfoldTable(N->getMachineOpcode()); 5424341825Sdim if (I == nullptr) 5425193323Sed return false; 5426341825Sdim unsigned Opc = I->DstOp; 5427341825Sdim unsigned Index = I->Flags & TB_INDEX_MASK; 5428341825Sdim bool FoldedLoad = I->Flags & TB_FOLDED_LOAD; 5429341825Sdim bool FoldedStore = I->Flags & TB_FOLDED_STORE; 5430224145Sdim const MCInstrDesc &MCID = get(Opc); 5431239462Sdim MachineFunction &MF = DAG.getMachineFunction(); 5432321369Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 5433239462Sdim const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); 5434224145Sdim unsigned NumDefs = MCID.NumDefs; 5435193323Sed std::vector<SDValue> AddrOps; 5436193323Sed std::vector<SDValue> BeforeOps; 5437193323Sed std::vector<SDValue> AfterOps; 5438261991Sdim SDLoc dl(N); 5439193323Sed unsigned NumOps = N->getNumOperands(); 5440193323Sed for (unsigned i = 0; i != NumOps-1; ++i) { 5441193323Sed SDValue Op = N->getOperand(i); 5442210299Sed if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands) 5443193323Sed AddrOps.push_back(Op); 5444193323Sed else if (i < Index-NumDefs) 5445193323Sed BeforeOps.push_back(Op); 5446193323Sed else if (i > Index-NumDefs) 5447193323Sed AfterOps.push_back(Op); 5448193323Sed } 5449193323Sed SDValue Chain = N->getOperand(NumOps-1); 5450193323Sed AddrOps.push_back(Chain); 5451193323Sed 5452193323Sed // Emit the load instruction. 5453276479Sdim SDNode *Load = nullptr; 5454193323Sed if (FoldedLoad) { 5455321369Sdim EVT VT = *TRI.legalclasstypes_begin(*RC); 5456344779Sdim auto MMOs = extractLoadMMOs(cast<MachineSDNode>(N)->memoperands(), MF); 5457344779Sdim if (MMOs.empty() && RC == &X86::VR128RegClass && 5458296417Sdim Subtarget.isUnalignedMem16Slow()) 5459210299Sed // Do not introduce a slow unaligned load. 5460210299Sed return false; 5461296417Sdim // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte 5462296417Sdim // memory access is slow above. 5463321369Sdim unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); 5464344779Sdim bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; 5465276479Sdim Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl, 5466251662Sdim VT, MVT::Other, AddrOps); 5467193323Sed NewNodes.push_back(Load); 5468198090Srdivacky 5469198090Srdivacky // Preserve memory reference information. 5470344779Sdim DAG.setNodeMemRefs(cast<MachineSDNode>(Load), MMOs); 5471193323Sed } 5472193323Sed 5473193323Sed // Emit the data processing instruction. 5474198090Srdivacky std::vector<EVT> VTs; 5475276479Sdim const TargetRegisterClass *DstRC = nullptr; 5476224145Sdim if (MCID.getNumDefs() > 0) { 5477239462Sdim DstRC = getRegClass(MCID, 0, &RI, MF); 5478321369Sdim VTs.push_back(*TRI.legalclasstypes_begin(*DstRC)); 5479193323Sed } 5480193323Sed for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 5481198090Srdivacky EVT VT = N->getValueType(i); 5482224145Sdim if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs()) 5483193323Sed VTs.push_back(VT); 5484193323Sed } 5485193323Sed if (Load) 5486193323Sed BeforeOps.push_back(SDValue(Load, 0)); 5487288943Sdim BeforeOps.insert(BeforeOps.end(), AfterOps.begin(), AfterOps.end()); 5488341825Sdim // Change CMP32ri r, 0 back to TEST32rr r, r, etc. 5489341825Sdim switch (Opc) { 5490341825Sdim default: break; 5491341825Sdim case X86::CMP64ri32: 5492341825Sdim case X86::CMP64ri8: 5493341825Sdim case X86::CMP32ri: 5494341825Sdim case X86::CMP32ri8: 5495341825Sdim case X86::CMP16ri: 5496341825Sdim case X86::CMP16ri8: 5497341825Sdim case X86::CMP8ri: 5498341825Sdim if (isNullConstant(BeforeOps[1])) { 5499341825Sdim switch (Opc) { 5500341825Sdim default: llvm_unreachable("Unreachable!"); 5501341825Sdim case X86::CMP64ri8: 5502341825Sdim case X86::CMP64ri32: Opc = X86::TEST64rr; break; 5503341825Sdim case X86::CMP32ri8: 5504341825Sdim case X86::CMP32ri: Opc = X86::TEST32rr; break; 5505341825Sdim case X86::CMP16ri8: 5506341825Sdim case X86::CMP16ri: Opc = X86::TEST16rr; break; 5507341825Sdim case X86::CMP8ri: Opc = X86::TEST8rr; break; 5508341825Sdim } 5509341825Sdim BeforeOps[1] = BeforeOps[0]; 5510341825Sdim } 5511341825Sdim } 5512251662Sdim SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps); 5513193323Sed NewNodes.push_back(NewNode); 5514193323Sed 5515193323Sed // Emit the store instruction. 5516193323Sed if (FoldedStore) { 5517193323Sed AddrOps.pop_back(); 5518193323Sed AddrOps.push_back(SDValue(NewNode, 0)); 5519193323Sed AddrOps.push_back(Chain); 5520344779Sdim auto MMOs = extractStoreMMOs(cast<MachineSDNode>(N)->memoperands(), MF); 5521344779Sdim if (MMOs.empty() && RC == &X86::VR128RegClass && 5522296417Sdim Subtarget.isUnalignedMem16Slow()) 5523210299Sed // Do not introduce a slow unaligned store. 5524210299Sed return false; 5525296417Sdim // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte 5526296417Sdim // memory access is slow above. 5527321369Sdim unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); 5528344779Sdim bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; 5529276479Sdim SDNode *Store = 5530276479Sdim DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget), 5531276479Sdim dl, MVT::Other, AddrOps); 5532193323Sed NewNodes.push_back(Store); 5533198090Srdivacky 5534198090Srdivacky // Preserve memory reference information. 5535344779Sdim DAG.setNodeMemRefs(cast<MachineSDNode>(Store), MMOs); 5536193323Sed } 5537193323Sed 5538193323Sed return true; 5539193323Sed} 5540193323Sed 5541193323Sedunsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, 5542198892Srdivacky bool UnfoldLoad, bool UnfoldStore, 5543198892Srdivacky unsigned *LoadRegIndex) const { 5544341825Sdim const X86MemoryFoldTableEntry *I = lookupUnfoldTable(Opc); 5545341825Sdim if (I == nullptr) 5546193323Sed return 0; 5547341825Sdim bool FoldedLoad = I->Flags & TB_FOLDED_LOAD; 5548341825Sdim bool FoldedStore = I->Flags & TB_FOLDED_STORE; 5549193323Sed if (UnfoldLoad && !FoldedLoad) 5550193323Sed return 0; 5551193323Sed if (UnfoldStore && !FoldedStore) 5552193323Sed return 0; 5553198892Srdivacky if (LoadRegIndex) 5554341825Sdim *LoadRegIndex = I->Flags & TB_INDEX_MASK; 5555341825Sdim return I->DstOp; 5556193323Sed} 5557193323Sed 5558202878Srdivackybool 5559202878SrdivackyX86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 5560202878Srdivacky int64_t &Offset1, int64_t &Offset2) const { 5561202878Srdivacky if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 5562202878Srdivacky return false; 5563202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 5564202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 5565202878Srdivacky switch (Opc1) { 5566202878Srdivacky default: return false; 5567202878Srdivacky case X86::MOV8rm: 5568202878Srdivacky case X86::MOV16rm: 5569202878Srdivacky case X86::MOV32rm: 5570202878Srdivacky case X86::MOV64rm: 5571202878Srdivacky case X86::LD_Fp32m: 5572202878Srdivacky case X86::LD_Fp64m: 5573202878Srdivacky case X86::LD_Fp80m: 5574202878Srdivacky case X86::MOVSSrm: 5575353358Sdim case X86::MOVSSrm_alt: 5576202878Srdivacky case X86::MOVSDrm: 5577353358Sdim case X86::MOVSDrm_alt: 5578202878Srdivacky case X86::MMX_MOVD64rm: 5579202878Srdivacky case X86::MMX_MOVQ64rm: 5580202878Srdivacky case X86::MOVAPSrm: 5581202878Srdivacky case X86::MOVUPSrm: 5582202878Srdivacky case X86::MOVAPDrm: 5583309124Sdim case X86::MOVUPDrm: 5584202878Srdivacky case X86::MOVDQArm: 5585202878Srdivacky case X86::MOVDQUrm: 5586226633Sdim // AVX load instructions 5587226633Sdim case X86::VMOVSSrm: 5588353358Sdim case X86::VMOVSSrm_alt: 5589226633Sdim case X86::VMOVSDrm: 5590353358Sdim case X86::VMOVSDrm_alt: 5591226633Sdim case X86::VMOVAPSrm: 5592226633Sdim case X86::VMOVUPSrm: 5593226633Sdim case X86::VMOVAPDrm: 5594309124Sdim case X86::VMOVUPDrm: 5595226633Sdim case X86::VMOVDQArm: 5596226633Sdim case X86::VMOVDQUrm: 5597224145Sdim case X86::VMOVAPSYrm: 5598224145Sdim case X86::VMOVUPSYrm: 5599224145Sdim case X86::VMOVAPDYrm: 5600309124Sdim case X86::VMOVUPDYrm: 5601224145Sdim case X86::VMOVDQAYrm: 5602224145Sdim case X86::VMOVDQUYrm: 5603309124Sdim // AVX512 load instructions 5604309124Sdim case X86::VMOVSSZrm: 5605353358Sdim case X86::VMOVSSZrm_alt: 5606309124Sdim case X86::VMOVSDZrm: 5607353358Sdim case X86::VMOVSDZrm_alt: 5608309124Sdim case X86::VMOVAPSZ128rm: 5609309124Sdim case X86::VMOVUPSZ128rm: 5610314564Sdim case X86::VMOVAPSZ128rm_NOVLX: 5611314564Sdim case X86::VMOVUPSZ128rm_NOVLX: 5612309124Sdim case X86::VMOVAPDZ128rm: 5613309124Sdim case X86::VMOVUPDZ128rm: 5614309124Sdim case X86::VMOVDQU8Z128rm: 5615309124Sdim case X86::VMOVDQU16Z128rm: 5616309124Sdim case X86::VMOVDQA32Z128rm: 5617309124Sdim case X86::VMOVDQU32Z128rm: 5618309124Sdim case X86::VMOVDQA64Z128rm: 5619309124Sdim case X86::VMOVDQU64Z128rm: 5620309124Sdim case X86::VMOVAPSZ256rm: 5621309124Sdim case X86::VMOVUPSZ256rm: 5622314564Sdim case X86::VMOVAPSZ256rm_NOVLX: 5623314564Sdim case X86::VMOVUPSZ256rm_NOVLX: 5624309124Sdim case X86::VMOVAPDZ256rm: 5625309124Sdim case X86::VMOVUPDZ256rm: 5626309124Sdim case X86::VMOVDQU8Z256rm: 5627309124Sdim case X86::VMOVDQU16Z256rm: 5628309124Sdim case X86::VMOVDQA32Z256rm: 5629309124Sdim case X86::VMOVDQU32Z256rm: 5630309124Sdim case X86::VMOVDQA64Z256rm: 5631309124Sdim case X86::VMOVDQU64Z256rm: 5632309124Sdim case X86::VMOVAPSZrm: 5633309124Sdim case X86::VMOVUPSZrm: 5634309124Sdim case X86::VMOVAPDZrm: 5635309124Sdim case X86::VMOVUPDZrm: 5636309124Sdim case X86::VMOVDQU8Zrm: 5637309124Sdim case X86::VMOVDQU16Zrm: 5638309124Sdim case X86::VMOVDQA32Zrm: 5639309124Sdim case X86::VMOVDQU32Zrm: 5640309124Sdim case X86::VMOVDQA64Zrm: 5641309124Sdim case X86::VMOVDQU64Zrm: 5642309124Sdim case X86::KMOVBkm: 5643309124Sdim case X86::KMOVWkm: 5644309124Sdim case X86::KMOVDkm: 5645309124Sdim case X86::KMOVQkm: 5646202878Srdivacky break; 5647202878Srdivacky } 5648202878Srdivacky switch (Opc2) { 5649202878Srdivacky default: return false; 5650202878Srdivacky case X86::MOV8rm: 5651202878Srdivacky case X86::MOV16rm: 5652202878Srdivacky case X86::MOV32rm: 5653202878Srdivacky case X86::MOV64rm: 5654202878Srdivacky case X86::LD_Fp32m: 5655202878Srdivacky case X86::LD_Fp64m: 5656202878Srdivacky case X86::LD_Fp80m: 5657202878Srdivacky case X86::MOVSSrm: 5658353358Sdim case X86::MOVSSrm_alt: 5659202878Srdivacky case X86::MOVSDrm: 5660353358Sdim case X86::MOVSDrm_alt: 5661202878Srdivacky case X86::MMX_MOVD64rm: 5662202878Srdivacky case X86::MMX_MOVQ64rm: 5663202878Srdivacky case X86::MOVAPSrm: 5664202878Srdivacky case X86::MOVUPSrm: 5665202878Srdivacky case X86::MOVAPDrm: 5666309124Sdim case X86::MOVUPDrm: 5667202878Srdivacky case X86::MOVDQArm: 5668202878Srdivacky case X86::MOVDQUrm: 5669226633Sdim // AVX load instructions 5670226633Sdim case X86::VMOVSSrm: 5671353358Sdim case X86::VMOVSSrm_alt: 5672226633Sdim case X86::VMOVSDrm: 5673353358Sdim case X86::VMOVSDrm_alt: 5674226633Sdim case X86::VMOVAPSrm: 5675226633Sdim case X86::VMOVUPSrm: 5676226633Sdim case X86::VMOVAPDrm: 5677309124Sdim case X86::VMOVUPDrm: 5678226633Sdim case X86::VMOVDQArm: 5679226633Sdim case X86::VMOVDQUrm: 5680224145Sdim case X86::VMOVAPSYrm: 5681224145Sdim case X86::VMOVUPSYrm: 5682224145Sdim case X86::VMOVAPDYrm: 5683309124Sdim case X86::VMOVUPDYrm: 5684224145Sdim case X86::VMOVDQAYrm: 5685224145Sdim case X86::VMOVDQUYrm: 5686309124Sdim // AVX512 load instructions 5687309124Sdim case X86::VMOVSSZrm: 5688353358Sdim case X86::VMOVSSZrm_alt: 5689309124Sdim case X86::VMOVSDZrm: 5690353358Sdim case X86::VMOVSDZrm_alt: 5691309124Sdim case X86::VMOVAPSZ128rm: 5692309124Sdim case X86::VMOVUPSZ128rm: 5693314564Sdim case X86::VMOVAPSZ128rm_NOVLX: 5694314564Sdim case X86::VMOVUPSZ128rm_NOVLX: 5695309124Sdim case X86::VMOVAPDZ128rm: 5696309124Sdim case X86::VMOVUPDZ128rm: 5697309124Sdim case X86::VMOVDQU8Z128rm: 5698309124Sdim case X86::VMOVDQU16Z128rm: 5699309124Sdim case X86::VMOVDQA32Z128rm: 5700309124Sdim case X86::VMOVDQU32Z128rm: 5701309124Sdim case X86::VMOVDQA64Z128rm: 5702309124Sdim case X86::VMOVDQU64Z128rm: 5703309124Sdim case X86::VMOVAPSZ256rm: 5704309124Sdim case X86::VMOVUPSZ256rm: 5705314564Sdim case X86::VMOVAPSZ256rm_NOVLX: 5706314564Sdim case X86::VMOVUPSZ256rm_NOVLX: 5707309124Sdim case X86::VMOVAPDZ256rm: 5708309124Sdim case X86::VMOVUPDZ256rm: 5709309124Sdim case X86::VMOVDQU8Z256rm: 5710309124Sdim case X86::VMOVDQU16Z256rm: 5711309124Sdim case X86::VMOVDQA32Z256rm: 5712309124Sdim case X86::VMOVDQU32Z256rm: 5713309124Sdim case X86::VMOVDQA64Z256rm: 5714309124Sdim case X86::VMOVDQU64Z256rm: 5715309124Sdim case X86::VMOVAPSZrm: 5716309124Sdim case X86::VMOVUPSZrm: 5717309124Sdim case X86::VMOVAPDZrm: 5718309124Sdim case X86::VMOVUPDZrm: 5719309124Sdim case X86::VMOVDQU8Zrm: 5720309124Sdim case X86::VMOVDQU16Zrm: 5721309124Sdim case X86::VMOVDQA32Zrm: 5722309124Sdim case X86::VMOVDQU32Zrm: 5723309124Sdim case X86::VMOVDQA64Zrm: 5724309124Sdim case X86::VMOVDQU64Zrm: 5725309124Sdim case X86::KMOVBkm: 5726309124Sdim case X86::KMOVWkm: 5727309124Sdim case X86::KMOVDkm: 5728309124Sdim case X86::KMOVQkm: 5729202878Srdivacky break; 5730202878Srdivacky } 5731202878Srdivacky 5732321369Sdim // Lambda to check if both the loads have the same value for an operand index. 5733321369Sdim auto HasSameOp = [&](int I) { 5734321369Sdim return Load1->getOperand(I) == Load2->getOperand(I); 5735321369Sdim }; 5736321369Sdim 5737321369Sdim // All operands except the displacement should match. 5738321369Sdim if (!HasSameOp(X86::AddrBaseReg) || !HasSameOp(X86::AddrScaleAmt) || 5739321369Sdim !HasSameOp(X86::AddrIndexReg) || !HasSameOp(X86::AddrSegmentReg)) 5740202878Srdivacky return false; 5741321369Sdim 5742321369Sdim // Chain Operand must be the same. 5743321369Sdim if (!HasSameOp(5)) 5744202878Srdivacky return false; 5745202878Srdivacky 5746321369Sdim // Now let's examine if the displacements are constants. 5747321369Sdim auto Disp1 = dyn_cast<ConstantSDNode>(Load1->getOperand(X86::AddrDisp)); 5748321369Sdim auto Disp2 = dyn_cast<ConstantSDNode>(Load2->getOperand(X86::AddrDisp)); 5749321369Sdim if (!Disp1 || !Disp2) 5750321369Sdim return false; 5751321369Sdim 5752321369Sdim Offset1 = Disp1->getSExtValue(); 5753321369Sdim Offset2 = Disp2->getSExtValue(); 5754321369Sdim return true; 5755202878Srdivacky} 5756202878Srdivacky 5757202878Srdivackybool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 5758202878Srdivacky int64_t Offset1, int64_t Offset2, 5759202878Srdivacky unsigned NumLoads) const { 5760202878Srdivacky assert(Offset2 > Offset1); 5761202878Srdivacky if ((Offset2 - Offset1) / 8 > 64) 5762202878Srdivacky return false; 5763202878Srdivacky 5764202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 5765202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 5766202878Srdivacky if (Opc1 != Opc2) 5767202878Srdivacky return false; // FIXME: overly conservative? 5768202878Srdivacky 5769202878Srdivacky switch (Opc1) { 5770202878Srdivacky default: break; 5771202878Srdivacky case X86::LD_Fp32m: 5772202878Srdivacky case X86::LD_Fp64m: 5773202878Srdivacky case X86::LD_Fp80m: 5774202878Srdivacky case X86::MMX_MOVD64rm: 5775202878Srdivacky case X86::MMX_MOVQ64rm: 5776202878Srdivacky return false; 5777202878Srdivacky } 5778202878Srdivacky 5779202878Srdivacky EVT VT = Load1->getValueType(0); 5780202878Srdivacky switch (VT.getSimpleVT().SimpleTy) { 5781210299Sed default: 5782202878Srdivacky // XMM registers. In 64-bit mode we can be a bit more aggressive since we 5783202878Srdivacky // have 16 of them to play with. 5784276479Sdim if (Subtarget.is64Bit()) { 5785202878Srdivacky if (NumLoads >= 3) 5786202878Srdivacky return false; 5787210299Sed } else if (NumLoads) { 5788202878Srdivacky return false; 5789210299Sed } 5790202878Srdivacky break; 5791202878Srdivacky case MVT::i8: 5792202878Srdivacky case MVT::i16: 5793202878Srdivacky case MVT::i32: 5794202878Srdivacky case MVT::i64: 5795202878Srdivacky case MVT::f32: 5796202878Srdivacky case MVT::f64: 5797202878Srdivacky if (NumLoads) 5798202878Srdivacky return false; 5799210299Sed break; 5800202878Srdivacky } 5801202878Srdivacky 5802202878Srdivacky return true; 5803202878Srdivacky} 5804202878Srdivacky 5805193323Sedbool X86InstrInfo:: 5806314564SdimreverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 5807193323Sed assert(Cond.size() == 1 && "Invalid X86 branch condition!"); 5808193323Sed X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm()); 5809193323Sed Cond[0].setImm(GetOppositeBranchCondition(CC)); 5810193323Sed return false; 5811193323Sed} 5812193323Sed 5813193323Sedbool X86InstrInfo:: 5814193323SedisSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 5815193323Sed // FIXME: Return false for x87 stack register classes for now. We can't 5816193323Sed // allow any loads of these registers before FpGet_ST0_80. 5817332833Sdim return !(RC == &X86::CCRRegClass || RC == &X86::DFCCRRegClass || 5818332833Sdim RC == &X86::RFP32RegClass || RC == &X86::RFP64RegClass || 5819332833Sdim RC == &X86::RFP80RegClass); 5820193323Sed} 5821193323Sed 5822288943Sdim/// Return a virtual register initialized with the 5823193323Sed/// the global base register value. Output instructions required to 5824193323Sed/// initialize the register in the function entry block, if necessary. 5825193323Sed/// 5826210299Sed/// TODO: Eliminate this and move the code to X86MachineFunctionInfo. 5827210299Sed/// 5828193323Sedunsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { 5829341825Sdim assert((!Subtarget.is64Bit() || 5830341825Sdim MF->getTarget().getCodeModel() == CodeModel::Medium || 5831341825Sdim MF->getTarget().getCodeModel() == CodeModel::Large) && 5832193323Sed "X86-64 PIC uses RIP relative addressing"); 5833193323Sed 5834193323Sed X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); 5835193323Sed unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 5836193323Sed if (GlobalBaseReg != 0) 5837193323Sed return GlobalBaseReg; 5838193323Sed 5839210299Sed // Create the register. The code to initialize it is inserted 5840210299Sed // later, by the CGBR pass (below). 5841193323Sed MachineRegisterInfo &RegInfo = MF->getRegInfo(); 5842341825Sdim GlobalBaseReg = RegInfo.createVirtualRegister( 5843341825Sdim Subtarget.is64Bit() ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass); 5844193323Sed X86FI->setGlobalBaseReg(GlobalBaseReg); 5845193323Sed return GlobalBaseReg; 5846193323Sed} 5847206083Srdivacky 5848206083Srdivacky// These are the replaceable SSE instructions. Some of these have Int variants 5849206083Srdivacky// that we don't include here. We don't want to replace instructions selected 5850206083Srdivacky// by intrinsics. 5851234353Sdimstatic const uint16_t ReplaceableInstrs[][3] = { 5852212904Sdim //PackedSingle PackedDouble PackedInt 5853206083Srdivacky { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, 5854206083Srdivacky { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, 5855206083Srdivacky { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, 5856206083Srdivacky { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr }, 5857206083Srdivacky { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm }, 5858314564Sdim { X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr }, 5859321369Sdim { X86::MOVSDmr, X86::MOVSDmr, X86::MOVPQI2QImr }, 5860314564Sdim { X86::MOVSSmr, X86::MOVSSmr, X86::MOVPDI2DImr }, 5861314564Sdim { X86::MOVSDrm, X86::MOVSDrm, X86::MOVQI2PQIrm }, 5862353358Sdim { X86::MOVSDrm_alt,X86::MOVSDrm_alt,X86::MOVQI2PQIrm }, 5863314564Sdim { X86::MOVSSrm, X86::MOVSSrm, X86::MOVDI2PDIrm }, 5864353358Sdim { X86::MOVSSrm_alt,X86::MOVSSrm_alt,X86::MOVDI2PDIrm }, 5865206083Srdivacky { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr }, 5866206083Srdivacky { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm }, 5867206083Srdivacky { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr }, 5868206083Srdivacky { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm }, 5869206083Srdivacky { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, 5870206083Srdivacky { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, 5871206083Srdivacky { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, 5872206083Srdivacky { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, 5873206083Srdivacky { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, 5874327952Sdim { X86::UNPCKLPDrm, X86::UNPCKLPDrm, X86::PUNPCKLQDQrm }, 5875327952Sdim { X86::MOVLHPSrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr }, 5876327952Sdim { X86::UNPCKHPDrm, X86::UNPCKHPDrm, X86::PUNPCKHQDQrm }, 5877327952Sdim { X86::UNPCKHPDrr, X86::UNPCKHPDrr, X86::PUNPCKHQDQrr }, 5878327952Sdim { X86::UNPCKLPSrm, X86::UNPCKLPSrm, X86::PUNPCKLDQrm }, 5879327952Sdim { X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr }, 5880327952Sdim { X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm }, 5881327952Sdim { X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr }, 5882327952Sdim { X86::EXTRACTPSmr, X86::EXTRACTPSmr, X86::PEXTRDmr }, 5883327952Sdim { X86::EXTRACTPSrr, X86::EXTRACTPSrr, X86::PEXTRDrr }, 5884212904Sdim // AVX 128-bit support 5885212904Sdim { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, 5886212904Sdim { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, 5887212904Sdim { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr }, 5888212904Sdim { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr }, 5889212904Sdim { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm }, 5890314564Sdim { X86::VMOVLPSmr, X86::VMOVLPDmr, X86::VMOVPQI2QImr }, 5891321369Sdim { X86::VMOVSDmr, X86::VMOVSDmr, X86::VMOVPQI2QImr }, 5892314564Sdim { X86::VMOVSSmr, X86::VMOVSSmr, X86::VMOVPDI2DImr }, 5893314564Sdim { X86::VMOVSDrm, X86::VMOVSDrm, X86::VMOVQI2PQIrm }, 5894353358Sdim { X86::VMOVSDrm_alt,X86::VMOVSDrm_alt,X86::VMOVQI2PQIrm }, 5895314564Sdim { X86::VMOVSSrm, X86::VMOVSSrm, X86::VMOVDI2PDIrm }, 5896353358Sdim { X86::VMOVSSrm_alt,X86::VMOVSSrm_alt,X86::VMOVDI2PDIrm }, 5897212904Sdim { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr }, 5898212904Sdim { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm }, 5899212904Sdim { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr }, 5900212904Sdim { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm }, 5901212904Sdim { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr }, 5902212904Sdim { X86::VORPSrm, X86::VORPDrm, X86::VPORrm }, 5903212904Sdim { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, 5904212904Sdim { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, 5905212904Sdim { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, 5906327952Sdim { X86::VUNPCKLPDrm, X86::VUNPCKLPDrm, X86::VPUNPCKLQDQrm }, 5907327952Sdim { X86::VMOVLHPSrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr }, 5908327952Sdim { X86::VUNPCKHPDrm, X86::VUNPCKHPDrm, X86::VPUNPCKHQDQrm }, 5909327952Sdim { X86::VUNPCKHPDrr, X86::VUNPCKHPDrr, X86::VPUNPCKHQDQrr }, 5910327952Sdim { X86::VUNPCKLPSrm, X86::VUNPCKLPSrm, X86::VPUNPCKLDQrm }, 5911327952Sdim { X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr }, 5912327952Sdim { X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm }, 5913327952Sdim { X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr }, 5914327952Sdim { X86::VEXTRACTPSmr, X86::VEXTRACTPSmr, X86::VPEXTRDmr }, 5915327952Sdim { X86::VEXTRACTPSrr, X86::VEXTRACTPSrr, X86::VPEXTRDrr }, 5916224145Sdim // AVX 256-bit support 5917224145Sdim { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, 5918224145Sdim { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, 5919224145Sdim { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr }, 5920224145Sdim { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr }, 5921224145Sdim { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm }, 5922314564Sdim { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }, 5923327952Sdim { X86::VPERMPSYrm, X86::VPERMPSYrm, X86::VPERMDYrm }, 5924327952Sdim { X86::VPERMPSYrr, X86::VPERMPSYrr, X86::VPERMDYrr }, 5925327952Sdim { X86::VPERMPDYmi, X86::VPERMPDYmi, X86::VPERMQYmi }, 5926327952Sdim { X86::VPERMPDYri, X86::VPERMPDYri, X86::VPERMQYri }, 5927314564Sdim // AVX512 support 5928314564Sdim { X86::VMOVLPSZ128mr, X86::VMOVLPDZ128mr, X86::VMOVPQI2QIZmr }, 5929314564Sdim { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr }, 5930321369Sdim { X86::VMOVNTPSZ256mr, X86::VMOVNTPDZ256mr, X86::VMOVNTDQZ256mr }, 5931314564Sdim { X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr }, 5932314564Sdim { X86::VMOVSDZmr, X86::VMOVSDZmr, X86::VMOVPQI2QIZmr }, 5933314564Sdim { X86::VMOVSSZmr, X86::VMOVSSZmr, X86::VMOVPDI2DIZmr }, 5934314564Sdim { X86::VMOVSDZrm, X86::VMOVSDZrm, X86::VMOVQI2PQIZrm }, 5935353358Sdim { X86::VMOVSDZrm_alt, X86::VMOVSDZrm_alt, X86::VMOVQI2PQIZrm }, 5936314564Sdim { X86::VMOVSSZrm, X86::VMOVSSZrm, X86::VMOVDI2PDIZrm }, 5937353358Sdim { X86::VMOVSSZrm_alt, X86::VMOVSSZrm_alt, X86::VMOVDI2PDIZrm }, 5938314564Sdim { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128r, X86::VPBROADCASTDZ128r }, 5939314564Sdim { X86::VBROADCASTSSZ128m, X86::VBROADCASTSSZ128m, X86::VPBROADCASTDZ128m }, 5940314564Sdim { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256r, X86::VPBROADCASTDZ256r }, 5941314564Sdim { X86::VBROADCASTSSZ256m, X86::VBROADCASTSSZ256m, X86::VPBROADCASTDZ256m }, 5942314564Sdim { X86::VBROADCASTSSZr, X86::VBROADCASTSSZr, X86::VPBROADCASTDZr }, 5943314564Sdim { X86::VBROADCASTSSZm, X86::VBROADCASTSSZm, X86::VPBROADCASTDZm }, 5944353358Sdim { X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rr, X86::VPBROADCASTQZ128r }, 5945353358Sdim { X86::VMOVDDUPZ128rm, X86::VMOVDDUPZ128rm, X86::VPBROADCASTQZ128m }, 5946314564Sdim { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256r, X86::VPBROADCASTQZ256r }, 5947314564Sdim { X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m }, 5948314564Sdim { X86::VBROADCASTSDZr, X86::VBROADCASTSDZr, X86::VPBROADCASTQZr }, 5949314564Sdim { X86::VBROADCASTSDZm, X86::VBROADCASTSDZm, X86::VPBROADCASTQZm }, 5950327952Sdim { X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrr, X86::VINSERTI32x4Zrr }, 5951327952Sdim { X86::VINSERTF32x4Zrm, X86::VINSERTF32x4Zrm, X86::VINSERTI32x4Zrm }, 5952327952Sdim { X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrr, X86::VINSERTI32x8Zrr }, 5953327952Sdim { X86::VINSERTF32x8Zrm, X86::VINSERTF32x8Zrm, X86::VINSERTI32x8Zrm }, 5954327952Sdim { X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrr, X86::VINSERTI64x2Zrr }, 5955327952Sdim { X86::VINSERTF64x2Zrm, X86::VINSERTF64x2Zrm, X86::VINSERTI64x2Zrm }, 5956327952Sdim { X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrr, X86::VINSERTI64x4Zrr }, 5957327952Sdim { X86::VINSERTF64x4Zrm, X86::VINSERTF64x4Zrm, X86::VINSERTI64x4Zrm }, 5958327952Sdim { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rr,X86::VINSERTI32x4Z256rr }, 5959327952Sdim { X86::VINSERTF32x4Z256rm,X86::VINSERTF32x4Z256rm,X86::VINSERTI32x4Z256rm }, 5960327952Sdim { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rr,X86::VINSERTI64x2Z256rr }, 5961327952Sdim { X86::VINSERTF64x2Z256rm,X86::VINSERTF64x2Z256rm,X86::VINSERTI64x2Z256rm }, 5962327952Sdim { X86::VEXTRACTF32x4Zrr, X86::VEXTRACTF32x4Zrr, X86::VEXTRACTI32x4Zrr }, 5963327952Sdim { X86::VEXTRACTF32x4Zmr, X86::VEXTRACTF32x4Zmr, X86::VEXTRACTI32x4Zmr }, 5964327952Sdim { X86::VEXTRACTF32x8Zrr, X86::VEXTRACTF32x8Zrr, X86::VEXTRACTI32x8Zrr }, 5965327952Sdim { X86::VEXTRACTF32x8Zmr, X86::VEXTRACTF32x8Zmr, X86::VEXTRACTI32x8Zmr }, 5966327952Sdim { X86::VEXTRACTF64x2Zrr, X86::VEXTRACTF64x2Zrr, X86::VEXTRACTI64x2Zrr }, 5967327952Sdim { X86::VEXTRACTF64x2Zmr, X86::VEXTRACTF64x2Zmr, X86::VEXTRACTI64x2Zmr }, 5968327952Sdim { X86::VEXTRACTF64x4Zrr, X86::VEXTRACTF64x4Zrr, X86::VEXTRACTI64x4Zrr }, 5969327952Sdim { X86::VEXTRACTF64x4Zmr, X86::VEXTRACTF64x4Zmr, X86::VEXTRACTI64x4Zmr }, 5970327952Sdim { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTI32x4Z256rr }, 5971327952Sdim { X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTI32x4Z256mr }, 5972327952Sdim { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTI64x2Z256rr }, 5973327952Sdim { X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTI64x2Z256mr }, 5974327952Sdim { X86::VPERMILPSmi, X86::VPERMILPSmi, X86::VPSHUFDmi }, 5975327952Sdim { X86::VPERMILPSri, X86::VPERMILPSri, X86::VPSHUFDri }, 5976327952Sdim { X86::VPERMILPSZ128mi, X86::VPERMILPSZ128mi, X86::VPSHUFDZ128mi }, 5977327952Sdim { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128ri, X86::VPSHUFDZ128ri }, 5978327952Sdim { X86::VPERMILPSZ256mi, X86::VPERMILPSZ256mi, X86::VPSHUFDZ256mi }, 5979327952Sdim { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256ri, X86::VPSHUFDZ256ri }, 5980327952Sdim { X86::VPERMILPSZmi, X86::VPERMILPSZmi, X86::VPSHUFDZmi }, 5981327952Sdim { X86::VPERMILPSZri, X86::VPERMILPSZri, X86::VPSHUFDZri }, 5982327952Sdim { X86::VPERMPSZ256rm, X86::VPERMPSZ256rm, X86::VPERMDZ256rm }, 5983327952Sdim { X86::VPERMPSZ256rr, X86::VPERMPSZ256rr, X86::VPERMDZ256rr }, 5984327952Sdim { X86::VPERMPDZ256mi, X86::VPERMPDZ256mi, X86::VPERMQZ256mi }, 5985327952Sdim { X86::VPERMPDZ256ri, X86::VPERMPDZ256ri, X86::VPERMQZ256ri }, 5986327952Sdim { X86::VPERMPDZ256rm, X86::VPERMPDZ256rm, X86::VPERMQZ256rm }, 5987327952Sdim { X86::VPERMPDZ256rr, X86::VPERMPDZ256rr, X86::VPERMQZ256rr }, 5988327952Sdim { X86::VPERMPSZrm, X86::VPERMPSZrm, X86::VPERMDZrm }, 5989327952Sdim { X86::VPERMPSZrr, X86::VPERMPSZrr, X86::VPERMDZrr }, 5990327952Sdim { X86::VPERMPDZmi, X86::VPERMPDZmi, X86::VPERMQZmi }, 5991327952Sdim { X86::VPERMPDZri, X86::VPERMPDZri, X86::VPERMQZri }, 5992327952Sdim { X86::VPERMPDZrm, X86::VPERMPDZrm, X86::VPERMQZrm }, 5993327952Sdim { X86::VPERMPDZrr, X86::VPERMPDZrr, X86::VPERMQZrr }, 5994327952Sdim { X86::VUNPCKLPDZ256rm, X86::VUNPCKLPDZ256rm, X86::VPUNPCKLQDQZ256rm }, 5995327952Sdim { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rr, X86::VPUNPCKLQDQZ256rr }, 5996327952Sdim { X86::VUNPCKHPDZ256rm, X86::VUNPCKHPDZ256rm, X86::VPUNPCKHQDQZ256rm }, 5997327952Sdim { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rr, X86::VPUNPCKHQDQZ256rr }, 5998327952Sdim { X86::VUNPCKLPSZ256rm, X86::VUNPCKLPSZ256rm, X86::VPUNPCKLDQZ256rm }, 5999327952Sdim { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rr, X86::VPUNPCKLDQZ256rr }, 6000327952Sdim { X86::VUNPCKHPSZ256rm, X86::VUNPCKHPSZ256rm, X86::VPUNPCKHDQZ256rm }, 6001327952Sdim { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rr, X86::VPUNPCKHDQZ256rr }, 6002327952Sdim { X86::VUNPCKLPDZ128rm, X86::VUNPCKLPDZ128rm, X86::VPUNPCKLQDQZ128rm }, 6003327952Sdim { X86::VMOVLHPSZrr, X86::VUNPCKLPDZ128rr, X86::VPUNPCKLQDQZ128rr }, 6004327952Sdim { X86::VUNPCKHPDZ128rm, X86::VUNPCKHPDZ128rm, X86::VPUNPCKHQDQZ128rm }, 6005327952Sdim { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rr, X86::VPUNPCKHQDQZ128rr }, 6006327952Sdim { X86::VUNPCKLPSZ128rm, X86::VUNPCKLPSZ128rm, X86::VPUNPCKLDQZ128rm }, 6007327952Sdim { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rr, X86::VPUNPCKLDQZ128rr }, 6008327952Sdim { X86::VUNPCKHPSZ128rm, X86::VUNPCKHPSZ128rm, X86::VPUNPCKHDQZ128rm }, 6009327952Sdim { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rr, X86::VPUNPCKHDQZ128rr }, 6010327952Sdim { X86::VUNPCKLPDZrm, X86::VUNPCKLPDZrm, X86::VPUNPCKLQDQZrm }, 6011327952Sdim { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrr, X86::VPUNPCKLQDQZrr }, 6012327952Sdim { X86::VUNPCKHPDZrm, X86::VUNPCKHPDZrm, X86::VPUNPCKHQDQZrm }, 6013327952Sdim { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrr, X86::VPUNPCKHQDQZrr }, 6014327952Sdim { X86::VUNPCKLPSZrm, X86::VUNPCKLPSZrm, X86::VPUNPCKLDQZrm }, 6015327952Sdim { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr }, 6016327952Sdim { X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm }, 6017327952Sdim { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr }, 6018327952Sdim { X86::VEXTRACTPSZmr, X86::VEXTRACTPSZmr, X86::VPEXTRDZmr }, 6019327952Sdim { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZrr, X86::VPEXTRDZrr }, 6020206083Srdivacky}; 6021206083Srdivacky 6022234353Sdimstatic const uint16_t ReplaceableInstrsAVX2[][3] = { 6023234353Sdim //PackedSingle PackedDouble PackedInt 6024234353Sdim { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm }, 6025234353Sdim { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr }, 6026234353Sdim { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm }, 6027234353Sdim { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr }, 6028234353Sdim { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm }, 6029234353Sdim { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr }, 6030234353Sdim { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm }, 6031234353Sdim { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }, 6032234353Sdim { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm }, 6033276479Sdim { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }, 6034276479Sdim { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm}, 6035276479Sdim { X86::VBROADCASTSSrr, X86::VBROADCASTSSrr, X86::VPBROADCASTDrr}, 6036353358Sdim { X86::VMOVDDUPrm, X86::VMOVDDUPrm, X86::VPBROADCASTQrm}, 6037353358Sdim { X86::VMOVDDUPrr, X86::VMOVDDUPrr, X86::VPBROADCASTQrr}, 6038276479Sdim { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrr, X86::VPBROADCASTDYrr}, 6039276479Sdim { X86::VBROADCASTSSYrm, X86::VBROADCASTSSYrm, X86::VPBROADCASTDYrm}, 6040276479Sdim { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr}, 6041314564Sdim { X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm}, 6042314564Sdim { X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 }, 6043327952Sdim { X86::VBLENDPSYrri, X86::VBLENDPSYrri, X86::VPBLENDDYrri }, 6044327952Sdim { X86::VBLENDPSYrmi, X86::VBLENDPSYrmi, X86::VPBLENDDYrmi }, 6045327952Sdim { X86::VPERMILPSYmi, X86::VPERMILPSYmi, X86::VPSHUFDYmi }, 6046327952Sdim { X86::VPERMILPSYri, X86::VPERMILPSYri, X86::VPSHUFDYri }, 6047327952Sdim { X86::VUNPCKLPDYrm, X86::VUNPCKLPDYrm, X86::VPUNPCKLQDQYrm }, 6048327952Sdim { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrr, X86::VPUNPCKLQDQYrr }, 6049327952Sdim { X86::VUNPCKHPDYrm, X86::VUNPCKHPDYrm, X86::VPUNPCKHQDQYrm }, 6050327952Sdim { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrr, X86::VPUNPCKHQDQYrr }, 6051327952Sdim { X86::VUNPCKLPSYrm, X86::VUNPCKLPSYrm, X86::VPUNPCKLDQYrm }, 6052327952Sdim { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrr, X86::VPUNPCKLDQYrr }, 6053327952Sdim { X86::VUNPCKHPSYrm, X86::VUNPCKHPSYrm, X86::VPUNPCKHDQYrm }, 6054327952Sdim { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrr, X86::VPUNPCKHDQYrr }, 6055234353Sdim}; 6056234353Sdim 6057353358Sdimstatic const uint16_t ReplaceableInstrsFP[][3] = { 6058353358Sdim //PackedSingle PackedDouble 6059353358Sdim { X86::MOVLPSrm, X86::MOVLPDrm, X86::INSTRUCTION_LIST_END }, 6060353358Sdim { X86::MOVHPSrm, X86::MOVHPDrm, X86::INSTRUCTION_LIST_END }, 6061353358Sdim { X86::MOVHPSmr, X86::MOVHPDmr, X86::INSTRUCTION_LIST_END }, 6062353358Sdim { X86::VMOVLPSrm, X86::VMOVLPDrm, X86::INSTRUCTION_LIST_END }, 6063353358Sdim { X86::VMOVHPSrm, X86::VMOVHPDrm, X86::INSTRUCTION_LIST_END }, 6064353358Sdim { X86::VMOVHPSmr, X86::VMOVHPDmr, X86::INSTRUCTION_LIST_END }, 6065353358Sdim { X86::VMOVLPSZ128rm, X86::VMOVLPDZ128rm, X86::INSTRUCTION_LIST_END }, 6066353358Sdim { X86::VMOVHPSZ128rm, X86::VMOVHPDZ128rm, X86::INSTRUCTION_LIST_END }, 6067353358Sdim { X86::VMOVHPSZ128mr, X86::VMOVHPDZ128mr, X86::INSTRUCTION_LIST_END }, 6068353358Sdim}; 6069353358Sdim 6070321369Sdimstatic const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = { 6071321369Sdim //PackedSingle PackedDouble PackedInt 6072321369Sdim { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr }, 6073321369Sdim { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr }, 6074321369Sdim { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm }, 6075321369Sdim { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr }, 6076321369Sdim}; 6077321369Sdim 6078314564Sdimstatic const uint16_t ReplaceableInstrsAVX512[][4] = { 6079314564Sdim // Two integer columns for 64-bit and 32-bit elements. 6080314564Sdim //PackedSingle PackedDouble PackedInt PackedInt 6081314564Sdim { X86::VMOVAPSZ128mr, X86::VMOVAPDZ128mr, X86::VMOVDQA64Z128mr, X86::VMOVDQA32Z128mr }, 6082314564Sdim { X86::VMOVAPSZ128rm, X86::VMOVAPDZ128rm, X86::VMOVDQA64Z128rm, X86::VMOVDQA32Z128rm }, 6083314564Sdim { X86::VMOVAPSZ128rr, X86::VMOVAPDZ128rr, X86::VMOVDQA64Z128rr, X86::VMOVDQA32Z128rr }, 6084314564Sdim { X86::VMOVUPSZ128mr, X86::VMOVUPDZ128mr, X86::VMOVDQU64Z128mr, X86::VMOVDQU32Z128mr }, 6085314564Sdim { X86::VMOVUPSZ128rm, X86::VMOVUPDZ128rm, X86::VMOVDQU64Z128rm, X86::VMOVDQU32Z128rm }, 6086314564Sdim { X86::VMOVAPSZ256mr, X86::VMOVAPDZ256mr, X86::VMOVDQA64Z256mr, X86::VMOVDQA32Z256mr }, 6087314564Sdim { X86::VMOVAPSZ256rm, X86::VMOVAPDZ256rm, X86::VMOVDQA64Z256rm, X86::VMOVDQA32Z256rm }, 6088314564Sdim { X86::VMOVAPSZ256rr, X86::VMOVAPDZ256rr, X86::VMOVDQA64Z256rr, X86::VMOVDQA32Z256rr }, 6089314564Sdim { X86::VMOVUPSZ256mr, X86::VMOVUPDZ256mr, X86::VMOVDQU64Z256mr, X86::VMOVDQU32Z256mr }, 6090314564Sdim { X86::VMOVUPSZ256rm, X86::VMOVUPDZ256rm, X86::VMOVDQU64Z256rm, X86::VMOVDQU32Z256rm }, 6091314564Sdim { X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA32Zmr }, 6092314564Sdim { X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA32Zrm }, 6093314564Sdim { X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA32Zrr }, 6094314564Sdim { X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU32Zmr }, 6095314564Sdim { X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU32Zrm }, 6096314564Sdim}; 6097314564Sdim 6098314564Sdimstatic const uint16_t ReplaceableInstrsAVX512DQ[][4] = { 6099314564Sdim // Two integer columns for 64-bit and 32-bit elements. 6100314564Sdim //PackedSingle PackedDouble PackedInt PackedInt 6101314564Sdim { X86::VANDNPSZ128rm, X86::VANDNPDZ128rm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm }, 6102314564Sdim { X86::VANDNPSZ128rr, X86::VANDNPDZ128rr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr }, 6103314564Sdim { X86::VANDPSZ128rm, X86::VANDPDZ128rm, X86::VPANDQZ128rm, X86::VPANDDZ128rm }, 6104314564Sdim { X86::VANDPSZ128rr, X86::VANDPDZ128rr, X86::VPANDQZ128rr, X86::VPANDDZ128rr }, 6105314564Sdim { X86::VORPSZ128rm, X86::VORPDZ128rm, X86::VPORQZ128rm, X86::VPORDZ128rm }, 6106314564Sdim { X86::VORPSZ128rr, X86::VORPDZ128rr, X86::VPORQZ128rr, X86::VPORDZ128rr }, 6107314564Sdim { X86::VXORPSZ128rm, X86::VXORPDZ128rm, X86::VPXORQZ128rm, X86::VPXORDZ128rm }, 6108314564Sdim { X86::VXORPSZ128rr, X86::VXORPDZ128rr, X86::VPXORQZ128rr, X86::VPXORDZ128rr }, 6109314564Sdim { X86::VANDNPSZ256rm, X86::VANDNPDZ256rm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm }, 6110314564Sdim { X86::VANDNPSZ256rr, X86::VANDNPDZ256rr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr }, 6111314564Sdim { X86::VANDPSZ256rm, X86::VANDPDZ256rm, X86::VPANDQZ256rm, X86::VPANDDZ256rm }, 6112314564Sdim { X86::VANDPSZ256rr, X86::VANDPDZ256rr, X86::VPANDQZ256rr, X86::VPANDDZ256rr }, 6113314564Sdim { X86::VORPSZ256rm, X86::VORPDZ256rm, X86::VPORQZ256rm, X86::VPORDZ256rm }, 6114314564Sdim { X86::VORPSZ256rr, X86::VORPDZ256rr, X86::VPORQZ256rr, X86::VPORDZ256rr }, 6115314564Sdim { X86::VXORPSZ256rm, X86::VXORPDZ256rm, X86::VPXORQZ256rm, X86::VPXORDZ256rm }, 6116314564Sdim { X86::VXORPSZ256rr, X86::VXORPDZ256rr, X86::VPXORQZ256rr, X86::VPXORDZ256rr }, 6117314564Sdim { X86::VANDNPSZrm, X86::VANDNPDZrm, X86::VPANDNQZrm, X86::VPANDNDZrm }, 6118314564Sdim { X86::VANDNPSZrr, X86::VANDNPDZrr, X86::VPANDNQZrr, X86::VPANDNDZrr }, 6119314564Sdim { X86::VANDPSZrm, X86::VANDPDZrm, X86::VPANDQZrm, X86::VPANDDZrm }, 6120314564Sdim { X86::VANDPSZrr, X86::VANDPDZrr, X86::VPANDQZrr, X86::VPANDDZrr }, 6121314564Sdim { X86::VORPSZrm, X86::VORPDZrm, X86::VPORQZrm, X86::VPORDZrm }, 6122314564Sdim { X86::VORPSZrr, X86::VORPDZrr, X86::VPORQZrr, X86::VPORDZrr }, 6123314564Sdim { X86::VXORPSZrm, X86::VXORPDZrm, X86::VPXORQZrm, X86::VPXORDZrm }, 6124314564Sdim { X86::VXORPSZrr, X86::VXORPDZrr, X86::VPXORQZrr, X86::VPXORDZrr }, 6125314564Sdim}; 6126314564Sdim 6127314564Sdimstatic const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = { 6128314564Sdim // Two integer columns for 64-bit and 32-bit elements. 6129314564Sdim //PackedSingle PackedDouble 6130314564Sdim //PackedInt PackedInt 6131314564Sdim { X86::VANDNPSZ128rmk, X86::VANDNPDZ128rmk, 6132314564Sdim X86::VPANDNQZ128rmk, X86::VPANDNDZ128rmk }, 6133314564Sdim { X86::VANDNPSZ128rmkz, X86::VANDNPDZ128rmkz, 6134314564Sdim X86::VPANDNQZ128rmkz, X86::VPANDNDZ128rmkz }, 6135314564Sdim { X86::VANDNPSZ128rrk, X86::VANDNPDZ128rrk, 6136314564Sdim X86::VPANDNQZ128rrk, X86::VPANDNDZ128rrk }, 6137314564Sdim { X86::VANDNPSZ128rrkz, X86::VANDNPDZ128rrkz, 6138314564Sdim X86::VPANDNQZ128rrkz, X86::VPANDNDZ128rrkz }, 6139314564Sdim { X86::VANDPSZ128rmk, X86::VANDPDZ128rmk, 6140314564Sdim X86::VPANDQZ128rmk, X86::VPANDDZ128rmk }, 6141314564Sdim { X86::VANDPSZ128rmkz, X86::VANDPDZ128rmkz, 6142314564Sdim X86::VPANDQZ128rmkz, X86::VPANDDZ128rmkz }, 6143314564Sdim { X86::VANDPSZ128rrk, X86::VANDPDZ128rrk, 6144314564Sdim X86::VPANDQZ128rrk, X86::VPANDDZ128rrk }, 6145314564Sdim { X86::VANDPSZ128rrkz, X86::VANDPDZ128rrkz, 6146314564Sdim X86::VPANDQZ128rrkz, X86::VPANDDZ128rrkz }, 6147314564Sdim { X86::VORPSZ128rmk, X86::VORPDZ128rmk, 6148314564Sdim X86::VPORQZ128rmk, X86::VPORDZ128rmk }, 6149314564Sdim { X86::VORPSZ128rmkz, X86::VORPDZ128rmkz, 6150314564Sdim X86::VPORQZ128rmkz, X86::VPORDZ128rmkz }, 6151314564Sdim { X86::VORPSZ128rrk, X86::VORPDZ128rrk, 6152314564Sdim X86::VPORQZ128rrk, X86::VPORDZ128rrk }, 6153314564Sdim { X86::VORPSZ128rrkz, X86::VORPDZ128rrkz, 6154314564Sdim X86::VPORQZ128rrkz, X86::VPORDZ128rrkz }, 6155314564Sdim { X86::VXORPSZ128rmk, X86::VXORPDZ128rmk, 6156314564Sdim X86::VPXORQZ128rmk, X86::VPXORDZ128rmk }, 6157314564Sdim { X86::VXORPSZ128rmkz, X86::VXORPDZ128rmkz, 6158314564Sdim X86::VPXORQZ128rmkz, X86::VPXORDZ128rmkz }, 6159314564Sdim { X86::VXORPSZ128rrk, X86::VXORPDZ128rrk, 6160314564Sdim X86::VPXORQZ128rrk, X86::VPXORDZ128rrk }, 6161314564Sdim { X86::VXORPSZ128rrkz, X86::VXORPDZ128rrkz, 6162314564Sdim X86::VPXORQZ128rrkz, X86::VPXORDZ128rrkz }, 6163314564Sdim { X86::VANDNPSZ256rmk, X86::VANDNPDZ256rmk, 6164314564Sdim X86::VPANDNQZ256rmk, X86::VPANDNDZ256rmk }, 6165314564Sdim { X86::VANDNPSZ256rmkz, X86::VANDNPDZ256rmkz, 6166314564Sdim X86::VPANDNQZ256rmkz, X86::VPANDNDZ256rmkz }, 6167314564Sdim { X86::VANDNPSZ256rrk, X86::VANDNPDZ256rrk, 6168314564Sdim X86::VPANDNQZ256rrk, X86::VPANDNDZ256rrk }, 6169314564Sdim { X86::VANDNPSZ256rrkz, X86::VANDNPDZ256rrkz, 6170314564Sdim X86::VPANDNQZ256rrkz, X86::VPANDNDZ256rrkz }, 6171314564Sdim { X86::VANDPSZ256rmk, X86::VANDPDZ256rmk, 6172314564Sdim X86::VPANDQZ256rmk, X86::VPANDDZ256rmk }, 6173314564Sdim { X86::VANDPSZ256rmkz, X86::VANDPDZ256rmkz, 6174314564Sdim X86::VPANDQZ256rmkz, X86::VPANDDZ256rmkz }, 6175314564Sdim { X86::VANDPSZ256rrk, X86::VANDPDZ256rrk, 6176314564Sdim X86::VPANDQZ256rrk, X86::VPANDDZ256rrk }, 6177314564Sdim { X86::VANDPSZ256rrkz, X86::VANDPDZ256rrkz, 6178314564Sdim X86::VPANDQZ256rrkz, X86::VPANDDZ256rrkz }, 6179314564Sdim { X86::VORPSZ256rmk, X86::VORPDZ256rmk, 6180314564Sdim X86::VPORQZ256rmk, X86::VPORDZ256rmk }, 6181314564Sdim { X86::VORPSZ256rmkz, X86::VORPDZ256rmkz, 6182314564Sdim X86::VPORQZ256rmkz, X86::VPORDZ256rmkz }, 6183314564Sdim { X86::VORPSZ256rrk, X86::VORPDZ256rrk, 6184314564Sdim X86::VPORQZ256rrk, X86::VPORDZ256rrk }, 6185314564Sdim { X86::VORPSZ256rrkz, X86::VORPDZ256rrkz, 6186314564Sdim X86::VPORQZ256rrkz, X86::VPORDZ256rrkz }, 6187314564Sdim { X86::VXORPSZ256rmk, X86::VXORPDZ256rmk, 6188314564Sdim X86::VPXORQZ256rmk, X86::VPXORDZ256rmk }, 6189314564Sdim { X86::VXORPSZ256rmkz, X86::VXORPDZ256rmkz, 6190314564Sdim X86::VPXORQZ256rmkz, X86::VPXORDZ256rmkz }, 6191314564Sdim { X86::VXORPSZ256rrk, X86::VXORPDZ256rrk, 6192314564Sdim X86::VPXORQZ256rrk, X86::VPXORDZ256rrk }, 6193314564Sdim { X86::VXORPSZ256rrkz, X86::VXORPDZ256rrkz, 6194314564Sdim X86::VPXORQZ256rrkz, X86::VPXORDZ256rrkz }, 6195314564Sdim { X86::VANDNPSZrmk, X86::VANDNPDZrmk, 6196314564Sdim X86::VPANDNQZrmk, X86::VPANDNDZrmk }, 6197314564Sdim { X86::VANDNPSZrmkz, X86::VANDNPDZrmkz, 6198314564Sdim X86::VPANDNQZrmkz, X86::VPANDNDZrmkz }, 6199314564Sdim { X86::VANDNPSZrrk, X86::VANDNPDZrrk, 6200314564Sdim X86::VPANDNQZrrk, X86::VPANDNDZrrk }, 6201314564Sdim { X86::VANDNPSZrrkz, X86::VANDNPDZrrkz, 6202314564Sdim X86::VPANDNQZrrkz, X86::VPANDNDZrrkz }, 6203314564Sdim { X86::VANDPSZrmk, X86::VANDPDZrmk, 6204314564Sdim X86::VPANDQZrmk, X86::VPANDDZrmk }, 6205314564Sdim { X86::VANDPSZrmkz, X86::VANDPDZrmkz, 6206314564Sdim X86::VPANDQZrmkz, X86::VPANDDZrmkz }, 6207314564Sdim { X86::VANDPSZrrk, X86::VANDPDZrrk, 6208314564Sdim X86::VPANDQZrrk, X86::VPANDDZrrk }, 6209314564Sdim { X86::VANDPSZrrkz, X86::VANDPDZrrkz, 6210314564Sdim X86::VPANDQZrrkz, X86::VPANDDZrrkz }, 6211314564Sdim { X86::VORPSZrmk, X86::VORPDZrmk, 6212314564Sdim X86::VPORQZrmk, X86::VPORDZrmk }, 6213314564Sdim { X86::VORPSZrmkz, X86::VORPDZrmkz, 6214314564Sdim X86::VPORQZrmkz, X86::VPORDZrmkz }, 6215314564Sdim { X86::VORPSZrrk, X86::VORPDZrrk, 6216314564Sdim X86::VPORQZrrk, X86::VPORDZrrk }, 6217314564Sdim { X86::VORPSZrrkz, X86::VORPDZrrkz, 6218314564Sdim X86::VPORQZrrkz, X86::VPORDZrrkz }, 6219314564Sdim { X86::VXORPSZrmk, X86::VXORPDZrmk, 6220314564Sdim X86::VPXORQZrmk, X86::VPXORDZrmk }, 6221314564Sdim { X86::VXORPSZrmkz, X86::VXORPDZrmkz, 6222314564Sdim X86::VPXORQZrmkz, X86::VPXORDZrmkz }, 6223314564Sdim { X86::VXORPSZrrk, X86::VXORPDZrrk, 6224314564Sdim X86::VPXORQZrrk, X86::VPXORDZrrk }, 6225314564Sdim { X86::VXORPSZrrkz, X86::VXORPDZrrkz, 6226314564Sdim X86::VPXORQZrrkz, X86::VPXORDZrrkz }, 6227314564Sdim // Broadcast loads can be handled the same as masked operations to avoid 6228314564Sdim // changing element size. 6229314564Sdim { X86::VANDNPSZ128rmb, X86::VANDNPDZ128rmb, 6230314564Sdim X86::VPANDNQZ128rmb, X86::VPANDNDZ128rmb }, 6231314564Sdim { X86::VANDPSZ128rmb, X86::VANDPDZ128rmb, 6232314564Sdim X86::VPANDQZ128rmb, X86::VPANDDZ128rmb }, 6233314564Sdim { X86::VORPSZ128rmb, X86::VORPDZ128rmb, 6234314564Sdim X86::VPORQZ128rmb, X86::VPORDZ128rmb }, 6235314564Sdim { X86::VXORPSZ128rmb, X86::VXORPDZ128rmb, 6236314564Sdim X86::VPXORQZ128rmb, X86::VPXORDZ128rmb }, 6237314564Sdim { X86::VANDNPSZ256rmb, X86::VANDNPDZ256rmb, 6238314564Sdim X86::VPANDNQZ256rmb, X86::VPANDNDZ256rmb }, 6239314564Sdim { X86::VANDPSZ256rmb, X86::VANDPDZ256rmb, 6240314564Sdim X86::VPANDQZ256rmb, X86::VPANDDZ256rmb }, 6241314564Sdim { X86::VORPSZ256rmb, X86::VORPDZ256rmb, 6242314564Sdim X86::VPORQZ256rmb, X86::VPORDZ256rmb }, 6243314564Sdim { X86::VXORPSZ256rmb, X86::VXORPDZ256rmb, 6244314564Sdim X86::VPXORQZ256rmb, X86::VPXORDZ256rmb }, 6245314564Sdim { X86::VANDNPSZrmb, X86::VANDNPDZrmb, 6246314564Sdim X86::VPANDNQZrmb, X86::VPANDNDZrmb }, 6247314564Sdim { X86::VANDPSZrmb, X86::VANDPDZrmb, 6248314564Sdim X86::VPANDQZrmb, X86::VPANDDZrmb }, 6249314564Sdim { X86::VANDPSZrmb, X86::VANDPDZrmb, 6250314564Sdim X86::VPANDQZrmb, X86::VPANDDZrmb }, 6251314564Sdim { X86::VORPSZrmb, X86::VORPDZrmb, 6252314564Sdim X86::VPORQZrmb, X86::VPORDZrmb }, 6253314564Sdim { X86::VXORPSZrmb, X86::VXORPDZrmb, 6254314564Sdim X86::VPXORQZrmb, X86::VPXORDZrmb }, 6255314564Sdim { X86::VANDNPSZ128rmbk, X86::VANDNPDZ128rmbk, 6256314564Sdim X86::VPANDNQZ128rmbk, X86::VPANDNDZ128rmbk }, 6257314564Sdim { X86::VANDPSZ128rmbk, X86::VANDPDZ128rmbk, 6258314564Sdim X86::VPANDQZ128rmbk, X86::VPANDDZ128rmbk }, 6259314564Sdim { X86::VORPSZ128rmbk, X86::VORPDZ128rmbk, 6260314564Sdim X86::VPORQZ128rmbk, X86::VPORDZ128rmbk }, 6261314564Sdim { X86::VXORPSZ128rmbk, X86::VXORPDZ128rmbk, 6262314564Sdim X86::VPXORQZ128rmbk, X86::VPXORDZ128rmbk }, 6263314564Sdim { X86::VANDNPSZ256rmbk, X86::VANDNPDZ256rmbk, 6264314564Sdim X86::VPANDNQZ256rmbk, X86::VPANDNDZ256rmbk }, 6265314564Sdim { X86::VANDPSZ256rmbk, X86::VANDPDZ256rmbk, 6266314564Sdim X86::VPANDQZ256rmbk, X86::VPANDDZ256rmbk }, 6267314564Sdim { X86::VORPSZ256rmbk, X86::VORPDZ256rmbk, 6268314564Sdim X86::VPORQZ256rmbk, X86::VPORDZ256rmbk }, 6269314564Sdim { X86::VXORPSZ256rmbk, X86::VXORPDZ256rmbk, 6270314564Sdim X86::VPXORQZ256rmbk, X86::VPXORDZ256rmbk }, 6271314564Sdim { X86::VANDNPSZrmbk, X86::VANDNPDZrmbk, 6272314564Sdim X86::VPANDNQZrmbk, X86::VPANDNDZrmbk }, 6273314564Sdim { X86::VANDPSZrmbk, X86::VANDPDZrmbk, 6274314564Sdim X86::VPANDQZrmbk, X86::VPANDDZrmbk }, 6275314564Sdim { X86::VANDPSZrmbk, X86::VANDPDZrmbk, 6276314564Sdim X86::VPANDQZrmbk, X86::VPANDDZrmbk }, 6277314564Sdim { X86::VORPSZrmbk, X86::VORPDZrmbk, 6278314564Sdim X86::VPORQZrmbk, X86::VPORDZrmbk }, 6279314564Sdim { X86::VXORPSZrmbk, X86::VXORPDZrmbk, 6280314564Sdim X86::VPXORQZrmbk, X86::VPXORDZrmbk }, 6281314564Sdim { X86::VANDNPSZ128rmbkz,X86::VANDNPDZ128rmbkz, 6282314564Sdim X86::VPANDNQZ128rmbkz,X86::VPANDNDZ128rmbkz}, 6283314564Sdim { X86::VANDPSZ128rmbkz, X86::VANDPDZ128rmbkz, 6284314564Sdim X86::VPANDQZ128rmbkz, X86::VPANDDZ128rmbkz }, 6285314564Sdim { X86::VORPSZ128rmbkz, X86::VORPDZ128rmbkz, 6286314564Sdim X86::VPORQZ128rmbkz, X86::VPORDZ128rmbkz }, 6287314564Sdim { X86::VXORPSZ128rmbkz, X86::VXORPDZ128rmbkz, 6288314564Sdim X86::VPXORQZ128rmbkz, X86::VPXORDZ128rmbkz }, 6289314564Sdim { X86::VANDNPSZ256rmbkz,X86::VANDNPDZ256rmbkz, 6290314564Sdim X86::VPANDNQZ256rmbkz,X86::VPANDNDZ256rmbkz}, 6291314564Sdim { X86::VANDPSZ256rmbkz, X86::VANDPDZ256rmbkz, 6292314564Sdim X86::VPANDQZ256rmbkz, X86::VPANDDZ256rmbkz }, 6293314564Sdim { X86::VORPSZ256rmbkz, X86::VORPDZ256rmbkz, 6294314564Sdim X86::VPORQZ256rmbkz, X86::VPORDZ256rmbkz }, 6295314564Sdim { X86::VXORPSZ256rmbkz, X86::VXORPDZ256rmbkz, 6296314564Sdim X86::VPXORQZ256rmbkz, X86::VPXORDZ256rmbkz }, 6297314564Sdim { X86::VANDNPSZrmbkz, X86::VANDNPDZrmbkz, 6298314564Sdim X86::VPANDNQZrmbkz, X86::VPANDNDZrmbkz }, 6299314564Sdim { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz, 6300314564Sdim X86::VPANDQZrmbkz, X86::VPANDDZrmbkz }, 6301314564Sdim { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz, 6302314564Sdim X86::VPANDQZrmbkz, X86::VPANDDZrmbkz }, 6303314564Sdim { X86::VORPSZrmbkz, X86::VORPDZrmbkz, 6304314564Sdim X86::VPORQZrmbkz, X86::VPORDZrmbkz }, 6305314564Sdim { X86::VXORPSZrmbkz, X86::VXORPDZrmbkz, 6306314564Sdim X86::VPXORQZrmbkz, X86::VPXORDZrmbkz }, 6307314564Sdim}; 6308314564Sdim 6309341825Sdim// NOTE: These should only be used by the custom domain methods. 6310353358Sdimstatic const uint16_t ReplaceableBlendInstrs[][3] = { 6311341825Sdim //PackedSingle PackedDouble PackedInt 6312341825Sdim { X86::BLENDPSrmi, X86::BLENDPDrmi, X86::PBLENDWrmi }, 6313341825Sdim { X86::BLENDPSrri, X86::BLENDPDrri, X86::PBLENDWrri }, 6314341825Sdim { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDWrmi }, 6315341825Sdim { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDWrri }, 6316341825Sdim { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDWYrmi }, 6317341825Sdim { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDWYrri }, 6318341825Sdim}; 6319353358Sdimstatic const uint16_t ReplaceableBlendAVX2Instrs[][3] = { 6320341825Sdim //PackedSingle PackedDouble PackedInt 6321341825Sdim { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDDrmi }, 6322341825Sdim { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDDrri }, 6323341825Sdim { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDDYrmi }, 6324341825Sdim { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDDYrri }, 6325341825Sdim}; 6326341825Sdim 6327341825Sdim// Special table for changing EVEX logic instructions to VEX. 6328341825Sdim// TODO: Should we run EVEX->VEX earlier? 6329341825Sdimstatic const uint16_t ReplaceableCustomAVX512LogicInstrs[][4] = { 6330341825Sdim // Two integer columns for 64-bit and 32-bit elements. 6331341825Sdim //PackedSingle PackedDouble PackedInt PackedInt 6332341825Sdim { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm }, 6333341825Sdim { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr }, 6334341825Sdim { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDQZ128rm, X86::VPANDDZ128rm }, 6335341825Sdim { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDQZ128rr, X86::VPANDDZ128rr }, 6336341825Sdim { X86::VORPSrm, X86::VORPDrm, X86::VPORQZ128rm, X86::VPORDZ128rm }, 6337341825Sdim { X86::VORPSrr, X86::VORPDrr, X86::VPORQZ128rr, X86::VPORDZ128rr }, 6338341825Sdim { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORQZ128rm, X86::VPXORDZ128rm }, 6339341825Sdim { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORQZ128rr, X86::VPXORDZ128rr }, 6340341825Sdim { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm }, 6341341825Sdim { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr }, 6342341825Sdim { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDQZ256rm, X86::VPANDDZ256rm }, 6343341825Sdim { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDQZ256rr, X86::VPANDDZ256rr }, 6344341825Sdim { X86::VORPSYrm, X86::VORPDYrm, X86::VPORQZ256rm, X86::VPORDZ256rm }, 6345341825Sdim { X86::VORPSYrr, X86::VORPDYrr, X86::VPORQZ256rr, X86::VPORDZ256rr }, 6346341825Sdim { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORQZ256rm, X86::VPXORDZ256rm }, 6347341825Sdim { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORQZ256rr, X86::VPXORDZ256rr }, 6348341825Sdim}; 6349341825Sdim 6350206083Srdivacky// FIXME: Some shuffle and unpack instructions have equivalents in different 6351206083Srdivacky// domains, but they require a bit more work than just switching opcodes. 6352206083Srdivacky 6353314564Sdimstatic const uint16_t *lookup(unsigned opcode, unsigned domain, 6354314564Sdim ArrayRef<uint16_t[3]> Table) { 6355314564Sdim for (const uint16_t (&Row)[3] : Table) 6356296417Sdim if (Row[domain-1] == opcode) 6357296417Sdim return Row; 6358276479Sdim return nullptr; 6359206083Srdivacky} 6360206083Srdivacky 6361314564Sdimstatic const uint16_t *lookupAVX512(unsigned opcode, unsigned domain, 6362314564Sdim ArrayRef<uint16_t[4]> Table) { 6363314564Sdim // If this is the integer domain make sure to check both integer columns. 6364314564Sdim for (const uint16_t (&Row)[4] : Table) 6365314564Sdim if (Row[domain-1] == opcode || (domain == 3 && Row[3] == opcode)) 6366296417Sdim return Row; 6367276479Sdim return nullptr; 6368234353Sdim} 6369234353Sdim 6370341825Sdim// Helper to attempt to widen/narrow blend masks. 6371341825Sdimstatic bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth, 6372341825Sdim unsigned NewWidth, unsigned *pNewMask = nullptr) { 6373341825Sdim assert(((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) && 6374341825Sdim "Illegal blend mask scale"); 6375341825Sdim unsigned NewMask = 0; 6376341825Sdim 6377341825Sdim if ((OldWidth % NewWidth) == 0) { 6378341825Sdim unsigned Scale = OldWidth / NewWidth; 6379341825Sdim unsigned SubMask = (1u << Scale) - 1; 6380341825Sdim for (unsigned i = 0; i != NewWidth; ++i) { 6381341825Sdim unsigned Sub = (OldMask >> (i * Scale)) & SubMask; 6382341825Sdim if (Sub == SubMask) 6383341825Sdim NewMask |= (1u << i); 6384341825Sdim else if (Sub != 0x0) 6385341825Sdim return false; 6386341825Sdim } 6387341825Sdim } else { 6388341825Sdim unsigned Scale = NewWidth / OldWidth; 6389341825Sdim unsigned SubMask = (1u << Scale) - 1; 6390341825Sdim for (unsigned i = 0; i != OldWidth; ++i) { 6391341825Sdim if (OldMask & (1 << i)) { 6392341825Sdim NewMask |= (SubMask << (i * Scale)); 6393341825Sdim } 6394341825Sdim } 6395341825Sdim } 6396341825Sdim 6397341825Sdim if (pNewMask) 6398341825Sdim *pNewMask = NewMask; 6399341825Sdim return true; 6400341825Sdim} 6401341825Sdim 6402341825Sdimuint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const { 6403341825Sdim unsigned Opcode = MI.getOpcode(); 6404341825Sdim unsigned NumOperands = MI.getDesc().getNumOperands(); 6405341825Sdim 6406341825Sdim auto GetBlendDomains = [&](unsigned ImmWidth, bool Is256) { 6407341825Sdim uint16_t validDomains = 0; 6408341825Sdim if (MI.getOperand(NumOperands - 1).isImm()) { 6409341825Sdim unsigned Imm = MI.getOperand(NumOperands - 1).getImm(); 6410341825Sdim if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4)) 6411341825Sdim validDomains |= 0x2; // PackedSingle 6412341825Sdim if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2)) 6413341825Sdim validDomains |= 0x4; // PackedDouble 6414341825Sdim if (!Is256 || Subtarget.hasAVX2()) 6415341825Sdim validDomains |= 0x8; // PackedInt 6416341825Sdim } 6417341825Sdim return validDomains; 6418341825Sdim }; 6419341825Sdim 6420341825Sdim switch (Opcode) { 6421341825Sdim case X86::BLENDPDrmi: 6422341825Sdim case X86::BLENDPDrri: 6423341825Sdim case X86::VBLENDPDrmi: 6424341825Sdim case X86::VBLENDPDrri: 6425341825Sdim return GetBlendDomains(2, false); 6426341825Sdim case X86::VBLENDPDYrmi: 6427341825Sdim case X86::VBLENDPDYrri: 6428341825Sdim return GetBlendDomains(4, true); 6429341825Sdim case X86::BLENDPSrmi: 6430341825Sdim case X86::BLENDPSrri: 6431341825Sdim case X86::VBLENDPSrmi: 6432341825Sdim case X86::VBLENDPSrri: 6433341825Sdim case X86::VPBLENDDrmi: 6434341825Sdim case X86::VPBLENDDrri: 6435341825Sdim return GetBlendDomains(4, false); 6436341825Sdim case X86::VBLENDPSYrmi: 6437341825Sdim case X86::VBLENDPSYrri: 6438341825Sdim case X86::VPBLENDDYrmi: 6439341825Sdim case X86::VPBLENDDYrri: 6440341825Sdim return GetBlendDomains(8, true); 6441341825Sdim case X86::PBLENDWrmi: 6442341825Sdim case X86::PBLENDWrri: 6443341825Sdim case X86::VPBLENDWrmi: 6444341825Sdim case X86::VPBLENDWrri: 6445341825Sdim // Treat VPBLENDWY as a 128-bit vector as it repeats the lo/hi masks. 6446341825Sdim case X86::VPBLENDWYrmi: 6447341825Sdim case X86::VPBLENDWYrri: 6448341825Sdim return GetBlendDomains(8, false); 6449341825Sdim case X86::VPANDDZ128rr: case X86::VPANDDZ128rm: 6450341825Sdim case X86::VPANDDZ256rr: case X86::VPANDDZ256rm: 6451341825Sdim case X86::VPANDQZ128rr: case X86::VPANDQZ128rm: 6452341825Sdim case X86::VPANDQZ256rr: case X86::VPANDQZ256rm: 6453341825Sdim case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm: 6454341825Sdim case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm: 6455341825Sdim case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm: 6456341825Sdim case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm: 6457341825Sdim case X86::VPORDZ128rr: case X86::VPORDZ128rm: 6458341825Sdim case X86::VPORDZ256rr: case X86::VPORDZ256rm: 6459341825Sdim case X86::VPORQZ128rr: case X86::VPORQZ128rm: 6460341825Sdim case X86::VPORQZ256rr: case X86::VPORQZ256rm: 6461341825Sdim case X86::VPXORDZ128rr: case X86::VPXORDZ128rm: 6462341825Sdim case X86::VPXORDZ256rr: case X86::VPXORDZ256rm: 6463341825Sdim case X86::VPXORQZ128rr: case X86::VPXORQZ128rm: 6464341825Sdim case X86::VPXORQZ256rr: case X86::VPXORQZ256rm: 6465341825Sdim // If we don't have DQI see if we can still switch from an EVEX integer 6466341825Sdim // instruction to a VEX floating point instruction. 6467341825Sdim if (Subtarget.hasDQI()) 6468341825Sdim return 0; 6469341825Sdim 6470341825Sdim if (RI.getEncodingValue(MI.getOperand(0).getReg()) >= 16) 6471341825Sdim return 0; 6472341825Sdim if (RI.getEncodingValue(MI.getOperand(1).getReg()) >= 16) 6473341825Sdim return 0; 6474341825Sdim // Register forms will have 3 operands. Memory form will have more. 6475341825Sdim if (NumOperands == 3 && 6476341825Sdim RI.getEncodingValue(MI.getOperand(2).getReg()) >= 16) 6477341825Sdim return 0; 6478341825Sdim 6479341825Sdim // All domains are valid. 6480341825Sdim return 0xe; 6481344779Sdim case X86::MOVHLPSrr: 6482344779Sdim // We can swap domains when both inputs are the same register. 6483344779Sdim // FIXME: This doesn't catch all the cases we would like. If the input 6484344779Sdim // register isn't KILLed by the instruction, the two address instruction 6485344779Sdim // pass puts a COPY on one input. The other input uses the original 6486344779Sdim // register. This prevents the same physical register from being used by 6487344779Sdim // both inputs. 6488344779Sdim if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg() && 6489344779Sdim MI.getOperand(0).getSubReg() == 0 && 6490344779Sdim MI.getOperand(1).getSubReg() == 0 && 6491344779Sdim MI.getOperand(2).getSubReg() == 0) 6492344779Sdim return 0x6; 6493344779Sdim return 0; 6494353358Sdim case X86::SHUFPDrri: 6495353358Sdim return 0x6; 6496341825Sdim } 6497341825Sdim return 0; 6498341825Sdim} 6499341825Sdim 6500341825Sdimbool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI, 6501341825Sdim unsigned Domain) const { 6502341825Sdim assert(Domain > 0 && Domain < 4 && "Invalid execution domain"); 6503341825Sdim uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 6504341825Sdim assert(dom && "Not an SSE instruction"); 6505341825Sdim 6506341825Sdim unsigned Opcode = MI.getOpcode(); 6507341825Sdim unsigned NumOperands = MI.getDesc().getNumOperands(); 6508341825Sdim 6509341825Sdim auto SetBlendDomain = [&](unsigned ImmWidth, bool Is256) { 6510341825Sdim if (MI.getOperand(NumOperands - 1).isImm()) { 6511341825Sdim unsigned Imm = MI.getOperand(NumOperands - 1).getImm() & 255; 6512341825Sdim Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm); 6513341825Sdim unsigned NewImm = Imm; 6514341825Sdim 6515353358Sdim const uint16_t *table = lookup(Opcode, dom, ReplaceableBlendInstrs); 6516341825Sdim if (!table) 6517353358Sdim table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs); 6518341825Sdim 6519341825Sdim if (Domain == 1) { // PackedSingle 6520341825Sdim AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm); 6521341825Sdim } else if (Domain == 2) { // PackedDouble 6522341825Sdim AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2, &NewImm); 6523341825Sdim } else if (Domain == 3) { // PackedInt 6524341825Sdim if (Subtarget.hasAVX2()) { 6525341825Sdim // If we are already VPBLENDW use that, else use VPBLENDD. 6526341825Sdim if ((ImmWidth / (Is256 ? 2 : 1)) != 8) { 6527353358Sdim table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs); 6528341825Sdim AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm); 6529341825Sdim } 6530341825Sdim } else { 6531341825Sdim assert(!Is256 && "128-bit vector expected"); 6532341825Sdim AdjustBlendMask(Imm, ImmWidth, 8, &NewImm); 6533341825Sdim } 6534341825Sdim } 6535341825Sdim 6536341825Sdim assert(table && table[Domain - 1] && "Unknown domain op"); 6537341825Sdim MI.setDesc(get(table[Domain - 1])); 6538341825Sdim MI.getOperand(NumOperands - 1).setImm(NewImm & 255); 6539341825Sdim } 6540341825Sdim return true; 6541341825Sdim }; 6542341825Sdim 6543341825Sdim switch (Opcode) { 6544341825Sdim case X86::BLENDPDrmi: 6545341825Sdim case X86::BLENDPDrri: 6546341825Sdim case X86::VBLENDPDrmi: 6547341825Sdim case X86::VBLENDPDrri: 6548341825Sdim return SetBlendDomain(2, false); 6549341825Sdim case X86::VBLENDPDYrmi: 6550341825Sdim case X86::VBLENDPDYrri: 6551341825Sdim return SetBlendDomain(4, true); 6552341825Sdim case X86::BLENDPSrmi: 6553341825Sdim case X86::BLENDPSrri: 6554341825Sdim case X86::VBLENDPSrmi: 6555341825Sdim case X86::VBLENDPSrri: 6556341825Sdim case X86::VPBLENDDrmi: 6557341825Sdim case X86::VPBLENDDrri: 6558341825Sdim return SetBlendDomain(4, false); 6559341825Sdim case X86::VBLENDPSYrmi: 6560341825Sdim case X86::VBLENDPSYrri: 6561341825Sdim case X86::VPBLENDDYrmi: 6562341825Sdim case X86::VPBLENDDYrri: 6563341825Sdim return SetBlendDomain(8, true); 6564341825Sdim case X86::PBLENDWrmi: 6565341825Sdim case X86::PBLENDWrri: 6566341825Sdim case X86::VPBLENDWrmi: 6567341825Sdim case X86::VPBLENDWrri: 6568341825Sdim return SetBlendDomain(8, false); 6569341825Sdim case X86::VPBLENDWYrmi: 6570341825Sdim case X86::VPBLENDWYrri: 6571341825Sdim return SetBlendDomain(16, true); 6572341825Sdim case X86::VPANDDZ128rr: case X86::VPANDDZ128rm: 6573341825Sdim case X86::VPANDDZ256rr: case X86::VPANDDZ256rm: 6574341825Sdim case X86::VPANDQZ128rr: case X86::VPANDQZ128rm: 6575341825Sdim case X86::VPANDQZ256rr: case X86::VPANDQZ256rm: 6576341825Sdim case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm: 6577341825Sdim case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm: 6578341825Sdim case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm: 6579341825Sdim case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm: 6580341825Sdim case X86::VPORDZ128rr: case X86::VPORDZ128rm: 6581341825Sdim case X86::VPORDZ256rr: case X86::VPORDZ256rm: 6582341825Sdim case X86::VPORQZ128rr: case X86::VPORQZ128rm: 6583341825Sdim case X86::VPORQZ256rr: case X86::VPORQZ256rm: 6584341825Sdim case X86::VPXORDZ128rr: case X86::VPXORDZ128rm: 6585341825Sdim case X86::VPXORDZ256rr: case X86::VPXORDZ256rm: 6586341825Sdim case X86::VPXORQZ128rr: case X86::VPXORQZ128rm: 6587341825Sdim case X86::VPXORQZ256rr: case X86::VPXORQZ256rm: { 6588341825Sdim // Without DQI, convert EVEX instructions to VEX instructions. 6589341825Sdim if (Subtarget.hasDQI()) 6590341825Sdim return false; 6591341825Sdim 6592341825Sdim const uint16_t *table = lookupAVX512(MI.getOpcode(), dom, 6593341825Sdim ReplaceableCustomAVX512LogicInstrs); 6594341825Sdim assert(table && "Instruction not found in table?"); 6595341825Sdim // Don't change integer Q instructions to D instructions and 6596341825Sdim // use D intructions if we started with a PS instruction. 6597341825Sdim if (Domain == 3 && (dom == 1 || table[3] == MI.getOpcode())) 6598341825Sdim Domain = 4; 6599341825Sdim MI.setDesc(get(table[Domain - 1])); 6600341825Sdim return true; 6601341825Sdim } 6602344779Sdim case X86::UNPCKHPDrr: 6603344779Sdim case X86::MOVHLPSrr: 6604344779Sdim // We just need to commute the instruction which will switch the domains. 6605344779Sdim if (Domain != dom && Domain != 3 && 6606344779Sdim MI.getOperand(1).getReg() == MI.getOperand(2).getReg() && 6607344779Sdim MI.getOperand(0).getSubReg() == 0 && 6608344779Sdim MI.getOperand(1).getSubReg() == 0 && 6609344779Sdim MI.getOperand(2).getSubReg() == 0) { 6610344779Sdim commuteInstruction(MI, false); 6611344779Sdim return true; 6612344779Sdim } 6613344779Sdim // We must always return true for MOVHLPSrr. 6614344779Sdim if (Opcode == X86::MOVHLPSrr) 6615344779Sdim return true; 6616353358Sdim break; 6617353358Sdim case X86::SHUFPDrri: { 6618353358Sdim if (Domain == 1) { 6619353358Sdim unsigned Imm = MI.getOperand(3).getImm(); 6620353358Sdim unsigned NewImm = 0x44; 6621353358Sdim if (Imm & 1) NewImm |= 0x0a; 6622353358Sdim if (Imm & 2) NewImm |= 0xa0; 6623353358Sdim MI.getOperand(3).setImm(NewImm); 6624353358Sdim MI.setDesc(get(X86::SHUFPSrri)); 6625353358Sdim } 6626353358Sdim return true; 6627341825Sdim } 6628353358Sdim } 6629341825Sdim return false; 6630341825Sdim} 6631341825Sdim 6632206083Srdivackystd::pair<uint16_t, uint16_t> 6633309124SdimX86InstrInfo::getExecutionDomain(const MachineInstr &MI) const { 6634309124Sdim uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 6635314564Sdim unsigned opcode = MI.getOpcode(); 6636234353Sdim uint16_t validDomains = 0; 6637314564Sdim if (domain) { 6638341825Sdim // Attempt to match for custom instructions. 6639341825Sdim validDomains = getExecutionDomainCustom(MI); 6640341825Sdim if (validDomains) 6641341825Sdim return std::make_pair(domain, validDomains); 6642341825Sdim 6643341825Sdim if (lookup(opcode, domain, ReplaceableInstrs)) { 6644314564Sdim validDomains = 0xe; 6645314564Sdim } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) { 6646314564Sdim validDomains = Subtarget.hasAVX2() ? 0xe : 0x6; 6647353358Sdim } else if (lookup(opcode, domain, ReplaceableInstrsFP)) { 6648353358Sdim validDomains = 0x6; 6649321369Sdim } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) { 6650321369Sdim // Insert/extract instructions should only effect domain if AVX2 6651321369Sdim // is enabled. 6652321369Sdim if (!Subtarget.hasAVX2()) 6653321369Sdim return std::make_pair(0, 0); 6654321369Sdim validDomains = 0xe; 6655314564Sdim } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) { 6656314564Sdim validDomains = 0xe; 6657321369Sdim } else if (Subtarget.hasDQI() && lookupAVX512(opcode, domain, 6658321369Sdim ReplaceableInstrsAVX512DQ)) { 6659321369Sdim validDomains = 0xe; 6660321369Sdim } else if (Subtarget.hasDQI()) { 6661321369Sdim if (const uint16_t *table = lookupAVX512(opcode, domain, 6662314564Sdim ReplaceableInstrsAVX512DQMasked)) { 6663321369Sdim if (domain == 1 || (domain == 3 && table[3] == opcode)) 6664321369Sdim validDomains = 0xa; 6665321369Sdim else 6666321369Sdim validDomains = 0xc; 6667321369Sdim } 6668314564Sdim } 6669314564Sdim } 6670234353Sdim return std::make_pair(domain, validDomains); 6671206083Srdivacky} 6672206083Srdivacky 6673309124Sdimvoid X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const { 6674206083Srdivacky assert(Domain>0 && Domain<4 && "Invalid execution domain"); 6675309124Sdim uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 6676206083Srdivacky assert(dom && "Not an SSE instruction"); 6677341825Sdim 6678341825Sdim // Attempt to match for custom instructions. 6679341825Sdim if (setExecutionDomainCustom(MI, Domain)) 6680341825Sdim return; 6681341825Sdim 6682314564Sdim const uint16_t *table = lookup(MI.getOpcode(), dom, ReplaceableInstrs); 6683234353Sdim if (!table) { // try the other table 6684276479Sdim assert((Subtarget.hasAVX2() || Domain < 3) && 6685234353Sdim "256-bit vector operations only available in AVX2"); 6686314564Sdim table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2); 6687234353Sdim } 6688353358Sdim if (!table) { // try the FP table 6689353358Sdim table = lookup(MI.getOpcode(), dom, ReplaceableInstrsFP); 6690353358Sdim assert((!table || Domain < 3) && 6691353358Sdim "Can only select PackedSingle or PackedDouble"); 6692353358Sdim } 6693321369Sdim if (!table) { // try the other table 6694321369Sdim assert(Subtarget.hasAVX2() && 6695321369Sdim "256-bit insert/extract only available in AVX2"); 6696321369Sdim table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2InsertExtract); 6697321369Sdim } 6698314564Sdim if (!table) { // try the AVX512 table 6699314564Sdim assert(Subtarget.hasAVX512() && "Requires AVX-512"); 6700314564Sdim table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512); 6701314564Sdim // Don't change integer Q instructions to D instructions. 6702314564Sdim if (table && Domain == 3 && table[3] == MI.getOpcode()) 6703314564Sdim Domain = 4; 6704314564Sdim } 6705314564Sdim if (!table) { // try the AVX512DQ table 6706314564Sdim assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ"); 6707314564Sdim table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQ); 6708314564Sdim // Don't change integer Q instructions to D instructions and 6709314564Sdim // use D intructions if we started with a PS instruction. 6710314564Sdim if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode())) 6711314564Sdim Domain = 4; 6712314564Sdim } 6713314564Sdim if (!table) { // try the AVX512DQMasked table 6714314564Sdim assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ"); 6715314564Sdim table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQMasked); 6716314564Sdim if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode())) 6717314564Sdim Domain = 4; 6718314564Sdim } 6719206083Srdivacky assert(table && "Cannot change domain"); 6720309124Sdim MI.setDesc(get(table[Domain - 1])); 6721206083Srdivacky} 6722207618Srdivacky 6723288943Sdim/// Return the noop instruction to use for a noop. 6724321369Sdimvoid X86InstrInfo::getNoop(MCInst &NopInst) const { 6725207618Srdivacky NopInst.setOpcode(X86::NOOP); 6726207618Srdivacky} 6727207618Srdivacky 6728221345Sdimbool X86InstrInfo::isHighLatencyDef(int opc) const { 6729221345Sdim switch (opc) { 6730218893Sdim default: return false; 6731309124Sdim case X86::DIVPDrm: 6732309124Sdim case X86::DIVPDrr: 6733309124Sdim case X86::DIVPSrm: 6734309124Sdim case X86::DIVPSrr: 6735218893Sdim case X86::DIVSDrm: 6736218893Sdim case X86::DIVSDrm_Int: 6737218893Sdim case X86::DIVSDrr: 6738218893Sdim case X86::DIVSDrr_Int: 6739218893Sdim case X86::DIVSSrm: 6740218893Sdim case X86::DIVSSrm_Int: 6741218893Sdim case X86::DIVSSrr: 6742218893Sdim case X86::DIVSSrr_Int: 6743218893Sdim case X86::SQRTPDm: 6744218893Sdim case X86::SQRTPDr: 6745218893Sdim case X86::SQRTPSm: 6746218893Sdim case X86::SQRTPSr: 6747218893Sdim case X86::SQRTSDm: 6748218893Sdim case X86::SQRTSDm_Int: 6749218893Sdim case X86::SQRTSDr: 6750218893Sdim case X86::SQRTSDr_Int: 6751218893Sdim case X86::SQRTSSm: 6752218893Sdim case X86::SQRTSSm_Int: 6753218893Sdim case X86::SQRTSSr: 6754218893Sdim case X86::SQRTSSr_Int: 6755226633Sdim // AVX instructions with high latency 6756309124Sdim case X86::VDIVPDrm: 6757309124Sdim case X86::VDIVPDrr: 6758309124Sdim case X86::VDIVPDYrm: 6759309124Sdim case X86::VDIVPDYrr: 6760309124Sdim case X86::VDIVPSrm: 6761309124Sdim case X86::VDIVPSrr: 6762309124Sdim case X86::VDIVPSYrm: 6763309124Sdim case X86::VDIVPSYrr: 6764226633Sdim case X86::VDIVSDrm: 6765226633Sdim case X86::VDIVSDrm_Int: 6766226633Sdim case X86::VDIVSDrr: 6767226633Sdim case X86::VDIVSDrr_Int: 6768226633Sdim case X86::VDIVSSrm: 6769226633Sdim case X86::VDIVSSrm_Int: 6770226633Sdim case X86::VDIVSSrr: 6771226633Sdim case X86::VDIVSSrr_Int: 6772226633Sdim case X86::VSQRTPDm: 6773226633Sdim case X86::VSQRTPDr: 6774309124Sdim case X86::VSQRTPDYm: 6775309124Sdim case X86::VSQRTPDYr: 6776226633Sdim case X86::VSQRTPSm: 6777226633Sdim case X86::VSQRTPSr: 6778309124Sdim case X86::VSQRTPSYm: 6779309124Sdim case X86::VSQRTPSYr: 6780226633Sdim case X86::VSQRTSDm: 6781226633Sdim case X86::VSQRTSDm_Int: 6782226633Sdim case X86::VSQRTSDr: 6783309124Sdim case X86::VSQRTSDr_Int: 6784226633Sdim case X86::VSQRTSSm: 6785226633Sdim case X86::VSQRTSSm_Int: 6786226633Sdim case X86::VSQRTSSr: 6787309124Sdim case X86::VSQRTSSr_Int: 6788309124Sdim // AVX512 instructions with high latency 6789309124Sdim case X86::VDIVPDZ128rm: 6790309124Sdim case X86::VDIVPDZ128rmb: 6791309124Sdim case X86::VDIVPDZ128rmbk: 6792309124Sdim case X86::VDIVPDZ128rmbkz: 6793309124Sdim case X86::VDIVPDZ128rmk: 6794309124Sdim case X86::VDIVPDZ128rmkz: 6795309124Sdim case X86::VDIVPDZ128rr: 6796309124Sdim case X86::VDIVPDZ128rrk: 6797309124Sdim case X86::VDIVPDZ128rrkz: 6798309124Sdim case X86::VDIVPDZ256rm: 6799309124Sdim case X86::VDIVPDZ256rmb: 6800309124Sdim case X86::VDIVPDZ256rmbk: 6801309124Sdim case X86::VDIVPDZ256rmbkz: 6802309124Sdim case X86::VDIVPDZ256rmk: 6803309124Sdim case X86::VDIVPDZ256rmkz: 6804309124Sdim case X86::VDIVPDZ256rr: 6805309124Sdim case X86::VDIVPDZ256rrk: 6806309124Sdim case X86::VDIVPDZ256rrkz: 6807327952Sdim case X86::VDIVPDZrrb: 6808327952Sdim case X86::VDIVPDZrrbk: 6809327952Sdim case X86::VDIVPDZrrbkz: 6810309124Sdim case X86::VDIVPDZrm: 6811309124Sdim case X86::VDIVPDZrmb: 6812309124Sdim case X86::VDIVPDZrmbk: 6813309124Sdim case X86::VDIVPDZrmbkz: 6814309124Sdim case X86::VDIVPDZrmk: 6815309124Sdim case X86::VDIVPDZrmkz: 6816309124Sdim case X86::VDIVPDZrr: 6817309124Sdim case X86::VDIVPDZrrk: 6818309124Sdim case X86::VDIVPDZrrkz: 6819309124Sdim case X86::VDIVPSZ128rm: 6820309124Sdim case X86::VDIVPSZ128rmb: 6821309124Sdim case X86::VDIVPSZ128rmbk: 6822309124Sdim case X86::VDIVPSZ128rmbkz: 6823309124Sdim case X86::VDIVPSZ128rmk: 6824309124Sdim case X86::VDIVPSZ128rmkz: 6825309124Sdim case X86::VDIVPSZ128rr: 6826309124Sdim case X86::VDIVPSZ128rrk: 6827309124Sdim case X86::VDIVPSZ128rrkz: 6828309124Sdim case X86::VDIVPSZ256rm: 6829309124Sdim case X86::VDIVPSZ256rmb: 6830309124Sdim case X86::VDIVPSZ256rmbk: 6831309124Sdim case X86::VDIVPSZ256rmbkz: 6832309124Sdim case X86::VDIVPSZ256rmk: 6833309124Sdim case X86::VDIVPSZ256rmkz: 6834309124Sdim case X86::VDIVPSZ256rr: 6835309124Sdim case X86::VDIVPSZ256rrk: 6836309124Sdim case X86::VDIVPSZ256rrkz: 6837327952Sdim case X86::VDIVPSZrrb: 6838327952Sdim case X86::VDIVPSZrrbk: 6839327952Sdim case X86::VDIVPSZrrbkz: 6840309124Sdim case X86::VDIVPSZrm: 6841309124Sdim case X86::VDIVPSZrmb: 6842309124Sdim case X86::VDIVPSZrmbk: 6843309124Sdim case X86::VDIVPSZrmbkz: 6844309124Sdim case X86::VDIVPSZrmk: 6845309124Sdim case X86::VDIVPSZrmkz: 6846309124Sdim case X86::VDIVPSZrr: 6847309124Sdim case X86::VDIVPSZrrk: 6848309124Sdim case X86::VDIVPSZrrkz: 6849309124Sdim case X86::VDIVSDZrm: 6850309124Sdim case X86::VDIVSDZrr: 6851309124Sdim case X86::VDIVSDZrm_Int: 6852309124Sdim case X86::VDIVSDZrm_Intk: 6853309124Sdim case X86::VDIVSDZrm_Intkz: 6854309124Sdim case X86::VDIVSDZrr_Int: 6855309124Sdim case X86::VDIVSDZrr_Intk: 6856309124Sdim case X86::VDIVSDZrr_Intkz: 6857327952Sdim case X86::VDIVSDZrrb_Int: 6858327952Sdim case X86::VDIVSDZrrb_Intk: 6859327952Sdim case X86::VDIVSDZrrb_Intkz: 6860309124Sdim case X86::VDIVSSZrm: 6861309124Sdim case X86::VDIVSSZrr: 6862309124Sdim case X86::VDIVSSZrm_Int: 6863309124Sdim case X86::VDIVSSZrm_Intk: 6864309124Sdim case X86::VDIVSSZrm_Intkz: 6865309124Sdim case X86::VDIVSSZrr_Int: 6866309124Sdim case X86::VDIVSSZrr_Intk: 6867309124Sdim case X86::VDIVSSZrr_Intkz: 6868327952Sdim case X86::VDIVSSZrrb_Int: 6869327952Sdim case X86::VDIVSSZrrb_Intk: 6870327952Sdim case X86::VDIVSSZrrb_Intkz: 6871309124Sdim case X86::VSQRTPDZ128m: 6872309124Sdim case X86::VSQRTPDZ128mb: 6873309124Sdim case X86::VSQRTPDZ128mbk: 6874309124Sdim case X86::VSQRTPDZ128mbkz: 6875309124Sdim case X86::VSQRTPDZ128mk: 6876309124Sdim case X86::VSQRTPDZ128mkz: 6877309124Sdim case X86::VSQRTPDZ128r: 6878309124Sdim case X86::VSQRTPDZ128rk: 6879309124Sdim case X86::VSQRTPDZ128rkz: 6880309124Sdim case X86::VSQRTPDZ256m: 6881309124Sdim case X86::VSQRTPDZ256mb: 6882309124Sdim case X86::VSQRTPDZ256mbk: 6883309124Sdim case X86::VSQRTPDZ256mbkz: 6884309124Sdim case X86::VSQRTPDZ256mk: 6885309124Sdim case X86::VSQRTPDZ256mkz: 6886309124Sdim case X86::VSQRTPDZ256r: 6887309124Sdim case X86::VSQRTPDZ256rk: 6888309124Sdim case X86::VSQRTPDZ256rkz: 6889280031Sdim case X86::VSQRTPDZm: 6890309124Sdim case X86::VSQRTPDZmb: 6891309124Sdim case X86::VSQRTPDZmbk: 6892309124Sdim case X86::VSQRTPDZmbkz: 6893309124Sdim case X86::VSQRTPDZmk: 6894309124Sdim case X86::VSQRTPDZmkz: 6895280031Sdim case X86::VSQRTPDZr: 6896309124Sdim case X86::VSQRTPDZrb: 6897309124Sdim case X86::VSQRTPDZrbk: 6898309124Sdim case X86::VSQRTPDZrbkz: 6899309124Sdim case X86::VSQRTPDZrk: 6900309124Sdim case X86::VSQRTPDZrkz: 6901309124Sdim case X86::VSQRTPSZ128m: 6902309124Sdim case X86::VSQRTPSZ128mb: 6903309124Sdim case X86::VSQRTPSZ128mbk: 6904309124Sdim case X86::VSQRTPSZ128mbkz: 6905309124Sdim case X86::VSQRTPSZ128mk: 6906309124Sdim case X86::VSQRTPSZ128mkz: 6907309124Sdim case X86::VSQRTPSZ128r: 6908309124Sdim case X86::VSQRTPSZ128rk: 6909309124Sdim case X86::VSQRTPSZ128rkz: 6910309124Sdim case X86::VSQRTPSZ256m: 6911309124Sdim case X86::VSQRTPSZ256mb: 6912309124Sdim case X86::VSQRTPSZ256mbk: 6913309124Sdim case X86::VSQRTPSZ256mbkz: 6914309124Sdim case X86::VSQRTPSZ256mk: 6915309124Sdim case X86::VSQRTPSZ256mkz: 6916309124Sdim case X86::VSQRTPSZ256r: 6917309124Sdim case X86::VSQRTPSZ256rk: 6918309124Sdim case X86::VSQRTPSZ256rkz: 6919280031Sdim case X86::VSQRTPSZm: 6920309124Sdim case X86::VSQRTPSZmb: 6921309124Sdim case X86::VSQRTPSZmbk: 6922309124Sdim case X86::VSQRTPSZmbkz: 6923309124Sdim case X86::VSQRTPSZmk: 6924309124Sdim case X86::VSQRTPSZmkz: 6925280031Sdim case X86::VSQRTPSZr: 6926309124Sdim case X86::VSQRTPSZrb: 6927309124Sdim case X86::VSQRTPSZrbk: 6928309124Sdim case X86::VSQRTPSZrbkz: 6929309124Sdim case X86::VSQRTPSZrk: 6930309124Sdim case X86::VSQRTPSZrkz: 6931261991Sdim case X86::VSQRTSDZm: 6932261991Sdim case X86::VSQRTSDZm_Int: 6933309124Sdim case X86::VSQRTSDZm_Intk: 6934309124Sdim case X86::VSQRTSDZm_Intkz: 6935261991Sdim case X86::VSQRTSDZr: 6936309124Sdim case X86::VSQRTSDZr_Int: 6937309124Sdim case X86::VSQRTSDZr_Intk: 6938309124Sdim case X86::VSQRTSDZr_Intkz: 6939309124Sdim case X86::VSQRTSDZrb_Int: 6940309124Sdim case X86::VSQRTSDZrb_Intk: 6941309124Sdim case X86::VSQRTSDZrb_Intkz: 6942309124Sdim case X86::VSQRTSSZm: 6943261991Sdim case X86::VSQRTSSZm_Int: 6944309124Sdim case X86::VSQRTSSZm_Intk: 6945309124Sdim case X86::VSQRTSSZm_Intkz: 6946261991Sdim case X86::VSQRTSSZr: 6947309124Sdim case X86::VSQRTSSZr_Int: 6948309124Sdim case X86::VSQRTSSZr_Intk: 6949309124Sdim case X86::VSQRTSSZr_Intkz: 6950309124Sdim case X86::VSQRTSSZrb_Int: 6951309124Sdim case X86::VSQRTSSZrb_Intk: 6952309124Sdim case X86::VSQRTSSZrb_Intkz: 6953261991Sdim 6954309124Sdim case X86::VGATHERDPDYrm: 6955309124Sdim case X86::VGATHERDPDZ128rm: 6956309124Sdim case X86::VGATHERDPDZ256rm: 6957261991Sdim case X86::VGATHERDPDZrm: 6958309124Sdim case X86::VGATHERDPDrm: 6959309124Sdim case X86::VGATHERDPSYrm: 6960309124Sdim case X86::VGATHERDPSZ128rm: 6961309124Sdim case X86::VGATHERDPSZ256rm: 6962261991Sdim case X86::VGATHERDPSZrm: 6963309124Sdim case X86::VGATHERDPSrm: 6964309124Sdim case X86::VGATHERPF0DPDm: 6965309124Sdim case X86::VGATHERPF0DPSm: 6966309124Sdim case X86::VGATHERPF0QPDm: 6967309124Sdim case X86::VGATHERPF0QPSm: 6968309124Sdim case X86::VGATHERPF1DPDm: 6969309124Sdim case X86::VGATHERPF1DPSm: 6970309124Sdim case X86::VGATHERPF1QPDm: 6971309124Sdim case X86::VGATHERPF1QPSm: 6972309124Sdim case X86::VGATHERQPDYrm: 6973309124Sdim case X86::VGATHERQPDZ128rm: 6974309124Sdim case X86::VGATHERQPDZ256rm: 6975309124Sdim case X86::VGATHERQPDZrm: 6976309124Sdim case X86::VGATHERQPDrm: 6977309124Sdim case X86::VGATHERQPSYrm: 6978309124Sdim case X86::VGATHERQPSZ128rm: 6979309124Sdim case X86::VGATHERQPSZ256rm: 6980309124Sdim case X86::VGATHERQPSZrm: 6981309124Sdim case X86::VGATHERQPSrm: 6982309124Sdim case X86::VPGATHERDDYrm: 6983309124Sdim case X86::VPGATHERDDZ128rm: 6984309124Sdim case X86::VPGATHERDDZ256rm: 6985309124Sdim case X86::VPGATHERDDZrm: 6986309124Sdim case X86::VPGATHERDDrm: 6987309124Sdim case X86::VPGATHERDQYrm: 6988309124Sdim case X86::VPGATHERDQZ128rm: 6989309124Sdim case X86::VPGATHERDQZ256rm: 6990309124Sdim case X86::VPGATHERDQZrm: 6991309124Sdim case X86::VPGATHERDQrm: 6992309124Sdim case X86::VPGATHERQDYrm: 6993309124Sdim case X86::VPGATHERQDZ128rm: 6994309124Sdim case X86::VPGATHERQDZ256rm: 6995261991Sdim case X86::VPGATHERQDZrm: 6996309124Sdim case X86::VPGATHERQDrm: 6997309124Sdim case X86::VPGATHERQQYrm: 6998309124Sdim case X86::VPGATHERQQZ128rm: 6999309124Sdim case X86::VPGATHERQQZ256rm: 7000261991Sdim case X86::VPGATHERQQZrm: 7001309124Sdim case X86::VPGATHERQQrm: 7002309124Sdim case X86::VSCATTERDPDZ128mr: 7003309124Sdim case X86::VSCATTERDPDZ256mr: 7004309124Sdim case X86::VSCATTERDPDZmr: 7005309124Sdim case X86::VSCATTERDPSZ128mr: 7006309124Sdim case X86::VSCATTERDPSZ256mr: 7007309124Sdim case X86::VSCATTERDPSZmr: 7008309124Sdim case X86::VSCATTERPF0DPDm: 7009309124Sdim case X86::VSCATTERPF0DPSm: 7010309124Sdim case X86::VSCATTERPF0QPDm: 7011309124Sdim case X86::VSCATTERPF0QPSm: 7012309124Sdim case X86::VSCATTERPF1DPDm: 7013309124Sdim case X86::VSCATTERPF1DPSm: 7014309124Sdim case X86::VSCATTERPF1QPDm: 7015309124Sdim case X86::VSCATTERPF1QPSm: 7016309124Sdim case X86::VSCATTERQPDZ128mr: 7017309124Sdim case X86::VSCATTERQPDZ256mr: 7018261991Sdim case X86::VSCATTERQPDZmr: 7019309124Sdim case X86::VSCATTERQPSZ128mr: 7020309124Sdim case X86::VSCATTERQPSZ256mr: 7021261991Sdim case X86::VSCATTERQPSZmr: 7022309124Sdim case X86::VPSCATTERDDZ128mr: 7023309124Sdim case X86::VPSCATTERDDZ256mr: 7024309124Sdim case X86::VPSCATTERDDZmr: 7025309124Sdim case X86::VPSCATTERDQZ128mr: 7026309124Sdim case X86::VPSCATTERDQZ256mr: 7027309124Sdim case X86::VPSCATTERDQZmr: 7028309124Sdim case X86::VPSCATTERQDZ128mr: 7029309124Sdim case X86::VPSCATTERQDZ256mr: 7030261991Sdim case X86::VPSCATTERQDZmr: 7031309124Sdim case X86::VPSCATTERQQZ128mr: 7032309124Sdim case X86::VPSCATTERQQZ256mr: 7033261991Sdim case X86::VPSCATTERQQZmr: 7034218893Sdim return true; 7035218893Sdim } 7036218893Sdim} 7037218893Sdim 7038309124Sdimbool X86InstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel, 7039309124Sdim const MachineRegisterInfo *MRI, 7040309124Sdim const MachineInstr &DefMI, 7041309124Sdim unsigned DefIdx, 7042309124Sdim const MachineInstr &UseMI, 7043309124Sdim unsigned UseIdx) const { 7044309124Sdim return isHighLatencyDef(DefMI.getOpcode()); 7045221345Sdim} 7046221345Sdim 7047296417Sdimbool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst, 7048296417Sdim const MachineBasicBlock *MBB) const { 7049296417Sdim assert((Inst.getNumOperands() == 3 || Inst.getNumOperands() == 4) && 7050296417Sdim "Reassociation needs binary operators"); 7051288943Sdim 7052296417Sdim // Integer binary math/logic instructions have a third source operand: 7053296417Sdim // the EFLAGS register. That operand must be both defined here and never 7054296417Sdim // used; ie, it must be dead. If the EFLAGS operand is live, then we can 7055296417Sdim // not change anything because rearranging the operands could affect other 7056296417Sdim // instructions that depend on the exact status flags (zero, sign, etc.) 7057296417Sdim // that are set by using these particular operands with this operation. 7058296417Sdim if (Inst.getNumOperands() == 4) { 7059296417Sdim assert(Inst.getOperand(3).isReg() && 7060296417Sdim Inst.getOperand(3).getReg() == X86::EFLAGS && 7061296417Sdim "Unexpected operand in reassociable instruction"); 7062296417Sdim if (!Inst.getOperand(3).isDead()) 7063296417Sdim return false; 7064296417Sdim } 7065288943Sdim 7066296417Sdim return TargetInstrInfo::hasReassociableOperands(Inst, MBB); 7067288943Sdim} 7068288943Sdim 7069288943Sdim// TODO: There are many more machine instruction opcodes to match: 7070288943Sdim// 1. Other data types (integer, vectors) 7071296417Sdim// 2. Other math / logic operations (xor, or) 7072296417Sdim// 3. Other forms of the same operation (intrinsics and other variants) 7073296417Sdimbool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { 7074296417Sdim switch (Inst.getOpcode()) { 7075296417Sdim case X86::AND8rr: 7076296417Sdim case X86::AND16rr: 7077296417Sdim case X86::AND32rr: 7078296417Sdim case X86::AND64rr: 7079296417Sdim case X86::OR8rr: 7080296417Sdim case X86::OR16rr: 7081296417Sdim case X86::OR32rr: 7082296417Sdim case X86::OR64rr: 7083296417Sdim case X86::XOR8rr: 7084296417Sdim case X86::XOR16rr: 7085296417Sdim case X86::XOR32rr: 7086296417Sdim case X86::XOR64rr: 7087296417Sdim case X86::IMUL16rr: 7088296417Sdim case X86::IMUL32rr: 7089296417Sdim case X86::IMUL64rr: 7090296417Sdim case X86::PANDrr: 7091296417Sdim case X86::PORrr: 7092296417Sdim case X86::PXORrr: 7093309124Sdim case X86::ANDPDrr: 7094309124Sdim case X86::ANDPSrr: 7095309124Sdim case X86::ORPDrr: 7096309124Sdim case X86::ORPSrr: 7097309124Sdim case X86::XORPDrr: 7098309124Sdim case X86::XORPSrr: 7099309124Sdim case X86::PADDBrr: 7100309124Sdim case X86::PADDWrr: 7101309124Sdim case X86::PADDDrr: 7102309124Sdim case X86::PADDQrr: 7103353358Sdim case X86::PMULLWrr: 7104353358Sdim case X86::PMULLDrr: 7105353358Sdim case X86::PMAXSBrr: 7106353358Sdim case X86::PMAXSDrr: 7107353358Sdim case X86::PMAXSWrr: 7108353358Sdim case X86::PMAXUBrr: 7109353358Sdim case X86::PMAXUDrr: 7110353358Sdim case X86::PMAXUWrr: 7111353358Sdim case X86::PMINSBrr: 7112353358Sdim case X86::PMINSDrr: 7113353358Sdim case X86::PMINSWrr: 7114353358Sdim case X86::PMINUBrr: 7115353358Sdim case X86::PMINUDrr: 7116353358Sdim case X86::PMINUWrr: 7117296417Sdim case X86::VPANDrr: 7118296417Sdim case X86::VPANDYrr: 7119309124Sdim case X86::VPANDDZ128rr: 7120309124Sdim case X86::VPANDDZ256rr: 7121309124Sdim case X86::VPANDDZrr: 7122309124Sdim case X86::VPANDQZ128rr: 7123309124Sdim case X86::VPANDQZ256rr: 7124309124Sdim case X86::VPANDQZrr: 7125296417Sdim case X86::VPORrr: 7126296417Sdim case X86::VPORYrr: 7127309124Sdim case X86::VPORDZ128rr: 7128309124Sdim case X86::VPORDZ256rr: 7129309124Sdim case X86::VPORDZrr: 7130309124Sdim case X86::VPORQZ128rr: 7131309124Sdim case X86::VPORQZ256rr: 7132309124Sdim case X86::VPORQZrr: 7133296417Sdim case X86::VPXORrr: 7134296417Sdim case X86::VPXORYrr: 7135309124Sdim case X86::VPXORDZ128rr: 7136309124Sdim case X86::VPXORDZ256rr: 7137309124Sdim case X86::VPXORDZrr: 7138309124Sdim case X86::VPXORQZ128rr: 7139309124Sdim case X86::VPXORQZ256rr: 7140309124Sdim case X86::VPXORQZrr: 7141309124Sdim case X86::VANDPDrr: 7142309124Sdim case X86::VANDPSrr: 7143309124Sdim case X86::VANDPDYrr: 7144309124Sdim case X86::VANDPSYrr: 7145309124Sdim case X86::VANDPDZ128rr: 7146309124Sdim case X86::VANDPSZ128rr: 7147309124Sdim case X86::VANDPDZ256rr: 7148309124Sdim case X86::VANDPSZ256rr: 7149309124Sdim case X86::VANDPDZrr: 7150309124Sdim case X86::VANDPSZrr: 7151309124Sdim case X86::VORPDrr: 7152309124Sdim case X86::VORPSrr: 7153309124Sdim case X86::VORPDYrr: 7154309124Sdim case X86::VORPSYrr: 7155309124Sdim case X86::VORPDZ128rr: 7156309124Sdim case X86::VORPSZ128rr: 7157309124Sdim case X86::VORPDZ256rr: 7158309124Sdim case X86::VORPSZ256rr: 7159309124Sdim case X86::VORPDZrr: 7160309124Sdim case X86::VORPSZrr: 7161309124Sdim case X86::VXORPDrr: 7162309124Sdim case X86::VXORPSrr: 7163309124Sdim case X86::VXORPDYrr: 7164309124Sdim case X86::VXORPSYrr: 7165309124Sdim case X86::VXORPDZ128rr: 7166309124Sdim case X86::VXORPSZ128rr: 7167309124Sdim case X86::VXORPDZ256rr: 7168309124Sdim case X86::VXORPSZ256rr: 7169309124Sdim case X86::VXORPDZrr: 7170309124Sdim case X86::VXORPSZrr: 7171309124Sdim case X86::KADDBrr: 7172309124Sdim case X86::KADDWrr: 7173309124Sdim case X86::KADDDrr: 7174309124Sdim case X86::KADDQrr: 7175309124Sdim case X86::KANDBrr: 7176309124Sdim case X86::KANDWrr: 7177309124Sdim case X86::KANDDrr: 7178309124Sdim case X86::KANDQrr: 7179309124Sdim case X86::KORBrr: 7180309124Sdim case X86::KORWrr: 7181309124Sdim case X86::KORDrr: 7182309124Sdim case X86::KORQrr: 7183309124Sdim case X86::KXORBrr: 7184309124Sdim case X86::KXORWrr: 7185309124Sdim case X86::KXORDrr: 7186309124Sdim case X86::KXORQrr: 7187309124Sdim case X86::VPADDBrr: 7188309124Sdim case X86::VPADDWrr: 7189309124Sdim case X86::VPADDDrr: 7190309124Sdim case X86::VPADDQrr: 7191309124Sdim case X86::VPADDBYrr: 7192309124Sdim case X86::VPADDWYrr: 7193309124Sdim case X86::VPADDDYrr: 7194309124Sdim case X86::VPADDQYrr: 7195309124Sdim case X86::VPADDBZ128rr: 7196309124Sdim case X86::VPADDWZ128rr: 7197309124Sdim case X86::VPADDDZ128rr: 7198309124Sdim case X86::VPADDQZ128rr: 7199309124Sdim case X86::VPADDBZ256rr: 7200309124Sdim case X86::VPADDWZ256rr: 7201309124Sdim case X86::VPADDDZ256rr: 7202309124Sdim case X86::VPADDQZ256rr: 7203309124Sdim case X86::VPADDBZrr: 7204309124Sdim case X86::VPADDWZrr: 7205309124Sdim case X86::VPADDDZrr: 7206309124Sdim case X86::VPADDQZrr: 7207309124Sdim case X86::VPMULLWrr: 7208309124Sdim case X86::VPMULLWYrr: 7209309124Sdim case X86::VPMULLWZ128rr: 7210309124Sdim case X86::VPMULLWZ256rr: 7211309124Sdim case X86::VPMULLWZrr: 7212309124Sdim case X86::VPMULLDrr: 7213309124Sdim case X86::VPMULLDYrr: 7214309124Sdim case X86::VPMULLDZ128rr: 7215309124Sdim case X86::VPMULLDZ256rr: 7216309124Sdim case X86::VPMULLDZrr: 7217309124Sdim case X86::VPMULLQZ128rr: 7218309124Sdim case X86::VPMULLQZ256rr: 7219309124Sdim case X86::VPMULLQZrr: 7220353358Sdim case X86::VPMAXSBrr: 7221353358Sdim case X86::VPMAXSBYrr: 7222353358Sdim case X86::VPMAXSBZ128rr: 7223353358Sdim case X86::VPMAXSBZ256rr: 7224353358Sdim case X86::VPMAXSBZrr: 7225353358Sdim case X86::VPMAXSDrr: 7226353358Sdim case X86::VPMAXSDYrr: 7227353358Sdim case X86::VPMAXSDZ128rr: 7228353358Sdim case X86::VPMAXSDZ256rr: 7229353358Sdim case X86::VPMAXSDZrr: 7230353358Sdim case X86::VPMAXSQZ128rr: 7231353358Sdim case X86::VPMAXSQZ256rr: 7232353358Sdim case X86::VPMAXSQZrr: 7233353358Sdim case X86::VPMAXSWrr: 7234353358Sdim case X86::VPMAXSWYrr: 7235353358Sdim case X86::VPMAXSWZ128rr: 7236353358Sdim case X86::VPMAXSWZ256rr: 7237353358Sdim case X86::VPMAXSWZrr: 7238353358Sdim case X86::VPMAXUBrr: 7239353358Sdim case X86::VPMAXUBYrr: 7240353358Sdim case X86::VPMAXUBZ128rr: 7241353358Sdim case X86::VPMAXUBZ256rr: 7242353358Sdim case X86::VPMAXUBZrr: 7243353358Sdim case X86::VPMAXUDrr: 7244353358Sdim case X86::VPMAXUDYrr: 7245353358Sdim case X86::VPMAXUDZ128rr: 7246353358Sdim case X86::VPMAXUDZ256rr: 7247353358Sdim case X86::VPMAXUDZrr: 7248353358Sdim case X86::VPMAXUQZ128rr: 7249353358Sdim case X86::VPMAXUQZ256rr: 7250353358Sdim case X86::VPMAXUQZrr: 7251353358Sdim case X86::VPMAXUWrr: 7252353358Sdim case X86::VPMAXUWYrr: 7253353358Sdim case X86::VPMAXUWZ128rr: 7254353358Sdim case X86::VPMAXUWZ256rr: 7255353358Sdim case X86::VPMAXUWZrr: 7256353358Sdim case X86::VPMINSBrr: 7257353358Sdim case X86::VPMINSBYrr: 7258353358Sdim case X86::VPMINSBZ128rr: 7259353358Sdim case X86::VPMINSBZ256rr: 7260353358Sdim case X86::VPMINSBZrr: 7261353358Sdim case X86::VPMINSDrr: 7262353358Sdim case X86::VPMINSDYrr: 7263353358Sdim case X86::VPMINSDZ128rr: 7264353358Sdim case X86::VPMINSDZ256rr: 7265353358Sdim case X86::VPMINSDZrr: 7266353358Sdim case X86::VPMINSQZ128rr: 7267353358Sdim case X86::VPMINSQZ256rr: 7268353358Sdim case X86::VPMINSQZrr: 7269353358Sdim case X86::VPMINSWrr: 7270353358Sdim case X86::VPMINSWYrr: 7271353358Sdim case X86::VPMINSWZ128rr: 7272353358Sdim case X86::VPMINSWZ256rr: 7273353358Sdim case X86::VPMINSWZrr: 7274353358Sdim case X86::VPMINUBrr: 7275353358Sdim case X86::VPMINUBYrr: 7276353358Sdim case X86::VPMINUBZ128rr: 7277353358Sdim case X86::VPMINUBZ256rr: 7278353358Sdim case X86::VPMINUBZrr: 7279353358Sdim case X86::VPMINUDrr: 7280353358Sdim case X86::VPMINUDYrr: 7281353358Sdim case X86::VPMINUDZ128rr: 7282353358Sdim case X86::VPMINUDZ256rr: 7283353358Sdim case X86::VPMINUDZrr: 7284353358Sdim case X86::VPMINUQZ128rr: 7285353358Sdim case X86::VPMINUQZ256rr: 7286353358Sdim case X86::VPMINUQZrr: 7287353358Sdim case X86::VPMINUWrr: 7288353358Sdim case X86::VPMINUWYrr: 7289353358Sdim case X86::VPMINUWZ128rr: 7290353358Sdim case X86::VPMINUWZ256rr: 7291353358Sdim case X86::VPMINUWZrr: 7292296417Sdim // Normal min/max instructions are not commutative because of NaN and signed 7293296417Sdim // zero semantics, but these are. Thus, there's no need to check for global 7294296417Sdim // relaxed math; the instructions themselves have the properties we need. 7295296417Sdim case X86::MAXCPDrr: 7296296417Sdim case X86::MAXCPSrr: 7297296417Sdim case X86::MAXCSDrr: 7298296417Sdim case X86::MAXCSSrr: 7299296417Sdim case X86::MINCPDrr: 7300296417Sdim case X86::MINCPSrr: 7301296417Sdim case X86::MINCSDrr: 7302296417Sdim case X86::MINCSSrr: 7303296417Sdim case X86::VMAXCPDrr: 7304296417Sdim case X86::VMAXCPSrr: 7305296417Sdim case X86::VMAXCPDYrr: 7306296417Sdim case X86::VMAXCPSYrr: 7307309124Sdim case X86::VMAXCPDZ128rr: 7308309124Sdim case X86::VMAXCPSZ128rr: 7309309124Sdim case X86::VMAXCPDZ256rr: 7310309124Sdim case X86::VMAXCPSZ256rr: 7311309124Sdim case X86::VMAXCPDZrr: 7312309124Sdim case X86::VMAXCPSZrr: 7313296417Sdim case X86::VMAXCSDrr: 7314296417Sdim case X86::VMAXCSSrr: 7315309124Sdim case X86::VMAXCSDZrr: 7316309124Sdim case X86::VMAXCSSZrr: 7317296417Sdim case X86::VMINCPDrr: 7318296417Sdim case X86::VMINCPSrr: 7319296417Sdim case X86::VMINCPDYrr: 7320296417Sdim case X86::VMINCPSYrr: 7321309124Sdim case X86::VMINCPDZ128rr: 7322309124Sdim case X86::VMINCPSZ128rr: 7323309124Sdim case X86::VMINCPDZ256rr: 7324309124Sdim case X86::VMINCPSZ256rr: 7325309124Sdim case X86::VMINCPDZrr: 7326309124Sdim case X86::VMINCPSZrr: 7327296417Sdim case X86::VMINCSDrr: 7328296417Sdim case X86::VMINCSSrr: 7329309124Sdim case X86::VMINCSDZrr: 7330309124Sdim case X86::VMINCSSZrr: 7331296417Sdim return true; 7332296417Sdim case X86::ADDPDrr: 7333296417Sdim case X86::ADDPSrr: 7334288943Sdim case X86::ADDSDrr: 7335288943Sdim case X86::ADDSSrr: 7336296417Sdim case X86::MULPDrr: 7337296417Sdim case X86::MULPSrr: 7338296417Sdim case X86::MULSDrr: 7339296417Sdim case X86::MULSSrr: 7340296417Sdim case X86::VADDPDrr: 7341296417Sdim case X86::VADDPSrr: 7342296417Sdim case X86::VADDPDYrr: 7343296417Sdim case X86::VADDPSYrr: 7344309124Sdim case X86::VADDPDZ128rr: 7345309124Sdim case X86::VADDPSZ128rr: 7346309124Sdim case X86::VADDPDZ256rr: 7347309124Sdim case X86::VADDPSZ256rr: 7348309124Sdim case X86::VADDPDZrr: 7349309124Sdim case X86::VADDPSZrr: 7350288943Sdim case X86::VADDSDrr: 7351288943Sdim case X86::VADDSSrr: 7352309124Sdim case X86::VADDSDZrr: 7353309124Sdim case X86::VADDSSZrr: 7354296417Sdim case X86::VMULPDrr: 7355296417Sdim case X86::VMULPSrr: 7356296417Sdim case X86::VMULPDYrr: 7357296417Sdim case X86::VMULPSYrr: 7358309124Sdim case X86::VMULPDZ128rr: 7359309124Sdim case X86::VMULPSZ128rr: 7360309124Sdim case X86::VMULPDZ256rr: 7361309124Sdim case X86::VMULPSZ256rr: 7362309124Sdim case X86::VMULPDZrr: 7363309124Sdim case X86::VMULPSZrr: 7364288943Sdim case X86::VMULSDrr: 7365288943Sdim case X86::VMULSSrr: 7366309124Sdim case X86::VMULSDZrr: 7367309124Sdim case X86::VMULSSZrr: 7368296417Sdim return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; 7369288943Sdim default: 7370288943Sdim return false; 7371288943Sdim } 7372288943Sdim} 7373288943Sdim 7374296417Sdim/// This is an architecture-specific helper function of reassociateOps. 7375296417Sdim/// Set special operand attributes for new instructions after reassociation. 7376296417Sdimvoid X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, 7377296417Sdim MachineInstr &OldMI2, 7378296417Sdim MachineInstr &NewMI1, 7379296417Sdim MachineInstr &NewMI2) const { 7380296417Sdim // Integer instructions define an implicit EFLAGS source register operand as 7381296417Sdim // the third source (fourth total) operand. 7382296417Sdim if (OldMI1.getNumOperands() != 4 || OldMI2.getNumOperands() != 4) 7383296417Sdim return; 7384288943Sdim 7385296417Sdim assert(NewMI1.getNumOperands() == 4 && NewMI2.getNumOperands() == 4 && 7386296417Sdim "Unexpected instruction type for reassociation"); 7387288943Sdim 7388296417Sdim MachineOperand &OldOp1 = OldMI1.getOperand(3); 7389296417Sdim MachineOperand &OldOp2 = OldMI2.getOperand(3); 7390296417Sdim MachineOperand &NewOp1 = NewMI1.getOperand(3); 7391296417Sdim MachineOperand &NewOp2 = NewMI2.getOperand(3); 7392288943Sdim 7393296417Sdim assert(OldOp1.isReg() && OldOp1.getReg() == X86::EFLAGS && OldOp1.isDead() && 7394296417Sdim "Must have dead EFLAGS operand in reassociable instruction"); 7395296417Sdim assert(OldOp2.isReg() && OldOp2.getReg() == X86::EFLAGS && OldOp2.isDead() && 7396296417Sdim "Must have dead EFLAGS operand in reassociable instruction"); 7397288943Sdim 7398296417Sdim (void)OldOp1; 7399296417Sdim (void)OldOp2; 7400288943Sdim 7401296417Sdim assert(NewOp1.isReg() && NewOp1.getReg() == X86::EFLAGS && 7402296417Sdim "Unexpected operand in reassociable instruction"); 7403296417Sdim assert(NewOp2.isReg() && NewOp2.getReg() == X86::EFLAGS && 7404296417Sdim "Unexpected operand in reassociable instruction"); 7405288943Sdim 7406296417Sdim // Mark the new EFLAGS operands as dead to be helpful to subsequent iterations 7407296417Sdim // of this pass or other passes. The EFLAGS operands must be dead in these new 7408296417Sdim // instructions because the EFLAGS operands in the original instructions must 7409296417Sdim // be dead in order for reassociation to occur. 7410296417Sdim NewOp1.setIsDead(); 7411296417Sdim NewOp2.setIsDead(); 7412288943Sdim} 7413288943Sdim 7414296417Sdimstd::pair<unsigned, unsigned> 7415296417SdimX86InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 7416296417Sdim return std::make_pair(TF, 0u); 7417288943Sdim} 7418288943Sdim 7419296417SdimArrayRef<std::pair<unsigned, const char *>> 7420296417SdimX86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 7421296417Sdim using namespace X86II; 7422296417Sdim static const std::pair<unsigned, const char *> TargetFlags[] = { 7423296417Sdim {MO_GOT_ABSOLUTE_ADDRESS, "x86-got-absolute-address"}, 7424296417Sdim {MO_PIC_BASE_OFFSET, "x86-pic-base-offset"}, 7425296417Sdim {MO_GOT, "x86-got"}, 7426296417Sdim {MO_GOTOFF, "x86-gotoff"}, 7427296417Sdim {MO_GOTPCREL, "x86-gotpcrel"}, 7428296417Sdim {MO_PLT, "x86-plt"}, 7429296417Sdim {MO_TLSGD, "x86-tlsgd"}, 7430296417Sdim {MO_TLSLD, "x86-tlsld"}, 7431296417Sdim {MO_TLSLDM, "x86-tlsldm"}, 7432296417Sdim {MO_GOTTPOFF, "x86-gottpoff"}, 7433296417Sdim {MO_INDNTPOFF, "x86-indntpoff"}, 7434296417Sdim {MO_TPOFF, "x86-tpoff"}, 7435296417Sdim {MO_DTPOFF, "x86-dtpoff"}, 7436296417Sdim {MO_NTPOFF, "x86-ntpoff"}, 7437296417Sdim {MO_GOTNTPOFF, "x86-gotntpoff"}, 7438296417Sdim {MO_DLLIMPORT, "x86-dllimport"}, 7439296417Sdim {MO_DARWIN_NONLAZY, "x86-darwin-nonlazy"}, 7440296417Sdim {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"}, 7441296417Sdim {MO_TLVP, "x86-tlvp"}, 7442296417Sdim {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"}, 7443344779Sdim {MO_SECREL, "x86-secrel"}, 7444344779Sdim {MO_COFFSTUB, "x86-coffstub"}}; 7445296417Sdim return makeArrayRef(TargetFlags); 7446288943Sdim} 7447288943Sdim 7448210299Sednamespace { 7449288943Sdim /// Create Global Base Reg pass. This initializes the PIC 7450210299Sed /// global base register for x86-32. 7451210299Sed struct CGBR : public MachineFunctionPass { 7452210299Sed static char ID; 7453212904Sdim CGBR() : MachineFunctionPass(ID) {} 7454210299Sed 7455276479Sdim bool runOnMachineFunction(MachineFunction &MF) override { 7456210299Sed const X86TargetMachine *TM = 7457210299Sed static_cast<const X86TargetMachine *>(&MF.getTarget()); 7458288943Sdim const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 7459210299Sed 7460341825Sdim // Don't do anything in the 64-bit small and kernel code models. They use 7461341825Sdim // RIP-relative addressing for everything. 7462341825Sdim if (STI.is64Bit() && (TM->getCodeModel() == CodeModel::Small || 7463341825Sdim TM->getCodeModel() == CodeModel::Kernel)) 7464276479Sdim return false; 7465210299Sed 7466210299Sed // Only emit a global base reg in PIC mode. 7467309124Sdim if (!TM->isPositionIndependent()) 7468210299Sed return false; 7469210299Sed 7470218893Sdim X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 7471218893Sdim unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 7472218893Sdim 7473218893Sdim // If we didn't need a GlobalBaseReg, don't insert code. 7474218893Sdim if (GlobalBaseReg == 0) 7475218893Sdim return false; 7476218893Sdim 7477210299Sed // Insert the set of GlobalBaseReg into the first MBB of the function 7478210299Sed MachineBasicBlock &FirstMBB = MF.front(); 7479210299Sed MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 7480210299Sed DebugLoc DL = FirstMBB.findDebugLoc(MBBI); 7481210299Sed MachineRegisterInfo &RegInfo = MF.getRegInfo(); 7482288943Sdim const X86InstrInfo *TII = STI.getInstrInfo(); 7483210299Sed 7484210299Sed unsigned PC; 7485288943Sdim if (STI.isPICStyleGOT()) 7486239462Sdim PC = RegInfo.createVirtualRegister(&X86::GR32RegClass); 7487210299Sed else 7488218893Sdim PC = GlobalBaseReg; 7489218893Sdim 7490341825Sdim if (STI.is64Bit()) { 7491341825Sdim if (TM->getCodeModel() == CodeModel::Medium) { 7492341825Sdim // In the medium code model, use a RIP-relative LEA to materialize the 7493341825Sdim // GOT. 7494341825Sdim BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PC) 7495341825Sdim .addReg(X86::RIP) 7496341825Sdim .addImm(0) 7497341825Sdim .addReg(0) 7498341825Sdim .addExternalSymbol("_GLOBAL_OFFSET_TABLE_") 7499341825Sdim .addReg(0); 7500341825Sdim } else if (TM->getCodeModel() == CodeModel::Large) { 7501344779Sdim // In the large code model, we are aiming for this code, though the 7502344779Sdim // register allocation may vary: 7503344779Sdim // leaq .LN$pb(%rip), %rax 7504344779Sdim // movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx 7505344779Sdim // addq %rcx, %rax 7506344779Sdim // RAX now holds address of _GLOBAL_OFFSET_TABLE_. 7507344779Sdim unsigned PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass); 7508344779Sdim unsigned GOTReg = 7509344779Sdim RegInfo.createVirtualRegister(&X86::GR64RegClass); 7510344779Sdim BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg) 7511344779Sdim .addReg(X86::RIP) 7512344779Sdim .addImm(0) 7513344779Sdim .addReg(0) 7514344779Sdim .addSym(MF.getPICBaseSymbol()) 7515344779Sdim .addReg(0); 7516344779Sdim std::prev(MBBI)->setPreInstrSymbol(MF, MF.getPICBaseSymbol()); 7517344779Sdim BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOV64ri), GOTReg) 7518344779Sdim .addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 7519344779Sdim X86II::MO_PIC_BASE_OFFSET); 7520344779Sdim BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD64rr), PC) 7521344779Sdim .addReg(PBReg, RegState::Kill) 7522344779Sdim .addReg(GOTReg, RegState::Kill); 7523341825Sdim } else { 7524341825Sdim llvm_unreachable("unexpected code model"); 7525341825Sdim } 7526341825Sdim } else { 7527341825Sdim // Operand of MovePCtoStack is completely ignored by asm printer. It's 7528341825Sdim // only used in JIT code emission as displacement to pc. 7529341825Sdim BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); 7530218893Sdim 7531341825Sdim // If we're using vanilla 'GOT' PIC style, we should use relative 7532341825Sdim // addressing not to pc, but to _GLOBAL_OFFSET_TABLE_ external. 7533341825Sdim if (STI.isPICStyleGOT()) { 7534341825Sdim // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], 7535341825Sdim // %some_register 7536341825Sdim BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) 7537341825Sdim .addReg(PC) 7538341825Sdim .addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 7539341825Sdim X86II::MO_GOT_ABSOLUTE_ADDRESS); 7540341825Sdim } 7541210299Sed } 7542210299Sed 7543210299Sed return true; 7544210299Sed } 7545210299Sed 7546314564Sdim StringRef getPassName() const override { 7547210299Sed return "X86 PIC Global Base Reg Initialization"; 7548210299Sed } 7549210299Sed 7550276479Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 7551210299Sed AU.setPreservesCFG(); 7552210299Sed MachineFunctionPass::getAnalysisUsage(AU); 7553210299Sed } 7554210299Sed }; 7555210299Sed} 7556210299Sed 7557210299Sedchar CGBR::ID = 0; 7558210299SedFunctionPass* 7559276479Sdimllvm::createX86GlobalBaseRegPass() { return new CGBR(); } 7560239462Sdim 7561239462Sdimnamespace { 7562239462Sdim struct LDTLSCleanup : public MachineFunctionPass { 7563239462Sdim static char ID; 7564239462Sdim LDTLSCleanup() : MachineFunctionPass(ID) {} 7565239462Sdim 7566276479Sdim bool runOnMachineFunction(MachineFunction &MF) override { 7567327952Sdim if (skipFunction(MF.getFunction())) 7568309124Sdim return false; 7569309124Sdim 7570309124Sdim X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); 7571239462Sdim if (MFI->getNumLocalDynamicTLSAccesses() < 2) { 7572239462Sdim // No point folding accesses if there isn't at least two. 7573239462Sdim return false; 7574239462Sdim } 7575239462Sdim 7576239462Sdim MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); 7577239462Sdim return VisitNode(DT->getRootNode(), 0); 7578239462Sdim } 7579239462Sdim 7580239462Sdim // Visit the dominator subtree rooted at Node in pre-order. 7581239462Sdim // If TLSBaseAddrReg is non-null, then use that to replace any 7582239462Sdim // TLS_base_addr instructions. Otherwise, create the register 7583239462Sdim // when the first such instruction is seen, and then use it 7584239462Sdim // as we encounter more instructions. 7585239462Sdim bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { 7586239462Sdim MachineBasicBlock *BB = Node->getBlock(); 7587239462Sdim bool Changed = false; 7588239462Sdim 7589239462Sdim // Traverse the current block. 7590239462Sdim for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; 7591239462Sdim ++I) { 7592239462Sdim switch (I->getOpcode()) { 7593239462Sdim case X86::TLS_base_addr32: 7594239462Sdim case X86::TLS_base_addr64: 7595239462Sdim if (TLSBaseAddrReg) 7596309124Sdim I = ReplaceTLSBaseAddrCall(*I, TLSBaseAddrReg); 7597239462Sdim else 7598309124Sdim I = SetRegister(*I, &TLSBaseAddrReg); 7599239462Sdim Changed = true; 7600239462Sdim break; 7601239462Sdim default: 7602239462Sdim break; 7603239462Sdim } 7604239462Sdim } 7605239462Sdim 7606239462Sdim // Visit the children of this block in the dominator tree. 7607239462Sdim for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); 7608239462Sdim I != E; ++I) { 7609239462Sdim Changed |= VisitNode(*I, TLSBaseAddrReg); 7610239462Sdim } 7611239462Sdim 7612239462Sdim return Changed; 7613239462Sdim } 7614239462Sdim 7615239462Sdim // Replace the TLS_base_addr instruction I with a copy from 7616239462Sdim // TLSBaseAddrReg, returning the new instruction. 7617309124Sdim MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr &I, 7618239462Sdim unsigned TLSBaseAddrReg) { 7619309124Sdim MachineFunction *MF = I.getParent()->getParent(); 7620288943Sdim const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>(); 7621288943Sdim const bool is64Bit = STI.is64Bit(); 7622288943Sdim const X86InstrInfo *TII = STI.getInstrInfo(); 7623239462Sdim 7624239462Sdim // Insert a Copy from TLSBaseAddrReg to RAX/EAX. 7625309124Sdim MachineInstr *Copy = 7626309124Sdim BuildMI(*I.getParent(), I, I.getDebugLoc(), 7627309124Sdim TII->get(TargetOpcode::COPY), is64Bit ? X86::RAX : X86::EAX) 7628309124Sdim .addReg(TLSBaseAddrReg); 7629239462Sdim 7630239462Sdim // Erase the TLS_base_addr instruction. 7631309124Sdim I.eraseFromParent(); 7632239462Sdim 7633239462Sdim return Copy; 7634239462Sdim } 7635239462Sdim 7636321369Sdim // Create a virtual register in *TLSBaseAddrReg, and populate it by 7637239462Sdim // inserting a copy instruction after I. Returns the new instruction. 7638309124Sdim MachineInstr *SetRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) { 7639309124Sdim MachineFunction *MF = I.getParent()->getParent(); 7640288943Sdim const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>(); 7641288943Sdim const bool is64Bit = STI.is64Bit(); 7642288943Sdim const X86InstrInfo *TII = STI.getInstrInfo(); 7643239462Sdim 7644239462Sdim // Create a virtual register for the TLS base address. 7645239462Sdim MachineRegisterInfo &RegInfo = MF->getRegInfo(); 7646239462Sdim *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit 7647239462Sdim ? &X86::GR64RegClass 7648239462Sdim : &X86::GR32RegClass); 7649239462Sdim 7650239462Sdim // Insert a copy from RAX/EAX to TLSBaseAddrReg. 7651309124Sdim MachineInstr *Next = I.getNextNode(); 7652309124Sdim MachineInstr *Copy = 7653309124Sdim BuildMI(*I.getParent(), Next, I.getDebugLoc(), 7654309124Sdim TII->get(TargetOpcode::COPY), *TLSBaseAddrReg) 7655309124Sdim .addReg(is64Bit ? X86::RAX : X86::EAX); 7656239462Sdim 7657239462Sdim return Copy; 7658239462Sdim } 7659239462Sdim 7660314564Sdim StringRef getPassName() const override { 7661239462Sdim return "Local Dynamic TLS Access Clean-up"; 7662239462Sdim } 7663239462Sdim 7664276479Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 7665239462Sdim AU.setPreservesCFG(); 7666239462Sdim AU.addRequired<MachineDominatorTree>(); 7667239462Sdim MachineFunctionPass::getAnalysisUsage(AU); 7668239462Sdim } 7669239462Sdim }; 7670239462Sdim} 7671239462Sdim 7672239462Sdimchar LDTLSCleanup::ID = 0; 7673239462SdimFunctionPass* 7674239462Sdimllvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } 7675321369Sdim 7676327952Sdim/// Constants defining how certain sequences should be outlined. 7677327952Sdim/// 7678327952Sdim/// \p MachineOutlinerDefault implies that the function is called with a call 7679327952Sdim/// instruction, and a return must be emitted for the outlined function frame. 7680327952Sdim/// 7681327952Sdim/// That is, 7682327952Sdim/// 7683327952Sdim/// I1 OUTLINED_FUNCTION: 7684327952Sdim/// I2 --> call OUTLINED_FUNCTION I1 7685327952Sdim/// I3 I2 7686327952Sdim/// I3 7687327952Sdim/// ret 7688327952Sdim/// 7689327952Sdim/// * Call construction overhead: 1 (call instruction) 7690327952Sdim/// * Frame construction overhead: 1 (return instruction) 7691327952Sdim/// 7692327952Sdim/// \p MachineOutlinerTailCall implies that the function is being tail called. 7693327952Sdim/// A jump is emitted instead of a call, and the return is already present in 7694327952Sdim/// the outlined sequence. That is, 7695327952Sdim/// 7696327952Sdim/// I1 OUTLINED_FUNCTION: 7697327952Sdim/// I2 --> jmp OUTLINED_FUNCTION I1 7698327952Sdim/// ret I2 7699327952Sdim/// ret 7700327952Sdim/// 7701327952Sdim/// * Call construction overhead: 1 (jump instruction) 7702327952Sdim/// * Frame construction overhead: 0 (don't need to return) 7703327952Sdim/// 7704327952Sdimenum MachineOutlinerClass { 7705327952Sdim MachineOutlinerDefault, 7706327952Sdim MachineOutlinerTailCall 7707327952Sdim}; 7708321369Sdim 7709341825Sdimoutliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo( 7710341825Sdim std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 7711341825Sdim unsigned SequenceSize = 7712341825Sdim std::accumulate(RepeatedSequenceLocs[0].front(), 7713341825Sdim std::next(RepeatedSequenceLocs[0].back()), 0, 7714341825Sdim [](unsigned Sum, const MachineInstr &MI) { 7715341825Sdim // FIXME: x86 doesn't implement getInstSizeInBytes, so 7716341825Sdim // we can't tell the cost. Just assume each instruction 7717341825Sdim // is one byte. 7718341825Sdim if (MI.isDebugInstr() || MI.isKill()) 7719341825Sdim return Sum; 7720341825Sdim return Sum + 1; 7721341825Sdim }); 7722321369Sdim 7723341825Sdim // FIXME: Use real size in bytes for call and ret instructions. 7724341825Sdim if (RepeatedSequenceLocs[0].back()->isTerminator()) { 7725341825Sdim for (outliner::Candidate &C : RepeatedSequenceLocs) 7726341825Sdim C.setCallInfo(MachineOutlinerTailCall, 1); 7727327952Sdim 7728341825Sdim return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 7729341825Sdim 0, // Number of bytes to emit frame. 7730341825Sdim MachineOutlinerTailCall // Type of frame. 7731341825Sdim ); 7732341825Sdim } 7733341825Sdim 7734341825Sdim for (outliner::Candidate &C : RepeatedSequenceLocs) 7735341825Sdim C.setCallInfo(MachineOutlinerDefault, 1); 7736341825Sdim 7737341825Sdim return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 1, 7738341825Sdim MachineOutlinerDefault); 7739321369Sdim} 7740321369Sdim 7741327952Sdimbool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF, 7742327952Sdim bool OutlineFromLinkOnceODRs) const { 7743327952Sdim const Function &F = MF.getFunction(); 7744327952Sdim 7745327952Sdim // Does the function use a red zone? If it does, then we can't risk messing 7746327952Sdim // with the stack. 7747353358Sdim if (Subtarget.getFrameLowering()->has128ByteRedZone(MF)) { 7748341825Sdim // It could have a red zone. If it does, then we don't want to touch it. 7749341825Sdim const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 7750341825Sdim if (!X86FI || X86FI->getUsesRedZone()) 7751327952Sdim return false; 7752341825Sdim } 7753327952Sdim 7754327952Sdim // If we *don't* want to outline from things that could potentially be deduped 7755327952Sdim // then return false. 7756327952Sdim if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 7757327952Sdim return false; 7758327952Sdim 7759327952Sdim // This function is viable for outlining, so return true. 7760327952Sdim return true; 7761321369Sdim} 7762321369Sdim 7763341825Sdimoutliner::InstrType 7764341825SdimX86InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const { 7765341825Sdim MachineInstr &MI = *MIT; 7766321369Sdim // Don't allow debug values to impact outlining type. 7767341825Sdim if (MI.isDebugInstr() || MI.isIndirectDebugValue()) 7768341825Sdim return outliner::InstrType::Invisible; 7769321369Sdim 7770341825Sdim // At this point, KILL instructions don't really tell us much so we can go 7771341825Sdim // ahead and skip over them. 7772341825Sdim if (MI.isKill()) 7773341825Sdim return outliner::InstrType::Invisible; 7774341825Sdim 7775321369Sdim // Is this a tail call? If yes, we can outline as a tail call. 7776321369Sdim if (isTailCall(MI)) 7777341825Sdim return outliner::InstrType::Legal; 7778321369Sdim 7779321369Sdim // Is this the terminator of a basic block? 7780321369Sdim if (MI.isTerminator() || MI.isReturn()) { 7781321369Sdim 7782321369Sdim // Does its parent have any successors in its MachineFunction? 7783321369Sdim if (MI.getParent()->succ_empty()) 7784341825Sdim return outliner::InstrType::Legal; 7785321369Sdim 7786321369Sdim // It does, so we can't tail call it. 7787341825Sdim return outliner::InstrType::Illegal; 7788321369Sdim } 7789321369Sdim 7790321369Sdim // Don't outline anything that modifies or reads from the stack pointer. 7791321369Sdim // 7792321369Sdim // FIXME: There are instructions which are being manually built without 7793321369Sdim // explicit uses/defs so we also have to check the MCInstrDesc. We should be 7794321369Sdim // able to remove the extra checks once those are fixed up. For example, 7795327952Sdim // sometimes we might get something like %rax = POP64r 1. This won't be 7796321369Sdim // caught by modifiesRegister or readsRegister even though the instruction 7797321369Sdim // really ought to be formed so that modifiesRegister/readsRegister would 7798321369Sdim // catch it. 7799321369Sdim if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) || 7800321369Sdim MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) || 7801321369Sdim MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) 7802341825Sdim return outliner::InstrType::Illegal; 7803321369Sdim 7804321369Sdim // Outlined calls change the instruction pointer, so don't read from it. 7805321369Sdim if (MI.readsRegister(X86::RIP, &RI) || 7806321369Sdim MI.getDesc().hasImplicitUseOfPhysReg(X86::RIP) || 7807321369Sdim MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP)) 7808341825Sdim return outliner::InstrType::Illegal; 7809321369Sdim 7810321369Sdim // Positions can't safely be outlined. 7811321369Sdim if (MI.isPosition()) 7812341825Sdim return outliner::InstrType::Illegal; 7813321369Sdim 7814321369Sdim // Make sure none of the operands of this instruction do anything tricky. 7815321369Sdim for (const MachineOperand &MOP : MI.operands()) 7816321369Sdim if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || 7817321369Sdim MOP.isTargetIndex()) 7818341825Sdim return outliner::InstrType::Illegal; 7819321369Sdim 7820341825Sdim return outliner::InstrType::Legal; 7821321369Sdim} 7822321369Sdim 7823341825Sdimvoid X86InstrInfo::buildOutlinedFrame(MachineBasicBlock &MBB, 7824321369Sdim MachineFunction &MF, 7825341825Sdim const outliner::OutlinedFunction &OF) 7826327952Sdim const { 7827321369Sdim // If we're a tail call, we already have a return, so don't do anything. 7828341825Sdim if (OF.FrameConstructionID == MachineOutlinerTailCall) 7829321369Sdim return; 7830321369Sdim 7831321369Sdim // We're a normal call, so our sequence doesn't have a return instruction. 7832321369Sdim // Add it in. 7833321369Sdim MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RETQ)); 7834321369Sdim MBB.insert(MBB.end(), retq); 7835321369Sdim} 7836321369Sdim 7837321369SdimMachineBasicBlock::iterator 7838321369SdimX86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB, 7839321369Sdim MachineBasicBlock::iterator &It, 7840321369Sdim MachineFunction &MF, 7841341825Sdim const outliner::Candidate &C) const { 7842321369Sdim // Is it a tail call? 7843341825Sdim if (C.CallConstructionID == MachineOutlinerTailCall) { 7844321369Sdim // Yes, just insert a JMP. 7845321369Sdim It = MBB.insert(It, 7846341825Sdim BuildMI(MF, DebugLoc(), get(X86::TAILJMPd64)) 7847321369Sdim .addGlobalAddress(M.getNamedValue(MF.getName()))); 7848321369Sdim } else { 7849321369Sdim // No, insert a call. 7850321369Sdim It = MBB.insert(It, 7851321369Sdim BuildMI(MF, DebugLoc(), get(X86::CALL64pcrel32)) 7852321369Sdim .addGlobalAddress(M.getNamedValue(MF.getName()))); 7853321369Sdim } 7854321369Sdim 7855321369Sdim return It; 7856321369Sdim} 7857344779Sdim 7858344779Sdim#define GET_INSTRINFO_HELPERS 7859344779Sdim#include "X86GenInstrInfo.inc" 7860