1234353Sdim//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===// 2193323Sed// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6193323Sed// 7193323Sed//===----------------------------------------------------------------------===// 8193323Sed// 9193323Sed// This file contains the X86 implementation of the TargetInstrInfo class. 10193323Sed// 11193323Sed//===----------------------------------------------------------------------===// 12193323Sed 13193323Sed#include "X86InstrInfo.h" 14193323Sed#include "X86.h" 15193323Sed#include "X86InstrBuilder.h" 16341825Sdim#include "X86InstrFoldTables.h" 17193323Sed#include "X86MachineFunctionInfo.h" 18193323Sed#include "X86Subtarget.h" 19193323Sed#include "X86TargetMachine.h" 20193323Sed#include "llvm/ADT/STLExtras.h" 21341825Sdim#include "llvm/ADT/Sequence.h" 22309124Sdim#include "llvm/CodeGen/LivePhysRegs.h" 23249423Sdim#include "llvm/CodeGen/LiveVariables.h" 24193323Sed#include "llvm/CodeGen/MachineConstantPool.h" 25239462Sdim#include "llvm/CodeGen/MachineDominators.h" 26193323Sed#include "llvm/CodeGen/MachineFrameInfo.h" 27193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 28309124Sdim#include "llvm/CodeGen/MachineModuleInfo.h" 29193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 30261991Sdim#include "llvm/CodeGen/StackMaps.h" 31249423Sdim#include "llvm/IR/DerivedTypes.h" 32280031Sdim#include "llvm/IR/Function.h" 33360784Sdim#include "llvm/IR/DebugInfoMetadata.h" 34234353Sdim#include "llvm/MC/MCAsmInfo.h" 35276479Sdim#include "llvm/MC/MCExpr.h" 36207618Srdivacky#include "llvm/MC/MCInst.h" 37193323Sed#include "llvm/Support/CommandLine.h" 38202375Srdivacky#include "llvm/Support/Debug.h" 39198090Srdivacky#include "llvm/Support/ErrorHandling.h" 40198090Srdivacky#include "llvm/Support/raw_ostream.h" 41193323Sed#include "llvm/Target/TargetOptions.h" 42199481Srdivacky 43276479Sdimusing namespace llvm; 44276479Sdim 45276479Sdim#define DEBUG_TYPE "x86-instr-info" 46276479Sdim 47261991Sdim#define GET_INSTRINFO_CTOR_DTOR 48224145Sdim#include "X86GenInstrInfo.inc" 49224145Sdim 50198090Srdivackystatic cl::opt<bool> 51327952Sdim NoFusing("disable-spill-fusing", 52327952Sdim cl::desc("Disable fusing of spill code into instructions"), 53327952Sdim cl::Hidden); 54198090Srdivackystatic cl::opt<bool> 55198090SrdivackyPrintFailedFusing("print-failed-fuse-candidates", 56198090Srdivacky cl::desc("Print instructions that the allocator wants to" 57198090Srdivacky " fuse, but the X86 backend currently can't"), 58198090Srdivacky cl::Hidden); 59198090Srdivackystatic cl::opt<bool> 60198090SrdivackyReMatPICStubLoad("remat-pic-stub-load", 61198090Srdivacky cl::desc("Re-materialize load from stub in PIC mode"), 62198090Srdivacky cl::init(false), cl::Hidden); 63309124Sdimstatic cl::opt<unsigned> 64309124SdimPartialRegUpdateClearance("partial-reg-update-clearance", 65309124Sdim cl::desc("Clearance between two register writes " 66309124Sdim "for inserting XOR to avoid partial " 67309124Sdim "register update"), 68309124Sdim cl::init(64), cl::Hidden); 69309124Sdimstatic cl::opt<unsigned> 70309124SdimUndefRegClearance("undef-reg-clearance", 71309124Sdim cl::desc("How many idle instructions we would like before " 72309124Sdim "certain undef register reads"), 73314564Sdim cl::init(128), cl::Hidden); 74193323Sed 75226633Sdim 76261991Sdim// Pin the vtable to this file. 77261991Sdimvoid X86InstrInfo::anchor() {} 78261991Sdim 79276479SdimX86InstrInfo::X86InstrInfo(X86Subtarget &STI) 80296417Sdim : X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 81296417Sdim : X86::ADJCALLSTACKDOWN32), 82296417Sdim (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 83296417Sdim : X86::ADJCALLSTACKUP32), 84309124Sdim X86::CATCHRET, 85309124Sdim (STI.is64Bit() ? X86::RETQ : X86::RETL)), 86288943Sdim Subtarget(STI), RI(STI.getTargetTriple()) { 87226633Sdim} 88218893Sdim 89202375Srdivackybool 90202375SrdivackyX86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 91202375Srdivacky unsigned &SrcReg, unsigned &DstReg, 92202375Srdivacky unsigned &SubIdx) const { 93202375Srdivacky switch (MI.getOpcode()) { 94202375Srdivacky default: break; 95202375Srdivacky case X86::MOVSX16rr8: 96202375Srdivacky case X86::MOVZX16rr8: 97202375Srdivacky case X86::MOVSX32rr8: 98202375Srdivacky case X86::MOVZX32rr8: 99202375Srdivacky case X86::MOVSX64rr8: 100276479Sdim if (!Subtarget.is64Bit()) 101202375Srdivacky // It's not always legal to reference the low 8-bit of the larger 102202375Srdivacky // register in 32-bit mode. 103202375Srdivacky return false; 104321369Sdim LLVM_FALLTHROUGH; 105202375Srdivacky case X86::MOVSX32rr16: 106202375Srdivacky case X86::MOVZX32rr16: 107202375Srdivacky case X86::MOVSX64rr16: 108261991Sdim case X86::MOVSX64rr32: { 109202375Srdivacky if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) 110202375Srdivacky // Be conservative. 111202375Srdivacky return false; 112202375Srdivacky SrcReg = MI.getOperand(1).getReg(); 113202375Srdivacky DstReg = MI.getOperand(0).getReg(); 114202375Srdivacky switch (MI.getOpcode()) { 115243830Sdim default: llvm_unreachable("Unreachable!"); 116202375Srdivacky case X86::MOVSX16rr8: 117202375Srdivacky case X86::MOVZX16rr8: 118202375Srdivacky case X86::MOVSX32rr8: 119202375Srdivacky case X86::MOVZX32rr8: 120202375Srdivacky case X86::MOVSX64rr8: 121208599Srdivacky SubIdx = X86::sub_8bit; 122202375Srdivacky break; 123202375Srdivacky case X86::MOVSX32rr16: 124202375Srdivacky case X86::MOVZX32rr16: 125202375Srdivacky case X86::MOVSX64rr16: 126208599Srdivacky SubIdx = X86::sub_16bit; 127202375Srdivacky break; 128202375Srdivacky case X86::MOVSX64rr32: 129208599Srdivacky SubIdx = X86::sub_32bit; 130202375Srdivacky break; 131202375Srdivacky } 132202375Srdivacky return true; 133202375Srdivacky } 134202375Srdivacky } 135202375Srdivacky return false; 136202375Srdivacky} 137202375Srdivacky 138309124Sdimint X86InstrInfo::getSPAdjust(const MachineInstr &MI) const { 139309124Sdim const MachineFunction *MF = MI.getParent()->getParent(); 140280031Sdim const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); 141280031Sdim 142321369Sdim if (isFrameInstr(MI)) { 143280031Sdim unsigned StackAlign = TFI->getStackAlignment(); 144321369Sdim int SPAdj = alignTo(getFrameSize(MI), StackAlign); 145321369Sdim SPAdj -= getFrameAdjustment(MI); 146321369Sdim if (!isFrameSetup(MI)) 147321369Sdim SPAdj = -SPAdj; 148321369Sdim return SPAdj; 149280031Sdim } 150288943Sdim 151288943Sdim // To know whether a call adjusts the stack, we need information 152280031Sdim // that is bound to the following ADJCALLSTACKUP pseudo. 153280031Sdim // Look for the next ADJCALLSTACKUP that follows the call. 154309124Sdim if (MI.isCall()) { 155309124Sdim const MachineBasicBlock *MBB = MI.getParent(); 156280031Sdim auto I = ++MachineBasicBlock::const_iterator(MI); 157280031Sdim for (auto E = MBB->end(); I != E; ++I) { 158280031Sdim if (I->getOpcode() == getCallFrameDestroyOpcode() || 159280031Sdim I->isCall()) 160280031Sdim break; 161280031Sdim } 162280031Sdim 163280031Sdim // If we could not find a frame destroy opcode, then it has already 164280031Sdim // been simplified, so we don't care. 165280031Sdim if (I->getOpcode() != getCallFrameDestroyOpcode()) 166280031Sdim return 0; 167280031Sdim 168280031Sdim return -(I->getOperand(1).getImm()); 169280031Sdim } 170280031Sdim 171280031Sdim // Currently handle only PUSHes we can reasonably expect to see 172280031Sdim // in call sequences 173309124Sdim switch (MI.getOpcode()) { 174288943Sdim default: 175280031Sdim return 0; 176280031Sdim case X86::PUSH32i8: 177280031Sdim case X86::PUSH32r: 178280031Sdim case X86::PUSH32rmm: 179280031Sdim case X86::PUSH32rmr: 180280031Sdim case X86::PUSHi32: 181280031Sdim return 4; 182309124Sdim case X86::PUSH64i8: 183309124Sdim case X86::PUSH64r: 184309124Sdim case X86::PUSH64rmm: 185309124Sdim case X86::PUSH64rmr: 186309124Sdim case X86::PUSH64i32: 187309124Sdim return 8; 188280031Sdim } 189280031Sdim} 190280031Sdim 191288943Sdim/// Return true and the FrameIndex if the specified 192199481Srdivacky/// operand and follow operands form a reference to the stack frame. 193309124Sdimbool X86InstrInfo::isFrameOperand(const MachineInstr &MI, unsigned int Op, 194199481Srdivacky int &FrameIndex) const { 195309124Sdim if (MI.getOperand(Op + X86::AddrBaseReg).isFI() && 196309124Sdim MI.getOperand(Op + X86::AddrScaleAmt).isImm() && 197309124Sdim MI.getOperand(Op + X86::AddrIndexReg).isReg() && 198309124Sdim MI.getOperand(Op + X86::AddrDisp).isImm() && 199309124Sdim MI.getOperand(Op + X86::AddrScaleAmt).getImm() == 1 && 200309124Sdim MI.getOperand(Op + X86::AddrIndexReg).getReg() == 0 && 201309124Sdim MI.getOperand(Op + X86::AddrDisp).getImm() == 0) { 202309124Sdim FrameIndex = MI.getOperand(Op + X86::AddrBaseReg).getIndex(); 203199481Srdivacky return true; 204199481Srdivacky } 205199481Srdivacky return false; 206199481Srdivacky} 207199481Srdivacky 208341825Sdimstatic bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) { 209199481Srdivacky switch (Opcode) { 210234353Sdim default: 211234353Sdim return false; 212193323Sed case X86::MOV8rm: 213341825Sdim case X86::KMOVBkm: 214341825Sdim MemBytes = 1; 215341825Sdim return true; 216193323Sed case X86::MOV16rm: 217341825Sdim case X86::KMOVWkm: 218341825Sdim MemBytes = 2; 219341825Sdim return true; 220193323Sed case X86::MOV32rm: 221341825Sdim case X86::MOVSSrm: 222353358Sdim case X86::MOVSSrm_alt: 223353358Sdim case X86::VMOVSSrm: 224353358Sdim case X86::VMOVSSrm_alt: 225341825Sdim case X86::VMOVSSZrm: 226353358Sdim case X86::VMOVSSZrm_alt: 227341825Sdim case X86::KMOVDkm: 228341825Sdim MemBytes = 4; 229341825Sdim return true; 230193323Sed case X86::MOV64rm: 231193323Sed case X86::LD_Fp64m: 232193323Sed case X86::MOVSDrm: 233353358Sdim case X86::MOVSDrm_alt: 234341825Sdim case X86::VMOVSDrm: 235353358Sdim case X86::VMOVSDrm_alt: 236341825Sdim case X86::VMOVSDZrm: 237353358Sdim case X86::VMOVSDZrm_alt: 238341825Sdim case X86::MMX_MOVD64rm: 239341825Sdim case X86::MMX_MOVQ64rm: 240341825Sdim case X86::KMOVQkm: 241341825Sdim MemBytes = 8; 242341825Sdim return true; 243193323Sed case X86::MOVAPSrm: 244309124Sdim case X86::MOVUPSrm: 245193323Sed case X86::MOVAPDrm: 246309124Sdim case X86::MOVUPDrm: 247193323Sed case X86::MOVDQArm: 248309124Sdim case X86::MOVDQUrm: 249226633Sdim case X86::VMOVAPSrm: 250309124Sdim case X86::VMOVUPSrm: 251226633Sdim case X86::VMOVAPDrm: 252309124Sdim case X86::VMOVUPDrm: 253226633Sdim case X86::VMOVDQArm: 254309124Sdim case X86::VMOVDQUrm: 255341825Sdim case X86::VMOVAPSZ128rm: 256341825Sdim case X86::VMOVUPSZ128rm: 257341825Sdim case X86::VMOVAPSZ128rm_NOVLX: 258341825Sdim case X86::VMOVUPSZ128rm_NOVLX: 259341825Sdim case X86::VMOVAPDZ128rm: 260341825Sdim case X86::VMOVUPDZ128rm: 261341825Sdim case X86::VMOVDQU8Z128rm: 262341825Sdim case X86::VMOVDQU16Z128rm: 263341825Sdim case X86::VMOVDQA32Z128rm: 264341825Sdim case X86::VMOVDQU32Z128rm: 265341825Sdim case X86::VMOVDQA64Z128rm: 266341825Sdim case X86::VMOVDQU64Z128rm: 267341825Sdim MemBytes = 16; 268341825Sdim return true; 269341825Sdim case X86::VMOVAPSYrm: 270280031Sdim case X86::VMOVUPSYrm: 271341825Sdim case X86::VMOVAPDYrm: 272280031Sdim case X86::VMOVUPDYrm: 273341825Sdim case X86::VMOVDQAYrm: 274280031Sdim case X86::VMOVDQUYrm: 275309124Sdim case X86::VMOVAPSZ256rm: 276341825Sdim case X86::VMOVUPSZ256rm: 277314564Sdim case X86::VMOVAPSZ256rm_NOVLX: 278314564Sdim case X86::VMOVUPSZ256rm_NOVLX: 279309124Sdim case X86::VMOVAPDZ256rm: 280309124Sdim case X86::VMOVUPDZ256rm: 281341825Sdim case X86::VMOVDQU8Z256rm: 282341825Sdim case X86::VMOVDQU16Z256rm: 283309124Sdim case X86::VMOVDQA32Z256rm: 284309124Sdim case X86::VMOVDQU32Z256rm: 285309124Sdim case X86::VMOVDQA64Z256rm: 286309124Sdim case X86::VMOVDQU64Z256rm: 287341825Sdim MemBytes = 32; 288341825Sdim return true; 289341825Sdim case X86::VMOVAPSZrm: 290341825Sdim case X86::VMOVUPSZrm: 291341825Sdim case X86::VMOVAPDZrm: 292341825Sdim case X86::VMOVUPDZrm: 293309124Sdim case X86::VMOVDQU8Zrm: 294309124Sdim case X86::VMOVDQU16Zrm: 295341825Sdim case X86::VMOVDQA32Zrm: 296341825Sdim case X86::VMOVDQU32Zrm: 297341825Sdim case X86::VMOVDQA64Zrm: 298341825Sdim case X86::VMOVDQU64Zrm: 299341825Sdim MemBytes = 64; 300199481Srdivacky return true; 301193323Sed } 302193323Sed} 303193323Sed 304341825Sdimstatic bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) { 305199481Srdivacky switch (Opcode) { 306341825Sdim default: 307341825Sdim return false; 308193323Sed case X86::MOV8mr: 309341825Sdim case X86::KMOVBmk: 310341825Sdim MemBytes = 1; 311341825Sdim return true; 312193323Sed case X86::MOV16mr: 313341825Sdim case X86::KMOVWmk: 314341825Sdim MemBytes = 2; 315341825Sdim return true; 316193323Sed case X86::MOV32mr: 317341825Sdim case X86::MOVSSmr: 318341825Sdim case X86::VMOVSSmr: 319341825Sdim case X86::VMOVSSZmr: 320341825Sdim case X86::KMOVDmk: 321341825Sdim MemBytes = 4; 322341825Sdim return true; 323193323Sed case X86::MOV64mr: 324193323Sed case X86::ST_FpP64m: 325193323Sed case X86::MOVSDmr: 326341825Sdim case X86::VMOVSDmr: 327341825Sdim case X86::VMOVSDZmr: 328341825Sdim case X86::MMX_MOVD64mr: 329341825Sdim case X86::MMX_MOVQ64mr: 330341825Sdim case X86::MMX_MOVNTQmr: 331341825Sdim case X86::KMOVQmk: 332341825Sdim MemBytes = 8; 333341825Sdim return true; 334193323Sed case X86::MOVAPSmr: 335309124Sdim case X86::MOVUPSmr: 336193323Sed case X86::MOVAPDmr: 337309124Sdim case X86::MOVUPDmr: 338193323Sed case X86::MOVDQAmr: 339309124Sdim case X86::MOVDQUmr: 340226633Sdim case X86::VMOVAPSmr: 341309124Sdim case X86::VMOVUPSmr: 342226633Sdim case X86::VMOVAPDmr: 343309124Sdim case X86::VMOVUPDmr: 344226633Sdim case X86::VMOVDQAmr: 345309124Sdim case X86::VMOVDQUmr: 346341825Sdim case X86::VMOVUPSZ128mr: 347341825Sdim case X86::VMOVAPSZ128mr: 348341825Sdim case X86::VMOVUPSZ128mr_NOVLX: 349341825Sdim case X86::VMOVAPSZ128mr_NOVLX: 350341825Sdim case X86::VMOVUPDZ128mr: 351341825Sdim case X86::VMOVAPDZ128mr: 352341825Sdim case X86::VMOVDQA32Z128mr: 353341825Sdim case X86::VMOVDQU32Z128mr: 354341825Sdim case X86::VMOVDQA64Z128mr: 355341825Sdim case X86::VMOVDQU64Z128mr: 356341825Sdim case X86::VMOVDQU8Z128mr: 357341825Sdim case X86::VMOVDQU16Z128mr: 358341825Sdim MemBytes = 16; 359341825Sdim return true; 360280031Sdim case X86::VMOVUPSYmr: 361224145Sdim case X86::VMOVAPSYmr: 362280031Sdim case X86::VMOVUPDYmr: 363224145Sdim case X86::VMOVAPDYmr: 364280031Sdim case X86::VMOVDQUYmr: 365224145Sdim case X86::VMOVDQAYmr: 366309124Sdim case X86::VMOVUPSZ256mr: 367341825Sdim case X86::VMOVAPSZ256mr: 368314564Sdim case X86::VMOVUPSZ256mr_NOVLX: 369314564Sdim case X86::VMOVAPSZ256mr_NOVLX: 370309124Sdim case X86::VMOVUPDZ256mr: 371309124Sdim case X86::VMOVAPDZ256mr: 372341825Sdim case X86::VMOVDQU8Z256mr: 373341825Sdim case X86::VMOVDQU16Z256mr: 374309124Sdim case X86::VMOVDQA32Z256mr: 375309124Sdim case X86::VMOVDQU32Z256mr: 376309124Sdim case X86::VMOVDQA64Z256mr: 377309124Sdim case X86::VMOVDQU64Z256mr: 378341825Sdim MemBytes = 32; 379341825Sdim return true; 380341825Sdim case X86::VMOVUPSZmr: 381341825Sdim case X86::VMOVAPSZmr: 382341825Sdim case X86::VMOVUPDZmr: 383341825Sdim case X86::VMOVAPDZmr: 384309124Sdim case X86::VMOVDQU8Zmr: 385309124Sdim case X86::VMOVDQU16Zmr: 386341825Sdim case X86::VMOVDQA32Zmr: 387341825Sdim case X86::VMOVDQU32Zmr: 388341825Sdim case X86::VMOVDQA64Zmr: 389341825Sdim case X86::VMOVDQU64Zmr: 390341825Sdim MemBytes = 64; 391199481Srdivacky return true; 392199481Srdivacky } 393199481Srdivacky return false; 394199481Srdivacky} 395199481Srdivacky 396309124Sdimunsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 397199481Srdivacky int &FrameIndex) const { 398341825Sdim unsigned Dummy; 399341825Sdim return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy); 400341825Sdim} 401341825Sdim 402341825Sdimunsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 403341825Sdim int &FrameIndex, 404341825Sdim unsigned &MemBytes) const { 405341825Sdim if (isFrameLoadOpcode(MI.getOpcode(), MemBytes)) 406309124Sdim if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) 407309124Sdim return MI.getOperand(0).getReg(); 408199481Srdivacky return 0; 409199481Srdivacky} 410199481Srdivacky 411309124Sdimunsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, 412199481Srdivacky int &FrameIndex) const { 413341825Sdim unsigned Dummy; 414341825Sdim if (isFrameLoadOpcode(MI.getOpcode(), Dummy)) { 415199481Srdivacky unsigned Reg; 416199481Srdivacky if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) 417199481Srdivacky return Reg; 418199481Srdivacky // Check for post-frame index elimination operations 419344779Sdim SmallVector<const MachineMemOperand *, 1> Accesses; 420344779Sdim if (hasLoadFromStackSlot(MI, Accesses)) { 421344779Sdim FrameIndex = 422344779Sdim cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) 423344779Sdim ->getFrameIndex(); 424344779Sdim return 1; 425344779Sdim } 426199481Srdivacky } 427199481Srdivacky return 0; 428199481Srdivacky} 429199481Srdivacky 430309124Sdimunsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 431199481Srdivacky int &FrameIndex) const { 432341825Sdim unsigned Dummy; 433341825Sdim return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy); 434341825Sdim} 435341825Sdim 436341825Sdimunsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 437341825Sdim int &FrameIndex, 438341825Sdim unsigned &MemBytes) const { 439341825Sdim if (isFrameStoreOpcode(MI.getOpcode(), MemBytes)) 440309124Sdim if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 && 441212904Sdim isFrameOperand(MI, 0, FrameIndex)) 442309124Sdim return MI.getOperand(X86::AddrNumOperands).getReg(); 443199481Srdivacky return 0; 444199481Srdivacky} 445199481Srdivacky 446309124Sdimunsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, 447199481Srdivacky int &FrameIndex) const { 448341825Sdim unsigned Dummy; 449341825Sdim if (isFrameStoreOpcode(MI.getOpcode(), Dummy)) { 450199481Srdivacky unsigned Reg; 451199481Srdivacky if ((Reg = isStoreToStackSlot(MI, FrameIndex))) 452199481Srdivacky return Reg; 453199481Srdivacky // Check for post-frame index elimination operations 454344779Sdim SmallVector<const MachineMemOperand *, 1> Accesses; 455344779Sdim if (hasStoreToStackSlot(MI, Accesses)) { 456344779Sdim FrameIndex = 457344779Sdim cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) 458344779Sdim ->getFrameIndex(); 459344779Sdim return 1; 460344779Sdim } 461193323Sed } 462193323Sed return 0; 463193323Sed} 464193323Sed 465288943Sdim/// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r. 466193323Sedstatic bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { 467239462Sdim // Don't waste compile time scanning use-def chains of physregs. 468360784Sdim if (!Register::isVirtualRegister(BaseReg)) 469239462Sdim return false; 470193323Sed bool isPICBase = false; 471276479Sdim for (MachineRegisterInfo::def_instr_iterator I = MRI.def_instr_begin(BaseReg), 472276479Sdim E = MRI.def_instr_end(); I != E; ++I) { 473276479Sdim MachineInstr *DefMI = &*I; 474193323Sed if (DefMI->getOpcode() != X86::MOVPC32r) 475193323Sed return false; 476193323Sed assert(!isPICBase && "More than one PIC base?"); 477193323Sed isPICBase = true; 478193323Sed } 479193323Sed return isPICBase; 480193323Sed} 481193323Sed 482309124Sdimbool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, 483360784Sdim AAResults *AA) const { 484309124Sdim switch (MI.getOpcode()) { 485360784Sdim default: 486360784Sdim // This function should only be called for opcodes with the ReMaterializable 487360784Sdim // flag set. 488360784Sdim llvm_unreachable("Unknown rematerializable operation!"); 489360784Sdim break; 490360784Sdim 491360784Sdim case X86::LOAD_STACK_GUARD: 492360784Sdim case X86::AVX1_SETALLONES: 493360784Sdim case X86::AVX2_SETALLONES: 494360784Sdim case X86::AVX512_128_SET0: 495360784Sdim case X86::AVX512_256_SET0: 496360784Sdim case X86::AVX512_512_SET0: 497360784Sdim case X86::AVX512_512_SETALLONES: 498360784Sdim case X86::AVX512_FsFLD0SD: 499360784Sdim case X86::AVX512_FsFLD0SS: 500360784Sdim case X86::AVX512_FsFLD0F128: 501360784Sdim case X86::AVX_SET0: 502360784Sdim case X86::FsFLD0SD: 503360784Sdim case X86::FsFLD0SS: 504360784Sdim case X86::FsFLD0F128: 505360784Sdim case X86::KSET0D: 506360784Sdim case X86::KSET0Q: 507360784Sdim case X86::KSET0W: 508360784Sdim case X86::KSET1D: 509360784Sdim case X86::KSET1Q: 510360784Sdim case X86::KSET1W: 511360784Sdim case X86::MMX_SET0: 512360784Sdim case X86::MOV32ImmSExti8: 513360784Sdim case X86::MOV32r0: 514360784Sdim case X86::MOV32r1: 515360784Sdim case X86::MOV32r_1: 516360784Sdim case X86::MOV32ri64: 517360784Sdim case X86::MOV64ImmSExti8: 518360784Sdim case X86::V_SET0: 519360784Sdim case X86::V_SETALLONES: 520360784Sdim case X86::MOV16ri: 521360784Sdim case X86::MOV32ri: 522360784Sdim case X86::MOV64ri: 523360784Sdim case X86::MOV64ri32: 524360784Sdim case X86::MOV8ri: 525360784Sdim return true; 526360784Sdim 527243830Sdim case X86::MOV8rm: 528314564Sdim case X86::MOV8rm_NOREX: 529243830Sdim case X86::MOV16rm: 530243830Sdim case X86::MOV32rm: 531243830Sdim case X86::MOV64rm: 532243830Sdim case X86::MOVSSrm: 533353358Sdim case X86::MOVSSrm_alt: 534243830Sdim case X86::MOVSDrm: 535353358Sdim case X86::MOVSDrm_alt: 536243830Sdim case X86::MOVAPSrm: 537243830Sdim case X86::MOVUPSrm: 538243830Sdim case X86::MOVAPDrm: 539314564Sdim case X86::MOVUPDrm: 540243830Sdim case X86::MOVDQArm: 541249423Sdim case X86::MOVDQUrm: 542243830Sdim case X86::VMOVSSrm: 543353358Sdim case X86::VMOVSSrm_alt: 544243830Sdim case X86::VMOVSDrm: 545353358Sdim case X86::VMOVSDrm_alt: 546243830Sdim case X86::VMOVAPSrm: 547243830Sdim case X86::VMOVUPSrm: 548243830Sdim case X86::VMOVAPDrm: 549314564Sdim case X86::VMOVUPDrm: 550243830Sdim case X86::VMOVDQArm: 551249423Sdim case X86::VMOVDQUrm: 552243830Sdim case X86::VMOVAPSYrm: 553243830Sdim case X86::VMOVUPSYrm: 554243830Sdim case X86::VMOVAPDYrm: 555314564Sdim case X86::VMOVUPDYrm: 556243830Sdim case X86::VMOVDQAYrm: 557249423Sdim case X86::VMOVDQUYrm: 558243830Sdim case X86::MMX_MOVD64rm: 559243830Sdim case X86::MMX_MOVQ64rm: 560296417Sdim // AVX-512 561314564Sdim case X86::VMOVSSZrm: 562353358Sdim case X86::VMOVSSZrm_alt: 563314564Sdim case X86::VMOVSDZrm: 564353358Sdim case X86::VMOVSDZrm_alt: 565296417Sdim case X86::VMOVAPDZ128rm: 566296417Sdim case X86::VMOVAPDZ256rm: 567296417Sdim case X86::VMOVAPDZrm: 568296417Sdim case X86::VMOVAPSZ128rm: 569296417Sdim case X86::VMOVAPSZ256rm: 570314564Sdim case X86::VMOVAPSZ128rm_NOVLX: 571314564Sdim case X86::VMOVAPSZ256rm_NOVLX: 572296417Sdim case X86::VMOVAPSZrm: 573296417Sdim case X86::VMOVDQA32Z128rm: 574296417Sdim case X86::VMOVDQA32Z256rm: 575296417Sdim case X86::VMOVDQA32Zrm: 576296417Sdim case X86::VMOVDQA64Z128rm: 577296417Sdim case X86::VMOVDQA64Z256rm: 578296417Sdim case X86::VMOVDQA64Zrm: 579296417Sdim case X86::VMOVDQU16Z128rm: 580296417Sdim case X86::VMOVDQU16Z256rm: 581296417Sdim case X86::VMOVDQU16Zrm: 582296417Sdim case X86::VMOVDQU32Z128rm: 583296417Sdim case X86::VMOVDQU32Z256rm: 584296417Sdim case X86::VMOVDQU32Zrm: 585296417Sdim case X86::VMOVDQU64Z128rm: 586296417Sdim case X86::VMOVDQU64Z256rm: 587296417Sdim case X86::VMOVDQU64Zrm: 588296417Sdim case X86::VMOVDQU8Z128rm: 589296417Sdim case X86::VMOVDQU8Z256rm: 590296417Sdim case X86::VMOVDQU8Zrm: 591314564Sdim case X86::VMOVUPDZ128rm: 592314564Sdim case X86::VMOVUPDZ256rm: 593314564Sdim case X86::VMOVUPDZrm: 594296417Sdim case X86::VMOVUPSZ128rm: 595296417Sdim case X86::VMOVUPSZ256rm: 596314564Sdim case X86::VMOVUPSZ128rm_NOVLX: 597314564Sdim case X86::VMOVUPSZ256rm_NOVLX: 598296417Sdim case X86::VMOVUPSZrm: { 599243830Sdim // Loads from constant pools are trivially rematerializable. 600309124Sdim if (MI.getOperand(1 + X86::AddrBaseReg).isReg() && 601309124Sdim MI.getOperand(1 + X86::AddrScaleAmt).isImm() && 602309124Sdim MI.getOperand(1 + X86::AddrIndexReg).isReg() && 603309124Sdim MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 && 604314564Sdim MI.isDereferenceableInvariantLoad(AA)) { 605360784Sdim Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg(); 606243830Sdim if (BaseReg == 0 || BaseReg == X86::RIP) 607243830Sdim return true; 608243830Sdim // Allow re-materialization of PIC load. 609309124Sdim if (!ReMatPICStubLoad && MI.getOperand(1 + X86::AddrDisp).isGlobal()) 610243830Sdim return false; 611309124Sdim const MachineFunction &MF = *MI.getParent()->getParent(); 612243830Sdim const MachineRegisterInfo &MRI = MF.getRegInfo(); 613243830Sdim return regIsPICBase(BaseReg, MRI); 614193323Sed } 615243830Sdim return false; 616243830Sdim } 617218893Sdim 618243830Sdim case X86::LEA32r: 619243830Sdim case X86::LEA64r: { 620309124Sdim if (MI.getOperand(1 + X86::AddrScaleAmt).isImm() && 621309124Sdim MI.getOperand(1 + X86::AddrIndexReg).isReg() && 622309124Sdim MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 && 623309124Sdim !MI.getOperand(1 + X86::AddrDisp).isReg()) { 624243830Sdim // lea fi#, lea GV, etc. are all rematerializable. 625309124Sdim if (!MI.getOperand(1 + X86::AddrBaseReg).isReg()) 626243830Sdim return true; 627360784Sdim Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg(); 628243830Sdim if (BaseReg == 0) 629243830Sdim return true; 630243830Sdim // Allow re-materialization of lea PICBase + x. 631309124Sdim const MachineFunction &MF = *MI.getParent()->getParent(); 632243830Sdim const MachineRegisterInfo &MRI = MF.getRegInfo(); 633243830Sdim return regIsPICBase(BaseReg, MRI); 634243830Sdim } 635243830Sdim return false; 636193323Sed } 637243830Sdim } 638193323Sed} 639193323Sed 640193323Sedvoid X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, 641193323Sed MachineBasicBlock::iterator I, 642198090Srdivacky unsigned DestReg, unsigned SubIdx, 643309124Sdim const MachineInstr &Orig, 644210299Sed const TargetRegisterInfo &TRI) const { 645353358Sdim bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI); 646296417Sdim if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) { 647296417Sdim // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side 648296417Sdim // effects. 649296417Sdim int Value; 650309124Sdim switch (Orig.getOpcode()) { 651296417Sdim case X86::MOV32r0: Value = 0; break; 652296417Sdim case X86::MOV32r1: Value = 1; break; 653296417Sdim case X86::MOV32r_1: Value = -1; break; 654296417Sdim default: 655296417Sdim llvm_unreachable("Unexpected instruction!"); 656296417Sdim } 657296417Sdim 658309124Sdim const DebugLoc &DL = Orig.getDebugLoc(); 659309124Sdim BuildMI(MBB, I, DL, get(X86::MOV32ri)) 660321369Sdim .add(Orig.getOperand(0)) 661309124Sdim .addImm(Value); 662261991Sdim } else { 663309124Sdim MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); 664193323Sed MBB.insert(I, MI); 665193323Sed } 666193323Sed 667309124Sdim MachineInstr &NewMI = *std::prev(I); 668309124Sdim NewMI.substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI); 669193323Sed} 670193323Sed 671288943Sdim/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead. 672309124Sdimbool X86InstrInfo::hasLiveCondCodeDef(MachineInstr &MI) const { 673309124Sdim for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 674309124Sdim MachineOperand &MO = MI.getOperand(i); 675193323Sed if (MO.isReg() && MO.isDef() && 676193323Sed MO.getReg() == X86::EFLAGS && !MO.isDead()) { 677193323Sed return true; 678193323Sed } 679193323Sed } 680193323Sed return false; 681193323Sed} 682193323Sed 683288943Sdim/// Check whether the shift count for a machine operand is non-zero. 684344779Sdiminline static unsigned getTruncatedShiftCount(const MachineInstr &MI, 685261991Sdim unsigned ShiftAmtOperandIdx) { 686261991Sdim // The shift count is six bits with the REX.W prefix and five bits without. 687309124Sdim unsigned ShiftCountMask = (MI.getDesc().TSFlags & X86II::REX_W) ? 63 : 31; 688309124Sdim unsigned Imm = MI.getOperand(ShiftAmtOperandIdx).getImm(); 689261991Sdim return Imm & ShiftCountMask; 690261991Sdim} 691261991Sdim 692288943Sdim/// Check whether the given shift count is appropriate 693261991Sdim/// can be represented by a LEA instruction. 694261991Sdiminline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) { 695261991Sdim // Left shift instructions can be transformed into load-effective-address 696261991Sdim // instructions if we can encode them appropriately. 697296417Sdim // A LEA instruction utilizes a SIB byte to encode its scale factor. 698261991Sdim // The SIB.scale field is two bits wide which means that we can encode any 699261991Sdim // shift amount less than 4. 700261991Sdim return ShAmt < 4 && ShAmt > 0; 701261991Sdim} 702261991Sdim 703309124Sdimbool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, 704360784Sdim unsigned Opc, bool AllowSP, Register &NewSrc, 705344779Sdim bool &isKill, MachineOperand &ImplicitOp, 706309124Sdim LiveVariables *LV) const { 707309124Sdim MachineFunction &MF = *MI.getParent()->getParent(); 708261991Sdim const TargetRegisterClass *RC; 709261991Sdim if (AllowSP) { 710261991Sdim RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass; 711261991Sdim } else { 712261991Sdim RC = Opc != X86::LEA32r ? 713261991Sdim &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass; 714261991Sdim } 715360784Sdim Register SrcReg = Src.getReg(); 716261991Sdim 717261991Sdim // For both LEA64 and LEA32 the register already has essentially the right 718261991Sdim // type (32-bit or 64-bit) we may just need to forbid SP. 719261991Sdim if (Opc != X86::LEA64_32r) { 720261991Sdim NewSrc = SrcReg; 721261991Sdim isKill = Src.isKill(); 722344779Sdim assert(!Src.isUndef() && "Undef op doesn't need optimization"); 723261991Sdim 724360784Sdim if (Register::isVirtualRegister(NewSrc) && 725261991Sdim !MF.getRegInfo().constrainRegClass(NewSrc, RC)) 726261991Sdim return false; 727261991Sdim 728261991Sdim return true; 729261991Sdim } 730261991Sdim 731261991Sdim // This is for an LEA64_32r and incoming registers are 32-bit. One way or 732261991Sdim // another we need to add 64-bit registers to the final MI. 733360784Sdim if (Register::isPhysicalRegister(SrcReg)) { 734261991Sdim ImplicitOp = Src; 735261991Sdim ImplicitOp.setImplicit(); 736261991Sdim 737296417Sdim NewSrc = getX86SubSuperRegister(Src.getReg(), 64); 738314564Sdim isKill = Src.isKill(); 739344779Sdim assert(!Src.isUndef() && "Undef op doesn't need optimization"); 740261991Sdim } else { 741261991Sdim // Virtual register of the wrong class, we have to create a temporary 64-bit 742261991Sdim // vreg to feed into the LEA. 743261991Sdim NewSrc = MF.getRegInfo().createVirtualRegister(RC); 744321369Sdim MachineInstr *Copy = 745321369Sdim BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY)) 746321369Sdim .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit) 747321369Sdim .add(Src); 748261991Sdim 749261991Sdim // Which is obviously going to be dead after we're done with it. 750261991Sdim isKill = true; 751309124Sdim 752309124Sdim if (LV) 753309124Sdim LV->replaceKillInstruction(SrcReg, MI, *Copy); 754261991Sdim } 755261991Sdim 756261991Sdim // We've set all the parameters without issue. 757261991Sdim return true; 758261991Sdim} 759261991Sdim 760309124SdimMachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( 761309124Sdim unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI, 762353358Sdim LiveVariables *LV, bool Is8BitOp) const { 763344779Sdim // We handle 8-bit adds and various 16-bit opcodes in the switch below. 764200581Srdivacky MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); 765353358Sdim assert((Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits( 766344779Sdim *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) && 767344779Sdim "Unexpected type for LEA transform"); 768218893Sdim 769344779Sdim // TODO: For a 32-bit target, we need to adjust the LEA variables with 770344779Sdim // something like this: 771344779Sdim // Opcode = X86::LEA32r; 772344779Sdim // InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); 773344779Sdim // OutRegLEA = 774344779Sdim // Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass) 775344779Sdim // : RegInfo.createVirtualRegister(&X86::GR32RegClass); 776344779Sdim if (!Subtarget.is64Bit()) 777344779Sdim return nullptr; 778344779Sdim 779344779Sdim unsigned Opcode = X86::LEA64_32r; 780360784Sdim Register InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); 781360784Sdim Register OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass); 782344779Sdim 783200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 784344779Sdim // we will be shifting and then extracting the lower 8/16-bits. 785200581Srdivacky // This has the potential to cause partial register stall. e.g. 786200581Srdivacky // movw (%rbp,%rcx,2), %dx 787200581Srdivacky // leal -65(%rdx), %esi 788200581Srdivacky // But testing has shown this *does* help performance in 64-bit mode (at 789200581Srdivacky // least on modern x86 machines). 790344779Sdim MachineBasicBlock::iterator MBBI = MI.getIterator(); 791360784Sdim Register Dest = MI.getOperand(0).getReg(); 792360784Sdim Register Src = MI.getOperand(1).getReg(); 793344779Sdim bool IsDead = MI.getOperand(0).isDead(); 794344779Sdim bool IsKill = MI.getOperand(1).isKill(); 795353358Sdim unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit; 796344779Sdim assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization"); 797344779Sdim BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA); 798200581Srdivacky MachineInstr *InsMI = 799309124Sdim BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) 800344779Sdim .addReg(InRegLEA, RegState::Define, SubReg) 801344779Sdim .addReg(Src, getKillRegState(IsKill)); 802200581Srdivacky 803309124Sdim MachineInstrBuilder MIB = 804344779Sdim BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA); 805200581Srdivacky switch (MIOpc) { 806243830Sdim default: llvm_unreachable("Unreachable!"); 807353358Sdim case X86::SHL8ri: 808200581Srdivacky case X86::SHL16ri: { 809309124Sdim unsigned ShAmt = MI.getOperand(2).getImm(); 810309124Sdim MIB.addReg(0).addImm(1ULL << ShAmt) 811344779Sdim .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0); 812200581Srdivacky break; 813200581Srdivacky } 814353358Sdim case X86::INC8r: 815200581Srdivacky case X86::INC16r: 816344779Sdim addRegOffset(MIB, InRegLEA, true, 1); 817200581Srdivacky break; 818353358Sdim case X86::DEC8r: 819200581Srdivacky case X86::DEC16r: 820344779Sdim addRegOffset(MIB, InRegLEA, true, -1); 821200581Srdivacky break; 822344779Sdim case X86::ADD8ri: 823353358Sdim case X86::ADD8ri_DB: 824200581Srdivacky case X86::ADD16ri: 825200581Srdivacky case X86::ADD16ri8: 826218893Sdim case X86::ADD16ri_DB: 827218893Sdim case X86::ADD16ri8_DB: 828344779Sdim addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm()); 829200581Srdivacky break; 830344779Sdim case X86::ADD8rr: 831353358Sdim case X86::ADD8rr_DB: 832218893Sdim case X86::ADD16rr: 833218893Sdim case X86::ADD16rr_DB: { 834360784Sdim Register Src2 = MI.getOperand(2).getReg(); 835344779Sdim bool IsKill2 = MI.getOperand(2).isKill(); 836344779Sdim assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization"); 837344779Sdim unsigned InRegLEA2 = 0; 838276479Sdim MachineInstr *InsMI2 = nullptr; 839200581Srdivacky if (Src == Src2) { 840344779Sdim // ADD8rr/ADD16rr killed %reg1028, %reg1028 841200581Srdivacky // just a single insert_subreg. 842344779Sdim addRegReg(MIB, InRegLEA, true, InRegLEA, false); 843200581Srdivacky } else { 844276479Sdim if (Subtarget.is64Bit()) 845344779Sdim InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); 846261991Sdim else 847344779Sdim InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); 848200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 849344779Sdim // we will be shifting and then extracting the lower 8/16-bits. 850344779Sdim BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2); 851309124Sdim InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY)) 852344779Sdim .addReg(InRegLEA2, RegState::Define, SubReg) 853344779Sdim .addReg(Src2, getKillRegState(IsKill2)); 854344779Sdim addRegReg(MIB, InRegLEA, true, InRegLEA2, true); 855200581Srdivacky } 856344779Sdim if (LV && IsKill2 && InsMI2) 857309124Sdim LV->replaceKillInstruction(Src2, MI, *InsMI2); 858200581Srdivacky break; 859200581Srdivacky } 860200581Srdivacky } 861200581Srdivacky 862200581Srdivacky MachineInstr *NewMI = MIB; 863200581Srdivacky MachineInstr *ExtMI = 864309124Sdim BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) 865344779Sdim .addReg(Dest, RegState::Define | getDeadRegState(IsDead)) 866344779Sdim .addReg(OutRegLEA, RegState::Kill, SubReg); 867200581Srdivacky 868200581Srdivacky if (LV) { 869344779Sdim // Update live variables. 870344779Sdim LV->getVarInfo(InRegLEA).Kills.push_back(NewMI); 871344779Sdim LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI); 872344779Sdim if (IsKill) 873309124Sdim LV->replaceKillInstruction(Src, MI, *InsMI); 874344779Sdim if (IsDead) 875309124Sdim LV->replaceKillInstruction(Dest, MI, *ExtMI); 876200581Srdivacky } 877200581Srdivacky 878200581Srdivacky return ExtMI; 879200581Srdivacky} 880200581Srdivacky 881288943Sdim/// This method must be implemented by targets that 882193323Sed/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target 883193323Sed/// may be able to convert a two-address instruction into a true 884193323Sed/// three-address instruction on demand. This allows the X86 target (for 885193323Sed/// example) to convert ADD and SHL instructions into LEA instructions if they 886193323Sed/// would require register copies due to two-addressness. 887193323Sed/// 888193323Sed/// This method returns a null pointer if the transformation cannot be 889193323Sed/// performed, otherwise it returns the new instruction. 890193323Sed/// 891193323SedMachineInstr * 892193323SedX86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 893309124Sdim MachineInstr &MI, LiveVariables *LV) const { 894261991Sdim // The following opcodes also sets the condition code register(s). Only 895261991Sdim // convert them to equivalent lea if the condition code register def's 896261991Sdim // are dead! 897261991Sdim if (hasLiveCondCodeDef(MI)) 898276479Sdim return nullptr; 899261991Sdim 900309124Sdim MachineFunction &MF = *MI.getParent()->getParent(); 901193323Sed // All instructions input are two-addr instructions. Get the known operands. 902309124Sdim const MachineOperand &Dest = MI.getOperand(0); 903309124Sdim const MachineOperand &Src = MI.getOperand(1); 904193323Sed 905344779Sdim // Ideally, operations with undef should be folded before we get here, but we 906344779Sdim // can't guarantee it. Bail out because optimizing undefs is a waste of time. 907344779Sdim // Without this, we have to forward undef state to new register operands to 908344779Sdim // avoid machine verifier errors. 909344779Sdim if (Src.isUndef()) 910344779Sdim return nullptr; 911344779Sdim if (MI.getNumOperands() > 2) 912344779Sdim if (MI.getOperand(2).isReg() && MI.getOperand(2).isUndef()) 913344779Sdim return nullptr; 914344779Sdim 915276479Sdim MachineInstr *NewMI = nullptr; 916344779Sdim bool Is64Bit = Subtarget.is64Bit(); 917193323Sed 918353358Sdim bool Is8BitOp = false; 919309124Sdim unsigned MIOpc = MI.getOpcode(); 920193323Sed switch (MIOpc) { 921353358Sdim default: llvm_unreachable("Unreachable!"); 922193323Sed case X86::SHL64ri: { 923309124Sdim assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); 924261991Sdim unsigned ShAmt = getTruncatedShiftCount(MI, 2); 925276479Sdim if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; 926193323Sed 927218893Sdim // LEA can't handle RSP. 928360784Sdim if (Register::isVirtualRegister(Src.getReg()) && 929243830Sdim !MF.getRegInfo().constrainRegClass(Src.getReg(), 930243830Sdim &X86::GR64_NOSPRegClass)) 931276479Sdim return nullptr; 932218893Sdim 933309124Sdim NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)) 934321369Sdim .add(Dest) 935309124Sdim .addReg(0) 936309124Sdim .addImm(1ULL << ShAmt) 937321369Sdim .add(Src) 938309124Sdim .addImm(0) 939309124Sdim .addReg(0); 940193323Sed break; 941193323Sed } 942193323Sed case X86::SHL32ri: { 943309124Sdim assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); 944261991Sdim unsigned ShAmt = getTruncatedShiftCount(MI, 2); 945276479Sdim if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; 946193323Sed 947344779Sdim unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; 948261991Sdim 949218893Sdim // LEA can't handle ESP. 950344779Sdim bool isKill; 951360784Sdim Register SrcReg; 952261991Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 953261991Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, 954344779Sdim SrcReg, isKill, ImplicitOp, LV)) 955276479Sdim return nullptr; 956218893Sdim 957309124Sdim MachineInstrBuilder MIB = 958309124Sdim BuildMI(MF, MI.getDebugLoc(), get(Opc)) 959321369Sdim .add(Dest) 960309124Sdim .addReg(0) 961309124Sdim .addImm(1ULL << ShAmt) 962344779Sdim .addReg(SrcReg, getKillRegState(isKill)) 963309124Sdim .addImm(0) 964309124Sdim .addReg(0); 965261991Sdim if (ImplicitOp.getReg() != 0) 966321369Sdim MIB.add(ImplicitOp); 967261991Sdim NewMI = MIB; 968261991Sdim 969193323Sed break; 970193323Sed } 971353358Sdim case X86::SHL8ri: 972353358Sdim Is8BitOp = true; 973353358Sdim LLVM_FALLTHROUGH; 974193323Sed case X86::SHL16ri: { 975309124Sdim assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); 976261991Sdim unsigned ShAmt = getTruncatedShiftCount(MI, 2); 977344779Sdim if (!isTruncatedShiftCountForLEA(ShAmt)) 978344779Sdim return nullptr; 979353358Sdim return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); 980193323Sed } 981280031Sdim case X86::INC64r: 982280031Sdim case X86::INC32r: { 983309124Sdim assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!"); 984344779Sdim unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : 985344779Sdim (Is64Bit ? X86::LEA64_32r : X86::LEA32r); 986344779Sdim bool isKill; 987360784Sdim Register SrcReg; 988280031Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 989344779Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, 990344779Sdim ImplicitOp, LV)) 991280031Sdim return nullptr; 992193323Sed 993309124Sdim MachineInstrBuilder MIB = 994309124Sdim BuildMI(MF, MI.getDebugLoc(), get(Opc)) 995321369Sdim .add(Dest) 996344779Sdim .addReg(SrcReg, getKillRegState(isKill)); 997280031Sdim if (ImplicitOp.getReg() != 0) 998321369Sdim MIB.add(ImplicitOp); 999218893Sdim 1000280031Sdim NewMI = addOffset(MIB, 1); 1001280031Sdim break; 1002280031Sdim } 1003280031Sdim case X86::DEC64r: 1004280031Sdim case X86::DEC32r: { 1005309124Sdim assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!"); 1006280031Sdim unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r 1007344779Sdim : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); 1008261991Sdim 1009344779Sdim bool isKill; 1010360784Sdim Register SrcReg; 1011280031Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 1012344779Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, 1013344779Sdim ImplicitOp, LV)) 1014280031Sdim return nullptr; 1015261991Sdim 1016309124Sdim MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) 1017321369Sdim .add(Dest) 1018344779Sdim .addReg(SrcReg, getKillRegState(isKill)); 1019280031Sdim if (ImplicitOp.getReg() != 0) 1020321369Sdim MIB.add(ImplicitOp); 1021218893Sdim 1022280031Sdim NewMI = addOffset(MIB, -1); 1023261991Sdim 1024280031Sdim break; 1025280031Sdim } 1026353358Sdim case X86::DEC8r: 1027353358Sdim case X86::INC8r: 1028353358Sdim Is8BitOp = true; 1029353358Sdim LLVM_FALLTHROUGH; 1030280031Sdim case X86::DEC16r: 1031353358Sdim case X86::INC16r: 1032353358Sdim return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); 1033280031Sdim case X86::ADD64rr: 1034280031Sdim case X86::ADD64rr_DB: 1035280031Sdim case X86::ADD32rr: 1036280031Sdim case X86::ADD32rr_DB: { 1037309124Sdim assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); 1038280031Sdim unsigned Opc; 1039280031Sdim if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) 1040280031Sdim Opc = X86::LEA64r; 1041280031Sdim else 1042344779Sdim Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; 1043261991Sdim 1044344779Sdim bool isKill; 1045360784Sdim Register SrcReg; 1046280031Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 1047280031Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, 1048344779Sdim SrcReg, isKill, ImplicitOp, LV)) 1049280031Sdim return nullptr; 1050218893Sdim 1051309124Sdim const MachineOperand &Src2 = MI.getOperand(2); 1052344779Sdim bool isKill2; 1053360784Sdim Register SrcReg2; 1054280031Sdim MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false); 1055280031Sdim if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false, 1056344779Sdim SrcReg2, isKill2, ImplicitOp2, LV)) 1057280031Sdim return nullptr; 1058218893Sdim 1059321369Sdim MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest); 1060280031Sdim if (ImplicitOp.getReg() != 0) 1061321369Sdim MIB.add(ImplicitOp); 1062280031Sdim if (ImplicitOp2.getReg() != 0) 1063321369Sdim MIB.add(ImplicitOp2); 1064218893Sdim 1065280031Sdim NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2); 1066280031Sdim if (LV && Src2.isKill()) 1067309124Sdim LV->replaceKillInstruction(SrcReg2, MI, *NewMI); 1068280031Sdim break; 1069280031Sdim } 1070344779Sdim case X86::ADD8rr: 1071353358Sdim case X86::ADD8rr_DB: 1072353358Sdim Is8BitOp = true; 1073353358Sdim LLVM_FALLTHROUGH; 1074280031Sdim case X86::ADD16rr: 1075344779Sdim case X86::ADD16rr_DB: 1076353358Sdim return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); 1077280031Sdim case X86::ADD64ri32: 1078280031Sdim case X86::ADD64ri8: 1079280031Sdim case X86::ADD64ri32_DB: 1080280031Sdim case X86::ADD64ri8_DB: 1081309124Sdim assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); 1082321369Sdim NewMI = addOffset( 1083321369Sdim BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src), 1084321369Sdim MI.getOperand(2)); 1085280031Sdim break; 1086280031Sdim case X86::ADD32ri: 1087280031Sdim case X86::ADD32ri8: 1088280031Sdim case X86::ADD32ri_DB: 1089280031Sdim case X86::ADD32ri8_DB: { 1090309124Sdim assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); 1091344779Sdim unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; 1092243830Sdim 1093344779Sdim bool isKill; 1094360784Sdim Register SrcReg; 1095280031Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 1096280031Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, 1097344779Sdim SrcReg, isKill, ImplicitOp, LV)) 1098280031Sdim return nullptr; 1099261991Sdim 1100309124Sdim MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) 1101321369Sdim .add(Dest) 1102344779Sdim .addReg(SrcReg, getKillRegState(isKill)); 1103280031Sdim if (ImplicitOp.getReg() != 0) 1104321369Sdim MIB.add(ImplicitOp); 1105261991Sdim 1106314564Sdim NewMI = addOffset(MIB, MI.getOperand(2)); 1107280031Sdim break; 1108193323Sed } 1109344779Sdim case X86::ADD8ri: 1110353358Sdim case X86::ADD8ri_DB: 1111353358Sdim Is8BitOp = true; 1112353358Sdim LLVM_FALLTHROUGH; 1113280031Sdim case X86::ADD16ri: 1114280031Sdim case X86::ADD16ri8: 1115280031Sdim case X86::ADD16ri_DB: 1116280031Sdim case X86::ADD16ri8_DB: 1117353358Sdim return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); 1118353358Sdim case X86::SUB8ri: 1119353358Sdim case X86::SUB16ri8: 1120353358Sdim case X86::SUB16ri: 1121353358Sdim /// FIXME: Support these similar to ADD8ri/ADD16ri*. 1122353358Sdim return nullptr; 1123353358Sdim case X86::SUB32ri8: 1124353358Sdim case X86::SUB32ri: { 1125360661Sdim if (!MI.getOperand(2).isImm()) 1126360661Sdim return nullptr; 1127353358Sdim int64_t Imm = MI.getOperand(2).getImm(); 1128353358Sdim if (!isInt<32>(-Imm)) 1129353358Sdim return nullptr; 1130353358Sdim 1131353358Sdim assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); 1132353358Sdim unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; 1133353358Sdim 1134353358Sdim bool isKill; 1135360784Sdim Register SrcReg; 1136353358Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 1137353358Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, 1138353358Sdim SrcReg, isKill, ImplicitOp, LV)) 1139353358Sdim return nullptr; 1140353358Sdim 1141353358Sdim MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) 1142353358Sdim .add(Dest) 1143353358Sdim .addReg(SrcReg, getKillRegState(isKill)); 1144353358Sdim if (ImplicitOp.getReg() != 0) 1145353358Sdim MIB.add(ImplicitOp); 1146353358Sdim 1147353358Sdim NewMI = addOffset(MIB, -Imm); 1148353358Sdim break; 1149353358Sdim } 1150353358Sdim 1151353358Sdim case X86::SUB64ri8: 1152353358Sdim case X86::SUB64ri32: { 1153360661Sdim if (!MI.getOperand(2).isImm()) 1154360661Sdim return nullptr; 1155353358Sdim int64_t Imm = MI.getOperand(2).getImm(); 1156353358Sdim if (!isInt<32>(-Imm)) 1157353358Sdim return nullptr; 1158353358Sdim 1159353358Sdim assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!"); 1160353358Sdim 1161353358Sdim MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), 1162353358Sdim get(X86::LEA64r)).add(Dest).add(Src); 1163353358Sdim NewMI = addOffset(MIB, -Imm); 1164353358Sdim break; 1165353358Sdim } 1166353358Sdim 1167321369Sdim case X86::VMOVDQU8Z128rmk: 1168321369Sdim case X86::VMOVDQU8Z256rmk: 1169321369Sdim case X86::VMOVDQU8Zrmk: 1170321369Sdim case X86::VMOVDQU16Z128rmk: 1171321369Sdim case X86::VMOVDQU16Z256rmk: 1172321369Sdim case X86::VMOVDQU16Zrmk: 1173321369Sdim case X86::VMOVDQU32Z128rmk: case X86::VMOVDQA32Z128rmk: 1174321369Sdim case X86::VMOVDQU32Z256rmk: case X86::VMOVDQA32Z256rmk: 1175321369Sdim case X86::VMOVDQU32Zrmk: case X86::VMOVDQA32Zrmk: 1176321369Sdim case X86::VMOVDQU64Z128rmk: case X86::VMOVDQA64Z128rmk: 1177321369Sdim case X86::VMOVDQU64Z256rmk: case X86::VMOVDQA64Z256rmk: 1178321369Sdim case X86::VMOVDQU64Zrmk: case X86::VMOVDQA64Zrmk: 1179321369Sdim case X86::VMOVUPDZ128rmk: case X86::VMOVAPDZ128rmk: 1180321369Sdim case X86::VMOVUPDZ256rmk: case X86::VMOVAPDZ256rmk: 1181321369Sdim case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk: 1182321369Sdim case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk: 1183321369Sdim case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk: 1184360784Sdim case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: 1185360784Sdim case X86::VBROADCASTSDZ256mk: 1186360784Sdim case X86::VBROADCASTSDZmk: 1187360784Sdim case X86::VBROADCASTSSZ128mk: 1188360784Sdim case X86::VBROADCASTSSZ256mk: 1189360784Sdim case X86::VBROADCASTSSZmk: 1190360784Sdim case X86::VPBROADCASTDZ128mk: 1191360784Sdim case X86::VPBROADCASTDZ256mk: 1192360784Sdim case X86::VPBROADCASTDZmk: 1193360784Sdim case X86::VPBROADCASTQZ128mk: 1194360784Sdim case X86::VPBROADCASTQZ256mk: 1195360784Sdim case X86::VPBROADCASTQZmk: { 1196321369Sdim unsigned Opc; 1197321369Sdim switch (MIOpc) { 1198321369Sdim default: llvm_unreachable("Unreachable!"); 1199360784Sdim case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break; 1200360784Sdim case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break; 1201360784Sdim case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break; 1202360784Sdim case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break; 1203360784Sdim case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break; 1204360784Sdim case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break; 1205360784Sdim case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; 1206360784Sdim case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; 1207360784Sdim case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break; 1208360784Sdim case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; 1209360784Sdim case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; 1210360784Sdim case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break; 1211360784Sdim case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; 1212360784Sdim case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; 1213360784Sdim case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break; 1214360784Sdim case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; 1215360784Sdim case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; 1216360784Sdim case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break; 1217360784Sdim case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; 1218360784Sdim case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; 1219360784Sdim case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break; 1220360784Sdim case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; 1221360784Sdim case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; 1222360784Sdim case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break; 1223360784Sdim case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; 1224360784Sdim case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; 1225360784Sdim case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break; 1226360784Sdim case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; 1227360784Sdim case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; 1228360784Sdim case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break; 1229360784Sdim case X86::VBROADCASTSDZ256mk: Opc = X86::VBLENDMPDZ256rmbk; break; 1230360784Sdim case X86::VBROADCASTSDZmk: Opc = X86::VBLENDMPDZrmbk; break; 1231360784Sdim case X86::VBROADCASTSSZ128mk: Opc = X86::VBLENDMPSZ128rmbk; break; 1232360784Sdim case X86::VBROADCASTSSZ256mk: Opc = X86::VBLENDMPSZ256rmbk; break; 1233360784Sdim case X86::VBROADCASTSSZmk: Opc = X86::VBLENDMPSZrmbk; break; 1234360784Sdim case X86::VPBROADCASTDZ128mk: Opc = X86::VPBLENDMDZ128rmbk; break; 1235360784Sdim case X86::VPBROADCASTDZ256mk: Opc = X86::VPBLENDMDZ256rmbk; break; 1236360784Sdim case X86::VPBROADCASTDZmk: Opc = X86::VPBLENDMDZrmbk; break; 1237360784Sdim case X86::VPBROADCASTQZ128mk: Opc = X86::VPBLENDMQZ128rmbk; break; 1238360784Sdim case X86::VPBROADCASTQZ256mk: Opc = X86::VPBLENDMQZ256rmbk; break; 1239360784Sdim case X86::VPBROADCASTQZmk: Opc = X86::VPBLENDMQZrmbk; break; 1240321369Sdim } 1241321369Sdim 1242321369Sdim NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc)) 1243321369Sdim .add(Dest) 1244321369Sdim .add(MI.getOperand(2)) 1245321369Sdim .add(Src) 1246321369Sdim .add(MI.getOperand(3)) 1247321369Sdim .add(MI.getOperand(4)) 1248321369Sdim .add(MI.getOperand(5)) 1249321369Sdim .add(MI.getOperand(6)) 1250321369Sdim .add(MI.getOperand(7)); 1251321369Sdim break; 1252193323Sed } 1253360784Sdim 1254321369Sdim case X86::VMOVDQU8Z128rrk: 1255321369Sdim case X86::VMOVDQU8Z256rrk: 1256321369Sdim case X86::VMOVDQU8Zrrk: 1257321369Sdim case X86::VMOVDQU16Z128rrk: 1258321369Sdim case X86::VMOVDQU16Z256rrk: 1259321369Sdim case X86::VMOVDQU16Zrrk: 1260321369Sdim case X86::VMOVDQU32Z128rrk: case X86::VMOVDQA32Z128rrk: 1261321369Sdim case X86::VMOVDQU32Z256rrk: case X86::VMOVDQA32Z256rrk: 1262321369Sdim case X86::VMOVDQU32Zrrk: case X86::VMOVDQA32Zrrk: 1263321369Sdim case X86::VMOVDQU64Z128rrk: case X86::VMOVDQA64Z128rrk: 1264321369Sdim case X86::VMOVDQU64Z256rrk: case X86::VMOVDQA64Z256rrk: 1265321369Sdim case X86::VMOVDQU64Zrrk: case X86::VMOVDQA64Zrrk: 1266321369Sdim case X86::VMOVUPDZ128rrk: case X86::VMOVAPDZ128rrk: 1267321369Sdim case X86::VMOVUPDZ256rrk: case X86::VMOVAPDZ256rrk: 1268321369Sdim case X86::VMOVUPDZrrk: case X86::VMOVAPDZrrk: 1269321369Sdim case X86::VMOVUPSZ128rrk: case X86::VMOVAPSZ128rrk: 1270321369Sdim case X86::VMOVUPSZ256rrk: case X86::VMOVAPSZ256rrk: 1271321369Sdim case X86::VMOVUPSZrrk: case X86::VMOVAPSZrrk: { 1272321369Sdim unsigned Opc; 1273321369Sdim switch (MIOpc) { 1274321369Sdim default: llvm_unreachable("Unreachable!"); 1275321369Sdim case X86::VMOVDQU8Z128rrk: Opc = X86::VPBLENDMBZ128rrk; break; 1276321369Sdim case X86::VMOVDQU8Z256rrk: Opc = X86::VPBLENDMBZ256rrk; break; 1277321369Sdim case X86::VMOVDQU8Zrrk: Opc = X86::VPBLENDMBZrrk; break; 1278321369Sdim case X86::VMOVDQU16Z128rrk: Opc = X86::VPBLENDMWZ128rrk; break; 1279321369Sdim case X86::VMOVDQU16Z256rrk: Opc = X86::VPBLENDMWZ256rrk; break; 1280321369Sdim case X86::VMOVDQU16Zrrk: Opc = X86::VPBLENDMWZrrk; break; 1281321369Sdim case X86::VMOVDQU32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break; 1282321369Sdim case X86::VMOVDQU32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break; 1283321369Sdim case X86::VMOVDQU32Zrrk: Opc = X86::VPBLENDMDZrrk; break; 1284321369Sdim case X86::VMOVDQU64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break; 1285321369Sdim case X86::VMOVDQU64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break; 1286321369Sdim case X86::VMOVDQU64Zrrk: Opc = X86::VPBLENDMQZrrk; break; 1287321369Sdim case X86::VMOVUPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break; 1288321369Sdim case X86::VMOVUPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break; 1289321369Sdim case X86::VMOVUPDZrrk: Opc = X86::VBLENDMPDZrrk; break; 1290321369Sdim case X86::VMOVUPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break; 1291321369Sdim case X86::VMOVUPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break; 1292321369Sdim case X86::VMOVUPSZrrk: Opc = X86::VBLENDMPSZrrk; break; 1293321369Sdim case X86::VMOVDQA32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break; 1294321369Sdim case X86::VMOVDQA32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break; 1295321369Sdim case X86::VMOVDQA32Zrrk: Opc = X86::VPBLENDMDZrrk; break; 1296321369Sdim case X86::VMOVDQA64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break; 1297321369Sdim case X86::VMOVDQA64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break; 1298321369Sdim case X86::VMOVDQA64Zrrk: Opc = X86::VPBLENDMQZrrk; break; 1299321369Sdim case X86::VMOVAPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break; 1300321369Sdim case X86::VMOVAPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break; 1301321369Sdim case X86::VMOVAPDZrrk: Opc = X86::VBLENDMPDZrrk; break; 1302321369Sdim case X86::VMOVAPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break; 1303321369Sdim case X86::VMOVAPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break; 1304321369Sdim case X86::VMOVAPSZrrk: Opc = X86::VBLENDMPSZrrk; break; 1305321369Sdim } 1306193323Sed 1307321369Sdim NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc)) 1308321369Sdim .add(Dest) 1309321369Sdim .add(MI.getOperand(2)) 1310321369Sdim .add(Src) 1311321369Sdim .add(MI.getOperand(3)); 1312321369Sdim break; 1313321369Sdim } 1314321369Sdim } 1315321369Sdim 1316276479Sdim if (!NewMI) return nullptr; 1317193323Sed 1318193323Sed if (LV) { // Update live variables 1319243830Sdim if (Src.isKill()) 1320309124Sdim LV->replaceKillInstruction(Src.getReg(), MI, *NewMI); 1321243830Sdim if (Dest.isDead()) 1322309124Sdim LV->replaceKillInstruction(Dest.getReg(), MI, *NewMI); 1323193323Sed } 1324193323Sed 1325309124Sdim MFI->insert(MI.getIterator(), NewMI); // Insert the new inst 1326193323Sed return NewMI; 1327193323Sed} 1328193323Sed 1329314564Sdim/// This determines which of three possible cases of a three source commute 1330314564Sdim/// the source indexes correspond to taking into account any mask operands. 1331314564Sdim/// All prevents commuting a passthru operand. Returns -1 if the commute isn't 1332314564Sdim/// possible. 1333314564Sdim/// Case 0 - Possible to commute the first and second operands. 1334314564Sdim/// Case 1 - Possible to commute the first and third operands. 1335314564Sdim/// Case 2 - Possible to commute the second and third operands. 1336341825Sdimstatic unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1, 1337341825Sdim unsigned SrcOpIdx2) { 1338314564Sdim // Put the lowest index to SrcOpIdx1 to simplify the checks below. 1339314564Sdim if (SrcOpIdx1 > SrcOpIdx2) 1340314564Sdim std::swap(SrcOpIdx1, SrcOpIdx2); 1341296417Sdim 1342314564Sdim unsigned Op1 = 1, Op2 = 2, Op3 = 3; 1343314564Sdim if (X86II::isKMasked(TSFlags)) { 1344314564Sdim Op2++; 1345314564Sdim Op3++; 1346314564Sdim } 1347296417Sdim 1348314564Sdim if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op2) 1349314564Sdim return 0; 1350314564Sdim if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op3) 1351314564Sdim return 1; 1352314564Sdim if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3) 1353314564Sdim return 2; 1354341825Sdim llvm_unreachable("Unknown three src commute case."); 1355314564Sdim} 1356296417Sdim 1357314564Sdimunsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands( 1358314564Sdim const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2, 1359314564Sdim const X86InstrFMA3Group &FMA3Group) const { 1360296417Sdim 1361314564Sdim unsigned Opc = MI.getOpcode(); 1362296417Sdim 1363314564Sdim // TODO: Commuting the 1st operand of FMA*_Int requires some additional 1364314564Sdim // analysis. The commute optimization is legal only if all users of FMA*_Int 1365314564Sdim // use only the lowest element of the FMA*_Int instruction. Such analysis are 1366314564Sdim // not implemented yet. So, just return 0 in that case. 1367314564Sdim // When such analysis are available this place will be the right place for 1368314564Sdim // calling it. 1369341825Sdim assert(!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) && 1370341825Sdim "Intrinsic instructions can't commute operand 1"); 1371296417Sdim 1372314564Sdim // Determine which case this commute is or if it can't be done. 1373341825Sdim unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, 1374341825Sdim SrcOpIdx2); 1375341825Sdim assert(Case < 3 && "Unexpected case number!"); 1376296417Sdim 1377314564Sdim // Define the FMA forms mapping array that helps to map input FMA form 1378314564Sdim // to output FMA form to preserve the operation semantics after 1379314564Sdim // commuting the operands. 1380314564Sdim const unsigned Form132Index = 0; 1381314564Sdim const unsigned Form213Index = 1; 1382314564Sdim const unsigned Form231Index = 2; 1383314564Sdim static const unsigned FormMapping[][3] = { 1384314564Sdim // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2; 1385314564Sdim // FMA132 A, C, b; ==> FMA231 C, A, b; 1386314564Sdim // FMA213 B, A, c; ==> FMA213 A, B, c; 1387314564Sdim // FMA231 C, A, b; ==> FMA132 A, C, b; 1388314564Sdim { Form231Index, Form213Index, Form132Index }, 1389314564Sdim // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3; 1390314564Sdim // FMA132 A, c, B; ==> FMA132 B, c, A; 1391314564Sdim // FMA213 B, a, C; ==> FMA231 C, a, B; 1392314564Sdim // FMA231 C, a, B; ==> FMA213 B, a, C; 1393314564Sdim { Form132Index, Form231Index, Form213Index }, 1394314564Sdim // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3; 1395314564Sdim // FMA132 a, C, B; ==> FMA213 a, B, C; 1396314564Sdim // FMA213 b, A, C; ==> FMA132 b, C, A; 1397314564Sdim // FMA231 c, A, B; ==> FMA231 c, B, A; 1398314564Sdim { Form213Index, Form132Index, Form231Index } 1399314564Sdim }; 1400296417Sdim 1401314564Sdim unsigned FMAForms[3]; 1402341825Sdim FMAForms[0] = FMA3Group.get132Opcode(); 1403341825Sdim FMAForms[1] = FMA3Group.get213Opcode(); 1404341825Sdim FMAForms[2] = FMA3Group.get231Opcode(); 1405314564Sdim unsigned FormIndex; 1406314564Sdim for (FormIndex = 0; FormIndex < 3; FormIndex++) 1407314564Sdim if (Opc == FMAForms[FormIndex]) 1408314564Sdim break; 1409296417Sdim 1410314564Sdim // Everything is ready, just adjust the FMA opcode and return it. 1411314564Sdim FormIndex = FormMapping[Case][FormIndex]; 1412314564Sdim return FMAForms[FormIndex]; 1413314564Sdim} 1414296417Sdim 1415341825Sdimstatic void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1, 1416314564Sdim unsigned SrcOpIdx2) { 1417314564Sdim // Determine which case this commute is or if it can't be done. 1418341825Sdim unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, 1419341825Sdim SrcOpIdx2); 1420341825Sdim assert(Case < 3 && "Unexpected case value!"); 1421314564Sdim 1422314564Sdim // For each case we need to swap two pairs of bits in the final immediate. 1423314564Sdim static const uint8_t SwapMasks[3][4] = { 1424314564Sdim { 0x04, 0x10, 0x08, 0x20 }, // Swap bits 2/4 and 3/5. 1425314564Sdim { 0x02, 0x10, 0x08, 0x40 }, // Swap bits 1/4 and 3/6. 1426314564Sdim { 0x02, 0x04, 0x20, 0x40 }, // Swap bits 1/2 and 5/6. 1427314564Sdim }; 1428314564Sdim 1429314564Sdim uint8_t Imm = MI.getOperand(MI.getNumOperands()-1).getImm(); 1430314564Sdim // Clear out the bits we are swapping. 1431314564Sdim uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] | 1432314564Sdim SwapMasks[Case][2] | SwapMasks[Case][3]); 1433314564Sdim // If the immediate had a bit of the pair set, then set the opposite bit. 1434314564Sdim if (Imm & SwapMasks[Case][0]) NewImm |= SwapMasks[Case][1]; 1435314564Sdim if (Imm & SwapMasks[Case][1]) NewImm |= SwapMasks[Case][0]; 1436314564Sdim if (Imm & SwapMasks[Case][2]) NewImm |= SwapMasks[Case][3]; 1437314564Sdim if (Imm & SwapMasks[Case][3]) NewImm |= SwapMasks[Case][2]; 1438314564Sdim MI.getOperand(MI.getNumOperands()-1).setImm(NewImm); 1439314564Sdim} 1440314564Sdim 1441341825Sdim// Returns true if this is a VPERMI2 or VPERMT2 instruction that can be 1442314564Sdim// commuted. 1443314564Sdimstatic bool isCommutableVPERMV3Instruction(unsigned Opcode) { 1444314564Sdim#define VPERM_CASES(Suffix) \ 1445314564Sdim case X86::VPERMI2##Suffix##128rr: case X86::VPERMT2##Suffix##128rr: \ 1446314564Sdim case X86::VPERMI2##Suffix##256rr: case X86::VPERMT2##Suffix##256rr: \ 1447314564Sdim case X86::VPERMI2##Suffix##rr: case X86::VPERMT2##Suffix##rr: \ 1448314564Sdim case X86::VPERMI2##Suffix##128rm: case X86::VPERMT2##Suffix##128rm: \ 1449314564Sdim case X86::VPERMI2##Suffix##256rm: case X86::VPERMT2##Suffix##256rm: \ 1450314564Sdim case X86::VPERMI2##Suffix##rm: case X86::VPERMT2##Suffix##rm: \ 1451314564Sdim case X86::VPERMI2##Suffix##128rrkz: case X86::VPERMT2##Suffix##128rrkz: \ 1452314564Sdim case X86::VPERMI2##Suffix##256rrkz: case X86::VPERMT2##Suffix##256rrkz: \ 1453314564Sdim case X86::VPERMI2##Suffix##rrkz: case X86::VPERMT2##Suffix##rrkz: \ 1454314564Sdim case X86::VPERMI2##Suffix##128rmkz: case X86::VPERMT2##Suffix##128rmkz: \ 1455314564Sdim case X86::VPERMI2##Suffix##256rmkz: case X86::VPERMT2##Suffix##256rmkz: \ 1456314564Sdim case X86::VPERMI2##Suffix##rmkz: case X86::VPERMT2##Suffix##rmkz: 1457314564Sdim 1458314564Sdim#define VPERM_CASES_BROADCAST(Suffix) \ 1459314564Sdim VPERM_CASES(Suffix) \ 1460314564Sdim case X86::VPERMI2##Suffix##128rmb: case X86::VPERMT2##Suffix##128rmb: \ 1461314564Sdim case X86::VPERMI2##Suffix##256rmb: case X86::VPERMT2##Suffix##256rmb: \ 1462314564Sdim case X86::VPERMI2##Suffix##rmb: case X86::VPERMT2##Suffix##rmb: \ 1463314564Sdim case X86::VPERMI2##Suffix##128rmbkz: case X86::VPERMT2##Suffix##128rmbkz: \ 1464314564Sdim case X86::VPERMI2##Suffix##256rmbkz: case X86::VPERMT2##Suffix##256rmbkz: \ 1465314564Sdim case X86::VPERMI2##Suffix##rmbkz: case X86::VPERMT2##Suffix##rmbkz: 1466314564Sdim 1467314564Sdim switch (Opcode) { 1468314564Sdim default: return false; 1469314564Sdim VPERM_CASES(B) 1470314564Sdim VPERM_CASES_BROADCAST(D) 1471314564Sdim VPERM_CASES_BROADCAST(PD) 1472314564Sdim VPERM_CASES_BROADCAST(PS) 1473314564Sdim VPERM_CASES_BROADCAST(Q) 1474314564Sdim VPERM_CASES(W) 1475314564Sdim return true; 1476296417Sdim } 1477314564Sdim#undef VPERM_CASES_BROADCAST 1478314564Sdim#undef VPERM_CASES 1479296417Sdim} 1480296417Sdim 1481314564Sdim// Returns commuted opcode for VPERMI2 and VPERMT2 instructions by switching 1482341825Sdim// from the I opcode to the T opcode and vice versa. 1483314564Sdimstatic unsigned getCommutedVPERMV3Opcode(unsigned Opcode) { 1484314564Sdim#define VPERM_CASES(Orig, New) \ 1485314564Sdim case X86::Orig##128rr: return X86::New##128rr; \ 1486314564Sdim case X86::Orig##128rrkz: return X86::New##128rrkz; \ 1487314564Sdim case X86::Orig##128rm: return X86::New##128rm; \ 1488314564Sdim case X86::Orig##128rmkz: return X86::New##128rmkz; \ 1489314564Sdim case X86::Orig##256rr: return X86::New##256rr; \ 1490314564Sdim case X86::Orig##256rrkz: return X86::New##256rrkz; \ 1491314564Sdim case X86::Orig##256rm: return X86::New##256rm; \ 1492314564Sdim case X86::Orig##256rmkz: return X86::New##256rmkz; \ 1493314564Sdim case X86::Orig##rr: return X86::New##rr; \ 1494314564Sdim case X86::Orig##rrkz: return X86::New##rrkz; \ 1495314564Sdim case X86::Orig##rm: return X86::New##rm; \ 1496314564Sdim case X86::Orig##rmkz: return X86::New##rmkz; 1497314564Sdim 1498314564Sdim#define VPERM_CASES_BROADCAST(Orig, New) \ 1499314564Sdim VPERM_CASES(Orig, New) \ 1500314564Sdim case X86::Orig##128rmb: return X86::New##128rmb; \ 1501314564Sdim case X86::Orig##128rmbkz: return X86::New##128rmbkz; \ 1502314564Sdim case X86::Orig##256rmb: return X86::New##256rmb; \ 1503314564Sdim case X86::Orig##256rmbkz: return X86::New##256rmbkz; \ 1504314564Sdim case X86::Orig##rmb: return X86::New##rmb; \ 1505314564Sdim case X86::Orig##rmbkz: return X86::New##rmbkz; 1506314564Sdim 1507314564Sdim switch (Opcode) { 1508314564Sdim VPERM_CASES(VPERMI2B, VPERMT2B) 1509314564Sdim VPERM_CASES_BROADCAST(VPERMI2D, VPERMT2D) 1510314564Sdim VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD) 1511314564Sdim VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS) 1512314564Sdim VPERM_CASES_BROADCAST(VPERMI2Q, VPERMT2Q) 1513314564Sdim VPERM_CASES(VPERMI2W, VPERMT2W) 1514314564Sdim VPERM_CASES(VPERMT2B, VPERMI2B) 1515314564Sdim VPERM_CASES_BROADCAST(VPERMT2D, VPERMI2D) 1516314564Sdim VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD) 1517314564Sdim VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS) 1518314564Sdim VPERM_CASES_BROADCAST(VPERMT2Q, VPERMI2Q) 1519314564Sdim VPERM_CASES(VPERMT2W, VPERMI2W) 1520314564Sdim } 1521314564Sdim 1522314564Sdim llvm_unreachable("Unreachable!"); 1523314564Sdim#undef VPERM_CASES_BROADCAST 1524314564Sdim#undef VPERM_CASES 1525314564Sdim} 1526314564Sdim 1527309124SdimMachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, 1528296417Sdim unsigned OpIdx1, 1529296417Sdim unsigned OpIdx2) const { 1530309124Sdim auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { 1531309124Sdim if (NewMI) 1532309124Sdim return *MI.getParent()->getParent()->CloneMachineInstr(&MI); 1533309124Sdim return MI; 1534309124Sdim }; 1535309124Sdim 1536309124Sdim switch (MI.getOpcode()) { 1537193323Sed case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) 1538193323Sed case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) 1539193323Sed case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) 1540193323Sed case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) 1541193323Sed case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) 1542193323Sed case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) 1543193323Sed unsigned Opc; 1544193323Sed unsigned Size; 1545309124Sdim switch (MI.getOpcode()) { 1546198090Srdivacky default: llvm_unreachable("Unreachable!"); 1547193323Sed case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; 1548193323Sed case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; 1549193323Sed case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; 1550193323Sed case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break; 1551193323Sed case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break; 1552193323Sed case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; 1553193323Sed } 1554309124Sdim unsigned Amt = MI.getOperand(3).getImm(); 1555309124Sdim auto &WorkingMI = cloneIfNew(MI); 1556309124Sdim WorkingMI.setDesc(get(Opc)); 1557309124Sdim WorkingMI.getOperand(3).setImm(Size - Amt); 1558309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1559309124Sdim OpIdx1, OpIdx2); 1560193323Sed } 1561321369Sdim case X86::PFSUBrr: 1562321369Sdim case X86::PFSUBRrr: { 1563321369Sdim // PFSUB x, y: x = x - y 1564321369Sdim // PFSUBR x, y: x = y - x 1565321369Sdim unsigned Opc = 1566321369Sdim (X86::PFSUBRrr == MI.getOpcode() ? X86::PFSUBrr : X86::PFSUBRrr); 1567321369Sdim auto &WorkingMI = cloneIfNew(MI); 1568321369Sdim WorkingMI.setDesc(get(Opc)); 1569321369Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1570321369Sdim OpIdx1, OpIdx2); 1571321369Sdim } 1572280031Sdim case X86::BLENDPDrri: 1573280031Sdim case X86::BLENDPSrri: 1574280031Sdim case X86::VBLENDPDrri: 1575280031Sdim case X86::VBLENDPSrri: 1576341825Sdim // If we're optimizing for size, try to use MOVSD/MOVSS. 1577353358Sdim if (MI.getParent()->getParent()->getFunction().hasOptSize()) { 1578341825Sdim unsigned Mask, Opc; 1579341825Sdim switch (MI.getOpcode()) { 1580341825Sdim default: llvm_unreachable("Unreachable!"); 1581341825Sdim case X86::BLENDPDrri: Opc = X86::MOVSDrr; Mask = 0x03; break; 1582341825Sdim case X86::BLENDPSrri: Opc = X86::MOVSSrr; Mask = 0x0F; break; 1583341825Sdim case X86::VBLENDPDrri: Opc = X86::VMOVSDrr; Mask = 0x03; break; 1584341825Sdim case X86::VBLENDPSrri: Opc = X86::VMOVSSrr; Mask = 0x0F; break; 1585341825Sdim } 1586341825Sdim if ((MI.getOperand(3).getImm() ^ Mask) == 1) { 1587341825Sdim auto &WorkingMI = cloneIfNew(MI); 1588341825Sdim WorkingMI.setDesc(get(Opc)); 1589341825Sdim WorkingMI.RemoveOperand(3); 1590341825Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, 1591341825Sdim /*NewMI=*/false, 1592341825Sdim OpIdx1, OpIdx2); 1593341825Sdim } 1594341825Sdim } 1595341825Sdim LLVM_FALLTHROUGH; 1596341825Sdim case X86::PBLENDWrri: 1597280031Sdim case X86::VBLENDPDYrri: 1598280031Sdim case X86::VBLENDPSYrri: 1599280031Sdim case X86::VPBLENDDrri: 1600280031Sdim case X86::VPBLENDWrri: 1601280031Sdim case X86::VPBLENDDYrri: 1602280031Sdim case X86::VPBLENDWYrri:{ 1603353358Sdim int8_t Mask; 1604309124Sdim switch (MI.getOpcode()) { 1605280031Sdim default: llvm_unreachable("Unreachable!"); 1606353358Sdim case X86::BLENDPDrri: Mask = (int8_t)0x03; break; 1607353358Sdim case X86::BLENDPSrri: Mask = (int8_t)0x0F; break; 1608353358Sdim case X86::PBLENDWrri: Mask = (int8_t)0xFF; break; 1609353358Sdim case X86::VBLENDPDrri: Mask = (int8_t)0x03; break; 1610353358Sdim case X86::VBLENDPSrri: Mask = (int8_t)0x0F; break; 1611353358Sdim case X86::VBLENDPDYrri: Mask = (int8_t)0x0F; break; 1612353358Sdim case X86::VBLENDPSYrri: Mask = (int8_t)0xFF; break; 1613353358Sdim case X86::VPBLENDDrri: Mask = (int8_t)0x0F; break; 1614353358Sdim case X86::VPBLENDWrri: Mask = (int8_t)0xFF; break; 1615353358Sdim case X86::VPBLENDDYrri: Mask = (int8_t)0xFF; break; 1616353358Sdim case X86::VPBLENDWYrri: Mask = (int8_t)0xFF; break; 1617280031Sdim } 1618280031Sdim // Only the least significant bits of Imm are used. 1619353358Sdim // Using int8_t to ensure it will be sign extended to the int64_t that 1620353358Sdim // setImm takes in order to match isel behavior. 1621353358Sdim int8_t Imm = MI.getOperand(3).getImm() & Mask; 1622309124Sdim auto &WorkingMI = cloneIfNew(MI); 1623309124Sdim WorkingMI.getOperand(3).setImm(Mask ^ Imm); 1624309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1625309124Sdim OpIdx1, OpIdx2); 1626280031Sdim } 1627353358Sdim case X86::INSERTPSrr: 1628353358Sdim case X86::VINSERTPSrr: 1629353358Sdim case X86::VINSERTPSZrr: { 1630353358Sdim unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm(); 1631353358Sdim unsigned ZMask = Imm & 15; 1632353358Sdim unsigned DstIdx = (Imm >> 4) & 3; 1633353358Sdim unsigned SrcIdx = (Imm >> 6) & 3; 1634353358Sdim 1635353358Sdim // We can commute insertps if we zero 2 of the elements, the insertion is 1636353358Sdim // "inline" and we don't override the insertion with a zero. 1637353358Sdim if (DstIdx == SrcIdx && (ZMask & (1 << DstIdx)) == 0 && 1638353358Sdim countPopulation(ZMask) == 2) { 1639353358Sdim unsigned AltIdx = findFirstSet((ZMask | (1 << DstIdx)) ^ 15); 1640353358Sdim assert(AltIdx < 4 && "Illegal insertion index"); 1641353358Sdim unsigned AltImm = (AltIdx << 6) | (AltIdx << 4) | ZMask; 1642353358Sdim auto &WorkingMI = cloneIfNew(MI); 1643353358Sdim WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(AltImm); 1644353358Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1645353358Sdim OpIdx1, OpIdx2); 1646353358Sdim } 1647353358Sdim return nullptr; 1648353358Sdim } 1649314564Sdim case X86::MOVSDrr: 1650314564Sdim case X86::MOVSSrr: 1651314564Sdim case X86::VMOVSDrr: 1652314564Sdim case X86::VMOVSSrr:{ 1653314564Sdim // On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD. 1654353358Sdim if (Subtarget.hasSSE41()) { 1655353358Sdim unsigned Mask, Opc; 1656353358Sdim switch (MI.getOpcode()) { 1657353358Sdim default: llvm_unreachable("Unreachable!"); 1658353358Sdim case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break; 1659353358Sdim case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break; 1660353358Sdim case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break; 1661353358Sdim case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break; 1662353358Sdim } 1663314564Sdim 1664353358Sdim auto &WorkingMI = cloneIfNew(MI); 1665353358Sdim WorkingMI.setDesc(get(Opc)); 1666353358Sdim WorkingMI.addOperand(MachineOperand::CreateImm(Mask)); 1667353358Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1668353358Sdim OpIdx1, OpIdx2); 1669314564Sdim } 1670314564Sdim 1671353358Sdim // Convert to SHUFPD. 1672353358Sdim assert(MI.getOpcode() == X86::MOVSDrr && 1673353358Sdim "Can only commute MOVSDrr without SSE4.1"); 1674353358Sdim 1675314564Sdim auto &WorkingMI = cloneIfNew(MI); 1676353358Sdim WorkingMI.setDesc(get(X86::SHUFPDrri)); 1677353358Sdim WorkingMI.addOperand(MachineOperand::CreateImm(0x02)); 1678314564Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1679314564Sdim OpIdx1, OpIdx2); 1680314564Sdim } 1681353358Sdim case X86::SHUFPDrri: { 1682353358Sdim // Commute to MOVSD. 1683353358Sdim assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!"); 1684353358Sdim auto &WorkingMI = cloneIfNew(MI); 1685353358Sdim WorkingMI.setDesc(get(X86::MOVSDrr)); 1686353358Sdim WorkingMI.RemoveOperand(3); 1687353358Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1688353358Sdim OpIdx1, OpIdx2); 1689353358Sdim } 1690288943Sdim case X86::PCLMULQDQrr: 1691327952Sdim case X86::VPCLMULQDQrr: 1692327952Sdim case X86::VPCLMULQDQYrr: 1693327952Sdim case X86::VPCLMULQDQZrr: 1694327952Sdim case X86::VPCLMULQDQZ128rr: 1695327952Sdim case X86::VPCLMULQDQZ256rr: { 1696288943Sdim // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0] 1697288943Sdim // SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0] 1698309124Sdim unsigned Imm = MI.getOperand(3).getImm(); 1699288943Sdim unsigned Src1Hi = Imm & 0x01; 1700288943Sdim unsigned Src2Hi = Imm & 0x10; 1701309124Sdim auto &WorkingMI = cloneIfNew(MI); 1702309124Sdim WorkingMI.getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4)); 1703309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1704309124Sdim OpIdx1, OpIdx2); 1705288943Sdim } 1706321369Sdim case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri: 1707321369Sdim case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri: 1708321369Sdim case X86::VPCMPBZrri: case X86::VPCMPUBZrri: 1709321369Sdim case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri: 1710321369Sdim case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri: 1711321369Sdim case X86::VPCMPDZrri: case X86::VPCMPUDZrri: 1712321369Sdim case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri: 1713321369Sdim case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri: 1714321369Sdim case X86::VPCMPQZrri: case X86::VPCMPUQZrri: 1715321369Sdim case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri: 1716321369Sdim case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri: 1717321369Sdim case X86::VPCMPWZrri: case X86::VPCMPUWZrri: 1718321369Sdim case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik: 1719321369Sdim case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik: 1720321369Sdim case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik: 1721321369Sdim case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik: 1722321369Sdim case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik: 1723321369Sdim case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik: 1724321369Sdim case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik: 1725321369Sdim case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik: 1726321369Sdim case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik: 1727321369Sdim case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik: 1728321369Sdim case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik: 1729321369Sdim case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: { 1730314564Sdim // Flip comparison mode immediate (if necessary). 1731321369Sdim unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7; 1732341825Sdim Imm = X86::getSwappedVPCMPImm(Imm); 1733314564Sdim auto &WorkingMI = cloneIfNew(MI); 1734321369Sdim WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); 1735314564Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1736314564Sdim OpIdx1, OpIdx2); 1737314564Sdim } 1738288943Sdim case X86::VPCOMBri: case X86::VPCOMUBri: 1739288943Sdim case X86::VPCOMDri: case X86::VPCOMUDri: 1740288943Sdim case X86::VPCOMQri: case X86::VPCOMUQri: 1741288943Sdim case X86::VPCOMWri: case X86::VPCOMUWri: { 1742288943Sdim // Flip comparison mode immediate (if necessary). 1743309124Sdim unsigned Imm = MI.getOperand(3).getImm() & 0x7; 1744341825Sdim Imm = X86::getSwappedVPCOMImm(Imm); 1745309124Sdim auto &WorkingMI = cloneIfNew(MI); 1746309124Sdim WorkingMI.getOperand(3).setImm(Imm); 1747309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1748309124Sdim OpIdx1, OpIdx2); 1749288943Sdim } 1750360784Sdim case X86::VCMPSDZrr: 1751360784Sdim case X86::VCMPSSZrr: 1752360784Sdim case X86::VCMPPDZrri: 1753360784Sdim case X86::VCMPPSZrri: 1754360784Sdim case X86::VCMPPDZ128rri: 1755360784Sdim case X86::VCMPPSZ128rri: 1756360784Sdim case X86::VCMPPDZ256rri: 1757360784Sdim case X86::VCMPPSZ256rri: 1758360784Sdim case X86::VCMPPDZrrik: 1759360784Sdim case X86::VCMPPSZrrik: 1760360784Sdim case X86::VCMPPDZ128rrik: 1761360784Sdim case X86::VCMPPSZ128rrik: 1762360784Sdim case X86::VCMPPDZ256rrik: 1763360784Sdim case X86::VCMPPSZ256rrik: { 1764360784Sdim unsigned Imm = 1765360784Sdim MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 0x1f; 1766360784Sdim Imm = X86::getSwappedVCMPImm(Imm); 1767360784Sdim auto &WorkingMI = cloneIfNew(MI); 1768360784Sdim WorkingMI.getOperand(MI.getNumExplicitOperands() - 1).setImm(Imm); 1769360784Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1770360784Sdim OpIdx1, OpIdx2); 1771360784Sdim } 1772309124Sdim case X86::VPERM2F128rr: 1773309124Sdim case X86::VPERM2I128rr: { 1774309124Sdim // Flip permute source immediate. 1775309124Sdim // Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi. 1776309124Sdim // Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi. 1777353358Sdim int8_t Imm = MI.getOperand(3).getImm() & 0xFF; 1778309124Sdim auto &WorkingMI = cloneIfNew(MI); 1779309124Sdim WorkingMI.getOperand(3).setImm(Imm ^ 0x22); 1780309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1781309124Sdim OpIdx1, OpIdx2); 1782309124Sdim } 1783314564Sdim case X86::MOVHLPSrr: 1784341825Sdim case X86::UNPCKHPDrr: 1785341825Sdim case X86::VMOVHLPSrr: 1786341825Sdim case X86::VUNPCKHPDrr: 1787341825Sdim case X86::VMOVHLPSZrr: 1788341825Sdim case X86::VUNPCKHPDZ128rr: { 1789341825Sdim assert(Subtarget.hasSSE2() && "Commuting MOVHLP/UNPCKHPD requires SSE2!"); 1790314564Sdim 1791314564Sdim unsigned Opc = MI.getOpcode(); 1792314564Sdim switch (Opc) { 1793341825Sdim default: llvm_unreachable("Unreachable!"); 1794341825Sdim case X86::MOVHLPSrr: Opc = X86::UNPCKHPDrr; break; 1795341825Sdim case X86::UNPCKHPDrr: Opc = X86::MOVHLPSrr; break; 1796341825Sdim case X86::VMOVHLPSrr: Opc = X86::VUNPCKHPDrr; break; 1797341825Sdim case X86::VUNPCKHPDrr: Opc = X86::VMOVHLPSrr; break; 1798341825Sdim case X86::VMOVHLPSZrr: Opc = X86::VUNPCKHPDZ128rr; break; 1799341825Sdim case X86::VUNPCKHPDZ128rr: Opc = X86::VMOVHLPSZrr; break; 1800314564Sdim } 1801314564Sdim auto &WorkingMI = cloneIfNew(MI); 1802314564Sdim WorkingMI.setDesc(get(Opc)); 1803314564Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1804314564Sdim OpIdx1, OpIdx2); 1805314564Sdim } 1806353358Sdim case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: { 1807309124Sdim auto &WorkingMI = cloneIfNew(MI); 1808353358Sdim unsigned OpNo = MI.getDesc().getNumOperands() - 1; 1809353358Sdim X86::CondCode CC = static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm()); 1810353358Sdim WorkingMI.getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC)); 1811309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1812309124Sdim OpIdx1, OpIdx2); 1813193323Sed } 1814314564Sdim case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi: 1815314564Sdim case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi: 1816314564Sdim case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi: 1817314564Sdim case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi: 1818314564Sdim case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi: 1819314564Sdim case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi: 1820321369Sdim case X86::VPTERNLOGDZrrik: 1821321369Sdim case X86::VPTERNLOGDZ128rrik: 1822321369Sdim case X86::VPTERNLOGDZ256rrik: 1823321369Sdim case X86::VPTERNLOGQZrrik: 1824321369Sdim case X86::VPTERNLOGQZ128rrik: 1825321369Sdim case X86::VPTERNLOGQZ256rrik: 1826314564Sdim case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz: 1827314564Sdim case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz: 1828314564Sdim case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz: 1829314564Sdim case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz: 1830314564Sdim case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz: 1831321369Sdim case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: 1832321369Sdim case X86::VPTERNLOGDZ128rmbi: 1833321369Sdim case X86::VPTERNLOGDZ256rmbi: 1834321369Sdim case X86::VPTERNLOGDZrmbi: 1835321369Sdim case X86::VPTERNLOGQZ128rmbi: 1836321369Sdim case X86::VPTERNLOGQZ256rmbi: 1837321369Sdim case X86::VPTERNLOGQZrmbi: 1838321369Sdim case X86::VPTERNLOGDZ128rmbikz: 1839321369Sdim case X86::VPTERNLOGDZ256rmbikz: 1840321369Sdim case X86::VPTERNLOGDZrmbikz: 1841321369Sdim case X86::VPTERNLOGQZ128rmbikz: 1842321369Sdim case X86::VPTERNLOGQZ256rmbikz: 1843321369Sdim case X86::VPTERNLOGQZrmbikz: { 1844314564Sdim auto &WorkingMI = cloneIfNew(MI); 1845341825Sdim commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2); 1846314564Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1847314564Sdim OpIdx1, OpIdx2); 1848314564Sdim } 1849314564Sdim default: { 1850314564Sdim if (isCommutableVPERMV3Instruction(MI.getOpcode())) { 1851314564Sdim unsigned Opc = getCommutedVPERMV3Opcode(MI.getOpcode()); 1852314564Sdim auto &WorkingMI = cloneIfNew(MI); 1853314564Sdim WorkingMI.setDesc(get(Opc)); 1854314564Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1855314564Sdim OpIdx1, OpIdx2); 1856314564Sdim } 1857314564Sdim 1858341825Sdim const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(), 1859341825Sdim MI.getDesc().TSFlags); 1860314564Sdim if (FMA3Group) { 1861314564Sdim unsigned Opc = 1862314564Sdim getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group); 1863309124Sdim auto &WorkingMI = cloneIfNew(MI); 1864309124Sdim WorkingMI.setDesc(get(Opc)); 1865309124Sdim return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 1866309124Sdim OpIdx1, OpIdx2); 1867296417Sdim } 1868309124Sdim 1869296417Sdim return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 1870193323Sed } 1871314564Sdim } 1872193323Sed} 1873193323Sed 1874341825Sdimbool 1875341825SdimX86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI, 1876341825Sdim unsigned &SrcOpIdx1, 1877341825Sdim unsigned &SrcOpIdx2, 1878341825Sdim bool IsIntrinsic) const { 1879314564Sdim uint64_t TSFlags = MI.getDesc().TSFlags; 1880314564Sdim 1881314564Sdim unsigned FirstCommutableVecOp = 1; 1882314564Sdim unsigned LastCommutableVecOp = 3; 1883341825Sdim unsigned KMaskOp = -1U; 1884314564Sdim if (X86II::isKMasked(TSFlags)) { 1885341825Sdim // For k-zero-masked operations it is Ok to commute the first vector 1886341825Sdim // operand. 1887341825Sdim // For regular k-masked operations a conservative choice is done as the 1888341825Sdim // elements of the first vector operand, for which the corresponding bit 1889341825Sdim // in the k-mask operand is set to 0, are copied to the result of the 1890341825Sdim // instruction. 1891341825Sdim // TODO/FIXME: The commute still may be legal if it is known that the 1892341825Sdim // k-mask operand is set to either all ones or all zeroes. 1893341825Sdim // It is also Ok to commute the 1st operand if all users of MI use only 1894341825Sdim // the elements enabled by the k-mask operand. For example, 1895341825Sdim // v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i] 1896341825Sdim // : v1[i]; 1897341825Sdim // VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 -> 1898341825Sdim // // Ok, to commute v1 in FMADD213PSZrk. 1899341825Sdim 1900314564Sdim // The k-mask operand has index = 2 for masked and zero-masked operations. 1901314564Sdim KMaskOp = 2; 1902314564Sdim 1903314564Sdim // The operand with index = 1 is used as a source for those elements for 1904314564Sdim // which the corresponding bit in the k-mask is set to 0. 1905314564Sdim if (X86II::isKMergeMasked(TSFlags)) 1906314564Sdim FirstCommutableVecOp = 3; 1907314564Sdim 1908314564Sdim LastCommutableVecOp++; 1909341825Sdim } else if (IsIntrinsic) { 1910341825Sdim // Commuting the first operand of an intrinsic instruction isn't possible 1911341825Sdim // unless we can prove that only the lowest element of the result is used. 1912341825Sdim FirstCommutableVecOp = 2; 1913314564Sdim } 1914314564Sdim 1915314564Sdim if (isMem(MI, LastCommutableVecOp)) 1916314564Sdim LastCommutableVecOp--; 1917314564Sdim 1918296417Sdim // Only the first RegOpsNum operands are commutable. 1919296417Sdim // Also, the value 'CommuteAnyOperandIndex' is valid here as it means 1920296417Sdim // that the operand is not specified/fixed. 1921296417Sdim if (SrcOpIdx1 != CommuteAnyOperandIndex && 1922314564Sdim (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp || 1923314564Sdim SrcOpIdx1 == KMaskOp)) 1924296417Sdim return false; 1925296417Sdim if (SrcOpIdx2 != CommuteAnyOperandIndex && 1926314564Sdim (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp || 1927314564Sdim SrcOpIdx2 == KMaskOp)) 1928296417Sdim return false; 1929296417Sdim 1930296417Sdim // Look for two different register operands assumed to be commutable 1931296417Sdim // regardless of the FMA opcode. The FMA opcode is adjusted later. 1932296417Sdim if (SrcOpIdx1 == CommuteAnyOperandIndex || 1933296417Sdim SrcOpIdx2 == CommuteAnyOperandIndex) { 1934296417Sdim unsigned CommutableOpIdx2 = SrcOpIdx2; 1935296417Sdim 1936296417Sdim // At least one of operands to be commuted is not specified and 1937296417Sdim // this method is free to choose appropriate commutable operands. 1938296417Sdim if (SrcOpIdx1 == SrcOpIdx2) 1939296417Sdim // Both of operands are not fixed. By default set one of commutable 1940296417Sdim // operands to the last register operand of the instruction. 1941314564Sdim CommutableOpIdx2 = LastCommutableVecOp; 1942296417Sdim else if (SrcOpIdx2 == CommuteAnyOperandIndex) 1943296417Sdim // Only one of operands is not fixed. 1944296417Sdim CommutableOpIdx2 = SrcOpIdx1; 1945296417Sdim 1946296417Sdim // CommutableOpIdx2 is well defined now. Let's choose another commutable 1947296417Sdim // operand and assign its index to CommutableOpIdx1. 1948360784Sdim Register Op2Reg = MI.getOperand(CommutableOpIdx2).getReg(); 1949353358Sdim 1950353358Sdim unsigned CommutableOpIdx1; 1951314564Sdim for (CommutableOpIdx1 = LastCommutableVecOp; 1952314564Sdim CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) { 1953314564Sdim // Just ignore and skip the k-mask operand. 1954314564Sdim if (CommutableOpIdx1 == KMaskOp) 1955314564Sdim continue; 1956314564Sdim 1957296417Sdim // The commuted operands must have different registers. 1958296417Sdim // Otherwise, the commute transformation does not change anything and 1959296417Sdim // is useless then. 1960309124Sdim if (Op2Reg != MI.getOperand(CommutableOpIdx1).getReg()) 1961296417Sdim break; 1962296417Sdim } 1963296417Sdim 1964296417Sdim // No appropriate commutable operands were found. 1965314564Sdim if (CommutableOpIdx1 < FirstCommutableVecOp) 1966296417Sdim return false; 1967296417Sdim 1968296417Sdim // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2 1969296417Sdim // to return those values. 1970296417Sdim if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1971296417Sdim CommutableOpIdx1, CommutableOpIdx2)) 1972296417Sdim return false; 1973296417Sdim } 1974296417Sdim 1975314564Sdim return true; 1976296417Sdim} 1977296417Sdim 1978360784Sdimbool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI, 1979360784Sdim unsigned &SrcOpIdx1, 1980276479Sdim unsigned &SrcOpIdx2) const { 1981314564Sdim const MCInstrDesc &Desc = MI.getDesc(); 1982314564Sdim if (!Desc.isCommutable()) 1983314564Sdim return false; 1984314564Sdim 1985309124Sdim switch (MI.getOpcode()) { 1986314564Sdim case X86::CMPSDrr: 1987314564Sdim case X86::CMPSSrr: 1988309124Sdim case X86::CMPPDrri: 1989309124Sdim case X86::CMPPSrri: 1990314564Sdim case X86::VCMPSDrr: 1991314564Sdim case X86::VCMPSSrr: 1992309124Sdim case X86::VCMPPDrri: 1993309124Sdim case X86::VCMPPSrri: 1994309124Sdim case X86::VCMPPDYrri: 1995314564Sdim case X86::VCMPPSYrri: 1996314564Sdim case X86::VCMPSDZrr: 1997314564Sdim case X86::VCMPSSZrr: 1998314564Sdim case X86::VCMPPDZrri: 1999314564Sdim case X86::VCMPPSZrri: 2000314564Sdim case X86::VCMPPDZ128rri: 2001314564Sdim case X86::VCMPPSZ128rri: 2002314564Sdim case X86::VCMPPDZ256rri: 2003353358Sdim case X86::VCMPPSZ256rri: 2004353358Sdim case X86::VCMPPDZrrik: 2005353358Sdim case X86::VCMPPSZrrik: 2006353358Sdim case X86::VCMPPDZ128rrik: 2007353358Sdim case X86::VCMPPSZ128rrik: 2008353358Sdim case X86::VCMPPDZ256rrik: 2009353358Sdim case X86::VCMPPSZ256rrik: { 2010353358Sdim unsigned OpOffset = X86II::isKMasked(Desc.TSFlags) ? 1 : 0; 2011353358Sdim 2012309124Sdim // Float comparison can be safely commuted for 2013309124Sdim // Ordered/Unordered/Equal/NotEqual tests 2014353358Sdim unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7; 2015309124Sdim switch (Imm) { 2016360784Sdim default: 2017360784Sdim // EVEX versions can be commuted. 2018360784Sdim if ((Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX) 2019360784Sdim break; 2020360784Sdim return false; 2021309124Sdim case 0x00: // EQUAL 2022309124Sdim case 0x03: // UNORDERED 2023309124Sdim case 0x04: // NOT EQUAL 2024309124Sdim case 0x07: // ORDERED 2025360784Sdim break; 2026288943Sdim } 2027360784Sdim 2028360784Sdim // The indices of the commutable operands are 1 and 2 (or 2 and 3 2029360784Sdim // when masked). 2030360784Sdim // Assign them to the returned operand indices here. 2031360784Sdim return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset, 2032360784Sdim 2 + OpOffset); 2033276479Sdim } 2034314564Sdim case X86::MOVSSrr: 2035353358Sdim // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can 2036353358Sdim // form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since 2037353358Sdim // AVX implies sse4.1. 2038314564Sdim if (Subtarget.hasSSE41()) 2039314564Sdim return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 2040314564Sdim return false; 2041353358Sdim case X86::SHUFPDrri: 2042353358Sdim // We can commute this to MOVSD. 2043353358Sdim if (MI.getOperand(3).getImm() == 0x02) 2044353358Sdim return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 2045353358Sdim return false; 2046341825Sdim case X86::MOVHLPSrr: 2047341825Sdim case X86::UNPCKHPDrr: 2048341825Sdim case X86::VMOVHLPSrr: 2049341825Sdim case X86::VUNPCKHPDrr: 2050341825Sdim case X86::VMOVHLPSZrr: 2051341825Sdim case X86::VUNPCKHPDZ128rr: 2052341825Sdim if (Subtarget.hasSSE2()) 2053341825Sdim return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 2054341825Sdim return false; 2055314564Sdim case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi: 2056314564Sdim case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi: 2057314564Sdim case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi: 2058314564Sdim case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi: 2059314564Sdim case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi: 2060314564Sdim case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi: 2061321369Sdim case X86::VPTERNLOGDZrrik: 2062321369Sdim case X86::VPTERNLOGDZ128rrik: 2063321369Sdim case X86::VPTERNLOGDZ256rrik: 2064321369Sdim case X86::VPTERNLOGQZrrik: 2065321369Sdim case X86::VPTERNLOGQZ128rrik: 2066321369Sdim case X86::VPTERNLOGQZ256rrik: 2067314564Sdim case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz: 2068314564Sdim case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz: 2069314564Sdim case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz: 2070314564Sdim case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz: 2071314564Sdim case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz: 2072314564Sdim case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: 2073321369Sdim case X86::VPTERNLOGDZ128rmbi: 2074321369Sdim case X86::VPTERNLOGDZ256rmbi: 2075321369Sdim case X86::VPTERNLOGDZrmbi: 2076321369Sdim case X86::VPTERNLOGQZ128rmbi: 2077321369Sdim case X86::VPTERNLOGQZ256rmbi: 2078321369Sdim case X86::VPTERNLOGQZrmbi: 2079321369Sdim case X86::VPTERNLOGDZ128rmbikz: 2080321369Sdim case X86::VPTERNLOGDZ256rmbikz: 2081321369Sdim case X86::VPTERNLOGDZrmbikz: 2082321369Sdim case X86::VPTERNLOGQZ128rmbikz: 2083321369Sdim case X86::VPTERNLOGQZ256rmbikz: 2084321369Sdim case X86::VPTERNLOGQZrmbikz: 2085314564Sdim return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 2086360784Sdim case X86::VPDPWSSDZ128r: 2087360784Sdim case X86::VPDPWSSDZ128rk: 2088360784Sdim case X86::VPDPWSSDZ128rkz: 2089360784Sdim case X86::VPDPWSSDZ256r: 2090360784Sdim case X86::VPDPWSSDZ256rk: 2091360784Sdim case X86::VPDPWSSDZ256rkz: 2092360784Sdim case X86::VPDPWSSDZr: 2093360784Sdim case X86::VPDPWSSDZrk: 2094360784Sdim case X86::VPDPWSSDZrkz: 2095360784Sdim case X86::VPDPWSSDSZ128r: 2096360784Sdim case X86::VPDPWSSDSZ128rk: 2097360784Sdim case X86::VPDPWSSDSZ128rkz: 2098360784Sdim case X86::VPDPWSSDSZ256r: 2099360784Sdim case X86::VPDPWSSDSZ256rk: 2100360784Sdim case X86::VPDPWSSDSZ256rkz: 2101360784Sdim case X86::VPDPWSSDSZr: 2102360784Sdim case X86::VPDPWSSDSZrk: 2103360784Sdim case X86::VPDPWSSDSZrkz: 2104327952Sdim case X86::VPMADD52HUQZ128r: 2105327952Sdim case X86::VPMADD52HUQZ128rk: 2106327952Sdim case X86::VPMADD52HUQZ128rkz: 2107327952Sdim case X86::VPMADD52HUQZ256r: 2108327952Sdim case X86::VPMADD52HUQZ256rk: 2109327952Sdim case X86::VPMADD52HUQZ256rkz: 2110327952Sdim case X86::VPMADD52HUQZr: 2111327952Sdim case X86::VPMADD52HUQZrk: 2112327952Sdim case X86::VPMADD52HUQZrkz: 2113327952Sdim case X86::VPMADD52LUQZ128r: 2114327952Sdim case X86::VPMADD52LUQZ128rk: 2115327952Sdim case X86::VPMADD52LUQZ128rkz: 2116327952Sdim case X86::VPMADD52LUQZ256r: 2117327952Sdim case X86::VPMADD52LUQZ256rk: 2118327952Sdim case X86::VPMADD52LUQZ256rkz: 2119327952Sdim case X86::VPMADD52LUQZr: 2120327952Sdim case X86::VPMADD52LUQZrk: 2121327952Sdim case X86::VPMADD52LUQZrkz: { 2122327952Sdim unsigned CommutableOpIdx1 = 2; 2123327952Sdim unsigned CommutableOpIdx2 = 3; 2124341825Sdim if (X86II::isKMasked(Desc.TSFlags)) { 2125327952Sdim // Skip the mask register. 2126327952Sdim ++CommutableOpIdx1; 2127327952Sdim ++CommutableOpIdx2; 2128327952Sdim } 2129327952Sdim if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2130327952Sdim CommutableOpIdx1, CommutableOpIdx2)) 2131327952Sdim return false; 2132327952Sdim if (!MI.getOperand(SrcOpIdx1).isReg() || 2133327952Sdim !MI.getOperand(SrcOpIdx2).isReg()) 2134327952Sdim // No idea. 2135327952Sdim return false; 2136327952Sdim return true; 2137327952Sdim } 2138327952Sdim 2139309124Sdim default: 2140341825Sdim const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(), 2141341825Sdim MI.getDesc().TSFlags); 2142314564Sdim if (FMA3Group) 2143341825Sdim return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2, 2144341825Sdim FMA3Group->isIntrinsic()); 2145314564Sdim 2146314564Sdim // Handled masked instructions since we need to skip over the mask input 2147314564Sdim // and the preserved input. 2148341825Sdim if (X86II::isKMasked(Desc.TSFlags)) { 2149314564Sdim // First assume that the first input is the mask operand and skip past it. 2150314564Sdim unsigned CommutableOpIdx1 = Desc.getNumDefs() + 1; 2151314564Sdim unsigned CommutableOpIdx2 = Desc.getNumDefs() + 2; 2152314564Sdim // Check if the first input is tied. If there isn't one then we only 2153314564Sdim // need to skip the mask operand which we did above. 2154314564Sdim if ((MI.getDesc().getOperandConstraint(Desc.getNumDefs(), 2155314564Sdim MCOI::TIED_TO) != -1)) { 2156314564Sdim // If this is zero masking instruction with a tied operand, we need to 2157314564Sdim // move the first index back to the first input since this must 2158314564Sdim // be a 3 input instruction and we want the first two non-mask inputs. 2159314564Sdim // Otherwise this is a 2 input instruction with a preserved input and 2160314564Sdim // mask, so we need to move the indices to skip one more input. 2161341825Sdim if (X86II::isKMergeMasked(Desc.TSFlags)) { 2162314564Sdim ++CommutableOpIdx1; 2163314564Sdim ++CommutableOpIdx2; 2164341825Sdim } else { 2165341825Sdim --CommutableOpIdx1; 2166314564Sdim } 2167314564Sdim } 2168314564Sdim 2169314564Sdim if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2170314564Sdim CommutableOpIdx1, CommutableOpIdx2)) 2171314564Sdim return false; 2172314564Sdim 2173314564Sdim if (!MI.getOperand(SrcOpIdx1).isReg() || 2174314564Sdim !MI.getOperand(SrcOpIdx2).isReg()) 2175314564Sdim // No idea. 2176314564Sdim return false; 2177314564Sdim return true; 2178314564Sdim } 2179314564Sdim 2180309124Sdim return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 2181309124Sdim } 2182296417Sdim return false; 2183276479Sdim} 2184276479Sdim 2185353358SdimX86::CondCode X86::getCondFromBranch(const MachineInstr &MI) { 2186353358Sdim switch (MI.getOpcode()) { 2187193323Sed default: return X86::COND_INVALID; 2188353358Sdim case X86::JCC_1: 2189353358Sdim return static_cast<X86::CondCode>( 2190353358Sdim MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); 2191193323Sed } 2192193323Sed} 2193193323Sed 2194353358Sdim/// Return condition code of a SETCC opcode. 2195353358SdimX86::CondCode X86::getCondFromSETCC(const MachineInstr &MI) { 2196353358Sdim switch (MI.getOpcode()) { 2197239462Sdim default: return X86::COND_INVALID; 2198353358Sdim case X86::SETCCr: case X86::SETCCm: 2199353358Sdim return static_cast<X86::CondCode>( 2200353358Sdim MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); 2201239462Sdim } 2202239462Sdim} 2203239462Sdim 2204288943Sdim/// Return condition code of a CMov opcode. 2205353358SdimX86::CondCode X86::getCondFromCMov(const MachineInstr &MI) { 2206353358Sdim switch (MI.getOpcode()) { 2207239462Sdim default: return X86::COND_INVALID; 2208353358Sdim case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: 2209353358Sdim case X86::CMOV16rm: case X86::CMOV32rm: case X86::CMOV64rm: 2210353358Sdim return static_cast<X86::CondCode>( 2211353358Sdim MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); 2212239462Sdim } 2213239462Sdim} 2214239462Sdim 2215288943Sdim/// Return the inverse of the specified condition, 2216193323Sed/// e.g. turning COND_E to COND_NE. 2217193323SedX86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { 2218193323Sed switch (CC) { 2219198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 2220193323Sed case X86::COND_E: return X86::COND_NE; 2221193323Sed case X86::COND_NE: return X86::COND_E; 2222193323Sed case X86::COND_L: return X86::COND_GE; 2223193323Sed case X86::COND_LE: return X86::COND_G; 2224193323Sed case X86::COND_G: return X86::COND_LE; 2225193323Sed case X86::COND_GE: return X86::COND_L; 2226193323Sed case X86::COND_B: return X86::COND_AE; 2227193323Sed case X86::COND_BE: return X86::COND_A; 2228193323Sed case X86::COND_A: return X86::COND_BE; 2229193323Sed case X86::COND_AE: return X86::COND_B; 2230193323Sed case X86::COND_S: return X86::COND_NS; 2231193323Sed case X86::COND_NS: return X86::COND_S; 2232193323Sed case X86::COND_P: return X86::COND_NP; 2233193323Sed case X86::COND_NP: return X86::COND_P; 2234193323Sed case X86::COND_O: return X86::COND_NO; 2235193323Sed case X86::COND_NO: return X86::COND_O; 2236309124Sdim case X86::COND_NE_OR_P: return X86::COND_E_AND_NP; 2237309124Sdim case X86::COND_E_AND_NP: return X86::COND_NE_OR_P; 2238193323Sed } 2239193323Sed} 2240193323Sed 2241288943Sdim/// Assuming the flags are set by MI(a,b), return the condition code if we 2242288943Sdim/// modify the instructions such that flags are set by MI(b,a). 2243239462Sdimstatic X86::CondCode getSwappedCondition(X86::CondCode CC) { 2244239462Sdim switch (CC) { 2245239462Sdim default: return X86::COND_INVALID; 2246239462Sdim case X86::COND_E: return X86::COND_E; 2247239462Sdim case X86::COND_NE: return X86::COND_NE; 2248239462Sdim case X86::COND_L: return X86::COND_G; 2249239462Sdim case X86::COND_LE: return X86::COND_GE; 2250239462Sdim case X86::COND_G: return X86::COND_L; 2251239462Sdim case X86::COND_GE: return X86::COND_LE; 2252239462Sdim case X86::COND_B: return X86::COND_A; 2253239462Sdim case X86::COND_BE: return X86::COND_AE; 2254239462Sdim case X86::COND_A: return X86::COND_B; 2255239462Sdim case X86::COND_AE: return X86::COND_BE; 2256239462Sdim } 2257239462Sdim} 2258239462Sdim 2259321369Sdimstd::pair<X86::CondCode, bool> 2260321369SdimX86::getX86ConditionCode(CmpInst::Predicate Predicate) { 2261321369Sdim X86::CondCode CC = X86::COND_INVALID; 2262321369Sdim bool NeedSwap = false; 2263321369Sdim switch (Predicate) { 2264321369Sdim default: break; 2265321369Sdim // Floating-point Predicates 2266321369Sdim case CmpInst::FCMP_UEQ: CC = X86::COND_E; break; 2267321369Sdim case CmpInst::FCMP_OLT: NeedSwap = true; LLVM_FALLTHROUGH; 2268321369Sdim case CmpInst::FCMP_OGT: CC = X86::COND_A; break; 2269321369Sdim case CmpInst::FCMP_OLE: NeedSwap = true; LLVM_FALLTHROUGH; 2270321369Sdim case CmpInst::FCMP_OGE: CC = X86::COND_AE; break; 2271321369Sdim case CmpInst::FCMP_UGT: NeedSwap = true; LLVM_FALLTHROUGH; 2272321369Sdim case CmpInst::FCMP_ULT: CC = X86::COND_B; break; 2273321369Sdim case CmpInst::FCMP_UGE: NeedSwap = true; LLVM_FALLTHROUGH; 2274321369Sdim case CmpInst::FCMP_ULE: CC = X86::COND_BE; break; 2275321369Sdim case CmpInst::FCMP_ONE: CC = X86::COND_NE; break; 2276321369Sdim case CmpInst::FCMP_UNO: CC = X86::COND_P; break; 2277321369Sdim case CmpInst::FCMP_ORD: CC = X86::COND_NP; break; 2278321369Sdim case CmpInst::FCMP_OEQ: LLVM_FALLTHROUGH; 2279321369Sdim case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break; 2280321369Sdim 2281321369Sdim // Integer Predicates 2282321369Sdim case CmpInst::ICMP_EQ: CC = X86::COND_E; break; 2283321369Sdim case CmpInst::ICMP_NE: CC = X86::COND_NE; break; 2284321369Sdim case CmpInst::ICMP_UGT: CC = X86::COND_A; break; 2285321369Sdim case CmpInst::ICMP_UGE: CC = X86::COND_AE; break; 2286321369Sdim case CmpInst::ICMP_ULT: CC = X86::COND_B; break; 2287321369Sdim case CmpInst::ICMP_ULE: CC = X86::COND_BE; break; 2288321369Sdim case CmpInst::ICMP_SGT: CC = X86::COND_G; break; 2289321369Sdim case CmpInst::ICMP_SGE: CC = X86::COND_GE; break; 2290321369Sdim case CmpInst::ICMP_SLT: CC = X86::COND_L; break; 2291321369Sdim case CmpInst::ICMP_SLE: CC = X86::COND_LE; break; 2292321369Sdim } 2293321369Sdim 2294321369Sdim return std::make_pair(CC, NeedSwap); 2295321369Sdim} 2296321369Sdim 2297353358Sdim/// Return a setcc opcode based on whether it has memory operand. 2298353358Sdimunsigned X86::getSETOpc(bool HasMemoryOperand) { 2299353358Sdim return HasMemoryOperand ? X86::SETCCr : X86::SETCCm; 2300239462Sdim} 2301239462Sdim 2302353358Sdim/// Return a cmov opcode for the given register size in bytes, and operand type. 2303353358Sdimunsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand) { 2304239462Sdim switch(RegBytes) { 2305239462Sdim default: llvm_unreachable("Illegal register size!"); 2306353358Sdim case 2: return HasMemoryOperand ? X86::CMOV16rm : X86::CMOV16rr; 2307353358Sdim case 4: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV32rr; 2308360661Sdim case 8: return HasMemoryOperand ? X86::CMOV64rm : X86::CMOV64rr; 2309239462Sdim } 2310239462Sdim} 2311239462Sdim 2312341825Sdim/// Get the VPCMP immediate for the given condition. 2313341825Sdimunsigned X86::getVPCMPImmForCond(ISD::CondCode CC) { 2314341825Sdim switch (CC) { 2315341825Sdim default: llvm_unreachable("Unexpected SETCC condition"); 2316341825Sdim case ISD::SETNE: return 4; 2317341825Sdim case ISD::SETEQ: return 0; 2318341825Sdim case ISD::SETULT: 2319341825Sdim case ISD::SETLT: return 1; 2320341825Sdim case ISD::SETUGT: 2321341825Sdim case ISD::SETGT: return 6; 2322341825Sdim case ISD::SETUGE: 2323341825Sdim case ISD::SETGE: return 5; 2324341825Sdim case ISD::SETULE: 2325341825Sdim case ISD::SETLE: return 2; 2326341825Sdim } 2327341825Sdim} 2328341825Sdim 2329360784Sdim/// Get the VPCMP immediate if the operands are swapped. 2330341825Sdimunsigned X86::getSwappedVPCMPImm(unsigned Imm) { 2331341825Sdim switch (Imm) { 2332341825Sdim default: llvm_unreachable("Unreachable!"); 2333341825Sdim case 0x01: Imm = 0x06; break; // LT -> NLE 2334341825Sdim case 0x02: Imm = 0x05; break; // LE -> NLT 2335341825Sdim case 0x05: Imm = 0x02; break; // NLT -> LE 2336341825Sdim case 0x06: Imm = 0x01; break; // NLE -> LT 2337341825Sdim case 0x00: // EQ 2338341825Sdim case 0x03: // FALSE 2339341825Sdim case 0x04: // NE 2340341825Sdim case 0x07: // TRUE 2341341825Sdim break; 2342341825Sdim } 2343341825Sdim 2344341825Sdim return Imm; 2345341825Sdim} 2346341825Sdim 2347360784Sdim/// Get the VPCOM immediate if the operands are swapped. 2348341825Sdimunsigned X86::getSwappedVPCOMImm(unsigned Imm) { 2349341825Sdim switch (Imm) { 2350341825Sdim default: llvm_unreachable("Unreachable!"); 2351341825Sdim case 0x00: Imm = 0x02; break; // LT -> GT 2352341825Sdim case 0x01: Imm = 0x03; break; // LE -> GE 2353341825Sdim case 0x02: Imm = 0x00; break; // GT -> LT 2354341825Sdim case 0x03: Imm = 0x01; break; // GE -> LE 2355341825Sdim case 0x04: // EQ 2356341825Sdim case 0x05: // NE 2357341825Sdim case 0x06: // FALSE 2358341825Sdim case 0x07: // TRUE 2359341825Sdim break; 2360341825Sdim } 2361341825Sdim 2362341825Sdim return Imm; 2363341825Sdim} 2364341825Sdim 2365360784Sdim/// Get the VCMP immediate if the operands are swapped. 2366360784Sdimunsigned X86::getSwappedVCMPImm(unsigned Imm) { 2367360784Sdim // Only need the lower 2 bits to distinquish. 2368360784Sdim switch (Imm & 0x3) { 2369360784Sdim default: llvm_unreachable("Unreachable!"); 2370360784Sdim case 0x00: case 0x03: 2371360784Sdim // EQ/NE/TRUE/FALSE/ORD/UNORD don't change immediate when commuted. 2372360784Sdim break; 2373360784Sdim case 0x01: case 0x02: 2374360784Sdim // Need to toggle bits 3:0. Bit 4 stays the same. 2375360784Sdim Imm ^= 0xf; 2376360784Sdim break; 2377360784Sdim } 2378360784Sdim 2379360784Sdim return Imm; 2380360784Sdim} 2381360784Sdim 2382309124Sdimbool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { 2383309124Sdim if (!MI.isTerminator()) return false; 2384218893Sdim 2385193323Sed // Conditional branch is a special case. 2386309124Sdim if (MI.isBranch() && !MI.isBarrier()) 2387193323Sed return true; 2388309124Sdim if (!MI.isPredicable()) 2389193323Sed return true; 2390193323Sed return !isPredicated(MI); 2391193323Sed} 2392193323Sed 2393321369Sdimbool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const { 2394321369Sdim switch (MI.getOpcode()) { 2395321369Sdim case X86::TCRETURNdi: 2396321369Sdim case X86::TCRETURNri: 2397321369Sdim case X86::TCRETURNmi: 2398321369Sdim case X86::TCRETURNdi64: 2399321369Sdim case X86::TCRETURNri64: 2400321369Sdim case X86::TCRETURNmi64: 2401321369Sdim return true; 2402321369Sdim default: 2403321369Sdim return false; 2404321369Sdim } 2405321369Sdim} 2406321369Sdim 2407321369Sdimbool X86InstrInfo::canMakeTailCallConditional( 2408321369Sdim SmallVectorImpl<MachineOperand> &BranchCond, 2409321369Sdim const MachineInstr &TailCall) const { 2410321369Sdim if (TailCall.getOpcode() != X86::TCRETURNdi && 2411321369Sdim TailCall.getOpcode() != X86::TCRETURNdi64) { 2412321369Sdim // Only direct calls can be done with a conditional branch. 2413321369Sdim return false; 2414321369Sdim } 2415321369Sdim 2416321369Sdim const MachineFunction *MF = TailCall.getParent()->getParent(); 2417321369Sdim if (Subtarget.isTargetWin64() && MF->hasWinCFI()) { 2418321369Sdim // Conditional tail calls confuse the Win64 unwinder. 2419321369Sdim return false; 2420321369Sdim } 2421321369Sdim 2422321369Sdim assert(BranchCond.size() == 1); 2423321369Sdim if (BranchCond[0].getImm() > X86::LAST_VALID_COND) { 2424321369Sdim // Can't make a conditional tail call with this condition. 2425321369Sdim return false; 2426321369Sdim } 2427321369Sdim 2428321369Sdim const X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); 2429321369Sdim if (X86FI->getTCReturnAddrDelta() != 0 || 2430321369Sdim TailCall.getOperand(1).getImm() != 0) { 2431321369Sdim // A conditional tail call cannot do any stack adjustment. 2432321369Sdim return false; 2433321369Sdim } 2434321369Sdim 2435321369Sdim return true; 2436321369Sdim} 2437321369Sdim 2438321369Sdimvoid X86InstrInfo::replaceBranchWithTailCall( 2439321369Sdim MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond, 2440321369Sdim const MachineInstr &TailCall) const { 2441321369Sdim assert(canMakeTailCallConditional(BranchCond, TailCall)); 2442321369Sdim 2443321369Sdim MachineBasicBlock::iterator I = MBB.end(); 2444321369Sdim while (I != MBB.begin()) { 2445321369Sdim --I; 2446341825Sdim if (I->isDebugInstr()) 2447321369Sdim continue; 2448321369Sdim if (!I->isBranch()) 2449321369Sdim assert(0 && "Can't find the branch to replace!"); 2450321369Sdim 2451353358Sdim X86::CondCode CC = X86::getCondFromBranch(*I); 2452321369Sdim assert(BranchCond.size() == 1); 2453321369Sdim if (CC != BranchCond[0].getImm()) 2454321369Sdim continue; 2455321369Sdim 2456321369Sdim break; 2457321369Sdim } 2458321369Sdim 2459321369Sdim unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc 2460321369Sdim : X86::TCRETURNdi64cc; 2461321369Sdim 2462321369Sdim auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc)); 2463321369Sdim MIB->addOperand(TailCall.getOperand(0)); // Destination. 2464321369Sdim MIB.addImm(0); // Stack offset (not used). 2465321369Sdim MIB->addOperand(BranchCond[0]); // Condition. 2466321369Sdim MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters. 2467321369Sdim 2468321369Sdim // Add implicit uses and defs of all live regs potentially clobbered by the 2469321369Sdim // call. This way they still appear live across the call. 2470321369Sdim LivePhysRegs LiveRegs(getRegisterInfo()); 2471321369Sdim LiveRegs.addLiveOuts(MBB); 2472344779Sdim SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers; 2473321369Sdim LiveRegs.stepForward(*MIB, Clobbers); 2474321369Sdim for (const auto &C : Clobbers) { 2475321369Sdim MIB.addReg(C.first, RegState::Implicit); 2476321369Sdim MIB.addReg(C.first, RegState::Implicit | RegState::Define); 2477321369Sdim } 2478321369Sdim 2479321369Sdim I->eraseFromParent(); 2480321369Sdim} 2481321369Sdim 2482309124Sdim// Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may 2483309124Sdim// not be a fallthrough MBB now due to layout changes). Return nullptr if the 2484309124Sdim// fallthrough MBB cannot be identified. 2485309124Sdimstatic MachineBasicBlock *getFallThroughMBB(MachineBasicBlock *MBB, 2486309124Sdim MachineBasicBlock *TBB) { 2487309124Sdim // Look for non-EHPad successors other than TBB. If we find exactly one, it 2488309124Sdim // is the fallthrough MBB. If we find zero, then TBB is both the target MBB 2489309124Sdim // and fallthrough MBB. If we find more than one, we cannot identify the 2490309124Sdim // fallthrough MBB and should return nullptr. 2491309124Sdim MachineBasicBlock *FallthroughBB = nullptr; 2492309124Sdim for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { 2493309124Sdim if ((*SI)->isEHPad() || (*SI == TBB && FallthroughBB)) 2494309124Sdim continue; 2495309124Sdim // Return a nullptr if we found more than one fallthrough successor. 2496309124Sdim if (FallthroughBB && FallthroughBB != TBB) 2497309124Sdim return nullptr; 2498309124Sdim FallthroughBB = *SI; 2499309124Sdim } 2500309124Sdim return FallthroughBB; 2501309124Sdim} 2502309124Sdim 2503288943Sdimbool X86InstrInfo::AnalyzeBranchImpl( 2504288943Sdim MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, 2505288943Sdim SmallVectorImpl<MachineOperand> &Cond, 2506288943Sdim SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const { 2507288943Sdim 2508193323Sed // Start from the bottom of the block and work up, examining the 2509193323Sed // terminator instructions. 2510193323Sed MachineBasicBlock::iterator I = MBB.end(); 2511207618Srdivacky MachineBasicBlock::iterator UnCondBrIter = MBB.end(); 2512193323Sed while (I != MBB.begin()) { 2513193323Sed --I; 2514341825Sdim if (I->isDebugInstr()) 2515206083Srdivacky continue; 2516200581Srdivacky 2517200581Srdivacky // Working from the bottom, when we see a non-terminator instruction, we're 2518200581Srdivacky // done. 2519309124Sdim if (!isUnpredicatedTerminator(*I)) 2520193323Sed break; 2521200581Srdivacky 2522200581Srdivacky // A terminator that isn't a branch can't easily be handled by this 2523200581Srdivacky // analysis. 2524234353Sdim if (!I->isBranch()) 2525193323Sed return true; 2526200581Srdivacky 2527193323Sed // Handle unconditional branches. 2528280031Sdim if (I->getOpcode() == X86::JMP_1) { 2529207618Srdivacky UnCondBrIter = I; 2530207618Srdivacky 2531193323Sed if (!AllowModify) { 2532193323Sed TBB = I->getOperand(0).getMBB(); 2533193323Sed continue; 2534193323Sed } 2535193323Sed 2536193323Sed // If the block has any instructions after a JMP, delete them. 2537276479Sdim while (std::next(I) != MBB.end()) 2538276479Sdim std::next(I)->eraseFromParent(); 2539200581Srdivacky 2540193323Sed Cond.clear(); 2541276479Sdim FBB = nullptr; 2542200581Srdivacky 2543193323Sed // Delete the JMP if it's equivalent to a fall-through. 2544193323Sed if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { 2545276479Sdim TBB = nullptr; 2546193323Sed I->eraseFromParent(); 2547193323Sed I = MBB.end(); 2548207618Srdivacky UnCondBrIter = MBB.end(); 2549193323Sed continue; 2550193323Sed } 2551200581Srdivacky 2552207618Srdivacky // TBB is used to indicate the unconditional destination. 2553193323Sed TBB = I->getOperand(0).getMBB(); 2554193323Sed continue; 2555193323Sed } 2556200581Srdivacky 2557193323Sed // Handle conditional branches. 2558353358Sdim X86::CondCode BranchCode = X86::getCondFromBranch(*I); 2559193323Sed if (BranchCode == X86::COND_INVALID) 2560193323Sed return true; // Can't handle indirect branch. 2561200581Srdivacky 2562344779Sdim // In practice we should never have an undef eflags operand, if we do 2563344779Sdim // abort here as we are not prepared to preserve the flag. 2564353358Sdim if (I->findRegisterUseOperand(X86::EFLAGS)->isUndef()) 2565344779Sdim return true; 2566344779Sdim 2567193323Sed // Working from the bottom, handle the first conditional branch. 2568193323Sed if (Cond.empty()) { 2569207618Srdivacky MachineBasicBlock *TargetBB = I->getOperand(0).getMBB(); 2570207618Srdivacky if (AllowModify && UnCondBrIter != MBB.end() && 2571207618Srdivacky MBB.isLayoutSuccessor(TargetBB)) { 2572207618Srdivacky // If we can modify the code and it ends in something like: 2573207618Srdivacky // 2574207618Srdivacky // jCC L1 2575207618Srdivacky // jmp L2 2576207618Srdivacky // L1: 2577207618Srdivacky // ... 2578207618Srdivacky // L2: 2579207618Srdivacky // 2580207618Srdivacky // Then we can change this to: 2581207618Srdivacky // 2582207618Srdivacky // jnCC L2 2583207618Srdivacky // L1: 2584207618Srdivacky // ... 2585207618Srdivacky // L2: 2586207618Srdivacky // 2587207618Srdivacky // Which is a bit more efficient. 2588207618Srdivacky // We conditionally jump to the fall-through block. 2589207618Srdivacky BranchCode = GetOppositeBranchCondition(BranchCode); 2590207618Srdivacky MachineBasicBlock::iterator OldInst = I; 2591207618Srdivacky 2592353358Sdim BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JCC_1)) 2593353358Sdim .addMBB(UnCondBrIter->getOperand(0).getMBB()) 2594353358Sdim .addImm(BranchCode); 2595280031Sdim BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_1)) 2596207618Srdivacky .addMBB(TargetBB); 2597207618Srdivacky 2598207618Srdivacky OldInst->eraseFromParent(); 2599207618Srdivacky UnCondBrIter->eraseFromParent(); 2600207618Srdivacky 2601207618Srdivacky // Restart the analysis. 2602207618Srdivacky UnCondBrIter = MBB.end(); 2603207618Srdivacky I = MBB.end(); 2604207618Srdivacky continue; 2605207618Srdivacky } 2606207618Srdivacky 2607193323Sed FBB = TBB; 2608193323Sed TBB = I->getOperand(0).getMBB(); 2609193323Sed Cond.push_back(MachineOperand::CreateImm(BranchCode)); 2610309124Sdim CondBranches.push_back(&*I); 2611193323Sed continue; 2612193323Sed } 2613200581Srdivacky 2614200581Srdivacky // Handle subsequent conditional branches. Only handle the case where all 2615200581Srdivacky // conditional branches branch to the same destination and their condition 2616200581Srdivacky // opcodes fit one of the special multi-branch idioms. 2617193323Sed assert(Cond.size() == 1); 2618193323Sed assert(TBB); 2619200581Srdivacky 2620200581Srdivacky // If the conditions are the same, we can leave them alone. 2621193323Sed X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); 2622309124Sdim auto NewTBB = I->getOperand(0).getMBB(); 2623309124Sdim if (OldBranchCode == BranchCode && TBB == NewTBB) 2624193323Sed continue; 2625200581Srdivacky 2626200581Srdivacky // If they differ, see if they fit one of the known patterns. Theoretically, 2627200581Srdivacky // we could handle more patterns here, but we shouldn't expect to see them 2628200581Srdivacky // if instruction selection has done a reasonable job. 2629309124Sdim if (TBB == NewTBB && 2630309124Sdim ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) || 2631309124Sdim (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) { 2632193323Sed BranchCode = X86::COND_NE_OR_P; 2633309124Sdim } else if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) || 2634309124Sdim (OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) { 2635309124Sdim if (NewTBB != (FBB ? FBB : getFallThroughMBB(&MBB, TBB))) 2636309124Sdim return true; 2637309124Sdim 2638309124Sdim // X86::COND_E_AND_NP usually has two different branch destinations. 2639309124Sdim // 2640309124Sdim // JP B1 2641309124Sdim // JE B2 2642309124Sdim // JMP B1 2643309124Sdim // B1: 2644309124Sdim // B2: 2645309124Sdim // 2646309124Sdim // Here this condition branches to B2 only if NP && E. It has another 2647309124Sdim // equivalent form: 2648309124Sdim // 2649309124Sdim // JNE B1 2650309124Sdim // JNP B2 2651309124Sdim // JMP B1 2652309124Sdim // B1: 2653309124Sdim // B2: 2654309124Sdim // 2655309124Sdim // Similarly it branches to B2 only if E && NP. That is why this condition 2656309124Sdim // is named with COND_E_AND_NP. 2657309124Sdim BranchCode = X86::COND_E_AND_NP; 2658309124Sdim } else 2659193323Sed return true; 2660200581Srdivacky 2661193323Sed // Update the MachineOperand. 2662193323Sed Cond[0].setImm(BranchCode); 2663309124Sdim CondBranches.push_back(&*I); 2664193323Sed } 2665193323Sed 2666193323Sed return false; 2667193323Sed} 2668193323Sed 2669309124Sdimbool X86InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 2670288943Sdim MachineBasicBlock *&TBB, 2671288943Sdim MachineBasicBlock *&FBB, 2672288943Sdim SmallVectorImpl<MachineOperand> &Cond, 2673288943Sdim bool AllowModify) const { 2674288943Sdim SmallVector<MachineInstr *, 4> CondBranches; 2675288943Sdim return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify); 2676288943Sdim} 2677288943Sdim 2678309124Sdimbool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, 2679288943Sdim MachineBranchPredicate &MBP, 2680288943Sdim bool AllowModify) const { 2681288943Sdim using namespace std::placeholders; 2682288943Sdim 2683288943Sdim SmallVector<MachineOperand, 4> Cond; 2684288943Sdim SmallVector<MachineInstr *, 4> CondBranches; 2685288943Sdim if (AnalyzeBranchImpl(MBB, MBP.TrueDest, MBP.FalseDest, Cond, CondBranches, 2686288943Sdim AllowModify)) 2687288943Sdim return true; 2688288943Sdim 2689288943Sdim if (Cond.size() != 1) 2690288943Sdim return true; 2691288943Sdim 2692288943Sdim assert(MBP.TrueDest && "expected!"); 2693288943Sdim 2694288943Sdim if (!MBP.FalseDest) 2695288943Sdim MBP.FalseDest = MBB.getNextNode(); 2696288943Sdim 2697288943Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 2698288943Sdim 2699288943Sdim MachineInstr *ConditionDef = nullptr; 2700288943Sdim bool SingleUseCondition = true; 2701288943Sdim 2702288943Sdim for (auto I = std::next(MBB.rbegin()), E = MBB.rend(); I != E; ++I) { 2703288943Sdim if (I->modifiesRegister(X86::EFLAGS, TRI)) { 2704288943Sdim ConditionDef = &*I; 2705288943Sdim break; 2706288943Sdim } 2707288943Sdim 2708288943Sdim if (I->readsRegister(X86::EFLAGS, TRI)) 2709288943Sdim SingleUseCondition = false; 2710288943Sdim } 2711288943Sdim 2712288943Sdim if (!ConditionDef) 2713288943Sdim return true; 2714288943Sdim 2715288943Sdim if (SingleUseCondition) { 2716288943Sdim for (auto *Succ : MBB.successors()) 2717288943Sdim if (Succ->isLiveIn(X86::EFLAGS)) 2718288943Sdim SingleUseCondition = false; 2719288943Sdim } 2720288943Sdim 2721288943Sdim MBP.ConditionDef = ConditionDef; 2722288943Sdim MBP.SingleUseCondition = SingleUseCondition; 2723288943Sdim 2724288943Sdim // Currently we only recognize the simple pattern: 2725288943Sdim // 2726288943Sdim // test %reg, %reg 2727288943Sdim // je %label 2728288943Sdim // 2729288943Sdim const unsigned TestOpcode = 2730288943Sdim Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr; 2731288943Sdim 2732288943Sdim if (ConditionDef->getOpcode() == TestOpcode && 2733288943Sdim ConditionDef->getNumOperands() == 3 && 2734288943Sdim ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) && 2735288943Sdim (Cond[0].getImm() == X86::COND_NE || Cond[0].getImm() == X86::COND_E)) { 2736288943Sdim MBP.LHS = ConditionDef->getOperand(0); 2737288943Sdim MBP.RHS = MachineOperand::CreateImm(0); 2738288943Sdim MBP.Predicate = Cond[0].getImm() == X86::COND_NE 2739288943Sdim ? MachineBranchPredicate::PRED_NE 2740288943Sdim : MachineBranchPredicate::PRED_EQ; 2741288943Sdim return false; 2742288943Sdim } 2743288943Sdim 2744288943Sdim return true; 2745288943Sdim} 2746288943Sdim 2747314564Sdimunsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB, 2748314564Sdim int *BytesRemoved) const { 2749314564Sdim assert(!BytesRemoved && "code size not handled"); 2750314564Sdim 2751193323Sed MachineBasicBlock::iterator I = MBB.end(); 2752193323Sed unsigned Count = 0; 2753193323Sed 2754193323Sed while (I != MBB.begin()) { 2755193323Sed --I; 2756341825Sdim if (I->isDebugInstr()) 2757206083Srdivacky continue; 2758280031Sdim if (I->getOpcode() != X86::JMP_1 && 2759353358Sdim X86::getCondFromBranch(*I) == X86::COND_INVALID) 2760193323Sed break; 2761193323Sed // Remove the branch. 2762193323Sed I->eraseFromParent(); 2763193323Sed I = MBB.end(); 2764193323Sed ++Count; 2765193323Sed } 2766218893Sdim 2767193323Sed return Count; 2768193323Sed} 2769193323Sed 2770314564Sdimunsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB, 2771309124Sdim MachineBasicBlock *TBB, 2772309124Sdim MachineBasicBlock *FBB, 2773309124Sdim ArrayRef<MachineOperand> Cond, 2774314564Sdim const DebugLoc &DL, 2775314564Sdim int *BytesAdded) const { 2776193323Sed // Shouldn't be a fall through. 2777314564Sdim assert(TBB && "insertBranch must not be told to insert a fallthrough"); 2778193323Sed assert((Cond.size() == 1 || Cond.size() == 0) && 2779193323Sed "X86 branch conditions have one component!"); 2780314564Sdim assert(!BytesAdded && "code size not handled"); 2781193323Sed 2782193323Sed if (Cond.empty()) { 2783193323Sed // Unconditional branch? 2784193323Sed assert(!FBB && "Unconditional branch with multiple successors!"); 2785280031Sdim BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(TBB); 2786193323Sed return 1; 2787193323Sed } 2788193323Sed 2789309124Sdim // If FBB is null, it is implied to be a fall-through block. 2790309124Sdim bool FallThru = FBB == nullptr; 2791309124Sdim 2792193323Sed // Conditional branch. 2793193323Sed unsigned Count = 0; 2794193323Sed X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); 2795193323Sed switch (CC) { 2796193323Sed case X86::COND_NE_OR_P: 2797193323Sed // Synthesize NE_OR_P with two branches. 2798353358Sdim BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NE); 2799193323Sed ++Count; 2800353358Sdim BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_P); 2801193323Sed ++Count; 2802193323Sed break; 2803309124Sdim case X86::COND_E_AND_NP: 2804309124Sdim // Use the next block of MBB as FBB if it is null. 2805309124Sdim if (FBB == nullptr) { 2806309124Sdim FBB = getFallThroughMBB(&MBB, TBB); 2807309124Sdim assert(FBB && "MBB cannot be the last block in function when the false " 2808309124Sdim "body is a fall-through."); 2809309124Sdim } 2810309124Sdim // Synthesize COND_E_AND_NP with two branches. 2811353358Sdim BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(FBB).addImm(X86::COND_NE); 2812309124Sdim ++Count; 2813353358Sdim BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NP); 2814309124Sdim ++Count; 2815309124Sdim break; 2816193323Sed default: { 2817353358Sdim BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(CC); 2818193323Sed ++Count; 2819193323Sed } 2820193323Sed } 2821309124Sdim if (!FallThru) { 2822193323Sed // Two-way Conditional branch. Insert the second branch. 2823280031Sdim BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB); 2824193323Sed ++Count; 2825193323Sed } 2826193323Sed return Count; 2827193323Sed} 2828193323Sed 2829239462Sdimbool X86InstrInfo:: 2830239462SdimcanInsertSelect(const MachineBasicBlock &MBB, 2831288943Sdim ArrayRef<MachineOperand> Cond, 2832239462Sdim unsigned TrueReg, unsigned FalseReg, 2833239462Sdim int &CondCycles, int &TrueCycles, int &FalseCycles) const { 2834239462Sdim // Not all subtargets have cmov instructions. 2835276479Sdim if (!Subtarget.hasCMov()) 2836239462Sdim return false; 2837239462Sdim if (Cond.size() != 1) 2838239462Sdim return false; 2839239462Sdim // We cannot do the composite conditions, at least not in SSA form. 2840353358Sdim if ((X86::CondCode)Cond[0].getImm() > X86::LAST_VALID_COND) 2841239462Sdim return false; 2842239462Sdim 2843239462Sdim // Check register classes. 2844239462Sdim const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2845239462Sdim const TargetRegisterClass *RC = 2846239462Sdim RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 2847239462Sdim if (!RC) 2848239462Sdim return false; 2849239462Sdim 2850239462Sdim // We have cmov instructions for 16, 32, and 64 bit general purpose registers. 2851239462Sdim if (X86::GR16RegClass.hasSubClassEq(RC) || 2852239462Sdim X86::GR32RegClass.hasSubClassEq(RC) || 2853239462Sdim X86::GR64RegClass.hasSubClassEq(RC)) { 2854239462Sdim // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy 2855239462Sdim // Bridge. Probably Ivy Bridge as well. 2856239462Sdim CondCycles = 2; 2857239462Sdim TrueCycles = 2; 2858239462Sdim FalseCycles = 2; 2859239462Sdim return true; 2860239462Sdim } 2861239462Sdim 2862239462Sdim // Can't do vectors. 2863239462Sdim return false; 2864239462Sdim} 2865239462Sdim 2866239462Sdimvoid X86InstrInfo::insertSelect(MachineBasicBlock &MBB, 2867309124Sdim MachineBasicBlock::iterator I, 2868309124Sdim const DebugLoc &DL, unsigned DstReg, 2869309124Sdim ArrayRef<MachineOperand> Cond, unsigned TrueReg, 2870309124Sdim unsigned FalseReg) const { 2871309124Sdim MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2872321369Sdim const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 2873321369Sdim const TargetRegisterClass &RC = *MRI.getRegClass(DstReg); 2874309124Sdim assert(Cond.size() == 1 && "Invalid Cond array"); 2875353358Sdim unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(RC) / 8, 2876353358Sdim false /*HasMemoryOperand*/); 2877353358Sdim BuildMI(MBB, I, DL, get(Opc), DstReg) 2878353358Sdim .addReg(FalseReg) 2879353358Sdim .addReg(TrueReg) 2880353358Sdim .addImm(Cond[0].getImm()); 2881239462Sdim} 2882239462Sdim 2883288943Sdim/// Test if the given register is a physical h register. 2884193323Sedstatic bool isHReg(unsigned Reg) { 2885193323Sed return X86::GR8_ABCD_HRegClass.contains(Reg); 2886193323Sed} 2887193323Sed 2888212904Sdim// Try and copy between VR128/VR64 and GR64 registers. 2889341825Sdimstatic unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, 2890276479Sdim const X86Subtarget &Subtarget) { 2891314564Sdim bool HasAVX = Subtarget.hasAVX(); 2892314564Sdim bool HasAVX512 = Subtarget.hasAVX512(); 2893261991Sdim 2894314564Sdim // SrcReg(MaskReg) -> DestReg(GR64) 2895314564Sdim // SrcReg(MaskReg) -> DestReg(GR32) 2896314564Sdim 2897314564Sdim // All KMASK RegClasses hold the same k registers, can be tested against anyone. 2898314564Sdim if (X86::VK16RegClass.contains(SrcReg)) { 2899314564Sdim if (X86::GR64RegClass.contains(DestReg)) { 2900314564Sdim assert(Subtarget.hasBWI()); 2901314564Sdim return X86::KMOVQrk; 2902314564Sdim } 2903314564Sdim if (X86::GR32RegClass.contains(DestReg)) 2904314564Sdim return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk; 2905314564Sdim } 2906314564Sdim 2907314564Sdim // SrcReg(GR64) -> DestReg(MaskReg) 2908314564Sdim // SrcReg(GR32) -> DestReg(MaskReg) 2909314564Sdim 2910314564Sdim // All KMASK RegClasses hold the same k registers, can be tested against anyone. 2911314564Sdim if (X86::VK16RegClass.contains(DestReg)) { 2912314564Sdim if (X86::GR64RegClass.contains(SrcReg)) { 2913314564Sdim assert(Subtarget.hasBWI()); 2914314564Sdim return X86::KMOVQkr; 2915314564Sdim } 2916314564Sdim if (X86::GR32RegClass.contains(SrcReg)) 2917314564Sdim return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr; 2918314564Sdim } 2919314564Sdim 2920314564Sdim 2921212904Sdim // SrcReg(VR128) -> DestReg(GR64) 2922212904Sdim // SrcReg(VR64) -> DestReg(GR64) 2923212904Sdim // SrcReg(GR64) -> DestReg(VR128) 2924212904Sdim // SrcReg(GR64) -> DestReg(VR64) 2925212904Sdim 2926212904Sdim if (X86::GR64RegClass.contains(DestReg)) { 2927261991Sdim if (X86::VR128XRegClass.contains(SrcReg)) 2928212904Sdim // Copy from a VR128 register to a GR64 register. 2929309124Sdim return HasAVX512 ? X86::VMOVPQIto64Zrr : 2930309124Sdim HasAVX ? X86::VMOVPQIto64rr : 2931309124Sdim X86::MOVPQIto64rr; 2932243830Sdim if (X86::VR64RegClass.contains(SrcReg)) 2933212904Sdim // Copy from a VR64 register to a GR64 register. 2934288943Sdim return X86::MMX_MOVD64from64rr; 2935212904Sdim } else if (X86::GR64RegClass.contains(SrcReg)) { 2936212904Sdim // Copy from a GR64 register to a VR128 register. 2937261991Sdim if (X86::VR128XRegClass.contains(DestReg)) 2938309124Sdim return HasAVX512 ? X86::VMOV64toPQIZrr : 2939309124Sdim HasAVX ? X86::VMOV64toPQIrr : 2940309124Sdim X86::MOV64toPQIrr; 2941212904Sdim // Copy from a GR64 register to a VR64 register. 2942243830Sdim if (X86::VR64RegClass.contains(DestReg)) 2943288943Sdim return X86::MMX_MOVD64to64rr; 2944212904Sdim } 2945212904Sdim 2946353358Sdim // SrcReg(VR128) -> DestReg(GR32) 2947353358Sdim // SrcReg(GR32) -> DestReg(VR128) 2948226633Sdim 2949309124Sdim if (X86::GR32RegClass.contains(DestReg) && 2950353358Sdim X86::VR128XRegClass.contains(SrcReg)) 2951353358Sdim // Copy from a VR128 register to a GR32 register. 2952353358Sdim return HasAVX512 ? X86::VMOVPDI2DIZrr : 2953353358Sdim HasAVX ? X86::VMOVPDI2DIrr : 2954353358Sdim X86::MOVPDI2DIrr; 2955226633Sdim 2956353358Sdim if (X86::VR128XRegClass.contains(DestReg) && 2957309124Sdim X86::GR32RegClass.contains(SrcReg)) 2958353358Sdim // Copy from a VR128 register to a VR128 register. 2959353358Sdim return HasAVX512 ? X86::VMOVDI2PDIZrr : 2960353358Sdim HasAVX ? X86::VMOVDI2PDIrr : 2961353358Sdim X86::MOVDI2PDIrr; 2962261991Sdim return 0; 2963261991Sdim} 2964226633Sdim 2965210299Sedvoid X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 2966309124Sdim MachineBasicBlock::iterator MI, 2967360784Sdim const DebugLoc &DL, MCRegister DestReg, 2968360784Sdim MCRegister SrcReg, bool KillSrc) const { 2969210299Sed // First deal with the normal symmetric copies. 2970276479Sdim bool HasAVX = Subtarget.hasAVX(); 2971314564Sdim bool HasVLX = Subtarget.hasVLX(); 2972261991Sdim unsigned Opc = 0; 2973210299Sed if (X86::GR64RegClass.contains(DestReg, SrcReg)) 2974210299Sed Opc = X86::MOV64rr; 2975210299Sed else if (X86::GR32RegClass.contains(DestReg, SrcReg)) 2976210299Sed Opc = X86::MOV32rr; 2977210299Sed else if (X86::GR16RegClass.contains(DestReg, SrcReg)) 2978210299Sed Opc = X86::MOV16rr; 2979210299Sed else if (X86::GR8RegClass.contains(DestReg, SrcReg)) { 2980210299Sed // Copying to or from a physical H register on x86-64 requires a NOREX 2981210299Sed // move. Otherwise use a normal move. 2982210299Sed if ((isHReg(DestReg) || isHReg(SrcReg)) && 2983276479Sdim Subtarget.is64Bit()) { 2984210299Sed Opc = X86::MOV8rr_NOREX; 2985226633Sdim // Both operands must be encodable without an REX prefix. 2986226633Sdim assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) && 2987226633Sdim "8-bit H register can not be copied outside GR8_NOREX"); 2988226633Sdim } else 2989210299Sed Opc = X86::MOV8rr; 2990261991Sdim } 2991261991Sdim else if (X86::VR64RegClass.contains(DestReg, SrcReg)) 2992261991Sdim Opc = X86::MMX_MOVQ64rr; 2993314564Sdim else if (X86::VR128XRegClass.contains(DestReg, SrcReg)) { 2994314564Sdim if (HasVLX) 2995314564Sdim Opc = X86::VMOVAPSZ128rr; 2996314564Sdim else if (X86::VR128RegClass.contains(DestReg, SrcReg)) 2997314564Sdim Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr; 2998314564Sdim else { 2999314564Sdim // If this an extended register and we don't have VLX we need to use a 3000314564Sdim // 512-bit move. 3001314564Sdim Opc = X86::VMOVAPSZrr; 3002314564Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 3003314564Sdim DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_xmm, 3004314564Sdim &X86::VR512RegClass); 3005314564Sdim SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, 3006314564Sdim &X86::VR512RegClass); 3007314564Sdim } 3008314564Sdim } else if (X86::VR256XRegClass.contains(DestReg, SrcReg)) { 3009314564Sdim if (HasVLX) 3010314564Sdim Opc = X86::VMOVAPSZ256rr; 3011314564Sdim else if (X86::VR256RegClass.contains(DestReg, SrcReg)) 3012314564Sdim Opc = X86::VMOVAPSYrr; 3013314564Sdim else { 3014314564Sdim // If this an extended register and we don't have VLX we need to use a 3015314564Sdim // 512-bit move. 3016314564Sdim Opc = X86::VMOVAPSZrr; 3017314564Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 3018314564Sdim DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_ymm, 3019314564Sdim &X86::VR512RegClass); 3020314564Sdim SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, 3021314564Sdim &X86::VR512RegClass); 3022314564Sdim } 3023314564Sdim } else if (X86::VR512RegClass.contains(DestReg, SrcReg)) 3024314564Sdim Opc = X86::VMOVAPSZrr; 3025314564Sdim // All KMASK RegClasses hold the same k registers, can be tested against anyone. 3026314564Sdim else if (X86::VK16RegClass.contains(DestReg, SrcReg)) 3027314564Sdim Opc = Subtarget.hasBWI() ? X86::KMOVQkk : X86::KMOVWkk; 3028261991Sdim if (!Opc) 3029276479Sdim Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget); 3030193323Sed 3031210299Sed if (Opc) { 3032210299Sed BuildMI(MBB, MI, DL, get(Opc), DestReg) 3033210299Sed .addReg(SrcReg, getKillRegState(KillSrc)); 3034210299Sed return; 3035193323Sed } 3036198090Srdivacky 3037332833Sdim if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) { 3038332833Sdim // FIXME: We use a fatal error here because historically LLVM has tried 3039332833Sdim // lower some of these physreg copies and we want to ensure we get 3040332833Sdim // reasonable bug reports if someone encounters a case no other testing 3041332833Sdim // found. This path should be removed after the LLVM 7 release. 3042332833Sdim report_fatal_error("Unable to copy EFLAGS physical register!"); 3043193323Sed } 3044193323Sed 3045341825Sdim LLVM_DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) << " to " 3046341825Sdim << RI.getName(DestReg) << '\n'); 3047341825Sdim report_fatal_error("Cannot emit physreg copy instruction"); 3048193323Sed} 3049193323Sed 3050360784SdimOptional<DestSourcePair> 3051360784SdimX86InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 3052360784Sdim if (MI.isMoveReg()) 3053360784Sdim return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 3054360784Sdim return None; 3055341825Sdim} 3056341825Sdim 3057210299Sedstatic unsigned getLoadStoreRegOpcode(unsigned Reg, 3058210299Sed const TargetRegisterClass *RC, 3059210299Sed bool isStackAligned, 3060276479Sdim const X86Subtarget &STI, 3061210299Sed bool load) { 3062314564Sdim bool HasAVX = STI.hasAVX(); 3063314564Sdim bool HasAVX512 = STI.hasAVX512(); 3064314564Sdim bool HasVLX = STI.hasVLX(); 3065261991Sdim 3066321369Sdim switch (STI.getRegisterInfo()->getSpillSize(*RC)) { 3067210299Sed default: 3068223017Sdim llvm_unreachable("Unknown spill size"); 3069223017Sdim case 1: 3070223017Sdim assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass"); 3071276479Sdim if (STI.is64Bit()) 3072223017Sdim // Copying to or from a physical H register on x86-64 requires a NOREX 3073223017Sdim // move. Otherwise use a normal move. 3074223017Sdim if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC)) 3075223017Sdim return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; 3076223017Sdim return load ? X86::MOV8rm : X86::MOV8mr; 3077223017Sdim case 2: 3078314564Sdim if (X86::VK16RegClass.hasSubClassEq(RC)) 3079314564Sdim return load ? X86::KMOVWkm : X86::KMOVWmk; 3080223017Sdim assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass"); 3081210299Sed return load ? X86::MOV16rm : X86::MOV16mr; 3082223017Sdim case 4: 3083223017Sdim if (X86::GR32RegClass.hasSubClassEq(RC)) 3084223017Sdim return load ? X86::MOV32rm : X86::MOV32mr; 3085314564Sdim if (X86::FR32XRegClass.hasSubClassEq(RC)) 3086226633Sdim return load ? 3087353358Sdim (HasAVX512 ? X86::VMOVSSZrm_alt : 3088353358Sdim HasAVX ? X86::VMOVSSrm_alt : 3089353358Sdim X86::MOVSSrm_alt) : 3090353358Sdim (HasAVX512 ? X86::VMOVSSZmr : 3091353358Sdim HasAVX ? X86::VMOVSSmr : 3092353358Sdim X86::MOVSSmr); 3093223017Sdim if (X86::RFP32RegClass.hasSubClassEq(RC)) 3094223017Sdim return load ? X86::LD_Fp32m : X86::ST_Fp32m; 3095341825Sdim if (X86::VK32RegClass.hasSubClassEq(RC)) { 3096341825Sdim assert(STI.hasBWI() && "KMOVD requires BWI"); 3097314564Sdim return load ? X86::KMOVDkm : X86::KMOVDmk; 3098341825Sdim } 3099353358Sdim // All of these mask pair classes have the same spill size, the same kind 3100353358Sdim // of kmov instructions can be used with all of them. 3101353358Sdim if (X86::VK1PAIRRegClass.hasSubClassEq(RC) || 3102353358Sdim X86::VK2PAIRRegClass.hasSubClassEq(RC) || 3103353358Sdim X86::VK4PAIRRegClass.hasSubClassEq(RC) || 3104353358Sdim X86::VK8PAIRRegClass.hasSubClassEq(RC) || 3105353358Sdim X86::VK16PAIRRegClass.hasSubClassEq(RC)) 3106353358Sdim return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE; 3107223017Sdim llvm_unreachable("Unknown 4-byte regclass"); 3108223017Sdim case 8: 3109223017Sdim if (X86::GR64RegClass.hasSubClassEq(RC)) 3110223017Sdim return load ? X86::MOV64rm : X86::MOV64mr; 3111314564Sdim if (X86::FR64XRegClass.hasSubClassEq(RC)) 3112226633Sdim return load ? 3113353358Sdim (HasAVX512 ? X86::VMOVSDZrm_alt : 3114353358Sdim HasAVX ? X86::VMOVSDrm_alt : 3115353358Sdim X86::MOVSDrm_alt) : 3116353358Sdim (HasAVX512 ? X86::VMOVSDZmr : 3117353358Sdim HasAVX ? X86::VMOVSDmr : 3118353358Sdim X86::MOVSDmr); 3119223017Sdim if (X86::VR64RegClass.hasSubClassEq(RC)) 3120223017Sdim return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; 3121223017Sdim if (X86::RFP64RegClass.hasSubClassEq(RC)) 3122223017Sdim return load ? X86::LD_Fp64m : X86::ST_Fp64m; 3123341825Sdim if (X86::VK64RegClass.hasSubClassEq(RC)) { 3124341825Sdim assert(STI.hasBWI() && "KMOVQ requires BWI"); 3125314564Sdim return load ? X86::KMOVQkm : X86::KMOVQmk; 3126341825Sdim } 3127223017Sdim llvm_unreachable("Unknown 8-byte regclass"); 3128223017Sdim case 10: 3129223017Sdim assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); 3130210299Sed return load ? X86::LD_Fp80m : X86::ST_FpP80m; 3131226633Sdim case 16: { 3132321369Sdim if (X86::VR128XRegClass.hasSubClassEq(RC)) { 3133321369Sdim // If stack is realigned we can use aligned stores. 3134321369Sdim if (isStackAligned) 3135321369Sdim return load ? 3136321369Sdim (HasVLX ? X86::VMOVAPSZ128rm : 3137321369Sdim HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX : 3138321369Sdim HasAVX ? X86::VMOVAPSrm : 3139321369Sdim X86::MOVAPSrm): 3140321369Sdim (HasVLX ? X86::VMOVAPSZ128mr : 3141321369Sdim HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX : 3142321369Sdim HasAVX ? X86::VMOVAPSmr : 3143321369Sdim X86::MOVAPSmr); 3144321369Sdim else 3145321369Sdim return load ? 3146321369Sdim (HasVLX ? X86::VMOVUPSZ128rm : 3147321369Sdim HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX : 3148321369Sdim HasAVX ? X86::VMOVUPSrm : 3149321369Sdim X86::MOVUPSrm): 3150321369Sdim (HasVLX ? X86::VMOVUPSZ128mr : 3151321369Sdim HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX : 3152321369Sdim HasAVX ? X86::VMOVUPSmr : 3153321369Sdim X86::MOVUPSmr); 3154321369Sdim } 3155321369Sdim if (X86::BNDRRegClass.hasSubClassEq(RC)) { 3156321369Sdim if (STI.is64Bit()) 3157341825Sdim return load ? X86::BNDMOV64rm : X86::BNDMOV64mr; 3158321369Sdim else 3159341825Sdim return load ? X86::BNDMOV32rm : X86::BNDMOV32mr; 3160321369Sdim } 3161321369Sdim llvm_unreachable("Unknown 16-byte regclass"); 3162226633Sdim } 3163224145Sdim case 32: 3164314564Sdim assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass"); 3165224145Sdim // If stack is realigned we can use aligned stores. 3166224145Sdim if (isStackAligned) 3167314564Sdim return load ? 3168314564Sdim (HasVLX ? X86::VMOVAPSZ256rm : 3169314564Sdim HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX : 3170314564Sdim X86::VMOVAPSYrm) : 3171314564Sdim (HasVLX ? X86::VMOVAPSZ256mr : 3172314564Sdim HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX : 3173314564Sdim X86::VMOVAPSYmr); 3174224145Sdim else 3175314564Sdim return load ? 3176314564Sdim (HasVLX ? X86::VMOVUPSZ256rm : 3177314564Sdim HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX : 3178314564Sdim X86::VMOVUPSYrm) : 3179314564Sdim (HasVLX ? X86::VMOVUPSZ256mr : 3180314564Sdim HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX : 3181314564Sdim X86::VMOVUPSYmr); 3182261991Sdim case 64: 3183261991Sdim assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass"); 3184314564Sdim assert(STI.hasAVX512() && "Using 512-bit register requires AVX512"); 3185261991Sdim if (isStackAligned) 3186261991Sdim return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; 3187261991Sdim else 3188261991Sdim return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; 3189193323Sed } 3190210299Sed} 3191193323Sed 3192344779Sdimbool X86InstrInfo::getMemOperandWithOffset( 3193353358Sdim const MachineInstr &MemOp, const MachineOperand *&BaseOp, int64_t &Offset, 3194344779Sdim const TargetRegisterInfo *TRI) const { 3195309124Sdim const MCInstrDesc &Desc = MemOp.getDesc(); 3196309124Sdim int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags); 3197288943Sdim if (MemRefBegin < 0) 3198288943Sdim return false; 3199288943Sdim 3200288943Sdim MemRefBegin += X86II::getOperandBias(Desc); 3201288943Sdim 3202344779Sdim BaseOp = &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg); 3203344779Sdim if (!BaseOp->isReg()) // Can be an MO_FrameIndex 3204288943Sdim return false; 3205288943Sdim 3206309124Sdim if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1) 3207309124Sdim return false; 3208309124Sdim 3209309124Sdim if (MemOp.getOperand(MemRefBegin + X86::AddrIndexReg).getReg() != 3210288943Sdim X86::NoRegister) 3211288943Sdim return false; 3212288943Sdim 3213309124Sdim const MachineOperand &DispMO = MemOp.getOperand(MemRefBegin + X86::AddrDisp); 3214288943Sdim 3215288943Sdim // Displacement can be symbolic 3216288943Sdim if (!DispMO.isImm()) 3217288943Sdim return false; 3218288943Sdim 3219288943Sdim Offset = DispMO.getImm(); 3220288943Sdim 3221360784Sdim if (!BaseOp->isReg()) 3222360784Sdim return false; 3223360784Sdim 3224314564Sdim return true; 3225288943Sdim} 3226288943Sdim 3227210299Sedstatic unsigned getStoreRegOpcode(unsigned SrcReg, 3228210299Sed const TargetRegisterClass *RC, 3229210299Sed bool isStackAligned, 3230276479Sdim const X86Subtarget &STI) { 3231276479Sdim return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, STI, false); 3232193323Sed} 3233193323Sed 3234210299Sed 3235210299Sedstatic unsigned getLoadRegOpcode(unsigned DestReg, 3236210299Sed const TargetRegisterClass *RC, 3237210299Sed bool isStackAligned, 3238276479Sdim const X86Subtarget &STI) { 3239276479Sdim return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, STI, true); 3240210299Sed} 3241210299Sed 3242193323Sedvoid X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 3243193323Sed MachineBasicBlock::iterator MI, 3244193323Sed unsigned SrcReg, bool isKill, int FrameIdx, 3245208599Srdivacky const TargetRegisterClass *RC, 3246208599Srdivacky const TargetRegisterInfo *TRI) const { 3247193323Sed const MachineFunction &MF = *MBB.getParent(); 3248321369Sdim assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && 3249212904Sdim "Stack slot too small for store"); 3250321369Sdim unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16); 3251288943Sdim bool isAligned = 3252288943Sdim (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) || 3253288943Sdim RI.canRealignStack(MF); 3254276479Sdim unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); 3255344779Sdim addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx) 3256193323Sed .addReg(SrcReg, getKillRegState(isKill)); 3257193323Sed} 3258193323Sed 3259193323Sedvoid X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 3260193323Sed MachineBasicBlock::iterator MI, 3261193323Sed unsigned DestReg, int FrameIdx, 3262208599Srdivacky const TargetRegisterClass *RC, 3263208599Srdivacky const TargetRegisterInfo *TRI) const { 3264193323Sed const MachineFunction &MF = *MBB.getParent(); 3265321369Sdim unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16); 3266288943Sdim bool isAligned = 3267288943Sdim (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) || 3268288943Sdim RI.canRealignStack(MF); 3269276479Sdim unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); 3270344779Sdim addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx); 3271193323Sed} 3272193323Sed 3273309124Sdimbool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, 3274309124Sdim unsigned &SrcReg2, int &CmpMask, 3275309124Sdim int &CmpValue) const { 3276309124Sdim switch (MI.getOpcode()) { 3277239462Sdim default: break; 3278239462Sdim case X86::CMP64ri32: 3279239462Sdim case X86::CMP64ri8: 3280239462Sdim case X86::CMP32ri: 3281239462Sdim case X86::CMP32ri8: 3282239462Sdim case X86::CMP16ri: 3283239462Sdim case X86::CMP16ri8: 3284239462Sdim case X86::CMP8ri: 3285309124Sdim SrcReg = MI.getOperand(0).getReg(); 3286239462Sdim SrcReg2 = 0; 3287321369Sdim if (MI.getOperand(1).isImm()) { 3288321369Sdim CmpMask = ~0; 3289321369Sdim CmpValue = MI.getOperand(1).getImm(); 3290321369Sdim } else { 3291321369Sdim CmpMask = CmpValue = 0; 3292321369Sdim } 3293239462Sdim return true; 3294239462Sdim // A SUB can be used to perform comparison. 3295239462Sdim case X86::SUB64rm: 3296239462Sdim case X86::SUB32rm: 3297239462Sdim case X86::SUB16rm: 3298239462Sdim case X86::SUB8rm: 3299309124Sdim SrcReg = MI.getOperand(1).getReg(); 3300239462Sdim SrcReg2 = 0; 3301321369Sdim CmpMask = 0; 3302239462Sdim CmpValue = 0; 3303239462Sdim return true; 3304239462Sdim case X86::SUB64rr: 3305239462Sdim case X86::SUB32rr: 3306239462Sdim case X86::SUB16rr: 3307239462Sdim case X86::SUB8rr: 3308309124Sdim SrcReg = MI.getOperand(1).getReg(); 3309309124Sdim SrcReg2 = MI.getOperand(2).getReg(); 3310321369Sdim CmpMask = 0; 3311239462Sdim CmpValue = 0; 3312239462Sdim return true; 3313239462Sdim case X86::SUB64ri32: 3314239462Sdim case X86::SUB64ri8: 3315239462Sdim case X86::SUB32ri: 3316239462Sdim case X86::SUB32ri8: 3317239462Sdim case X86::SUB16ri: 3318239462Sdim case X86::SUB16ri8: 3319239462Sdim case X86::SUB8ri: 3320309124Sdim SrcReg = MI.getOperand(1).getReg(); 3321239462Sdim SrcReg2 = 0; 3322321369Sdim if (MI.getOperand(2).isImm()) { 3323321369Sdim CmpMask = ~0; 3324321369Sdim CmpValue = MI.getOperand(2).getImm(); 3325321369Sdim } else { 3326321369Sdim CmpMask = CmpValue = 0; 3327321369Sdim } 3328239462Sdim return true; 3329239462Sdim case X86::CMP64rr: 3330239462Sdim case X86::CMP32rr: 3331239462Sdim case X86::CMP16rr: 3332239462Sdim case X86::CMP8rr: 3333309124Sdim SrcReg = MI.getOperand(0).getReg(); 3334309124Sdim SrcReg2 = MI.getOperand(1).getReg(); 3335321369Sdim CmpMask = 0; 3336239462Sdim CmpValue = 0; 3337239462Sdim return true; 3338239462Sdim case X86::TEST8rr: 3339239462Sdim case X86::TEST16rr: 3340239462Sdim case X86::TEST32rr: 3341239462Sdim case X86::TEST64rr: 3342309124Sdim SrcReg = MI.getOperand(0).getReg(); 3343309124Sdim if (MI.getOperand(1).getReg() != SrcReg) 3344309124Sdim return false; 3345239462Sdim // Compare against zero. 3346239462Sdim SrcReg2 = 0; 3347239462Sdim CmpMask = ~0; 3348239462Sdim CmpValue = 0; 3349239462Sdim return true; 3350239462Sdim } 3351239462Sdim return false; 3352239462Sdim} 3353239462Sdim 3354288943Sdim/// Check whether the first instruction, whose only 3355239462Sdim/// purpose is to update flags, can be made redundant. 3356239462Sdim/// CMPrr can be made redundant by SUBrr if the operands are the same. 3357239462Sdim/// This function can be extended later on. 3358239462Sdim/// SrcReg, SrcRegs: register operands for FlagI. 3359239462Sdim/// ImmValue: immediate for FlagI if it takes an immediate. 3360344779Sdiminline static bool isRedundantFlagInstr(const MachineInstr &FlagI, 3361344779Sdim unsigned SrcReg, unsigned SrcReg2, 3362344779Sdim int ImmMask, int ImmValue, 3363344779Sdim const MachineInstr &OI) { 3364309124Sdim if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) || 3365309124Sdim (FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) || 3366309124Sdim (FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) || 3367309124Sdim (FlagI.getOpcode() == X86::CMP8rr && OI.getOpcode() == X86::SUB8rr)) && 3368309124Sdim ((OI.getOperand(1).getReg() == SrcReg && 3369309124Sdim OI.getOperand(2).getReg() == SrcReg2) || 3370309124Sdim (OI.getOperand(1).getReg() == SrcReg2 && 3371309124Sdim OI.getOperand(2).getReg() == SrcReg))) 3372239462Sdim return true; 3373239462Sdim 3374321369Sdim if (ImmMask != 0 && 3375321369Sdim ((FlagI.getOpcode() == X86::CMP64ri32 && 3376309124Sdim OI.getOpcode() == X86::SUB64ri32) || 3377309124Sdim (FlagI.getOpcode() == X86::CMP64ri8 && 3378309124Sdim OI.getOpcode() == X86::SUB64ri8) || 3379309124Sdim (FlagI.getOpcode() == X86::CMP32ri && OI.getOpcode() == X86::SUB32ri) || 3380309124Sdim (FlagI.getOpcode() == X86::CMP32ri8 && 3381309124Sdim OI.getOpcode() == X86::SUB32ri8) || 3382309124Sdim (FlagI.getOpcode() == X86::CMP16ri && OI.getOpcode() == X86::SUB16ri) || 3383309124Sdim (FlagI.getOpcode() == X86::CMP16ri8 && 3384309124Sdim OI.getOpcode() == X86::SUB16ri8) || 3385309124Sdim (FlagI.getOpcode() == X86::CMP8ri && OI.getOpcode() == X86::SUB8ri)) && 3386309124Sdim OI.getOperand(1).getReg() == SrcReg && 3387309124Sdim OI.getOperand(2).getImm() == ImmValue) 3388239462Sdim return true; 3389239462Sdim return false; 3390239462Sdim} 3391239462Sdim 3392288943Sdim/// Check whether the definition can be converted 3393239462Sdim/// to remove a comparison against zero. 3394344779Sdiminline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) { 3395344779Sdim NoSignFlag = false; 3396344779Sdim 3397309124Sdim switch (MI.getOpcode()) { 3398239462Sdim default: return false; 3399261991Sdim 3400261991Sdim // The shift instructions only modify ZF if their shift count is non-zero. 3401261991Sdim // N.B.: The processor truncates the shift count depending on the encoding. 3402261991Sdim case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri:case X86::SAR64ri: 3403261991Sdim case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri:case X86::SHR64ri: 3404261991Sdim return getTruncatedShiftCount(MI, 2) != 0; 3405261991Sdim 3406261991Sdim // Some left shift instructions can be turned into LEA instructions but only 3407261991Sdim // if their flags aren't used. Avoid transforming such instructions. 3408261991Sdim case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri:case X86::SHL64ri:{ 3409261991Sdim unsigned ShAmt = getTruncatedShiftCount(MI, 2); 3410261991Sdim if (isTruncatedShiftCountForLEA(ShAmt)) return false; 3411261991Sdim return ShAmt != 0; 3412261991Sdim } 3413261991Sdim 3414261991Sdim case X86::SHRD16rri8:case X86::SHRD32rri8:case X86::SHRD64rri8: 3415261991Sdim case X86::SHLD16rri8:case X86::SHLD32rri8:case X86::SHLD64rri8: 3416261991Sdim return getTruncatedShiftCount(MI, 3) != 0; 3417261991Sdim 3418239462Sdim case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri: 3419239462Sdim case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8: 3420239462Sdim case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr: 3421239462Sdim case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm: 3422239462Sdim case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm: 3423249423Sdim case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r: 3424239462Sdim case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri: 3425239462Sdim case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8: 3426239462Sdim case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr: 3427239462Sdim case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm: 3428239462Sdim case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm: 3429249423Sdim case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r: 3430239462Sdim case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri: 3431239462Sdim case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8: 3432239462Sdim case X86::AND8ri: case X86::AND64rr: case X86::AND32rr: 3433239462Sdim case X86::AND16rr: case X86::AND8rr: case X86::AND64rm: 3434239462Sdim case X86::AND32rm: case X86::AND16rm: case X86::AND8rm: 3435239462Sdim case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri: 3436239462Sdim case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8: 3437239462Sdim case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr: 3438239462Sdim case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm: 3439239462Sdim case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm: 3440239462Sdim case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri: 3441239462Sdim case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8: 3442239462Sdim case X86::OR8ri: case X86::OR64rr: case X86::OR32rr: 3443239462Sdim case X86::OR16rr: case X86::OR8rr: case X86::OR64rm: 3444239462Sdim case X86::OR32rm: case X86::OR16rm: case X86::OR8rm: 3445327952Sdim case X86::ADC64ri32: case X86::ADC64ri8: case X86::ADC32ri: 3446327952Sdim case X86::ADC32ri8: case X86::ADC16ri: case X86::ADC16ri8: 3447327952Sdim case X86::ADC8ri: case X86::ADC64rr: case X86::ADC32rr: 3448327952Sdim case X86::ADC16rr: case X86::ADC8rr: case X86::ADC64rm: 3449327952Sdim case X86::ADC32rm: case X86::ADC16rm: case X86::ADC8rm: 3450327952Sdim case X86::SBB64ri32: case X86::SBB64ri8: case X86::SBB32ri: 3451327952Sdim case X86::SBB32ri8: case X86::SBB16ri: case X86::SBB16ri8: 3452327952Sdim case X86::SBB8ri: case X86::SBB64rr: case X86::SBB32rr: 3453327952Sdim case X86::SBB16rr: case X86::SBB8rr: case X86::SBB64rm: 3454327952Sdim case X86::SBB32rm: case X86::SBB16rm: case X86::SBB8rm: 3455261991Sdim case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r: 3456261991Sdim case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1: 3457261991Sdim case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1: 3458261991Sdim case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1: 3459249423Sdim case X86::ANDN32rr: case X86::ANDN32rm: 3460249423Sdim case X86::ANDN64rr: case X86::ANDN64rm: 3461261991Sdim case X86::BLSI32rr: case X86::BLSI32rm: 3462261991Sdim case X86::BLSI64rr: case X86::BLSI64rm: 3463261991Sdim case X86::BLSMSK32rr:case X86::BLSMSK32rm: 3464261991Sdim case X86::BLSMSK64rr:case X86::BLSMSK64rm: 3465261991Sdim case X86::BLSR32rr: case X86::BLSR32rm: 3466261991Sdim case X86::BLSR64rr: case X86::BLSR64rm: 3467261991Sdim case X86::BZHI32rr: case X86::BZHI32rm: 3468261991Sdim case X86::BZHI64rr: case X86::BZHI64rm: 3469261991Sdim case X86::LZCNT16rr: case X86::LZCNT16rm: 3470261991Sdim case X86::LZCNT32rr: case X86::LZCNT32rm: 3471261991Sdim case X86::LZCNT64rr: case X86::LZCNT64rm: 3472261991Sdim case X86::POPCNT16rr:case X86::POPCNT16rm: 3473261991Sdim case X86::POPCNT32rr:case X86::POPCNT32rm: 3474261991Sdim case X86::POPCNT64rr:case X86::POPCNT64rm: 3475261991Sdim case X86::TZCNT16rr: case X86::TZCNT16rm: 3476261991Sdim case X86::TZCNT32rr: case X86::TZCNT32rm: 3477261991Sdim case X86::TZCNT64rr: case X86::TZCNT64rm: 3478327952Sdim case X86::BLCFILL32rr: case X86::BLCFILL32rm: 3479327952Sdim case X86::BLCFILL64rr: case X86::BLCFILL64rm: 3480327952Sdim case X86::BLCI32rr: case X86::BLCI32rm: 3481327952Sdim case X86::BLCI64rr: case X86::BLCI64rm: 3482327952Sdim case X86::BLCIC32rr: case X86::BLCIC32rm: 3483327952Sdim case X86::BLCIC64rr: case X86::BLCIC64rm: 3484327952Sdim case X86::BLCMSK32rr: case X86::BLCMSK32rm: 3485327952Sdim case X86::BLCMSK64rr: case X86::BLCMSK64rm: 3486327952Sdim case X86::BLCS32rr: case X86::BLCS32rm: 3487327952Sdim case X86::BLCS64rr: case X86::BLCS64rm: 3488327952Sdim case X86::BLSFILL32rr: case X86::BLSFILL32rm: 3489327952Sdim case X86::BLSFILL64rr: case X86::BLSFILL64rm: 3490327952Sdim case X86::BLSIC32rr: case X86::BLSIC32rm: 3491327952Sdim case X86::BLSIC64rr: case X86::BLSIC64rm: 3492344779Sdim case X86::T1MSKC32rr: case X86::T1MSKC32rm: 3493344779Sdim case X86::T1MSKC64rr: case X86::T1MSKC64rm: 3494344779Sdim case X86::TZMSK32rr: case X86::TZMSK32rm: 3495344779Sdim case X86::TZMSK64rr: case X86::TZMSK64rm: 3496239462Sdim return true; 3497344779Sdim case X86::BEXTR32rr: case X86::BEXTR64rr: 3498344779Sdim case X86::BEXTR32rm: case X86::BEXTR64rm: 3499344779Sdim case X86::BEXTRI32ri: case X86::BEXTRI32mi: 3500344779Sdim case X86::BEXTRI64ri: case X86::BEXTRI64mi: 3501344779Sdim // BEXTR doesn't update the sign flag so we can't use it. 3502344779Sdim NoSignFlag = true; 3503344779Sdim return true; 3504239462Sdim } 3505239462Sdim} 3506239462Sdim 3507288943Sdim/// Check whether the use can be converted to remove a comparison against zero. 3508344779Sdimstatic X86::CondCode isUseDefConvertible(const MachineInstr &MI) { 3509309124Sdim switch (MI.getOpcode()) { 3510276479Sdim default: return X86::COND_INVALID; 3511353358Sdim case X86::NEG8r: 3512353358Sdim case X86::NEG16r: 3513353358Sdim case X86::NEG32r: 3514353358Sdim case X86::NEG64r: 3515353358Sdim return X86::COND_AE; 3516353358Sdim case X86::LZCNT16rr: 3517353358Sdim case X86::LZCNT32rr: 3518353358Sdim case X86::LZCNT64rr: 3519276479Sdim return X86::COND_B; 3520353358Sdim case X86::POPCNT16rr: 3521353358Sdim case X86::POPCNT32rr: 3522353358Sdim case X86::POPCNT64rr: 3523276479Sdim return X86::COND_E; 3524353358Sdim case X86::TZCNT16rr: 3525353358Sdim case X86::TZCNT32rr: 3526353358Sdim case X86::TZCNT64rr: 3527276479Sdim return X86::COND_B; 3528353358Sdim case X86::BSF16rr: 3529353358Sdim case X86::BSF32rr: 3530353358Sdim case X86::BSF64rr: 3531353358Sdim case X86::BSR16rr: 3532353358Sdim case X86::BSR32rr: 3533353358Sdim case X86::BSR64rr: 3534341825Sdim return X86::COND_E; 3535353358Sdim case X86::BLSI32rr: 3536353358Sdim case X86::BLSI64rr: 3537353358Sdim return X86::COND_AE; 3538353358Sdim case X86::BLSR32rr: 3539353358Sdim case X86::BLSR64rr: 3540353358Sdim case X86::BLSMSK32rr: 3541353358Sdim case X86::BLSMSK64rr: 3542353358Sdim return X86::COND_B; 3543353358Sdim // TODO: TBM instructions. 3544276479Sdim } 3545276479Sdim} 3546276479Sdim 3547288943Sdim/// Check if there exists an earlier instruction that 3548239462Sdim/// operates on the same source operands and sets flags in the same way as 3549239462Sdim/// Compare; remove Compare if possible. 3550309124Sdimbool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, 3551309124Sdim unsigned SrcReg2, int CmpMask, 3552309124Sdim int CmpValue, 3553309124Sdim const MachineRegisterInfo *MRI) const { 3554239462Sdim // Check whether we can replace SUB with CMP. 3555309124Sdim switch (CmpInstr.getOpcode()) { 3556239462Sdim default: break; 3557239462Sdim case X86::SUB64ri32: 3558239462Sdim case X86::SUB64ri8: 3559239462Sdim case X86::SUB32ri: 3560239462Sdim case X86::SUB32ri8: 3561239462Sdim case X86::SUB16ri: 3562239462Sdim case X86::SUB16ri8: 3563239462Sdim case X86::SUB8ri: 3564239462Sdim case X86::SUB64rm: 3565239462Sdim case X86::SUB32rm: 3566239462Sdim case X86::SUB16rm: 3567239462Sdim case X86::SUB8rm: 3568239462Sdim case X86::SUB64rr: 3569239462Sdim case X86::SUB32rr: 3570239462Sdim case X86::SUB16rr: 3571239462Sdim case X86::SUB8rr: { 3572309124Sdim if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) 3573239462Sdim return false; 3574239462Sdim // There is no use of the destination register, we can replace SUB with CMP. 3575353358Sdim unsigned NewOpcode = 0; 3576309124Sdim switch (CmpInstr.getOpcode()) { 3577243830Sdim default: llvm_unreachable("Unreachable!"); 3578239462Sdim case X86::SUB64rm: NewOpcode = X86::CMP64rm; break; 3579239462Sdim case X86::SUB32rm: NewOpcode = X86::CMP32rm; break; 3580239462Sdim case X86::SUB16rm: NewOpcode = X86::CMP16rm; break; 3581239462Sdim case X86::SUB8rm: NewOpcode = X86::CMP8rm; break; 3582239462Sdim case X86::SUB64rr: NewOpcode = X86::CMP64rr; break; 3583239462Sdim case X86::SUB32rr: NewOpcode = X86::CMP32rr; break; 3584239462Sdim case X86::SUB16rr: NewOpcode = X86::CMP16rr; break; 3585239462Sdim case X86::SUB8rr: NewOpcode = X86::CMP8rr; break; 3586239462Sdim case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break; 3587239462Sdim case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break; 3588239462Sdim case X86::SUB32ri: NewOpcode = X86::CMP32ri; break; 3589239462Sdim case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break; 3590239462Sdim case X86::SUB16ri: NewOpcode = X86::CMP16ri; break; 3591239462Sdim case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break; 3592239462Sdim case X86::SUB8ri: NewOpcode = X86::CMP8ri; break; 3593239462Sdim } 3594309124Sdim CmpInstr.setDesc(get(NewOpcode)); 3595309124Sdim CmpInstr.RemoveOperand(0); 3596239462Sdim // Fall through to optimize Cmp if Cmp is CMPrr or CMPri. 3597239462Sdim if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm || 3598239462Sdim NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm) 3599239462Sdim return false; 3600239462Sdim } 3601239462Sdim } 3602239462Sdim 3603239462Sdim // Get the unique definition of SrcReg. 3604239462Sdim MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 3605239462Sdim if (!MI) return false; 3606239462Sdim 3607239462Sdim // CmpInstr is the first instruction of the BB. 3608239462Sdim MachineBasicBlock::iterator I = CmpInstr, Def = MI; 3609239462Sdim 3610239462Sdim // If we are comparing against zero, check whether we can use MI to update 3611239462Sdim // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize. 3612321369Sdim bool IsCmpZero = (CmpMask != 0 && CmpValue == 0); 3613309124Sdim if (IsCmpZero && MI->getParent() != CmpInstr.getParent()) 3614239462Sdim return false; 3615239462Sdim 3616276479Sdim // If we have a use of the source register between the def and our compare 3617276479Sdim // instruction we can eliminate the compare iff the use sets EFLAGS in the 3618276479Sdim // right way. 3619276479Sdim bool ShouldUpdateCC = false; 3620344779Sdim bool NoSignFlag = false; 3621276479Sdim X86::CondCode NewCC = X86::COND_INVALID; 3622344779Sdim if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag)) { 3623276479Sdim // Scan forward from the use until we hit the use we're looking for or the 3624276479Sdim // compare instruction. 3625276479Sdim for (MachineBasicBlock::iterator J = MI;; ++J) { 3626276479Sdim // Do we have a convertible instruction? 3627309124Sdim NewCC = isUseDefConvertible(*J); 3628276479Sdim if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() && 3629276479Sdim J->getOperand(1).getReg() == SrcReg) { 3630276479Sdim assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!"); 3631276479Sdim ShouldUpdateCC = true; // Update CC later on. 3632276479Sdim // This is not a def of SrcReg, but still a def of EFLAGS. Keep going 3633276479Sdim // with the new def. 3634309124Sdim Def = J; 3635309124Sdim MI = &*Def; 3636276479Sdim break; 3637276479Sdim } 3638276479Sdim 3639276479Sdim if (J == I) 3640276479Sdim return false; 3641276479Sdim } 3642276479Sdim } 3643276479Sdim 3644239462Sdim // We are searching for an earlier instruction that can make CmpInstr 3645239462Sdim // redundant and that instruction will be saved in Sub. 3646276479Sdim MachineInstr *Sub = nullptr; 3647239462Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 3648239462Sdim 3649239462Sdim // We iterate backward, starting from the instruction before CmpInstr and 3650239462Sdim // stop when reaching the definition of a source register or done with the BB. 3651239462Sdim // RI points to the instruction before CmpInstr. 3652239462Sdim // If the definition is in this basic block, RE points to the definition; 3653239462Sdim // otherwise, RE is the rend of the basic block. 3654239462Sdim MachineBasicBlock::reverse_iterator 3655314564Sdim RI = ++I.getReverse(), 3656309124Sdim RE = CmpInstr.getParent() == MI->getParent() 3657314564Sdim ? Def.getReverse() /* points to MI */ 3658309124Sdim : CmpInstr.getParent()->rend(); 3659276479Sdim MachineInstr *Movr0Inst = nullptr; 3660239462Sdim for (; RI != RE; ++RI) { 3661309124Sdim MachineInstr &Instr = *RI; 3662239462Sdim // Check whether CmpInstr can be made redundant by the current instruction. 3663321369Sdim if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, 3664321369Sdim CmpValue, Instr)) { 3665309124Sdim Sub = &Instr; 3666239462Sdim break; 3667239462Sdim } 3668239462Sdim 3669309124Sdim if (Instr.modifiesRegister(X86::EFLAGS, TRI) || 3670309124Sdim Instr.readsRegister(X86::EFLAGS, TRI)) { 3671239462Sdim // This instruction modifies or uses EFLAGS. 3672239462Sdim 3673239462Sdim // MOV32r0 etc. are implemented with xor which clobbers condition code. 3674239462Sdim // They are safe to move up, if the definition to EFLAGS is dead and 3675239462Sdim // earlier instructions do not read or write EFLAGS. 3676309124Sdim if (!Movr0Inst && Instr.getOpcode() == X86::MOV32r0 && 3677309124Sdim Instr.registerDefIsDead(X86::EFLAGS, TRI)) { 3678309124Sdim Movr0Inst = &Instr; 3679239462Sdim continue; 3680239462Sdim } 3681239462Sdim 3682239462Sdim // We can't remove CmpInstr. 3683239462Sdim return false; 3684239462Sdim } 3685239462Sdim } 3686239462Sdim 3687239462Sdim // Return false if no candidates exist. 3688239462Sdim if (!IsCmpZero && !Sub) 3689239462Sdim return false; 3690239462Sdim 3691360784Sdim bool IsSwapped = 3692360784Sdim (SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 && 3693360784Sdim Sub->getOperand(2).getReg() == SrcReg); 3694239462Sdim 3695239462Sdim // Scan forward from the instruction after CmpInstr for uses of EFLAGS. 3696239462Sdim // It is safe to remove CmpInstr if EFLAGS is redefined or killed. 3697239462Sdim // If we are done with the basic block, we need to check whether EFLAGS is 3698239462Sdim // live-out. 3699239462Sdim bool IsSafe = false; 3700353358Sdim SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate; 3701309124Sdim MachineBasicBlock::iterator E = CmpInstr.getParent()->end(); 3702239462Sdim for (++I; I != E; ++I) { 3703239462Sdim const MachineInstr &Instr = *I; 3704239462Sdim bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI); 3705239462Sdim bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI); 3706239462Sdim // We should check the usage if this instruction uses and updates EFLAGS. 3707239462Sdim if (!UseEFLAGS && ModifyEFLAGS) { 3708239462Sdim // It is safe to remove CmpInstr if EFLAGS is updated again. 3709239462Sdim IsSafe = true; 3710239462Sdim break; 3711239462Sdim } 3712239462Sdim if (!UseEFLAGS && !ModifyEFLAGS) 3713239462Sdim continue; 3714239462Sdim 3715239462Sdim // EFLAGS is used by this instruction. 3716276479Sdim X86::CondCode OldCC = X86::COND_INVALID; 3717239462Sdim if (IsCmpZero || IsSwapped) { 3718239462Sdim // We decode the condition code from opcode. 3719239462Sdim if (Instr.isBranch()) 3720353358Sdim OldCC = X86::getCondFromBranch(Instr); 3721239462Sdim else { 3722353358Sdim OldCC = X86::getCondFromSETCC(Instr); 3723353358Sdim if (OldCC == X86::COND_INVALID) 3724353358Sdim OldCC = X86::getCondFromCMov(Instr); 3725239462Sdim } 3726239462Sdim if (OldCC == X86::COND_INVALID) return false; 3727239462Sdim } 3728327952Sdim X86::CondCode ReplacementCC = X86::COND_INVALID; 3729239462Sdim if (IsCmpZero) { 3730239462Sdim switch (OldCC) { 3731239462Sdim default: break; 3732239462Sdim case X86::COND_A: case X86::COND_AE: 3733239462Sdim case X86::COND_B: case X86::COND_BE: 3734239462Sdim case X86::COND_G: case X86::COND_GE: 3735239462Sdim case X86::COND_L: case X86::COND_LE: 3736239462Sdim case X86::COND_O: case X86::COND_NO: 3737239462Sdim // CF and OF are used, we can't perform this optimization. 3738239462Sdim return false; 3739344779Sdim case X86::COND_S: case X86::COND_NS: 3740344779Sdim // If SF is used, but the instruction doesn't update the SF, then we 3741344779Sdim // can't do the optimization. 3742344779Sdim if (NoSignFlag) 3743344779Sdim return false; 3744344779Sdim break; 3745239462Sdim } 3746276479Sdim 3747276479Sdim // If we're updating the condition code check if we have to reverse the 3748276479Sdim // condition. 3749276479Sdim if (ShouldUpdateCC) 3750276479Sdim switch (OldCC) { 3751276479Sdim default: 3752276479Sdim return false; 3753276479Sdim case X86::COND_E: 3754327952Sdim ReplacementCC = NewCC; 3755276479Sdim break; 3756276479Sdim case X86::COND_NE: 3757327952Sdim ReplacementCC = GetOppositeBranchCondition(NewCC); 3758276479Sdim break; 3759276479Sdim } 3760239462Sdim } else if (IsSwapped) { 3761239462Sdim // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs 3762239462Sdim // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. 3763239462Sdim // We swap the condition code and synthesize the new opcode. 3764327952Sdim ReplacementCC = getSwappedCondition(OldCC); 3765327952Sdim if (ReplacementCC == X86::COND_INVALID) return false; 3766276479Sdim } 3767239462Sdim 3768327952Sdim if ((ShouldUpdateCC || IsSwapped) && ReplacementCC != OldCC) { 3769239462Sdim // Push the MachineInstr to OpsToUpdate. 3770239462Sdim // If it is safe to remove CmpInstr, the condition code of these 3771239462Sdim // instructions will be modified. 3772353358Sdim OpsToUpdate.push_back(std::make_pair(&*I, ReplacementCC)); 3773239462Sdim } 3774239462Sdim if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) { 3775239462Sdim // It is safe to remove CmpInstr if EFLAGS is updated again or killed. 3776239462Sdim IsSafe = true; 3777239462Sdim break; 3778239462Sdim } 3779239462Sdim } 3780239462Sdim 3781239462Sdim // If EFLAGS is not killed nor re-defined, we should check whether it is 3782239462Sdim // live-out. If it is live-out, do not optimize. 3783239462Sdim if ((IsCmpZero || IsSwapped) && !IsSafe) { 3784309124Sdim MachineBasicBlock *MBB = CmpInstr.getParent(); 3785296417Sdim for (MachineBasicBlock *Successor : MBB->successors()) 3786296417Sdim if (Successor->isLiveIn(X86::EFLAGS)) 3787239462Sdim return false; 3788239462Sdim } 3789239462Sdim 3790239462Sdim // The instruction to be updated is either Sub or MI. 3791239462Sdim Sub = IsCmpZero ? MI : Sub; 3792261991Sdim // Move Movr0Inst to the appropriate place before Sub. 3793239462Sdim if (Movr0Inst) { 3794261991Sdim // Look backwards until we find a def that doesn't use the current EFLAGS. 3795261991Sdim Def = Sub; 3796314564Sdim MachineBasicBlock::reverse_iterator InsertI = Def.getReverse(), 3797314564Sdim InsertE = Sub->getParent()->rend(); 3798261991Sdim for (; InsertI != InsertE; ++InsertI) { 3799261991Sdim MachineInstr *Instr = &*InsertI; 3800261991Sdim if (!Instr->readsRegister(X86::EFLAGS, TRI) && 3801261991Sdim Instr->modifiesRegister(X86::EFLAGS, TRI)) { 3802261991Sdim Sub->getParent()->remove(Movr0Inst); 3803261991Sdim Instr->getParent()->insert(MachineBasicBlock::iterator(Instr), 3804261991Sdim Movr0Inst); 3805261991Sdim break; 3806261991Sdim } 3807261991Sdim } 3808261991Sdim if (InsertI == InsertE) 3809261991Sdim return false; 3810239462Sdim } 3811239462Sdim 3812243830Sdim // Make sure Sub instruction defines EFLAGS and mark the def live. 3813353358Sdim MachineOperand *FlagDef = Sub->findRegisterDefOperand(X86::EFLAGS); 3814353358Sdim assert(FlagDef && "Unable to locate a def EFLAGS operand"); 3815353358Sdim FlagDef->setIsDead(false); 3816261991Sdim 3817309124Sdim CmpInstr.eraseFromParent(); 3818239462Sdim 3819239462Sdim // Modify the condition code of instructions in OpsToUpdate. 3820353358Sdim for (auto &Op : OpsToUpdate) { 3821353358Sdim Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1) 3822353358Sdim .setImm(Op.second); 3823353358Sdim } 3824239462Sdim return true; 3825239462Sdim} 3826239462Sdim 3827288943Sdim/// Try to remove the load by folding it to a register 3828239462Sdim/// operand at the use. We fold the load instructions if load defines a virtual 3829239462Sdim/// register, the virtual register is used once in the same BB, and the 3830239462Sdim/// instructions in-between do not load or store, and have no side effects. 3831309124SdimMachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI, 3832280031Sdim const MachineRegisterInfo *MRI, 3833280031Sdim unsigned &FoldAsLoadDefReg, 3834280031Sdim MachineInstr *&DefMI) const { 3835239462Sdim // Check whether we can move DefMI here. 3836239462Sdim DefMI = MRI->getVRegDef(FoldAsLoadDefReg); 3837239462Sdim assert(DefMI); 3838239462Sdim bool SawStore = false; 3839288943Sdim if (!DefMI->isSafeToMove(nullptr, SawStore)) 3840276479Sdim return nullptr; 3841239462Sdim 3842280031Sdim // Collect information about virtual register operands of MI. 3843314564Sdim SmallVector<unsigned, 1> SrcOperandIds; 3844314564Sdim for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 3845309124Sdim MachineOperand &MO = MI.getOperand(i); 3846280031Sdim if (!MO.isReg()) 3847280031Sdim continue; 3848360784Sdim Register Reg = MO.getReg(); 3849280031Sdim if (Reg != FoldAsLoadDefReg) 3850280031Sdim continue; 3851314564Sdim // Do not fold if we have a subreg use or a def. 3852314564Sdim if (MO.getSubReg() || MO.isDef()) 3853280031Sdim return nullptr; 3854314564Sdim SrcOperandIds.push_back(i); 3855280031Sdim } 3856314564Sdim if (SrcOperandIds.empty()) 3857280031Sdim return nullptr; 3858239462Sdim 3859280031Sdim // Check whether we can fold the def into SrcOperandId. 3860314564Sdim if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandIds, *DefMI)) { 3861280031Sdim FoldAsLoadDefReg = 0; 3862280031Sdim return FoldMI; 3863280031Sdim } 3864239462Sdim 3865276479Sdim return nullptr; 3866239462Sdim} 3867239462Sdim 3868288943Sdim/// Expand a single-def pseudo instruction to a two-addr 3869288943Sdim/// instruction with two undef reads of the register being defined. 3870288943Sdim/// This is used for mapping: 3871226633Sdim/// %xmm4 = V_SET0 3872226633Sdim/// to: 3873327952Sdim/// %xmm4 = PXORrr undef %xmm4, undef %xmm4 3874226633Sdim/// 3875249423Sdimstatic bool Expand2AddrUndef(MachineInstrBuilder &MIB, 3876249423Sdim const MCInstrDesc &Desc) { 3877226633Sdim assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); 3878360784Sdim Register Reg = MIB->getOperand(0).getReg(); 3879249423Sdim MIB->setDesc(Desc); 3880226633Sdim 3881226633Sdim // MachineInstr::addOperand() will insert explicit operands before any 3882226633Sdim // implicit operands. 3883249423Sdim MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); 3884226633Sdim // But we don't trust that. 3885249423Sdim assert(MIB->getOperand(1).getReg() == Reg && 3886249423Sdim MIB->getOperand(2).getReg() == Reg && "Misplaced operand"); 3887226633Sdim return true; 3888226633Sdim} 3889226633Sdim 3890296417Sdim/// Expand a single-def pseudo instruction to a two-addr 3891296417Sdim/// instruction with two %k0 reads. 3892296417Sdim/// This is used for mapping: 3893296417Sdim/// %k4 = K_SET1 3894296417Sdim/// to: 3895296417Sdim/// %k4 = KXNORrr %k0, %k0 3896296417Sdimstatic bool Expand2AddrKreg(MachineInstrBuilder &MIB, 3897296417Sdim const MCInstrDesc &Desc, unsigned Reg) { 3898296417Sdim assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); 3899296417Sdim MIB->setDesc(Desc); 3900296417Sdim MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); 3901296417Sdim return true; 3902296417Sdim} 3903296417Sdim 3904296417Sdimstatic bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, 3905296417Sdim bool MinusOne) { 3906296417Sdim MachineBasicBlock &MBB = *MIB->getParent(); 3907296417Sdim DebugLoc DL = MIB->getDebugLoc(); 3908360784Sdim Register Reg = MIB->getOperand(0).getReg(); 3909296417Sdim 3910296417Sdim // Insert the XOR. 3911296417Sdim BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg) 3912296417Sdim .addReg(Reg, RegState::Undef) 3913296417Sdim .addReg(Reg, RegState::Undef); 3914296417Sdim 3915296417Sdim // Turn the pseudo into an INC or DEC. 3916296417Sdim MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r)); 3917296417Sdim MIB.addReg(Reg); 3918296417Sdim 3919296417Sdim return true; 3920296417Sdim} 3921296417Sdim 3922314564Sdimstatic bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, 3923314564Sdim const TargetInstrInfo &TII, 3924314564Sdim const X86Subtarget &Subtarget) { 3925309124Sdim MachineBasicBlock &MBB = *MIB->getParent(); 3926309124Sdim DebugLoc DL = MIB->getDebugLoc(); 3927309124Sdim int64_t Imm = MIB->getOperand(1).getImm(); 3928309124Sdim assert(Imm != 0 && "Using push/pop for 0 is not efficient."); 3929309124Sdim MachineBasicBlock::iterator I = MIB.getInstr(); 3930309124Sdim 3931309124Sdim int StackAdjustment; 3932309124Sdim 3933309124Sdim if (Subtarget.is64Bit()) { 3934309124Sdim assert(MIB->getOpcode() == X86::MOV64ImmSExti8 || 3935309124Sdim MIB->getOpcode() == X86::MOV32ImmSExti8); 3936309124Sdim 3937309124Sdim // Can't use push/pop lowering if the function might write to the red zone. 3938309124Sdim X86MachineFunctionInfo *X86FI = 3939309124Sdim MBB.getParent()->getInfo<X86MachineFunctionInfo>(); 3940309124Sdim if (X86FI->getUsesRedZone()) { 3941314564Sdim MIB->setDesc(TII.get(MIB->getOpcode() == 3942314564Sdim X86::MOV32ImmSExti8 ? X86::MOV32ri : X86::MOV64ri)); 3943309124Sdim return true; 3944309124Sdim } 3945309124Sdim 3946309124Sdim // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and 3947309124Sdim // widen the register if necessary. 3948309124Sdim StackAdjustment = 8; 3949314564Sdim BuildMI(MBB, I, DL, TII.get(X86::PUSH64i8)).addImm(Imm); 3950314564Sdim MIB->setDesc(TII.get(X86::POP64r)); 3951309124Sdim MIB->getOperand(0) 3952309124Sdim .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64)); 3953309124Sdim } else { 3954309124Sdim assert(MIB->getOpcode() == X86::MOV32ImmSExti8); 3955309124Sdim StackAdjustment = 4; 3956314564Sdim BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm); 3957314564Sdim MIB->setDesc(TII.get(X86::POP32r)); 3958309124Sdim } 3959363496Sdim MIB->RemoveOperand(1); 3960363496Sdim MIB->addImplicitDefUseOperands(*MBB.getParent()); 3961309124Sdim 3962309124Sdim // Build CFI if necessary. 3963309124Sdim MachineFunction &MF = *MBB.getParent(); 3964309124Sdim const X86FrameLowering *TFL = Subtarget.getFrameLowering(); 3965309124Sdim bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 3966360784Sdim bool NeedsDwarfCFI = !IsWin64Prologue && MF.needsFrameMoves(); 3967309124Sdim bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI; 3968309124Sdim if (EmitCFI) { 3969309124Sdim TFL->BuildCFI(MBB, I, DL, 3970309124Sdim MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment)); 3971309124Sdim TFL->BuildCFI(MBB, std::next(I), DL, 3972309124Sdim MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment)); 3973309124Sdim } 3974309124Sdim 3975309124Sdim return true; 3976309124Sdim} 3977309124Sdim 3978280031Sdim// LoadStackGuard has so far only been implemented for 64-bit MachO. Different 3979280031Sdim// code sequence is needed for other targets. 3980280031Sdimstatic void expandLoadStackGuard(MachineInstrBuilder &MIB, 3981280031Sdim const TargetInstrInfo &TII) { 3982280031Sdim MachineBasicBlock &MBB = *MIB->getParent(); 3983280031Sdim DebugLoc DL = MIB->getDebugLoc(); 3984360784Sdim Register Reg = MIB->getOperand(0).getReg(); 3985280031Sdim const GlobalValue *GV = 3986280031Sdim cast<GlobalValue>((*MIB->memoperands_begin())->getValue()); 3987314564Sdim auto Flags = MachineMemOperand::MOLoad | 3988314564Sdim MachineMemOperand::MODereferenceable | 3989314564Sdim MachineMemOperand::MOInvariant; 3990296417Sdim MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( 3991309124Sdim MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, 8); 3992280031Sdim MachineBasicBlock::iterator I = MIB.getInstr(); 3993280031Sdim 3994280031Sdim BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1) 3995280031Sdim .addReg(0).addGlobalAddress(GV, 0, X86II::MO_GOTPCREL).addReg(0) 3996280031Sdim .addMemOperand(MMO); 3997280031Sdim MIB->setDebugLoc(DL); 3998280031Sdim MIB->setDesc(TII.get(X86::MOV64rm)); 3999280031Sdim MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0); 4000280031Sdim} 4001280031Sdim 4002327952Sdimstatic bool expandXorFP(MachineInstrBuilder &MIB, const TargetInstrInfo &TII) { 4003327952Sdim MachineBasicBlock &MBB = *MIB->getParent(); 4004327952Sdim MachineFunction &MF = *MBB.getParent(); 4005327952Sdim const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 4006327952Sdim const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); 4007327952Sdim unsigned XorOp = 4008327952Sdim MIB->getOpcode() == X86::XOR64_FP ? X86::XOR64rr : X86::XOR32rr; 4009327952Sdim MIB->setDesc(TII.get(XorOp)); 4010327952Sdim MIB.addReg(TRI->getFrameRegister(MF), RegState::Undef); 4011327952Sdim return true; 4012327952Sdim} 4013327952Sdim 4014314564Sdim// This is used to handle spills for 128/256-bit registers when we have AVX512, 4015314564Sdim// but not VLX. If it uses an extended register we need to use an instruction 4016314564Sdim// that loads the lower 128/256-bit, but is available with only AVX512F. 4017314564Sdimstatic bool expandNOVLXLoad(MachineInstrBuilder &MIB, 4018314564Sdim const TargetRegisterInfo *TRI, 4019314564Sdim const MCInstrDesc &LoadDesc, 4020314564Sdim const MCInstrDesc &BroadcastDesc, 4021314564Sdim unsigned SubIdx) { 4022360784Sdim Register DestReg = MIB->getOperand(0).getReg(); 4023314564Sdim // Check if DestReg is XMM16-31 or YMM16-31. 4024314564Sdim if (TRI->getEncodingValue(DestReg) < 16) { 4025314564Sdim // We can use a normal VEX encoded load. 4026314564Sdim MIB->setDesc(LoadDesc); 4027314564Sdim } else { 4028314564Sdim // Use a 128/256-bit VBROADCAST instruction. 4029314564Sdim MIB->setDesc(BroadcastDesc); 4030314564Sdim // Change the destination to a 512-bit register. 4031314564Sdim DestReg = TRI->getMatchingSuperReg(DestReg, SubIdx, &X86::VR512RegClass); 4032314564Sdim MIB->getOperand(0).setReg(DestReg); 4033314564Sdim } 4034314564Sdim return true; 4035314564Sdim} 4036314564Sdim 4037314564Sdim// This is used to handle spills for 128/256-bit registers when we have AVX512, 4038314564Sdim// but not VLX. If it uses an extended register we need to use an instruction 4039314564Sdim// that stores the lower 128/256-bit, but is available with only AVX512F. 4040314564Sdimstatic bool expandNOVLXStore(MachineInstrBuilder &MIB, 4041314564Sdim const TargetRegisterInfo *TRI, 4042314564Sdim const MCInstrDesc &StoreDesc, 4043314564Sdim const MCInstrDesc &ExtractDesc, 4044314564Sdim unsigned SubIdx) { 4045360784Sdim Register SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg(); 4046314564Sdim // Check if DestReg is XMM16-31 or YMM16-31. 4047314564Sdim if (TRI->getEncodingValue(SrcReg) < 16) { 4048314564Sdim // We can use a normal VEX encoded store. 4049314564Sdim MIB->setDesc(StoreDesc); 4050314564Sdim } else { 4051314564Sdim // Use a VEXTRACTF instruction. 4052314564Sdim MIB->setDesc(ExtractDesc); 4053314564Sdim // Change the destination to a 512-bit register. 4054314564Sdim SrcReg = TRI->getMatchingSuperReg(SrcReg, SubIdx, &X86::VR512RegClass); 4055314564Sdim MIB->getOperand(X86::AddrNumOperands).setReg(SrcReg); 4056314564Sdim MIB.addImm(0x0); // Append immediate to extract from the lower bits. 4057314564Sdim } 4058314564Sdim 4059314564Sdim return true; 4060314564Sdim} 4061353358Sdim 4062353358Sdimstatic bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) { 4063353358Sdim MIB->setDesc(Desc); 4064353358Sdim int64_t ShiftAmt = MIB->getOperand(2).getImm(); 4065353358Sdim // Temporarily remove the immediate so we can add another source register. 4066353358Sdim MIB->RemoveOperand(2); 4067353358Sdim // Add the register. Don't copy the kill flag if there is one. 4068353358Sdim MIB.addReg(MIB->getOperand(1).getReg(), 4069353358Sdim getUndefRegState(MIB->getOperand(1).isUndef())); 4070353358Sdim // Add back the immediate. 4071353358Sdim MIB.addImm(ShiftAmt); 4072353358Sdim return true; 4073353358Sdim} 4074353358Sdim 4075309124Sdimbool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 4076276479Sdim bool HasAVX = Subtarget.hasAVX(); 4077309124Sdim MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); 4078309124Sdim switch (MI.getOpcode()) { 4079276479Sdim case X86::MOV32r0: 4080276479Sdim return Expand2AddrUndef(MIB, get(X86::XOR32rr)); 4081296417Sdim case X86::MOV32r1: 4082296417Sdim return expandMOV32r1(MIB, *this, /*MinusOne=*/ false); 4083296417Sdim case X86::MOV32r_1: 4084296417Sdim return expandMOV32r1(MIB, *this, /*MinusOne=*/ true); 4085309124Sdim case X86::MOV32ImmSExti8: 4086309124Sdim case X86::MOV64ImmSExti8: 4087314564Sdim return ExpandMOVImmSExti8(MIB, *this, Subtarget); 4088243830Sdim case X86::SETB_C8r: 4089249423Sdim return Expand2AddrUndef(MIB, get(X86::SBB8rr)); 4090243830Sdim case X86::SETB_C16r: 4091249423Sdim return Expand2AddrUndef(MIB, get(X86::SBB16rr)); 4092243830Sdim case X86::SETB_C32r: 4093249423Sdim return Expand2AddrUndef(MIB, get(X86::SBB32rr)); 4094243830Sdim case X86::SETB_C64r: 4095249423Sdim return Expand2AddrUndef(MIB, get(X86::SBB64rr)); 4096341825Sdim case X86::MMX_SET0: 4097341825Sdim return Expand2AddrUndef(MIB, get(X86::MMX_PXORirr)); 4098226633Sdim case X86::V_SET0: 4099234353Sdim case X86::FsFLD0SS: 4100234353Sdim case X86::FsFLD0SD: 4101360784Sdim case X86::FsFLD0F128: 4102249423Sdim return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); 4103327952Sdim case X86::AVX_SET0: { 4104243830Sdim assert(HasAVX && "AVX not supported"); 4105327952Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 4106360784Sdim Register SrcReg = MIB->getOperand(0).getReg(); 4107360784Sdim Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); 4108327952Sdim MIB->getOperand(0).setReg(XReg); 4109327952Sdim Expand2AddrUndef(MIB, get(X86::VXORPSrr)); 4110327952Sdim MIB.addReg(SrcReg, RegState::ImplicitDefine); 4111327952Sdim return true; 4112327952Sdim } 4113309124Sdim case X86::AVX512_128_SET0: 4114321369Sdim case X86::AVX512_FsFLD0SS: 4115360784Sdim case X86::AVX512_FsFLD0SD: 4116360784Sdim case X86::AVX512_FsFLD0F128: { 4117321369Sdim bool HasVLX = Subtarget.hasVLX(); 4118360784Sdim Register SrcReg = MIB->getOperand(0).getReg(); 4119321369Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 4120321369Sdim if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) 4121321369Sdim return Expand2AddrUndef(MIB, 4122321369Sdim get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr)); 4123321369Sdim // Extended register without VLX. Use a larger XOR. 4124327952Sdim SrcReg = 4125327952Sdim TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass); 4126321369Sdim MIB->getOperand(0).setReg(SrcReg); 4127321369Sdim return Expand2AddrUndef(MIB, get(X86::VPXORDZrr)); 4128321369Sdim } 4129327952Sdim case X86::AVX512_256_SET0: 4130327952Sdim case X86::AVX512_512_SET0: { 4131321369Sdim bool HasVLX = Subtarget.hasVLX(); 4132360784Sdim Register SrcReg = MIB->getOperand(0).getReg(); 4133321369Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 4134327952Sdim if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) { 4135360784Sdim Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); 4136327952Sdim MIB->getOperand(0).setReg(XReg); 4137327952Sdim Expand2AddrUndef(MIB, 4138327952Sdim get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr)); 4139327952Sdim MIB.addReg(SrcReg, RegState::ImplicitDefine); 4140327952Sdim return true; 4141327952Sdim } 4142353358Sdim if (MI.getOpcode() == X86::AVX512_256_SET0) { 4143353358Sdim // No VLX so we must reference a zmm. 4144353358Sdim unsigned ZReg = 4145353358Sdim TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass); 4146353358Sdim MIB->getOperand(0).setReg(ZReg); 4147353358Sdim } 4148321369Sdim return Expand2AddrUndef(MIB, get(X86::VPXORDZrr)); 4149321369Sdim } 4150243830Sdim case X86::V_SETALLONES: 4151249423Sdim return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); 4152243830Sdim case X86::AVX2_SETALLONES: 4153249423Sdim return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr)); 4154321369Sdim case X86::AVX1_SETALLONES: { 4155360784Sdim Register Reg = MIB->getOperand(0).getReg(); 4156321369Sdim // VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS. 4157321369Sdim MIB->setDesc(get(X86::VCMPPSYrri)); 4158321369Sdim MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf); 4159321369Sdim return true; 4160321369Sdim } 4161309124Sdim case X86::AVX512_512_SETALLONES: { 4162360784Sdim Register Reg = MIB->getOperand(0).getReg(); 4163309124Sdim MIB->setDesc(get(X86::VPTERNLOGDZrri)); 4164309124Sdim // VPTERNLOGD needs 3 register inputs and an immediate. 4165309124Sdim // 0xff will return 1s for any input. 4166309124Sdim MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef) 4167309124Sdim .addReg(Reg, RegState::Undef).addImm(0xff); 4168309124Sdim return true; 4169309124Sdim } 4170314564Sdim case X86::AVX512_512_SEXT_MASK_32: 4171314564Sdim case X86::AVX512_512_SEXT_MASK_64: { 4172360784Sdim Register Reg = MIB->getOperand(0).getReg(); 4173360784Sdim Register MaskReg = MIB->getOperand(1).getReg(); 4174314564Sdim unsigned MaskState = getRegState(MIB->getOperand(1)); 4175314564Sdim unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ? 4176314564Sdim X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz; 4177314564Sdim MI.RemoveOperand(1); 4178314564Sdim MIB->setDesc(get(Opc)); 4179314564Sdim // VPTERNLOG needs 3 register inputs and an immediate. 4180314564Sdim // 0xff will return 1s for any input. 4181314564Sdim MIB.addReg(Reg, RegState::Undef).addReg(MaskReg, MaskState) 4182314564Sdim .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xff); 4183314564Sdim return true; 4184314564Sdim } 4185314564Sdim case X86::VMOVAPSZ128rm_NOVLX: 4186314564Sdim return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm), 4187314564Sdim get(X86::VBROADCASTF32X4rm), X86::sub_xmm); 4188314564Sdim case X86::VMOVUPSZ128rm_NOVLX: 4189314564Sdim return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSrm), 4190314564Sdim get(X86::VBROADCASTF32X4rm), X86::sub_xmm); 4191314564Sdim case X86::VMOVAPSZ256rm_NOVLX: 4192314564Sdim return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSYrm), 4193314564Sdim get(X86::VBROADCASTF64X4rm), X86::sub_ymm); 4194314564Sdim case X86::VMOVUPSZ256rm_NOVLX: 4195314564Sdim return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSYrm), 4196314564Sdim get(X86::VBROADCASTF64X4rm), X86::sub_ymm); 4197314564Sdim case X86::VMOVAPSZ128mr_NOVLX: 4198314564Sdim return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSmr), 4199314564Sdim get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm); 4200314564Sdim case X86::VMOVUPSZ128mr_NOVLX: 4201314564Sdim return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSmr), 4202314564Sdim get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm); 4203314564Sdim case X86::VMOVAPSZ256mr_NOVLX: 4204314564Sdim return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSYmr), 4205314564Sdim get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm); 4206314564Sdim case X86::VMOVUPSZ256mr_NOVLX: 4207314564Sdim return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr), 4208314564Sdim get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm); 4209344779Sdim case X86::MOV32ri64: { 4210360784Sdim Register Reg = MIB->getOperand(0).getReg(); 4211360784Sdim Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit); 4212309124Sdim MI.setDesc(get(X86::MOV32ri)); 4213344779Sdim MIB->getOperand(0).setReg(Reg32); 4214344779Sdim MIB.addReg(Reg, RegState::ImplicitDefine); 4215296417Sdim return true; 4216344779Sdim } 4217296417Sdim 4218296417Sdim // KNL does not recognize dependency-breaking idioms for mask registers, 4219296417Sdim // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1. 4220296417Sdim // Using %k0 as the undef input register is a performance heuristic based 4221296417Sdim // on the assumption that %k0 is used less frequently than the other mask 4222296417Sdim // registers, since it is not usable as a write mask. 4223296417Sdim // FIXME: A more advanced approach would be to choose the best input mask 4224296417Sdim // register based on context. 4225296417Sdim case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0); 4226296417Sdim case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0); 4227296417Sdim case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0); 4228296417Sdim case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0); 4229296417Sdim case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0); 4230296417Sdim case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0); 4231280031Sdim case TargetOpcode::LOAD_STACK_GUARD: 4232280031Sdim expandLoadStackGuard(MIB, *this); 4233280031Sdim return true; 4234327952Sdim case X86::XOR64_FP: 4235327952Sdim case X86::XOR32_FP: 4236327952Sdim return expandXorFP(MIB, *this); 4237353358Sdim case X86::SHLDROT32ri: return expandSHXDROT(MIB, get(X86::SHLD32rri8)); 4238353358Sdim case X86::SHLDROT64ri: return expandSHXDROT(MIB, get(X86::SHLD64rri8)); 4239353358Sdim case X86::SHRDROT32ri: return expandSHXDROT(MIB, get(X86::SHRD32rri8)); 4240353358Sdim case X86::SHRDROT64ri: return expandSHXDROT(MIB, get(X86::SHRD64rri8)); 4241353358Sdim case X86::ADD8rr_DB: MIB->setDesc(get(X86::OR8rr)); break; 4242353358Sdim case X86::ADD16rr_DB: MIB->setDesc(get(X86::OR16rr)); break; 4243353358Sdim case X86::ADD32rr_DB: MIB->setDesc(get(X86::OR32rr)); break; 4244353358Sdim case X86::ADD64rr_DB: MIB->setDesc(get(X86::OR64rr)); break; 4245353358Sdim case X86::ADD8ri_DB: MIB->setDesc(get(X86::OR8ri)); break; 4246353358Sdim case X86::ADD16ri_DB: MIB->setDesc(get(X86::OR16ri)); break; 4247353358Sdim case X86::ADD32ri_DB: MIB->setDesc(get(X86::OR32ri)); break; 4248353358Sdim case X86::ADD64ri32_DB: MIB->setDesc(get(X86::OR64ri32)); break; 4249353358Sdim case X86::ADD16ri8_DB: MIB->setDesc(get(X86::OR16ri8)); break; 4250353358Sdim case X86::ADD32ri8_DB: MIB->setDesc(get(X86::OR32ri8)); break; 4251353358Sdim case X86::ADD64ri8_DB: MIB->setDesc(get(X86::OR64ri8)); break; 4252226633Sdim } 4253226633Sdim return false; 4254226633Sdim} 4255226633Sdim 4256327952Sdim/// Return true for all instructions that only update 4257327952Sdim/// the first 32 or 64-bits of the destination register and leave the rest 4258327952Sdim/// unmodified. This can be used to avoid folding loads if the instructions 4259327952Sdim/// only update part of the destination register, and the non-updated part is 4260327952Sdim/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these 4261327952Sdim/// instructions breaks the partial register dependency and it can improve 4262327952Sdim/// performance. e.g.: 4263327952Sdim/// 4264327952Sdim/// movss (%rdi), %xmm0 4265327952Sdim/// cvtss2sd %xmm0, %xmm0 4266327952Sdim/// 4267327952Sdim/// Instead of 4268327952Sdim/// cvtss2sd (%rdi), %xmm0 4269327952Sdim/// 4270327952Sdim/// FIXME: This should be turned into a TSFlags. 4271327952Sdim/// 4272341825Sdimstatic bool hasPartialRegUpdate(unsigned Opcode, 4273353358Sdim const X86Subtarget &Subtarget, 4274353358Sdim bool ForLoadFold = false) { 4275327952Sdim switch (Opcode) { 4276327952Sdim case X86::CVTSI2SSrr: 4277327952Sdim case X86::CVTSI2SSrm: 4278327952Sdim case X86::CVTSI642SSrr: 4279327952Sdim case X86::CVTSI642SSrm: 4280327952Sdim case X86::CVTSI2SDrr: 4281327952Sdim case X86::CVTSI2SDrm: 4282327952Sdim case X86::CVTSI642SDrr: 4283327952Sdim case X86::CVTSI642SDrm: 4284353358Sdim // Load folding won't effect the undef register update since the input is 4285353358Sdim // a GPR. 4286353358Sdim return !ForLoadFold; 4287327952Sdim case X86::CVTSD2SSrr: 4288327952Sdim case X86::CVTSD2SSrm: 4289327952Sdim case X86::CVTSS2SDrr: 4290327952Sdim case X86::CVTSS2SDrm: 4291327952Sdim case X86::MOVHPDrm: 4292327952Sdim case X86::MOVHPSrm: 4293327952Sdim case X86::MOVLPDrm: 4294327952Sdim case X86::MOVLPSrm: 4295327952Sdim case X86::RCPSSr: 4296327952Sdim case X86::RCPSSm: 4297327952Sdim case X86::RCPSSr_Int: 4298327952Sdim case X86::RCPSSm_Int: 4299327952Sdim case X86::ROUNDSDr: 4300327952Sdim case X86::ROUNDSDm: 4301327952Sdim case X86::ROUNDSSr: 4302327952Sdim case X86::ROUNDSSm: 4303327952Sdim case X86::RSQRTSSr: 4304327952Sdim case X86::RSQRTSSm: 4305327952Sdim case X86::RSQRTSSr_Int: 4306327952Sdim case X86::RSQRTSSm_Int: 4307327952Sdim case X86::SQRTSSr: 4308327952Sdim case X86::SQRTSSm: 4309327952Sdim case X86::SQRTSSr_Int: 4310327952Sdim case X86::SQRTSSm_Int: 4311327952Sdim case X86::SQRTSDr: 4312327952Sdim case X86::SQRTSDm: 4313327952Sdim case X86::SQRTSDr_Int: 4314327952Sdim case X86::SQRTSDm_Int: 4315327952Sdim return true; 4316341825Sdim // GPR 4317341825Sdim case X86::POPCNT32rm: 4318341825Sdim case X86::POPCNT32rr: 4319341825Sdim case X86::POPCNT64rm: 4320341825Sdim case X86::POPCNT64rr: 4321341825Sdim return Subtarget.hasPOPCNTFalseDeps(); 4322341825Sdim case X86::LZCNT32rm: 4323341825Sdim case X86::LZCNT32rr: 4324341825Sdim case X86::LZCNT64rm: 4325341825Sdim case X86::LZCNT64rr: 4326341825Sdim case X86::TZCNT32rm: 4327341825Sdim case X86::TZCNT32rr: 4328341825Sdim case X86::TZCNT64rm: 4329341825Sdim case X86::TZCNT64rr: 4330341825Sdim return Subtarget.hasLZCNTFalseDeps(); 4331327952Sdim } 4332327952Sdim 4333327952Sdim return false; 4334327952Sdim} 4335327952Sdim 4336341825Sdim/// Inform the BreakFalseDeps pass how many idle 4337327952Sdim/// instructions we would like before a partial register update. 4338327952Sdimunsigned X86InstrInfo::getPartialRegUpdateClearance( 4339327952Sdim const MachineInstr &MI, unsigned OpNum, 4340327952Sdim const TargetRegisterInfo *TRI) const { 4341341825Sdim if (OpNum != 0 || !hasPartialRegUpdate(MI.getOpcode(), Subtarget)) 4342327952Sdim return 0; 4343327952Sdim 4344327952Sdim // If MI is marked as reading Reg, the partial register update is wanted. 4345327952Sdim const MachineOperand &MO = MI.getOperand(0); 4346360784Sdim Register Reg = MO.getReg(); 4347360784Sdim if (Register::isVirtualRegister(Reg)) { 4348327952Sdim if (MO.readsReg() || MI.readsVirtualRegister(Reg)) 4349327952Sdim return 0; 4350327952Sdim } else { 4351327952Sdim if (MI.readsRegister(Reg, TRI)) 4352327952Sdim return 0; 4353327952Sdim } 4354327952Sdim 4355327952Sdim // If any instructions in the clearance range are reading Reg, insert a 4356327952Sdim // dependency breaking instruction, which is inexpensive and is likely to 4357327952Sdim // be hidden in other instruction's cycles. 4358327952Sdim return PartialRegUpdateClearance; 4359327952Sdim} 4360327952Sdim 4361327952Sdim// Return true for any instruction the copies the high bits of the first source 4362327952Sdim// operand into the unused high bits of the destination operand. 4363360784Sdimstatic bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum, 4364360784Sdim bool ForLoadFold = false) { 4365360784Sdim // Set the OpNum parameter to the first source operand. 4366360784Sdim OpNum = 1; 4367327952Sdim switch (Opcode) { 4368327952Sdim case X86::VCVTSI2SSrr: 4369327952Sdim case X86::VCVTSI2SSrm: 4370327952Sdim case X86::VCVTSI2SSrr_Int: 4371327952Sdim case X86::VCVTSI2SSrm_Int: 4372327952Sdim case X86::VCVTSI642SSrr: 4373327952Sdim case X86::VCVTSI642SSrm: 4374327952Sdim case X86::VCVTSI642SSrr_Int: 4375327952Sdim case X86::VCVTSI642SSrm_Int: 4376327952Sdim case X86::VCVTSI2SDrr: 4377327952Sdim case X86::VCVTSI2SDrm: 4378327952Sdim case X86::VCVTSI2SDrr_Int: 4379327952Sdim case X86::VCVTSI2SDrm_Int: 4380327952Sdim case X86::VCVTSI642SDrr: 4381327952Sdim case X86::VCVTSI642SDrm: 4382327952Sdim case X86::VCVTSI642SDrr_Int: 4383327952Sdim case X86::VCVTSI642SDrm_Int: 4384327952Sdim // AVX-512 4385327952Sdim case X86::VCVTSI2SSZrr: 4386327952Sdim case X86::VCVTSI2SSZrm: 4387327952Sdim case X86::VCVTSI2SSZrr_Int: 4388327952Sdim case X86::VCVTSI2SSZrrb_Int: 4389327952Sdim case X86::VCVTSI2SSZrm_Int: 4390327952Sdim case X86::VCVTSI642SSZrr: 4391327952Sdim case X86::VCVTSI642SSZrm: 4392327952Sdim case X86::VCVTSI642SSZrr_Int: 4393327952Sdim case X86::VCVTSI642SSZrrb_Int: 4394327952Sdim case X86::VCVTSI642SSZrm_Int: 4395327952Sdim case X86::VCVTSI2SDZrr: 4396327952Sdim case X86::VCVTSI2SDZrm: 4397327952Sdim case X86::VCVTSI2SDZrr_Int: 4398327952Sdim case X86::VCVTSI2SDZrm_Int: 4399327952Sdim case X86::VCVTSI642SDZrr: 4400327952Sdim case X86::VCVTSI642SDZrm: 4401327952Sdim case X86::VCVTSI642SDZrr_Int: 4402327952Sdim case X86::VCVTSI642SDZrrb_Int: 4403327952Sdim case X86::VCVTSI642SDZrm_Int: 4404327952Sdim case X86::VCVTUSI2SSZrr: 4405327952Sdim case X86::VCVTUSI2SSZrm: 4406327952Sdim case X86::VCVTUSI2SSZrr_Int: 4407327952Sdim case X86::VCVTUSI2SSZrrb_Int: 4408327952Sdim case X86::VCVTUSI2SSZrm_Int: 4409327952Sdim case X86::VCVTUSI642SSZrr: 4410327952Sdim case X86::VCVTUSI642SSZrm: 4411327952Sdim case X86::VCVTUSI642SSZrr_Int: 4412327952Sdim case X86::VCVTUSI642SSZrrb_Int: 4413327952Sdim case X86::VCVTUSI642SSZrm_Int: 4414327952Sdim case X86::VCVTUSI2SDZrr: 4415327952Sdim case X86::VCVTUSI2SDZrm: 4416327952Sdim case X86::VCVTUSI2SDZrr_Int: 4417327952Sdim case X86::VCVTUSI2SDZrm_Int: 4418327952Sdim case X86::VCVTUSI642SDZrr: 4419327952Sdim case X86::VCVTUSI642SDZrm: 4420327952Sdim case X86::VCVTUSI642SDZrr_Int: 4421327952Sdim case X86::VCVTUSI642SDZrrb_Int: 4422327952Sdim case X86::VCVTUSI642SDZrm_Int: 4423353358Sdim // Load folding won't effect the undef register update since the input is 4424353358Sdim // a GPR. 4425353358Sdim return !ForLoadFold; 4426353358Sdim case X86::VCVTSD2SSrr: 4427353358Sdim case X86::VCVTSD2SSrm: 4428353358Sdim case X86::VCVTSD2SSrr_Int: 4429353358Sdim case X86::VCVTSD2SSrm_Int: 4430353358Sdim case X86::VCVTSS2SDrr: 4431353358Sdim case X86::VCVTSS2SDrm: 4432353358Sdim case X86::VCVTSS2SDrr_Int: 4433353358Sdim case X86::VCVTSS2SDrm_Int: 4434353358Sdim case X86::VRCPSSr: 4435353358Sdim case X86::VRCPSSr_Int: 4436353358Sdim case X86::VRCPSSm: 4437353358Sdim case X86::VRCPSSm_Int: 4438353358Sdim case X86::VROUNDSDr: 4439353358Sdim case X86::VROUNDSDm: 4440353358Sdim case X86::VROUNDSDr_Int: 4441353358Sdim case X86::VROUNDSDm_Int: 4442353358Sdim case X86::VROUNDSSr: 4443353358Sdim case X86::VROUNDSSm: 4444353358Sdim case X86::VROUNDSSr_Int: 4445353358Sdim case X86::VROUNDSSm_Int: 4446353358Sdim case X86::VRSQRTSSr: 4447353358Sdim case X86::VRSQRTSSr_Int: 4448353358Sdim case X86::VRSQRTSSm: 4449353358Sdim case X86::VRSQRTSSm_Int: 4450353358Sdim case X86::VSQRTSSr: 4451353358Sdim case X86::VSQRTSSr_Int: 4452353358Sdim case X86::VSQRTSSm: 4453353358Sdim case X86::VSQRTSSm_Int: 4454353358Sdim case X86::VSQRTSDr: 4455353358Sdim case X86::VSQRTSDr_Int: 4456353358Sdim case X86::VSQRTSDm: 4457353358Sdim case X86::VSQRTSDm_Int: 4458353358Sdim // AVX-512 4459327952Sdim case X86::VCVTSD2SSZrr: 4460327952Sdim case X86::VCVTSD2SSZrr_Int: 4461327952Sdim case X86::VCVTSD2SSZrrb_Int: 4462327952Sdim case X86::VCVTSD2SSZrm: 4463327952Sdim case X86::VCVTSD2SSZrm_Int: 4464327952Sdim case X86::VCVTSS2SDZrr: 4465327952Sdim case X86::VCVTSS2SDZrr_Int: 4466327952Sdim case X86::VCVTSS2SDZrrb_Int: 4467327952Sdim case X86::VCVTSS2SDZrm: 4468327952Sdim case X86::VCVTSS2SDZrm_Int: 4469341825Sdim case X86::VGETEXPSDZr: 4470341825Sdim case X86::VGETEXPSDZrb: 4471341825Sdim case X86::VGETEXPSDZm: 4472341825Sdim case X86::VGETEXPSSZr: 4473341825Sdim case X86::VGETEXPSSZrb: 4474341825Sdim case X86::VGETEXPSSZm: 4475341825Sdim case X86::VGETMANTSDZrri: 4476341825Sdim case X86::VGETMANTSDZrrib: 4477341825Sdim case X86::VGETMANTSDZrmi: 4478341825Sdim case X86::VGETMANTSSZrri: 4479341825Sdim case X86::VGETMANTSSZrrib: 4480341825Sdim case X86::VGETMANTSSZrmi: 4481341825Sdim case X86::VRNDSCALESDZr: 4482341825Sdim case X86::VRNDSCALESDZr_Int: 4483341825Sdim case X86::VRNDSCALESDZrb_Int: 4484341825Sdim case X86::VRNDSCALESDZm: 4485341825Sdim case X86::VRNDSCALESDZm_Int: 4486341825Sdim case X86::VRNDSCALESSZr: 4487341825Sdim case X86::VRNDSCALESSZr_Int: 4488341825Sdim case X86::VRNDSCALESSZrb_Int: 4489341825Sdim case X86::VRNDSCALESSZm: 4490341825Sdim case X86::VRNDSCALESSZm_Int: 4491341825Sdim case X86::VRCP14SDZrr: 4492341825Sdim case X86::VRCP14SDZrm: 4493341825Sdim case X86::VRCP14SSZrr: 4494341825Sdim case X86::VRCP14SSZrm: 4495341825Sdim case X86::VRCP28SDZr: 4496341825Sdim case X86::VRCP28SDZrb: 4497341825Sdim case X86::VRCP28SDZm: 4498341825Sdim case X86::VRCP28SSZr: 4499341825Sdim case X86::VRCP28SSZrb: 4500341825Sdim case X86::VRCP28SSZm: 4501341825Sdim case X86::VREDUCESSZrmi: 4502341825Sdim case X86::VREDUCESSZrri: 4503341825Sdim case X86::VREDUCESSZrrib: 4504341825Sdim case X86::VRSQRT14SDZrr: 4505341825Sdim case X86::VRSQRT14SDZrm: 4506341825Sdim case X86::VRSQRT14SSZrr: 4507341825Sdim case X86::VRSQRT14SSZrm: 4508341825Sdim case X86::VRSQRT28SDZr: 4509341825Sdim case X86::VRSQRT28SDZrb: 4510341825Sdim case X86::VRSQRT28SDZm: 4511341825Sdim case X86::VRSQRT28SSZr: 4512341825Sdim case X86::VRSQRT28SSZrb: 4513341825Sdim case X86::VRSQRT28SSZm: 4514327952Sdim case X86::VSQRTSSZr: 4515327952Sdim case X86::VSQRTSSZr_Int: 4516327952Sdim case X86::VSQRTSSZrb_Int: 4517327952Sdim case X86::VSQRTSSZm: 4518327952Sdim case X86::VSQRTSSZm_Int: 4519327952Sdim case X86::VSQRTSDZr: 4520327952Sdim case X86::VSQRTSDZr_Int: 4521327952Sdim case X86::VSQRTSDZrb_Int: 4522327952Sdim case X86::VSQRTSDZm: 4523327952Sdim case X86::VSQRTSDZm_Int: 4524327952Sdim return true; 4525360784Sdim case X86::VMOVSSZrrk: 4526360784Sdim case X86::VMOVSDZrrk: 4527360784Sdim OpNum = 3; 4528360784Sdim return true; 4529360784Sdim case X86::VMOVSSZrrkz: 4530360784Sdim case X86::VMOVSDZrrkz: 4531360784Sdim OpNum = 2; 4532360784Sdim return true; 4533327952Sdim } 4534327952Sdim 4535327952Sdim return false; 4536327952Sdim} 4537327952Sdim 4538341825Sdim/// Inform the BreakFalseDeps pass how many idle instructions we would like 4539327952Sdim/// before certain undef register reads. 4540327952Sdim/// 4541327952Sdim/// This catches the VCVTSI2SD family of instructions: 4542327952Sdim/// 4543327952Sdim/// vcvtsi2sdq %rax, undef %xmm0, %xmm14 4544327952Sdim/// 4545327952Sdim/// We should to be careful *not* to catch VXOR idioms which are presumably 4546327952Sdim/// handled specially in the pipeline: 4547327952Sdim/// 4548327952Sdim/// vxorps undef %xmm1, undef %xmm1, %xmm1 4549327952Sdim/// 4550327952Sdim/// Like getPartialRegUpdateClearance, this makes a strong assumption that the 4551327952Sdim/// high bits that are passed-through are not live. 4552327952Sdimunsigned 4553327952SdimX86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum, 4554327952Sdim const TargetRegisterInfo *TRI) const { 4555360784Sdim if (!hasUndefRegUpdate(MI.getOpcode(), OpNum)) 4556327952Sdim return 0; 4557327952Sdim 4558327952Sdim const MachineOperand &MO = MI.getOperand(OpNum); 4559360784Sdim if (MO.isUndef() && Register::isPhysicalRegister(MO.getReg())) { 4560327952Sdim return UndefRegClearance; 4561327952Sdim } 4562327952Sdim return 0; 4563327952Sdim} 4564327952Sdim 4565327952Sdimvoid X86InstrInfo::breakPartialRegDependency( 4566327952Sdim MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { 4567360784Sdim Register Reg = MI.getOperand(OpNum).getReg(); 4568327952Sdim // If MI kills this register, the false dependence is already broken. 4569327952Sdim if (MI.killsRegister(Reg, TRI)) 4570327952Sdim return; 4571327952Sdim 4572327952Sdim if (X86::VR128RegClass.contains(Reg)) { 4573327952Sdim // These instructions are all floating point domain, so xorps is the best 4574327952Sdim // choice. 4575327952Sdim unsigned Opc = Subtarget.hasAVX() ? X86::VXORPSrr : X86::XORPSrr; 4576327952Sdim BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(Opc), Reg) 4577327952Sdim .addReg(Reg, RegState::Undef) 4578327952Sdim .addReg(Reg, RegState::Undef); 4579327952Sdim MI.addRegisterKilled(Reg, TRI, true); 4580327952Sdim } else if (X86::VR256RegClass.contains(Reg)) { 4581327952Sdim // Use vxorps to clear the full ymm register. 4582327952Sdim // It wants to read and write the xmm sub-register. 4583360784Sdim Register XReg = TRI->getSubReg(Reg, X86::sub_xmm); 4584327952Sdim BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VXORPSrr), XReg) 4585327952Sdim .addReg(XReg, RegState::Undef) 4586327952Sdim .addReg(XReg, RegState::Undef) 4587327952Sdim .addReg(Reg, RegState::ImplicitDefine); 4588327952Sdim MI.addRegisterKilled(Reg, TRI, true); 4589341825Sdim } else if (X86::GR64RegClass.contains(Reg)) { 4590341825Sdim // Using XOR32rr because it has shorter encoding and zeros up the upper bits 4591341825Sdim // as well. 4592360784Sdim Register XReg = TRI->getSubReg(Reg, X86::sub_32bit); 4593341825Sdim BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), XReg) 4594341825Sdim .addReg(XReg, RegState::Undef) 4595341825Sdim .addReg(XReg, RegState::Undef) 4596341825Sdim .addReg(Reg, RegState::ImplicitDefine); 4597341825Sdim MI.addRegisterKilled(Reg, TRI, true); 4598341825Sdim } else if (X86::GR32RegClass.contains(Reg)) { 4599341825Sdim BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), Reg) 4600341825Sdim .addReg(Reg, RegState::Undef) 4601341825Sdim .addReg(Reg, RegState::Undef); 4602341825Sdim MI.addRegisterKilled(Reg, TRI, true); 4603327952Sdim } 4604327952Sdim} 4605327952Sdim 4606296417Sdimstatic void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs, 4607296417Sdim int PtrOffset = 0) { 4608288943Sdim unsigned NumAddrOps = MOs.size(); 4609296417Sdim 4610296417Sdim if (NumAddrOps < 4) { 4611296417Sdim // FrameIndex only - add an immediate offset (whether its zero or not). 4612296417Sdim for (unsigned i = 0; i != NumAddrOps; ++i) 4613321369Sdim MIB.add(MOs[i]); 4614296417Sdim addOffset(MIB, PtrOffset); 4615296417Sdim } else { 4616296417Sdim // General Memory Addressing - we need to add any offset to an existing 4617296417Sdim // offset. 4618296417Sdim assert(MOs.size() == 5 && "Unexpected memory operand list length"); 4619296417Sdim for (unsigned i = 0; i != NumAddrOps; ++i) { 4620296417Sdim const MachineOperand &MO = MOs[i]; 4621296417Sdim if (i == 3 && PtrOffset != 0) { 4622296417Sdim MIB.addDisp(MO, PtrOffset); 4623296417Sdim } else { 4624321369Sdim MIB.add(MO); 4625296417Sdim } 4626296417Sdim } 4627296417Sdim } 4628288943Sdim} 4629288943Sdim 4630341825Sdimstatic void updateOperandRegConstraints(MachineFunction &MF, 4631341825Sdim MachineInstr &NewMI, 4632341825Sdim const TargetInstrInfo &TII) { 4633341825Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 4634341825Sdim const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 4635341825Sdim 4636341825Sdim for (int Idx : llvm::seq<int>(0, NewMI.getNumOperands())) { 4637341825Sdim MachineOperand &MO = NewMI.getOperand(Idx); 4638341825Sdim // We only need to update constraints on virtual register operands. 4639341825Sdim if (!MO.isReg()) 4640341825Sdim continue; 4641360784Sdim Register Reg = MO.getReg(); 4642360784Sdim if (!Register::isVirtualRegister(Reg)) 4643341825Sdim continue; 4644341825Sdim 4645341825Sdim auto *NewRC = MRI.constrainRegClass( 4646341825Sdim Reg, TII.getRegClass(NewMI.getDesc(), Idx, &TRI, MF)); 4647341825Sdim if (!NewRC) { 4648341825Sdim LLVM_DEBUG( 4649341825Sdim dbgs() << "WARNING: Unable to update register constraint for operand " 4650341825Sdim << Idx << " of instruction:\n"; 4651341825Sdim NewMI.dump(); dbgs() << "\n"); 4652341825Sdim } 4653341825Sdim } 4654341825Sdim} 4655341825Sdim 4656193323Sedstatic MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, 4657288943Sdim ArrayRef<MachineOperand> MOs, 4658288943Sdim MachineBasicBlock::iterator InsertPt, 4659309124Sdim MachineInstr &MI, 4660193323Sed const TargetInstrInfo &TII) { 4661193323Sed // Create the base instruction with the memory operand as the first part. 4662249423Sdim // Omit the implicit operands, something BuildMI can't do. 4663309124Sdim MachineInstr *NewMI = 4664309124Sdim MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true); 4665249423Sdim MachineInstrBuilder MIB(MF, NewMI); 4666288943Sdim addOperands(MIB, MOs); 4667218893Sdim 4668193323Sed // Loop over the rest of the ri operands, converting them over. 4669309124Sdim unsigned NumOps = MI.getDesc().getNumOperands() - 2; 4670193323Sed for (unsigned i = 0; i != NumOps; ++i) { 4671309124Sdim MachineOperand &MO = MI.getOperand(i + 2); 4672321369Sdim MIB.add(MO); 4673193323Sed } 4674309124Sdim for (unsigned i = NumOps + 2, e = MI.getNumOperands(); i != e; ++i) { 4675309124Sdim MachineOperand &MO = MI.getOperand(i); 4676321369Sdim MIB.add(MO); 4677193323Sed } 4678288943Sdim 4679341825Sdim updateOperandRegConstraints(MF, *NewMI, TII); 4680341825Sdim 4681288943Sdim MachineBasicBlock *MBB = InsertPt->getParent(); 4682288943Sdim MBB->insert(InsertPt, NewMI); 4683288943Sdim 4684193323Sed return MIB; 4685193323Sed} 4686193323Sed 4687288943Sdimstatic MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode, 4688288943Sdim unsigned OpNo, ArrayRef<MachineOperand> MOs, 4689288943Sdim MachineBasicBlock::iterator InsertPt, 4690309124Sdim MachineInstr &MI, const TargetInstrInfo &TII, 4691296417Sdim int PtrOffset = 0) { 4692249423Sdim // Omit the implicit operands, something BuildMI can't do. 4693309124Sdim MachineInstr *NewMI = 4694309124Sdim MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true); 4695249423Sdim MachineInstrBuilder MIB(MF, NewMI); 4696218893Sdim 4697309124Sdim for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 4698309124Sdim MachineOperand &MO = MI.getOperand(i); 4699193323Sed if (i == OpNo) { 4700193323Sed assert(MO.isReg() && "Expected to fold into reg operand!"); 4701296417Sdim addOperands(MIB, MOs, PtrOffset); 4702193323Sed } else { 4703321369Sdim MIB.add(MO); 4704193323Sed } 4705193323Sed } 4706288943Sdim 4707341825Sdim updateOperandRegConstraints(MF, *NewMI, TII); 4708341825Sdim 4709360784Sdim // Copy the NoFPExcept flag from the instruction we're fusing. 4710360784Sdim if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 4711360784Sdim NewMI->setFlag(MachineInstr::MIFlag::NoFPExcept); 4712360784Sdim 4713288943Sdim MachineBasicBlock *MBB = InsertPt->getParent(); 4714288943Sdim MBB->insert(InsertPt, NewMI); 4715288943Sdim 4716193323Sed return MIB; 4717193323Sed} 4718193323Sed 4719193323Sedstatic MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, 4720288943Sdim ArrayRef<MachineOperand> MOs, 4721288943Sdim MachineBasicBlock::iterator InsertPt, 4722309124Sdim MachineInstr &MI) { 4723288943Sdim MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, 4724309124Sdim MI.getDebugLoc(), TII.get(Opcode)); 4725288943Sdim addOperands(MIB, MOs); 4726193323Sed return MIB.addImm(0); 4727193323Sed} 4728193323Sed 4729296417SdimMachineInstr *X86InstrInfo::foldMemoryOperandCustom( 4730309124Sdim MachineFunction &MF, MachineInstr &MI, unsigned OpNum, 4731296417Sdim ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt, 4732296417Sdim unsigned Size, unsigned Align) const { 4733309124Sdim switch (MI.getOpcode()) { 4734296417Sdim case X86::INSERTPSrr: 4735296417Sdim case X86::VINSERTPSrr: 4736314564Sdim case X86::VINSERTPSZrr: 4737296417Sdim // Attempt to convert the load of inserted vector into a fold load 4738296417Sdim // of a single float. 4739296417Sdim if (OpNum == 2) { 4740309124Sdim unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm(); 4741296417Sdim unsigned ZMask = Imm & 15; 4742296417Sdim unsigned DstIdx = (Imm >> 4) & 3; 4743296417Sdim unsigned SrcIdx = (Imm >> 6) & 3; 4744296417Sdim 4745321369Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 4746321369Sdim const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); 4747321369Sdim unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; 4748353358Sdim if ((Size == 0 || Size >= 16) && RCSize >= 16 && 4 <= Align) { 4749296417Sdim int PtrOffset = SrcIdx * 4; 4750296417Sdim unsigned NewImm = (DstIdx << 4) | ZMask; 4751296417Sdim unsigned NewOpCode = 4752314564Sdim (MI.getOpcode() == X86::VINSERTPSZrr) ? X86::VINSERTPSZrm : 4753314564Sdim (MI.getOpcode() == X86::VINSERTPSrr) ? X86::VINSERTPSrm : 4754314564Sdim X86::INSERTPSrm; 4755296417Sdim MachineInstr *NewMI = 4756296417Sdim FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset); 4757296417Sdim NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm); 4758296417Sdim return NewMI; 4759296417Sdim } 4760296417Sdim } 4761296417Sdim break; 4762309124Sdim case X86::MOVHLPSrr: 4763309124Sdim case X86::VMOVHLPSrr: 4764314564Sdim case X86::VMOVHLPSZrr: 4765309124Sdim // Move the upper 64-bits of the second operand to the lower 64-bits. 4766309124Sdim // To fold the load, adjust the pointer to the upper and use (V)MOVLPS. 4767309124Sdim // TODO: In most cases AVX doesn't have a 8-byte alignment requirement. 4768309124Sdim if (OpNum == 2) { 4769321369Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 4770321369Sdim const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); 4771321369Sdim unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; 4772353358Sdim if ((Size == 0 || Size >= 16) && RCSize >= 16 && 8 <= Align) { 4773309124Sdim unsigned NewOpCode = 4774314564Sdim (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm : 4775314564Sdim (MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm : 4776314564Sdim X86::MOVLPSrm; 4777309124Sdim MachineInstr *NewMI = 4778309124Sdim FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, 8); 4779309124Sdim return NewMI; 4780309124Sdim } 4781309124Sdim } 4782309124Sdim break; 4783353358Sdim case X86::UNPCKLPDrr: 4784353358Sdim // If we won't be able to fold this to the memory form of UNPCKL, use 4785353358Sdim // MOVHPD instead. Done as custom because we can't have this in the load 4786353358Sdim // table twice. 4787353358Sdim if (OpNum == 2) { 4788353358Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 4789353358Sdim const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); 4790353358Sdim unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; 4791353358Sdim if ((Size == 0 || Size >= 16) && RCSize >= 16 && Align < 16) { 4792353358Sdim MachineInstr *NewMI = 4793353358Sdim FuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this); 4794353358Sdim return NewMI; 4795353358Sdim } 4796353358Sdim } 4797353358Sdim break; 4798353358Sdim } 4799296417Sdim 4800296417Sdim return nullptr; 4801296417Sdim} 4802296417Sdim 4803353358Sdimstatic bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, 4804353358Sdim MachineInstr &MI) { 4805360784Sdim unsigned Ignored; 4806360784Sdim if (!hasUndefRegUpdate(MI.getOpcode(), Ignored, /*ForLoadFold*/true) || 4807341825Sdim !MI.getOperand(1).isReg()) 4808341825Sdim return false; 4809341825Sdim 4810341825Sdim // The are two cases we need to handle depending on where in the pipeline 4811341825Sdim // the folding attempt is being made. 4812341825Sdim // -Register has the undef flag set. 4813341825Sdim // -Register is produced by the IMPLICIT_DEF instruction. 4814341825Sdim 4815341825Sdim if (MI.getOperand(1).isUndef()) 4816341825Sdim return true; 4817341825Sdim 4818341825Sdim MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4819341825Sdim MachineInstr *VRegDef = RegInfo.getUniqueVRegDef(MI.getOperand(1).getReg()); 4820341825Sdim return VRegDef && VRegDef->isImplicitDef(); 4821341825Sdim} 4822341825Sdim 4823341825Sdim 4824288943SdimMachineInstr *X86InstrInfo::foldMemoryOperandImpl( 4825309124Sdim MachineFunction &MF, MachineInstr &MI, unsigned OpNum, 4826288943Sdim ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt, 4827288943Sdim unsigned Size, unsigned Align, bool AllowCommute) const { 4828327952Sdim bool isSlowTwoMemOps = Subtarget.slowTwoMemOps(); 4829193323Sed bool isTwoAddrFold = false; 4830249423Sdim 4831296417Sdim // For CPUs that favor the register form of a call or push, 4832296417Sdim // do not fold loads into calls or pushes, unless optimizing for size 4833296417Sdim // aggressively. 4834353358Sdim if (isSlowTwoMemOps && !MF.getFunction().hasMinSize() && 4835309124Sdim (MI.getOpcode() == X86::CALL32r || MI.getOpcode() == X86::CALL64r || 4836309124Sdim MI.getOpcode() == X86::PUSH16r || MI.getOpcode() == X86::PUSH32r || 4837309124Sdim MI.getOpcode() == X86::PUSH64r)) 4838276479Sdim return nullptr; 4839249423Sdim 4840341825Sdim // Avoid partial and undef register update stalls unless optimizing for size. 4841353358Sdim if (!MF.getFunction().hasOptSize() && 4842353358Sdim (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) || 4843341825Sdim shouldPreventUndefRegUpdateMemFold(MF, MI))) 4844327952Sdim return nullptr; 4845327952Sdim 4846309124Sdim unsigned NumOps = MI.getDesc().getNumOperands(); 4847309124Sdim bool isTwoAddr = 4848309124Sdim NumOps > 1 && MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; 4849193323Sed 4850221345Sdim // FIXME: AsmPrinter doesn't know how to handle 4851221345Sdim // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. 4852309124Sdim if (MI.getOpcode() == X86::ADD32ri && 4853309124Sdim MI.getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) 4854276479Sdim return nullptr; 4855221345Sdim 4856341825Sdim // GOTTPOFF relocation loads can only be folded into add instructions. 4857341825Sdim // FIXME: Need to exclude other relocations that only support specific 4858341825Sdim // instructions. 4859341825Sdim if (MOs.size() == X86::AddrNumOperands && 4860341825Sdim MOs[X86::AddrDisp].getTargetFlags() == X86II::MO_GOTTPOFF && 4861341825Sdim MI.getOpcode() != X86::ADD64rr) 4862341825Sdim return nullptr; 4863341825Sdim 4864276479Sdim MachineInstr *NewMI = nullptr; 4865296417Sdim 4866296417Sdim // Attempt to fold any custom cases we have. 4867296417Sdim if (MachineInstr *CustomMI = 4868296417Sdim foldMemoryOperandCustom(MF, MI, OpNum, MOs, InsertPt, Size, Align)) 4869296417Sdim return CustomMI; 4870296417Sdim 4871341825Sdim const X86MemoryFoldTableEntry *I = nullptr; 4872341825Sdim 4873193323Sed // Folding a memory location into the two-address part of a two-address 4874193323Sed // instruction is different than folding it other places. It requires 4875193323Sed // replacing the *two* registers with the memory location. 4876309124Sdim if (isTwoAddr && NumOps >= 2 && OpNum < 2 && MI.getOperand(0).isReg() && 4877309124Sdim MI.getOperand(1).isReg() && 4878309124Sdim MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { 4879341825Sdim I = lookupTwoAddrFoldTable(MI.getOpcode()); 4880193323Sed isTwoAddrFold = true; 4881341825Sdim } else { 4882341825Sdim if (OpNum == 0) { 4883341825Sdim if (MI.getOpcode() == X86::MOV32r0) { 4884341825Sdim NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI); 4885341825Sdim if (NewMI) 4886341825Sdim return NewMI; 4887341825Sdim } 4888243830Sdim } 4889218893Sdim 4890341825Sdim I = lookupFoldTable(MI.getOpcode(), OpNum); 4891193323Sed } 4892218893Sdim 4893341825Sdim if (I != nullptr) { 4894341825Sdim unsigned Opcode = I->DstOp; 4895341825Sdim unsigned MinAlign = (I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT; 4896360784Sdim MinAlign = MinAlign ? 1 << (MinAlign - 1) : 0; 4897341825Sdim if (Align < MinAlign) 4898341825Sdim return nullptr; 4899341825Sdim bool NarrowToMOV32rm = false; 4900341825Sdim if (Size) { 4901341825Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 4902341825Sdim const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, 4903341825Sdim &RI, MF); 4904341825Sdim unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; 4905341825Sdim if (Size < RCSize) { 4906353358Sdim // FIXME: Allow scalar intrinsic instructions like ADDSSrm_Int. 4907341825Sdim // Check if it's safe to fold the load. If the size of the object is 4908341825Sdim // narrower than the load width, then it's not. 4909341825Sdim if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) 4910341825Sdim return nullptr; 4911341825Sdim // If this is a 64-bit load, but the spill slot is 32, then we can do 4912341825Sdim // a 32-bit load which is implicitly zero-extended. This likely is 4913341825Sdim // due to live interval analysis remat'ing a load from stack slot. 4914341825Sdim if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) 4915341825Sdim return nullptr; 4916341825Sdim Opcode = X86::MOV32rm; 4917341825Sdim NarrowToMOV32rm = true; 4918198090Srdivacky } 4919341825Sdim } 4920198090Srdivacky 4921341825Sdim if (isTwoAddrFold) 4922341825Sdim NewMI = FuseTwoAddrInst(MF, Opcode, MOs, InsertPt, MI, *this); 4923341825Sdim else 4924341825Sdim NewMI = FuseInst(MF, Opcode, OpNum, MOs, InsertPt, MI, *this); 4925341825Sdim 4926341825Sdim if (NarrowToMOV32rm) { 4927341825Sdim // If this is the special case where we use a MOV32rm to load a 32-bit 4928341825Sdim // value and zero-extend the top bits. Change the destination register 4929341825Sdim // to a 32-bit one. 4930360784Sdim Register DstReg = NewMI->getOperand(0).getReg(); 4931360784Sdim if (Register::isPhysicalRegister(DstReg)) 4932341825Sdim NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit)); 4933193323Sed else 4934341825Sdim NewMI->getOperand(0).setSubReg(X86::sub_32bit); 4935193323Sed } 4936341825Sdim return NewMI; 4937193323Sed } 4938218893Sdim 4939280031Sdim // If the instruction and target operand are commutable, commute the 4940280031Sdim // instruction and try again. 4941280031Sdim if (AllowCommute) { 4942296417Sdim unsigned CommuteOpIdx1 = OpNum, CommuteOpIdx2 = CommuteAnyOperandIndex; 4943280031Sdim if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) { 4944309124Sdim bool HasDef = MI.getDesc().getNumDefs(); 4945353358Sdim Register Reg0 = HasDef ? MI.getOperand(0).getReg() : Register(); 4946353358Sdim Register Reg1 = MI.getOperand(CommuteOpIdx1).getReg(); 4947353358Sdim Register Reg2 = MI.getOperand(CommuteOpIdx2).getReg(); 4948296417Sdim bool Tied1 = 4949309124Sdim 0 == MI.getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO); 4950296417Sdim bool Tied2 = 4951309124Sdim 0 == MI.getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO); 4952280031Sdim 4953280031Sdim // If either of the commutable operands are tied to the destination 4954280031Sdim // then we can not commute + fold. 4955296417Sdim if ((HasDef && Reg0 == Reg1 && Tied1) || 4956296417Sdim (HasDef && Reg0 == Reg2 && Tied2)) 4957280031Sdim return nullptr; 4958280031Sdim 4959296417Sdim MachineInstr *CommutedMI = 4960296417Sdim commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2); 4961296417Sdim if (!CommutedMI) { 4962296417Sdim // Unable to commute. 4963296417Sdim return nullptr; 4964296417Sdim } 4965309124Sdim if (CommutedMI != &MI) { 4966296417Sdim // New instruction. We can't fold from this. 4967296417Sdim CommutedMI->eraseFromParent(); 4968296417Sdim return nullptr; 4969296417Sdim } 4970280031Sdim 4971296417Sdim // Attempt to fold with the commuted version of the instruction. 4972296417Sdim NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt, 4973296417Sdim Size, Align, /*AllowCommute=*/false); 4974296417Sdim if (NewMI) 4975296417Sdim return NewMI; 4976280031Sdim 4977296417Sdim // Folding failed again - undo the commute before returning. 4978296417Sdim MachineInstr *UncommutedMI = 4979296417Sdim commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2); 4980296417Sdim if (!UncommutedMI) { 4981296417Sdim // Unable to commute. 4982280031Sdim return nullptr; 4983280031Sdim } 4984309124Sdim if (UncommutedMI != &MI) { 4985296417Sdim // New instruction. It doesn't need to be kept. 4986296417Sdim UncommutedMI->eraseFromParent(); 4987296417Sdim return nullptr; 4988296417Sdim } 4989296417Sdim 4990296417Sdim // Return here to prevent duplicate fuse failure report. 4991296417Sdim return nullptr; 4992280031Sdim } 4993280031Sdim } 4994280031Sdim 4995218893Sdim // No fusion 4996309124Sdim if (PrintFailedFusing && !MI.isCopy()) 4997309124Sdim dbgs() << "We failed to fuse operand " << OpNum << " in " << MI; 4998276479Sdim return nullptr; 4999193323Sed} 5000193323Sed 5001309124SdimMachineInstr * 5002309124SdimX86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, 5003309124Sdim ArrayRef<unsigned> Ops, 5004309124Sdim MachineBasicBlock::iterator InsertPt, 5005353358Sdim int FrameIndex, LiveIntervals *LIS, 5006353358Sdim VirtRegMap *VRM) const { 5007218893Sdim // Check switch flag 5008296417Sdim if (NoFusing) 5009296417Sdim return nullptr; 5010193323Sed 5011341825Sdim // Avoid partial and undef register update stalls unless optimizing for size. 5012353358Sdim if (!MF.getFunction().hasOptSize() && 5013353358Sdim (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) || 5014341825Sdim shouldPreventUndefRegUpdateMemFold(MF, MI))) 5015276479Sdim return nullptr; 5016201360Srdivacky 5017314564Sdim // Don't fold subreg spills, or reloads that use a high subreg. 5018314564Sdim for (auto Op : Ops) { 5019314564Sdim MachineOperand &MO = MI.getOperand(Op); 5020314564Sdim auto SubReg = MO.getSubReg(); 5021314564Sdim if (SubReg && (MO.isDef() || SubReg == X86::sub_8bit_hi)) 5022314564Sdim return nullptr; 5023314564Sdim } 5024314564Sdim 5025314564Sdim const MachineFrameInfo &MFI = MF.getFrameInfo(); 5026314564Sdim unsigned Size = MFI.getObjectSize(FrameIndex); 5027314564Sdim unsigned Alignment = MFI.getObjectAlignment(FrameIndex); 5028256090Sdim // If the function stack isn't realigned we don't want to fold instructions 5029256090Sdim // that need increased alignment. 5030256090Sdim if (!RI.needsStackRealignment(MF)) 5031288943Sdim Alignment = 5032288943Sdim std::min(Alignment, Subtarget.getFrameLowering()->getStackAlignment()); 5033193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 5034193323Sed unsigned NewOpc = 0; 5035198090Srdivacky unsigned RCSize = 0; 5036309124Sdim switch (MI.getOpcode()) { 5037276479Sdim default: return nullptr; 5038198090Srdivacky case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; 5039208599Srdivacky case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break; 5040208599Srdivacky case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break; 5041208599Srdivacky case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break; 5042193323Sed } 5043198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 5044198090Srdivacky // narrower than the load width, then it's not. 5045198090Srdivacky if (Size < RCSize) 5046276479Sdim return nullptr; 5047193323Sed // Change to CMPXXri r, 0 first. 5048309124Sdim MI.setDesc(get(NewOpc)); 5049309124Sdim MI.getOperand(1).ChangeToImmediate(0); 5050193323Sed } else if (Ops.size() != 1) 5051276479Sdim return nullptr; 5052193323Sed 5053288943Sdim return foldMemoryOperandImpl(MF, MI, Ops[0], 5054288943Sdim MachineOperand::CreateFI(FrameIndex), InsertPt, 5055280031Sdim Size, Alignment, /*AllowCommute=*/true); 5056193323Sed} 5057193323Sed 5058288943Sdim/// Check if \p LoadMI is a partial register load that we can't fold into \p MI 5059288943Sdim/// because the latter uses contents that wouldn't be defined in the folded 5060288943Sdim/// version. For instance, this transformation isn't legal: 5061288943Sdim/// movss (%rdi), %xmm0 5062288943Sdim/// addps %xmm0, %xmm0 5063288943Sdim/// -> 5064288943Sdim/// addps (%rdi), %xmm0 5065288943Sdim/// 5066288943Sdim/// But this one is: 5067288943Sdim/// movss (%rdi), %xmm0 5068288943Sdim/// addss %xmm0, %xmm0 5069288943Sdim/// -> 5070288943Sdim/// addss (%rdi), %xmm0 5071288943Sdim/// 5072288943Sdimstatic bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, 5073288943Sdim const MachineInstr &UserMI, 5074288943Sdim const MachineFunction &MF) { 5075280031Sdim unsigned Opc = LoadMI.getOpcode(); 5076288943Sdim unsigned UserOpc = UserMI.getOpcode(); 5077321369Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 5078321369Sdim const TargetRegisterClass *RC = 5079321369Sdim MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg()); 5080321369Sdim unsigned RegSize = TRI.getRegSizeInBits(*RC); 5081280031Sdim 5082353358Sdim if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm || 5083353358Sdim Opc == X86::MOVSSrm_alt || Opc == X86::VMOVSSrm_alt || 5084353358Sdim Opc == X86::VMOVSSZrm_alt) && 5085321369Sdim RegSize > 32) { 5086280031Sdim // These instructions only load 32 bits, we can't fold them if the 5087288943Sdim // destination register is wider than 32 bits (4 bytes), and its user 5088288943Sdim // instruction isn't scalar (SS). 5089288943Sdim switch (UserOpc) { 5090309124Sdim case X86::ADDSSrr_Int: case X86::VADDSSrr_Int: case X86::VADDSSZrr_Int: 5091327952Sdim case X86::CMPSSrr_Int: case X86::VCMPSSrr_Int: case X86::VCMPSSZrr_Int: 5092309124Sdim case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::VDIVSSZrr_Int: 5093314564Sdim case X86::MAXSSrr_Int: case X86::VMAXSSrr_Int: case X86::VMAXSSZrr_Int: 5094314564Sdim case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int: 5095309124Sdim case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int: 5096309124Sdim case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int: 5097321369Sdim case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz: 5098353358Sdim case X86::VCMPSSZrr_Intk: 5099321369Sdim case X86::VDIVSSZrr_Intk: case X86::VDIVSSZrr_Intkz: 5100321369Sdim case X86::VMAXSSZrr_Intk: case X86::VMAXSSZrr_Intkz: 5101321369Sdim case X86::VMINSSZrr_Intk: case X86::VMINSSZrr_Intkz: 5102321369Sdim case X86::VMULSSZrr_Intk: case X86::VMULSSZrr_Intkz: 5103321369Sdim case X86::VSUBSSZrr_Intk: case X86::VSUBSSZrr_Intkz: 5104314564Sdim case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int: 5105314564Sdim case X86::VFMSUBSS4rr_Int: case X86::VFNMSUBSS4rr_Int: 5106314564Sdim case X86::VFMADD132SSr_Int: case X86::VFNMADD132SSr_Int: 5107314564Sdim case X86::VFMADD213SSr_Int: case X86::VFNMADD213SSr_Int: 5108314564Sdim case X86::VFMADD231SSr_Int: case X86::VFNMADD231SSr_Int: 5109314564Sdim case X86::VFMSUB132SSr_Int: case X86::VFNMSUB132SSr_Int: 5110314564Sdim case X86::VFMSUB213SSr_Int: case X86::VFNMSUB213SSr_Int: 5111314564Sdim case X86::VFMSUB231SSr_Int: case X86::VFNMSUB231SSr_Int: 5112314564Sdim case X86::VFMADD132SSZr_Int: case X86::VFNMADD132SSZr_Int: 5113314564Sdim case X86::VFMADD213SSZr_Int: case X86::VFNMADD213SSZr_Int: 5114314564Sdim case X86::VFMADD231SSZr_Int: case X86::VFNMADD231SSZr_Int: 5115314564Sdim case X86::VFMSUB132SSZr_Int: case X86::VFNMSUB132SSZr_Int: 5116314564Sdim case X86::VFMSUB213SSZr_Int: case X86::VFNMSUB213SSZr_Int: 5117314564Sdim case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int: 5118321369Sdim case X86::VFMADD132SSZr_Intk: case X86::VFNMADD132SSZr_Intk: 5119321369Sdim case X86::VFMADD213SSZr_Intk: case X86::VFNMADD213SSZr_Intk: 5120321369Sdim case X86::VFMADD231SSZr_Intk: case X86::VFNMADD231SSZr_Intk: 5121321369Sdim case X86::VFMSUB132SSZr_Intk: case X86::VFNMSUB132SSZr_Intk: 5122321369Sdim case X86::VFMSUB213SSZr_Intk: case X86::VFNMSUB213SSZr_Intk: 5123321369Sdim case X86::VFMSUB231SSZr_Intk: case X86::VFNMSUB231SSZr_Intk: 5124321369Sdim case X86::VFMADD132SSZr_Intkz: case X86::VFNMADD132SSZr_Intkz: 5125321369Sdim case X86::VFMADD213SSZr_Intkz: case X86::VFNMADD213SSZr_Intkz: 5126321369Sdim case X86::VFMADD231SSZr_Intkz: case X86::VFNMADD231SSZr_Intkz: 5127321369Sdim case X86::VFMSUB132SSZr_Intkz: case X86::VFNMSUB132SSZr_Intkz: 5128321369Sdim case X86::VFMSUB213SSZr_Intkz: case X86::VFNMSUB213SSZr_Intkz: 5129321369Sdim case X86::VFMSUB231SSZr_Intkz: case X86::VFNMSUB231SSZr_Intkz: 5130288943Sdim return false; 5131288943Sdim default: 5132288943Sdim return true; 5133288943Sdim } 5134288943Sdim } 5135280031Sdim 5136353358Sdim if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm || 5137353358Sdim Opc == X86::MOVSDrm_alt || Opc == X86::VMOVSDrm_alt || 5138353358Sdim Opc == X86::VMOVSDZrm_alt) && 5139321369Sdim RegSize > 64) { 5140280031Sdim // These instructions only load 64 bits, we can't fold them if the 5141288943Sdim // destination register is wider than 64 bits (8 bytes), and its user 5142288943Sdim // instruction isn't scalar (SD). 5143288943Sdim switch (UserOpc) { 5144309124Sdim case X86::ADDSDrr_Int: case X86::VADDSDrr_Int: case X86::VADDSDZrr_Int: 5145327952Sdim case X86::CMPSDrr_Int: case X86::VCMPSDrr_Int: case X86::VCMPSDZrr_Int: 5146309124Sdim case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::VDIVSDZrr_Int: 5147314564Sdim case X86::MAXSDrr_Int: case X86::VMAXSDrr_Int: case X86::VMAXSDZrr_Int: 5148314564Sdim case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int: 5149309124Sdim case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int: 5150309124Sdim case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int: 5151321369Sdim case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz: 5152353358Sdim case X86::VCMPSDZrr_Intk: 5153321369Sdim case X86::VDIVSDZrr_Intk: case X86::VDIVSDZrr_Intkz: 5154321369Sdim case X86::VMAXSDZrr_Intk: case X86::VMAXSDZrr_Intkz: 5155321369Sdim case X86::VMINSDZrr_Intk: case X86::VMINSDZrr_Intkz: 5156321369Sdim case X86::VMULSDZrr_Intk: case X86::VMULSDZrr_Intkz: 5157321369Sdim case X86::VSUBSDZrr_Intk: case X86::VSUBSDZrr_Intkz: 5158314564Sdim case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int: 5159314564Sdim case X86::VFMSUBSD4rr_Int: case X86::VFNMSUBSD4rr_Int: 5160314564Sdim case X86::VFMADD132SDr_Int: case X86::VFNMADD132SDr_Int: 5161314564Sdim case X86::VFMADD213SDr_Int: case X86::VFNMADD213SDr_Int: 5162314564Sdim case X86::VFMADD231SDr_Int: case X86::VFNMADD231SDr_Int: 5163314564Sdim case X86::VFMSUB132SDr_Int: case X86::VFNMSUB132SDr_Int: 5164314564Sdim case X86::VFMSUB213SDr_Int: case X86::VFNMSUB213SDr_Int: 5165314564Sdim case X86::VFMSUB231SDr_Int: case X86::VFNMSUB231SDr_Int: 5166314564Sdim case X86::VFMADD132SDZr_Int: case X86::VFNMADD132SDZr_Int: 5167314564Sdim case X86::VFMADD213SDZr_Int: case X86::VFNMADD213SDZr_Int: 5168314564Sdim case X86::VFMADD231SDZr_Int: case X86::VFNMADD231SDZr_Int: 5169314564Sdim case X86::VFMSUB132SDZr_Int: case X86::VFNMSUB132SDZr_Int: 5170314564Sdim case X86::VFMSUB213SDZr_Int: case X86::VFNMSUB213SDZr_Int: 5171314564Sdim case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int: 5172321369Sdim case X86::VFMADD132SDZr_Intk: case X86::VFNMADD132SDZr_Intk: 5173321369Sdim case X86::VFMADD213SDZr_Intk: case X86::VFNMADD213SDZr_Intk: 5174321369Sdim case X86::VFMADD231SDZr_Intk: case X86::VFNMADD231SDZr_Intk: 5175321369Sdim case X86::VFMSUB132SDZr_Intk: case X86::VFNMSUB132SDZr_Intk: 5176321369Sdim case X86::VFMSUB213SDZr_Intk: case X86::VFNMSUB213SDZr_Intk: 5177321369Sdim case X86::VFMSUB231SDZr_Intk: case X86::VFNMSUB231SDZr_Intk: 5178321369Sdim case X86::VFMADD132SDZr_Intkz: case X86::VFNMADD132SDZr_Intkz: 5179321369Sdim case X86::VFMADD213SDZr_Intkz: case X86::VFNMADD213SDZr_Intkz: 5180321369Sdim case X86::VFMADD231SDZr_Intkz: case X86::VFNMADD231SDZr_Intkz: 5181321369Sdim case X86::VFMSUB132SDZr_Intkz: case X86::VFNMSUB132SDZr_Intkz: 5182321369Sdim case X86::VFMSUB213SDZr_Intkz: case X86::VFNMSUB213SDZr_Intkz: 5183321369Sdim case X86::VFMSUB231SDZr_Intkz: case X86::VFNMSUB231SDZr_Intkz: 5184288943Sdim return false; 5185288943Sdim default: 5186288943Sdim return true; 5187288943Sdim } 5188288943Sdim } 5189280031Sdim 5190280031Sdim return false; 5191280031Sdim} 5192280031Sdim 5193288943SdimMachineInstr *X86InstrInfo::foldMemoryOperandImpl( 5194309124Sdim MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 5195309124Sdim MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, 5196309124Sdim LiveIntervals *LIS) const { 5197314564Sdim 5198314564Sdim // TODO: Support the case where LoadMI loads a wide register, but MI 5199314564Sdim // only uses a subreg. 5200314564Sdim for (auto Op : Ops) { 5201314564Sdim if (MI.getOperand(Op).getSubReg()) 5202314564Sdim return nullptr; 5203314564Sdim } 5204314564Sdim 5205261991Sdim // If loading from a FrameIndex, fold directly from the FrameIndex. 5206309124Sdim unsigned NumOps = LoadMI.getDesc().getNumOperands(); 5207261991Sdim int FrameIndex; 5208280031Sdim if (isLoadFromStackSlot(LoadMI, FrameIndex)) { 5209309124Sdim if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF)) 5210280031Sdim return nullptr; 5211309124Sdim return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex, LIS); 5212280031Sdim } 5213261991Sdim 5214218893Sdim // Check switch flag 5215276479Sdim if (NoFusing) return nullptr; 5216193323Sed 5217341825Sdim // Avoid partial and undef register update stalls unless optimizing for size. 5218353358Sdim if (!MF.getFunction().hasOptSize() && 5219353358Sdim (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) || 5220341825Sdim shouldPreventUndefRegUpdateMemFold(MF, MI))) 5221276479Sdim return nullptr; 5222201360Srdivacky 5223193323Sed // Determine the alignment of the load. 5224193323Sed unsigned Alignment = 0; 5225309124Sdim if (LoadMI.hasOneMemOperand()) 5226309124Sdim Alignment = (*LoadMI.memoperands_begin())->getAlignment(); 5227198090Srdivacky else 5228309124Sdim switch (LoadMI.getOpcode()) { 5229309124Sdim case X86::AVX512_512_SET0: 5230309124Sdim case X86::AVX512_512_SETALLONES: 5231309124Sdim Alignment = 64; 5232309124Sdim break; 5233234353Sdim case X86::AVX2_SETALLONES: 5234321369Sdim case X86::AVX1_SETALLONES: 5235243830Sdim case X86::AVX_SET0: 5236309124Sdim case X86::AVX512_256_SET0: 5237212904Sdim Alignment = 32; 5238212904Sdim break; 5239226633Sdim case X86::V_SET0: 5240198090Srdivacky case X86::V_SETALLONES: 5241309124Sdim case X86::AVX512_128_SET0: 5242360784Sdim case X86::FsFLD0F128: 5243360784Sdim case X86::AVX512_FsFLD0F128: 5244198090Srdivacky Alignment = 16; 5245198090Srdivacky break; 5246341825Sdim case X86::MMX_SET0: 5247198090Srdivacky case X86::FsFLD0SD: 5248314564Sdim case X86::AVX512_FsFLD0SD: 5249198090Srdivacky Alignment = 8; 5250198090Srdivacky break; 5251198090Srdivacky case X86::FsFLD0SS: 5252314564Sdim case X86::AVX512_FsFLD0SS: 5253198090Srdivacky Alignment = 4; 5254198090Srdivacky break; 5255198090Srdivacky default: 5256276479Sdim return nullptr; 5257193323Sed } 5258193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 5259193323Sed unsigned NewOpc = 0; 5260309124Sdim switch (MI.getOpcode()) { 5261276479Sdim default: return nullptr; 5262193323Sed case X86::TEST8rr: NewOpc = X86::CMP8ri; break; 5263208599Srdivacky case X86::TEST16rr: NewOpc = X86::CMP16ri8; break; 5264208599Srdivacky case X86::TEST32rr: NewOpc = X86::CMP32ri8; break; 5265208599Srdivacky case X86::TEST64rr: NewOpc = X86::CMP64ri8; break; 5266193323Sed } 5267193323Sed // Change to CMPXXri r, 0 first. 5268309124Sdim MI.setDesc(get(NewOpc)); 5269309124Sdim MI.getOperand(1).ChangeToImmediate(0); 5270193323Sed } else if (Ops.size() != 1) 5271276479Sdim return nullptr; 5272193323Sed 5273212904Sdim // Make sure the subregisters match. 5274212904Sdim // Otherwise we risk changing the size of the load. 5275309124Sdim if (LoadMI.getOperand(0).getSubReg() != MI.getOperand(Ops[0]).getSubReg()) 5276276479Sdim return nullptr; 5277212904Sdim 5278210299Sed SmallVector<MachineOperand,X86::AddrNumOperands> MOs; 5279309124Sdim switch (LoadMI.getOpcode()) { 5280341825Sdim case X86::MMX_SET0: 5281226633Sdim case X86::V_SET0: 5282198090Srdivacky case X86::V_SETALLONES: 5283234353Sdim case X86::AVX2_SETALLONES: 5284321369Sdim case X86::AVX1_SETALLONES: 5285243830Sdim case X86::AVX_SET0: 5286309124Sdim case X86::AVX512_128_SET0: 5287309124Sdim case X86::AVX512_256_SET0: 5288309124Sdim case X86::AVX512_512_SET0: 5289309124Sdim case X86::AVX512_512_SETALLONES: 5290198090Srdivacky case X86::FsFLD0SD: 5291314564Sdim case X86::AVX512_FsFLD0SD: 5292314564Sdim case X86::FsFLD0SS: 5293360784Sdim case X86::AVX512_FsFLD0SS: 5294360784Sdim case X86::FsFLD0F128: 5295360784Sdim case X86::AVX512_FsFLD0F128: { 5296226633Sdim // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. 5297193323Sed // Create a constant-pool entry and operands to load from it. 5298193323Sed 5299204961Srdivacky // Medium and large mode can't fold loads this way. 5300276479Sdim if (MF.getTarget().getCodeModel() != CodeModel::Small && 5301276479Sdim MF.getTarget().getCodeModel() != CodeModel::Kernel) 5302276479Sdim return nullptr; 5303204961Srdivacky 5304193323Sed // x86-32 PIC requires a PIC base register for constant pools. 5305193323Sed unsigned PICBase = 0; 5306309124Sdim if (MF.getTarget().isPositionIndependent()) { 5307276479Sdim if (Subtarget.is64Bit()) 5308198090Srdivacky PICBase = X86::RIP; 5309198090Srdivacky else 5310210299Sed // FIXME: PICBase = getGlobalBaseReg(&MF); 5311198090Srdivacky // This doesn't work for several reasons. 5312198090Srdivacky // 1. GlobalBaseReg may have been spilled. 5313198090Srdivacky // 2. It may not be live at MI. 5314276479Sdim return nullptr; 5315198090Srdivacky } 5316193323Sed 5317198090Srdivacky // Create a constant-pool entry. 5318193323Sed MachineConstantPool &MCP = *MF.getConstantPool(); 5319226633Sdim Type *Ty; 5320309124Sdim unsigned Opc = LoadMI.getOpcode(); 5321314564Sdim if (Opc == X86::FsFLD0SS || Opc == X86::AVX512_FsFLD0SS) 5322327952Sdim Ty = Type::getFloatTy(MF.getFunction().getContext()); 5323314564Sdim else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD) 5324327952Sdim Ty = Type::getDoubleTy(MF.getFunction().getContext()); 5325360784Sdim else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128) 5326360784Sdim Ty = Type::getFP128Ty(MF.getFunction().getContext()); 5327309124Sdim else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES) 5328327952Sdim Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16); 5329309124Sdim else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 || 5330321369Sdim Opc == X86::AVX512_256_SET0 || Opc == X86::AVX1_SETALLONES) 5331327952Sdim Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 8); 5332341825Sdim else if (Opc == X86::MMX_SET0) 5333341825Sdim Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 2); 5334198090Srdivacky else 5335327952Sdim Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 4); 5336226633Sdim 5337309124Sdim bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES || 5338321369Sdim Opc == X86::AVX512_512_SETALLONES || 5339321369Sdim Opc == X86::AVX1_SETALLONES); 5340226633Sdim const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) : 5341226633Sdim Constant::getNullValue(Ty); 5342198090Srdivacky unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); 5343193323Sed 5344193323Sed // Create operands to load from the constant pool entry. 5345193323Sed MOs.push_back(MachineOperand::CreateReg(PICBase, false)); 5346193323Sed MOs.push_back(MachineOperand::CreateImm(1)); 5347193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 5348193323Sed MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); 5349193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 5350198090Srdivacky break; 5351198090Srdivacky } 5352198090Srdivacky default: { 5353309124Sdim if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF)) 5354276479Sdim return nullptr; 5355249423Sdim 5356193323Sed // Folding a normal load. Just copy the load's address operands. 5357309124Sdim MOs.append(LoadMI.operands_begin() + NumOps - X86::AddrNumOperands, 5358309124Sdim LoadMI.operands_begin() + NumOps); 5359198090Srdivacky break; 5360193323Sed } 5361198090Srdivacky } 5362288943Sdim return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt, 5363280031Sdim /*Size=*/0, Alignment, /*AllowCommute=*/true); 5364193323Sed} 5365193323Sed 5366344779Sdimstatic SmallVector<MachineMemOperand *, 2> 5367344779SdimextractLoadMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) { 5368344779Sdim SmallVector<MachineMemOperand *, 2> LoadMMOs; 5369344779Sdim 5370344779Sdim for (MachineMemOperand *MMO : MMOs) { 5371344779Sdim if (!MMO->isLoad()) 5372344779Sdim continue; 5373344779Sdim 5374344779Sdim if (!MMO->isStore()) { 5375344779Sdim // Reuse the MMO. 5376344779Sdim LoadMMOs.push_back(MMO); 5377344779Sdim } else { 5378344779Sdim // Clone the MMO and unset the store flag. 5379344779Sdim LoadMMOs.push_back(MF.getMachineMemOperand( 5380353358Sdim MMO, MMO->getFlags() & ~MachineMemOperand::MOStore)); 5381344779Sdim } 5382344779Sdim } 5383344779Sdim 5384344779Sdim return LoadMMOs; 5385344779Sdim} 5386344779Sdim 5387344779Sdimstatic SmallVector<MachineMemOperand *, 2> 5388344779SdimextractStoreMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) { 5389344779Sdim SmallVector<MachineMemOperand *, 2> StoreMMOs; 5390344779Sdim 5391344779Sdim for (MachineMemOperand *MMO : MMOs) { 5392344779Sdim if (!MMO->isStore()) 5393344779Sdim continue; 5394344779Sdim 5395344779Sdim if (!MMO->isLoad()) { 5396344779Sdim // Reuse the MMO. 5397344779Sdim StoreMMOs.push_back(MMO); 5398344779Sdim } else { 5399344779Sdim // Clone the MMO and unset the load flag. 5400344779Sdim StoreMMOs.push_back(MF.getMachineMemOperand( 5401353358Sdim MMO, MMO->getFlags() & ~MachineMemOperand::MOLoad)); 5402344779Sdim } 5403344779Sdim } 5404344779Sdim 5405344779Sdim return StoreMMOs; 5406344779Sdim} 5407344779Sdim 5408360784Sdimstatic unsigned getBroadcastOpcode(const X86MemoryFoldTableEntry *I, 5409360784Sdim const TargetRegisterClass *RC, 5410360784Sdim const X86Subtarget &STI) { 5411360784Sdim assert(STI.hasAVX512() && "Expected at least AVX512!"); 5412360784Sdim unsigned SpillSize = STI.getRegisterInfo()->getSpillSize(*RC); 5413360784Sdim assert((SpillSize == 64 || STI.hasVLX()) && 5414360784Sdim "Can't broadcast less than 64 bytes without AVX512VL!"); 5415360784Sdim 5416360784Sdim switch (I->Flags & TB_BCAST_MASK) { 5417360784Sdim default: llvm_unreachable("Unexpected broadcast type!"); 5418360784Sdim case TB_BCAST_D: 5419360784Sdim switch (SpillSize) { 5420360784Sdim default: llvm_unreachable("Unknown spill size"); 5421360784Sdim case 16: return X86::VPBROADCASTDZ128m; 5422360784Sdim case 32: return X86::VPBROADCASTDZ256m; 5423360784Sdim case 64: return X86::VPBROADCASTDZm; 5424360784Sdim } 5425360784Sdim break; 5426360784Sdim case TB_BCAST_Q: 5427360784Sdim switch (SpillSize) { 5428360784Sdim default: llvm_unreachable("Unknown spill size"); 5429360784Sdim case 16: return X86::VPBROADCASTQZ128m; 5430360784Sdim case 32: return X86::VPBROADCASTQZ256m; 5431360784Sdim case 64: return X86::VPBROADCASTQZm; 5432360784Sdim } 5433360784Sdim break; 5434360784Sdim case TB_BCAST_SS: 5435360784Sdim switch (SpillSize) { 5436360784Sdim default: llvm_unreachable("Unknown spill size"); 5437360784Sdim case 16: return X86::VBROADCASTSSZ128m; 5438360784Sdim case 32: return X86::VBROADCASTSSZ256m; 5439360784Sdim case 64: return X86::VBROADCASTSSZm; 5440360784Sdim } 5441360784Sdim break; 5442360784Sdim case TB_BCAST_SD: 5443360784Sdim switch (SpillSize) { 5444360784Sdim default: llvm_unreachable("Unknown spill size"); 5445360784Sdim case 16: return X86::VMOVDDUPZ128rm; 5446360784Sdim case 32: return X86::VBROADCASTSDZ256m; 5447360784Sdim case 64: return X86::VBROADCASTSDZm; 5448360784Sdim } 5449360784Sdim break; 5450360784Sdim } 5451360784Sdim} 5452360784Sdim 5453309124Sdimbool X86InstrInfo::unfoldMemoryOperand( 5454309124Sdim MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad, 5455309124Sdim bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const { 5456341825Sdim const X86MemoryFoldTableEntry *I = lookupUnfoldTable(MI.getOpcode()); 5457341825Sdim if (I == nullptr) 5458193323Sed return false; 5459341825Sdim unsigned Opc = I->DstOp; 5460341825Sdim unsigned Index = I->Flags & TB_INDEX_MASK; 5461341825Sdim bool FoldedLoad = I->Flags & TB_FOLDED_LOAD; 5462341825Sdim bool FoldedStore = I->Flags & TB_FOLDED_STORE; 5463360784Sdim bool FoldedBCast = I->Flags & TB_FOLDED_BCAST; 5464193323Sed if (UnfoldLoad && !FoldedLoad) 5465193323Sed return false; 5466193323Sed UnfoldLoad &= FoldedLoad; 5467193323Sed if (UnfoldStore && !FoldedStore) 5468193323Sed return false; 5469193323Sed UnfoldStore &= FoldedStore; 5470193323Sed 5471224145Sdim const MCInstrDesc &MCID = get(Opc); 5472360784Sdim 5473239462Sdim const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); 5474360784Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 5475296417Sdim // TODO: Check if 32-byte or greater accesses are slow too? 5476309124Sdim if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass && 5477296417Sdim Subtarget.isUnalignedMem16Slow()) 5478210299Sed // Without memoperands, loadRegFromAddr and storeRegToStackSlot will 5479210299Sed // conservatively assume the address is unaligned. That's bad for 5480210299Sed // performance. 5481210299Sed return false; 5482210299Sed SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps; 5483193323Sed SmallVector<MachineOperand,2> BeforeOps; 5484193323Sed SmallVector<MachineOperand,2> AfterOps; 5485193323Sed SmallVector<MachineOperand,4> ImpOps; 5486309124Sdim for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 5487309124Sdim MachineOperand &Op = MI.getOperand(i); 5488210299Sed if (i >= Index && i < Index + X86::AddrNumOperands) 5489193323Sed AddrOps.push_back(Op); 5490193323Sed else if (Op.isReg() && Op.isImplicit()) 5491193323Sed ImpOps.push_back(Op); 5492193323Sed else if (i < Index) 5493193323Sed BeforeOps.push_back(Op); 5494193323Sed else if (i > Index) 5495193323Sed AfterOps.push_back(Op); 5496193323Sed } 5497193323Sed 5498360784Sdim // Emit the load or broadcast instruction. 5499193323Sed if (UnfoldLoad) { 5500344779Sdim auto MMOs = extractLoadMMOs(MI.memoperands(), MF); 5501360784Sdim 5502360784Sdim unsigned Opc; 5503360784Sdim if (FoldedBCast) { 5504360784Sdim Opc = getBroadcastOpcode(I, RC, Subtarget); 5505360784Sdim } else { 5506360784Sdim unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); 5507360784Sdim bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; 5508360784Sdim Opc = getLoadRegOpcode(Reg, RC, isAligned, Subtarget); 5509360784Sdim } 5510360784Sdim 5511360784Sdim DebugLoc DL; 5512360784Sdim MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), Reg); 5513360784Sdim for (unsigned i = 0, e = AddrOps.size(); i != e; ++i) 5514360784Sdim MIB.add(AddrOps[i]); 5515360784Sdim MIB.setMemRefs(MMOs); 5516360784Sdim NewMIs.push_back(MIB); 5517360784Sdim 5518193323Sed if (UnfoldStore) { 5519193323Sed // Address operands cannot be marked isKill. 5520210299Sed for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) { 5521193323Sed MachineOperand &MO = NewMIs[0]->getOperand(i); 5522193323Sed if (MO.isReg()) 5523193323Sed MO.setIsKill(false); 5524193323Sed } 5525193323Sed } 5526193323Sed } 5527193323Sed 5528193323Sed // Emit the data processing instruction. 5529309124Sdim MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI.getDebugLoc(), true); 5530249423Sdim MachineInstrBuilder MIB(MF, DataMI); 5531218893Sdim 5532193323Sed if (FoldedStore) 5533193323Sed MIB.addReg(Reg, RegState::Define); 5534296417Sdim for (MachineOperand &BeforeOp : BeforeOps) 5535321369Sdim MIB.add(BeforeOp); 5536193323Sed if (FoldedLoad) 5537193323Sed MIB.addReg(Reg); 5538296417Sdim for (MachineOperand &AfterOp : AfterOps) 5539321369Sdim MIB.add(AfterOp); 5540296417Sdim for (MachineOperand &ImpOp : ImpOps) { 5541296417Sdim MIB.addReg(ImpOp.getReg(), 5542296417Sdim getDefRegState(ImpOp.isDef()) | 5543193323Sed RegState::Implicit | 5544296417Sdim getKillRegState(ImpOp.isKill()) | 5545296417Sdim getDeadRegState(ImpOp.isDead()) | 5546296417Sdim getUndefRegState(ImpOp.isUndef())); 5547193323Sed } 5548193323Sed // Change CMP32ri r, 0 back to TEST32rr r, r, etc. 5549193323Sed switch (DataMI->getOpcode()) { 5550193323Sed default: break; 5551193323Sed case X86::CMP64ri32: 5552208599Srdivacky case X86::CMP64ri8: 5553193323Sed case X86::CMP32ri: 5554208599Srdivacky case X86::CMP32ri8: 5555193323Sed case X86::CMP16ri: 5556208599Srdivacky case X86::CMP16ri8: 5557193323Sed case X86::CMP8ri: { 5558193323Sed MachineOperand &MO0 = DataMI->getOperand(0); 5559193323Sed MachineOperand &MO1 = DataMI->getOperand(1); 5560193323Sed if (MO1.getImm() == 0) { 5561243830Sdim unsigned NewOpc; 5562193323Sed switch (DataMI->getOpcode()) { 5563243830Sdim default: llvm_unreachable("Unreachable!"); 5564208599Srdivacky case X86::CMP64ri8: 5565193323Sed case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; 5566208599Srdivacky case X86::CMP32ri8: 5567193323Sed case X86::CMP32ri: NewOpc = X86::TEST32rr; break; 5568208599Srdivacky case X86::CMP16ri8: 5569193323Sed case X86::CMP16ri: NewOpc = X86::TEST16rr; break; 5570193323Sed case X86::CMP8ri: NewOpc = X86::TEST8rr; break; 5571193323Sed } 5572193323Sed DataMI->setDesc(get(NewOpc)); 5573193323Sed MO1.ChangeToRegister(MO0.getReg(), false); 5574193323Sed } 5575193323Sed } 5576193323Sed } 5577193323Sed NewMIs.push_back(DataMI); 5578193323Sed 5579193323Sed // Emit the store instruction. 5580193323Sed if (UnfoldStore) { 5581239462Sdim const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF); 5582344779Sdim auto MMOs = extractStoreMMOs(MI.memoperands(), MF); 5583360784Sdim unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*DstRC), 16); 5584360784Sdim bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; 5585360784Sdim unsigned Opc = getStoreRegOpcode(Reg, DstRC, isAligned, Subtarget); 5586360784Sdim DebugLoc DL; 5587360784Sdim MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); 5588360784Sdim for (unsigned i = 0, e = AddrOps.size(); i != e; ++i) 5589360784Sdim MIB.add(AddrOps[i]); 5590360784Sdim MIB.addReg(Reg, RegState::Kill); 5591360784Sdim MIB.setMemRefs(MMOs); 5592360784Sdim NewMIs.push_back(MIB); 5593193323Sed } 5594193323Sed 5595193323Sed return true; 5596193323Sed} 5597193323Sed 5598193323Sedbool 5599193323SedX86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, 5600193323Sed SmallVectorImpl<SDNode*> &NewNodes) const { 5601193323Sed if (!N->isMachineOpcode()) 5602193323Sed return false; 5603193323Sed 5604341825Sdim const X86MemoryFoldTableEntry *I = lookupUnfoldTable(N->getMachineOpcode()); 5605341825Sdim if (I == nullptr) 5606193323Sed return false; 5607341825Sdim unsigned Opc = I->DstOp; 5608341825Sdim unsigned Index = I->Flags & TB_INDEX_MASK; 5609341825Sdim bool FoldedLoad = I->Flags & TB_FOLDED_LOAD; 5610341825Sdim bool FoldedStore = I->Flags & TB_FOLDED_STORE; 5611360784Sdim bool FoldedBCast = I->Flags & TB_FOLDED_BCAST; 5612224145Sdim const MCInstrDesc &MCID = get(Opc); 5613239462Sdim MachineFunction &MF = DAG.getMachineFunction(); 5614321369Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 5615239462Sdim const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); 5616224145Sdim unsigned NumDefs = MCID.NumDefs; 5617193323Sed std::vector<SDValue> AddrOps; 5618193323Sed std::vector<SDValue> BeforeOps; 5619193323Sed std::vector<SDValue> AfterOps; 5620261991Sdim SDLoc dl(N); 5621193323Sed unsigned NumOps = N->getNumOperands(); 5622193323Sed for (unsigned i = 0; i != NumOps-1; ++i) { 5623193323Sed SDValue Op = N->getOperand(i); 5624210299Sed if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands) 5625193323Sed AddrOps.push_back(Op); 5626193323Sed else if (i < Index-NumDefs) 5627193323Sed BeforeOps.push_back(Op); 5628193323Sed else if (i > Index-NumDefs) 5629193323Sed AfterOps.push_back(Op); 5630193323Sed } 5631193323Sed SDValue Chain = N->getOperand(NumOps-1); 5632193323Sed AddrOps.push_back(Chain); 5633193323Sed 5634193323Sed // Emit the load instruction. 5635276479Sdim SDNode *Load = nullptr; 5636193323Sed if (FoldedLoad) { 5637321369Sdim EVT VT = *TRI.legalclasstypes_begin(*RC); 5638344779Sdim auto MMOs = extractLoadMMOs(cast<MachineSDNode>(N)->memoperands(), MF); 5639344779Sdim if (MMOs.empty() && RC == &X86::VR128RegClass && 5640296417Sdim Subtarget.isUnalignedMem16Slow()) 5641210299Sed // Do not introduce a slow unaligned load. 5642210299Sed return false; 5643296417Sdim // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte 5644296417Sdim // memory access is slow above. 5645360784Sdim 5646360784Sdim unsigned Opc; 5647360784Sdim if (FoldedBCast) { 5648360784Sdim Opc = getBroadcastOpcode(I, RC, Subtarget); 5649360784Sdim } else { 5650360784Sdim unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); 5651360784Sdim bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; 5652360784Sdim Opc = getLoadRegOpcode(0, RC, isAligned, Subtarget); 5653360784Sdim } 5654360784Sdim 5655360784Sdim Load = DAG.getMachineNode(Opc, dl, VT, MVT::Other, AddrOps); 5656193323Sed NewNodes.push_back(Load); 5657198090Srdivacky 5658198090Srdivacky // Preserve memory reference information. 5659344779Sdim DAG.setNodeMemRefs(cast<MachineSDNode>(Load), MMOs); 5660193323Sed } 5661193323Sed 5662193323Sed // Emit the data processing instruction. 5663198090Srdivacky std::vector<EVT> VTs; 5664276479Sdim const TargetRegisterClass *DstRC = nullptr; 5665224145Sdim if (MCID.getNumDefs() > 0) { 5666239462Sdim DstRC = getRegClass(MCID, 0, &RI, MF); 5667321369Sdim VTs.push_back(*TRI.legalclasstypes_begin(*DstRC)); 5668193323Sed } 5669193323Sed for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 5670198090Srdivacky EVT VT = N->getValueType(i); 5671224145Sdim if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs()) 5672193323Sed VTs.push_back(VT); 5673193323Sed } 5674193323Sed if (Load) 5675193323Sed BeforeOps.push_back(SDValue(Load, 0)); 5676288943Sdim BeforeOps.insert(BeforeOps.end(), AfterOps.begin(), AfterOps.end()); 5677341825Sdim // Change CMP32ri r, 0 back to TEST32rr r, r, etc. 5678341825Sdim switch (Opc) { 5679341825Sdim default: break; 5680341825Sdim case X86::CMP64ri32: 5681341825Sdim case X86::CMP64ri8: 5682341825Sdim case X86::CMP32ri: 5683341825Sdim case X86::CMP32ri8: 5684341825Sdim case X86::CMP16ri: 5685341825Sdim case X86::CMP16ri8: 5686341825Sdim case X86::CMP8ri: 5687341825Sdim if (isNullConstant(BeforeOps[1])) { 5688341825Sdim switch (Opc) { 5689341825Sdim default: llvm_unreachable("Unreachable!"); 5690341825Sdim case X86::CMP64ri8: 5691341825Sdim case X86::CMP64ri32: Opc = X86::TEST64rr; break; 5692341825Sdim case X86::CMP32ri8: 5693341825Sdim case X86::CMP32ri: Opc = X86::TEST32rr; break; 5694341825Sdim case X86::CMP16ri8: 5695341825Sdim case X86::CMP16ri: Opc = X86::TEST16rr; break; 5696341825Sdim case X86::CMP8ri: Opc = X86::TEST8rr; break; 5697341825Sdim } 5698341825Sdim BeforeOps[1] = BeforeOps[0]; 5699341825Sdim } 5700341825Sdim } 5701251662Sdim SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps); 5702193323Sed NewNodes.push_back(NewNode); 5703193323Sed 5704193323Sed // Emit the store instruction. 5705193323Sed if (FoldedStore) { 5706193323Sed AddrOps.pop_back(); 5707193323Sed AddrOps.push_back(SDValue(NewNode, 0)); 5708193323Sed AddrOps.push_back(Chain); 5709344779Sdim auto MMOs = extractStoreMMOs(cast<MachineSDNode>(N)->memoperands(), MF); 5710344779Sdim if (MMOs.empty() && RC == &X86::VR128RegClass && 5711296417Sdim Subtarget.isUnalignedMem16Slow()) 5712210299Sed // Do not introduce a slow unaligned store. 5713210299Sed return false; 5714296417Sdim // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte 5715296417Sdim // memory access is slow above. 5716321369Sdim unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16); 5717344779Sdim bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; 5718276479Sdim SDNode *Store = 5719276479Sdim DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget), 5720276479Sdim dl, MVT::Other, AddrOps); 5721193323Sed NewNodes.push_back(Store); 5722198090Srdivacky 5723198090Srdivacky // Preserve memory reference information. 5724344779Sdim DAG.setNodeMemRefs(cast<MachineSDNode>(Store), MMOs); 5725193323Sed } 5726193323Sed 5727193323Sed return true; 5728193323Sed} 5729193323Sed 5730193323Sedunsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, 5731198892Srdivacky bool UnfoldLoad, bool UnfoldStore, 5732198892Srdivacky unsigned *LoadRegIndex) const { 5733341825Sdim const X86MemoryFoldTableEntry *I = lookupUnfoldTable(Opc); 5734341825Sdim if (I == nullptr) 5735193323Sed return 0; 5736341825Sdim bool FoldedLoad = I->Flags & TB_FOLDED_LOAD; 5737341825Sdim bool FoldedStore = I->Flags & TB_FOLDED_STORE; 5738193323Sed if (UnfoldLoad && !FoldedLoad) 5739193323Sed return 0; 5740193323Sed if (UnfoldStore && !FoldedStore) 5741193323Sed return 0; 5742198892Srdivacky if (LoadRegIndex) 5743341825Sdim *LoadRegIndex = I->Flags & TB_INDEX_MASK; 5744341825Sdim return I->DstOp; 5745193323Sed} 5746193323Sed 5747202878Srdivackybool 5748202878SrdivackyX86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 5749202878Srdivacky int64_t &Offset1, int64_t &Offset2) const { 5750202878Srdivacky if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 5751202878Srdivacky return false; 5752202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 5753202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 5754202878Srdivacky switch (Opc1) { 5755202878Srdivacky default: return false; 5756202878Srdivacky case X86::MOV8rm: 5757202878Srdivacky case X86::MOV16rm: 5758202878Srdivacky case X86::MOV32rm: 5759202878Srdivacky case X86::MOV64rm: 5760202878Srdivacky case X86::LD_Fp32m: 5761202878Srdivacky case X86::LD_Fp64m: 5762202878Srdivacky case X86::LD_Fp80m: 5763202878Srdivacky case X86::MOVSSrm: 5764353358Sdim case X86::MOVSSrm_alt: 5765202878Srdivacky case X86::MOVSDrm: 5766353358Sdim case X86::MOVSDrm_alt: 5767202878Srdivacky case X86::MMX_MOVD64rm: 5768202878Srdivacky case X86::MMX_MOVQ64rm: 5769202878Srdivacky case X86::MOVAPSrm: 5770202878Srdivacky case X86::MOVUPSrm: 5771202878Srdivacky case X86::MOVAPDrm: 5772309124Sdim case X86::MOVUPDrm: 5773202878Srdivacky case X86::MOVDQArm: 5774202878Srdivacky case X86::MOVDQUrm: 5775226633Sdim // AVX load instructions 5776226633Sdim case X86::VMOVSSrm: 5777353358Sdim case X86::VMOVSSrm_alt: 5778226633Sdim case X86::VMOVSDrm: 5779353358Sdim case X86::VMOVSDrm_alt: 5780226633Sdim case X86::VMOVAPSrm: 5781226633Sdim case X86::VMOVUPSrm: 5782226633Sdim case X86::VMOVAPDrm: 5783309124Sdim case X86::VMOVUPDrm: 5784226633Sdim case X86::VMOVDQArm: 5785226633Sdim case X86::VMOVDQUrm: 5786224145Sdim case X86::VMOVAPSYrm: 5787224145Sdim case X86::VMOVUPSYrm: 5788224145Sdim case X86::VMOVAPDYrm: 5789309124Sdim case X86::VMOVUPDYrm: 5790224145Sdim case X86::VMOVDQAYrm: 5791224145Sdim case X86::VMOVDQUYrm: 5792309124Sdim // AVX512 load instructions 5793309124Sdim case X86::VMOVSSZrm: 5794353358Sdim case X86::VMOVSSZrm_alt: 5795309124Sdim case X86::VMOVSDZrm: 5796353358Sdim case X86::VMOVSDZrm_alt: 5797309124Sdim case X86::VMOVAPSZ128rm: 5798309124Sdim case X86::VMOVUPSZ128rm: 5799314564Sdim case X86::VMOVAPSZ128rm_NOVLX: 5800314564Sdim case X86::VMOVUPSZ128rm_NOVLX: 5801309124Sdim case X86::VMOVAPDZ128rm: 5802309124Sdim case X86::VMOVUPDZ128rm: 5803309124Sdim case X86::VMOVDQU8Z128rm: 5804309124Sdim case X86::VMOVDQU16Z128rm: 5805309124Sdim case X86::VMOVDQA32Z128rm: 5806309124Sdim case X86::VMOVDQU32Z128rm: 5807309124Sdim case X86::VMOVDQA64Z128rm: 5808309124Sdim case X86::VMOVDQU64Z128rm: 5809309124Sdim case X86::VMOVAPSZ256rm: 5810309124Sdim case X86::VMOVUPSZ256rm: 5811314564Sdim case X86::VMOVAPSZ256rm_NOVLX: 5812314564Sdim case X86::VMOVUPSZ256rm_NOVLX: 5813309124Sdim case X86::VMOVAPDZ256rm: 5814309124Sdim case X86::VMOVUPDZ256rm: 5815309124Sdim case X86::VMOVDQU8Z256rm: 5816309124Sdim case X86::VMOVDQU16Z256rm: 5817309124Sdim case X86::VMOVDQA32Z256rm: 5818309124Sdim case X86::VMOVDQU32Z256rm: 5819309124Sdim case X86::VMOVDQA64Z256rm: 5820309124Sdim case X86::VMOVDQU64Z256rm: 5821309124Sdim case X86::VMOVAPSZrm: 5822309124Sdim case X86::VMOVUPSZrm: 5823309124Sdim case X86::VMOVAPDZrm: 5824309124Sdim case X86::VMOVUPDZrm: 5825309124Sdim case X86::VMOVDQU8Zrm: 5826309124Sdim case X86::VMOVDQU16Zrm: 5827309124Sdim case X86::VMOVDQA32Zrm: 5828309124Sdim case X86::VMOVDQU32Zrm: 5829309124Sdim case X86::VMOVDQA64Zrm: 5830309124Sdim case X86::VMOVDQU64Zrm: 5831309124Sdim case X86::KMOVBkm: 5832309124Sdim case X86::KMOVWkm: 5833309124Sdim case X86::KMOVDkm: 5834309124Sdim case X86::KMOVQkm: 5835202878Srdivacky break; 5836202878Srdivacky } 5837202878Srdivacky switch (Opc2) { 5838202878Srdivacky default: return false; 5839202878Srdivacky case X86::MOV8rm: 5840202878Srdivacky case X86::MOV16rm: 5841202878Srdivacky case X86::MOV32rm: 5842202878Srdivacky case X86::MOV64rm: 5843202878Srdivacky case X86::LD_Fp32m: 5844202878Srdivacky case X86::LD_Fp64m: 5845202878Srdivacky case X86::LD_Fp80m: 5846202878Srdivacky case X86::MOVSSrm: 5847353358Sdim case X86::MOVSSrm_alt: 5848202878Srdivacky case X86::MOVSDrm: 5849353358Sdim case X86::MOVSDrm_alt: 5850202878Srdivacky case X86::MMX_MOVD64rm: 5851202878Srdivacky case X86::MMX_MOVQ64rm: 5852202878Srdivacky case X86::MOVAPSrm: 5853202878Srdivacky case X86::MOVUPSrm: 5854202878Srdivacky case X86::MOVAPDrm: 5855309124Sdim case X86::MOVUPDrm: 5856202878Srdivacky case X86::MOVDQArm: 5857202878Srdivacky case X86::MOVDQUrm: 5858226633Sdim // AVX load instructions 5859226633Sdim case X86::VMOVSSrm: 5860353358Sdim case X86::VMOVSSrm_alt: 5861226633Sdim case X86::VMOVSDrm: 5862353358Sdim case X86::VMOVSDrm_alt: 5863226633Sdim case X86::VMOVAPSrm: 5864226633Sdim case X86::VMOVUPSrm: 5865226633Sdim case X86::VMOVAPDrm: 5866309124Sdim case X86::VMOVUPDrm: 5867226633Sdim case X86::VMOVDQArm: 5868226633Sdim case X86::VMOVDQUrm: 5869224145Sdim case X86::VMOVAPSYrm: 5870224145Sdim case X86::VMOVUPSYrm: 5871224145Sdim case X86::VMOVAPDYrm: 5872309124Sdim case X86::VMOVUPDYrm: 5873224145Sdim case X86::VMOVDQAYrm: 5874224145Sdim case X86::VMOVDQUYrm: 5875309124Sdim // AVX512 load instructions 5876309124Sdim case X86::VMOVSSZrm: 5877353358Sdim case X86::VMOVSSZrm_alt: 5878309124Sdim case X86::VMOVSDZrm: 5879353358Sdim case X86::VMOVSDZrm_alt: 5880309124Sdim case X86::VMOVAPSZ128rm: 5881309124Sdim case X86::VMOVUPSZ128rm: 5882314564Sdim case X86::VMOVAPSZ128rm_NOVLX: 5883314564Sdim case X86::VMOVUPSZ128rm_NOVLX: 5884309124Sdim case X86::VMOVAPDZ128rm: 5885309124Sdim case X86::VMOVUPDZ128rm: 5886309124Sdim case X86::VMOVDQU8Z128rm: 5887309124Sdim case X86::VMOVDQU16Z128rm: 5888309124Sdim case X86::VMOVDQA32Z128rm: 5889309124Sdim case X86::VMOVDQU32Z128rm: 5890309124Sdim case X86::VMOVDQA64Z128rm: 5891309124Sdim case X86::VMOVDQU64Z128rm: 5892309124Sdim case X86::VMOVAPSZ256rm: 5893309124Sdim case X86::VMOVUPSZ256rm: 5894314564Sdim case X86::VMOVAPSZ256rm_NOVLX: 5895314564Sdim case X86::VMOVUPSZ256rm_NOVLX: 5896309124Sdim case X86::VMOVAPDZ256rm: 5897309124Sdim case X86::VMOVUPDZ256rm: 5898309124Sdim case X86::VMOVDQU8Z256rm: 5899309124Sdim case X86::VMOVDQU16Z256rm: 5900309124Sdim case X86::VMOVDQA32Z256rm: 5901309124Sdim case X86::VMOVDQU32Z256rm: 5902309124Sdim case X86::VMOVDQA64Z256rm: 5903309124Sdim case X86::VMOVDQU64Z256rm: 5904309124Sdim case X86::VMOVAPSZrm: 5905309124Sdim case X86::VMOVUPSZrm: 5906309124Sdim case X86::VMOVAPDZrm: 5907309124Sdim case X86::VMOVUPDZrm: 5908309124Sdim case X86::VMOVDQU8Zrm: 5909309124Sdim case X86::VMOVDQU16Zrm: 5910309124Sdim case X86::VMOVDQA32Zrm: 5911309124Sdim case X86::VMOVDQU32Zrm: 5912309124Sdim case X86::VMOVDQA64Zrm: 5913309124Sdim case X86::VMOVDQU64Zrm: 5914309124Sdim case X86::KMOVBkm: 5915309124Sdim case X86::KMOVWkm: 5916309124Sdim case X86::KMOVDkm: 5917309124Sdim case X86::KMOVQkm: 5918202878Srdivacky break; 5919202878Srdivacky } 5920202878Srdivacky 5921321369Sdim // Lambda to check if both the loads have the same value for an operand index. 5922321369Sdim auto HasSameOp = [&](int I) { 5923321369Sdim return Load1->getOperand(I) == Load2->getOperand(I); 5924321369Sdim }; 5925321369Sdim 5926321369Sdim // All operands except the displacement should match. 5927321369Sdim if (!HasSameOp(X86::AddrBaseReg) || !HasSameOp(X86::AddrScaleAmt) || 5928321369Sdim !HasSameOp(X86::AddrIndexReg) || !HasSameOp(X86::AddrSegmentReg)) 5929202878Srdivacky return false; 5930321369Sdim 5931321369Sdim // Chain Operand must be the same. 5932321369Sdim if (!HasSameOp(5)) 5933202878Srdivacky return false; 5934202878Srdivacky 5935321369Sdim // Now let's examine if the displacements are constants. 5936321369Sdim auto Disp1 = dyn_cast<ConstantSDNode>(Load1->getOperand(X86::AddrDisp)); 5937321369Sdim auto Disp2 = dyn_cast<ConstantSDNode>(Load2->getOperand(X86::AddrDisp)); 5938321369Sdim if (!Disp1 || !Disp2) 5939321369Sdim return false; 5940321369Sdim 5941321369Sdim Offset1 = Disp1->getSExtValue(); 5942321369Sdim Offset2 = Disp2->getSExtValue(); 5943321369Sdim return true; 5944202878Srdivacky} 5945202878Srdivacky 5946202878Srdivackybool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 5947202878Srdivacky int64_t Offset1, int64_t Offset2, 5948202878Srdivacky unsigned NumLoads) const { 5949202878Srdivacky assert(Offset2 > Offset1); 5950202878Srdivacky if ((Offset2 - Offset1) / 8 > 64) 5951202878Srdivacky return false; 5952202878Srdivacky 5953202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 5954202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 5955202878Srdivacky if (Opc1 != Opc2) 5956202878Srdivacky return false; // FIXME: overly conservative? 5957202878Srdivacky 5958202878Srdivacky switch (Opc1) { 5959202878Srdivacky default: break; 5960202878Srdivacky case X86::LD_Fp32m: 5961202878Srdivacky case X86::LD_Fp64m: 5962202878Srdivacky case X86::LD_Fp80m: 5963202878Srdivacky case X86::MMX_MOVD64rm: 5964202878Srdivacky case X86::MMX_MOVQ64rm: 5965202878Srdivacky return false; 5966202878Srdivacky } 5967202878Srdivacky 5968202878Srdivacky EVT VT = Load1->getValueType(0); 5969202878Srdivacky switch (VT.getSimpleVT().SimpleTy) { 5970210299Sed default: 5971202878Srdivacky // XMM registers. In 64-bit mode we can be a bit more aggressive since we 5972202878Srdivacky // have 16 of them to play with. 5973276479Sdim if (Subtarget.is64Bit()) { 5974202878Srdivacky if (NumLoads >= 3) 5975202878Srdivacky return false; 5976210299Sed } else if (NumLoads) { 5977202878Srdivacky return false; 5978210299Sed } 5979202878Srdivacky break; 5980202878Srdivacky case MVT::i8: 5981202878Srdivacky case MVT::i16: 5982202878Srdivacky case MVT::i32: 5983202878Srdivacky case MVT::i64: 5984202878Srdivacky case MVT::f32: 5985202878Srdivacky case MVT::f64: 5986202878Srdivacky if (NumLoads) 5987202878Srdivacky return false; 5988210299Sed break; 5989202878Srdivacky } 5990202878Srdivacky 5991202878Srdivacky return true; 5992202878Srdivacky} 5993202878Srdivacky 5994193323Sedbool X86InstrInfo:: 5995314564SdimreverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 5996193323Sed assert(Cond.size() == 1 && "Invalid X86 branch condition!"); 5997193323Sed X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm()); 5998193323Sed Cond[0].setImm(GetOppositeBranchCondition(CC)); 5999193323Sed return false; 6000193323Sed} 6001193323Sed 6002193323Sedbool X86InstrInfo:: 6003193323SedisSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 6004193323Sed // FIXME: Return false for x87 stack register classes for now. We can't 6005193323Sed // allow any loads of these registers before FpGet_ST0_80. 6006332833Sdim return !(RC == &X86::CCRRegClass || RC == &X86::DFCCRRegClass || 6007332833Sdim RC == &X86::RFP32RegClass || RC == &X86::RFP64RegClass || 6008332833Sdim RC == &X86::RFP80RegClass); 6009193323Sed} 6010193323Sed 6011288943Sdim/// Return a virtual register initialized with the 6012193323Sed/// the global base register value. Output instructions required to 6013193323Sed/// initialize the register in the function entry block, if necessary. 6014193323Sed/// 6015210299Sed/// TODO: Eliminate this and move the code to X86MachineFunctionInfo. 6016210299Sed/// 6017193323Sedunsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { 6018341825Sdim assert((!Subtarget.is64Bit() || 6019341825Sdim MF->getTarget().getCodeModel() == CodeModel::Medium || 6020341825Sdim MF->getTarget().getCodeModel() == CodeModel::Large) && 6021193323Sed "X86-64 PIC uses RIP relative addressing"); 6022193323Sed 6023193323Sed X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); 6024193323Sed unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 6025193323Sed if (GlobalBaseReg != 0) 6026193323Sed return GlobalBaseReg; 6027193323Sed 6028210299Sed // Create the register. The code to initialize it is inserted 6029210299Sed // later, by the CGBR pass (below). 6030193323Sed MachineRegisterInfo &RegInfo = MF->getRegInfo(); 6031341825Sdim GlobalBaseReg = RegInfo.createVirtualRegister( 6032341825Sdim Subtarget.is64Bit() ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass); 6033193323Sed X86FI->setGlobalBaseReg(GlobalBaseReg); 6034193323Sed return GlobalBaseReg; 6035193323Sed} 6036206083Srdivacky 6037206083Srdivacky// These are the replaceable SSE instructions. Some of these have Int variants 6038206083Srdivacky// that we don't include here. We don't want to replace instructions selected 6039206083Srdivacky// by intrinsics. 6040234353Sdimstatic const uint16_t ReplaceableInstrs[][3] = { 6041212904Sdim //PackedSingle PackedDouble PackedInt 6042206083Srdivacky { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, 6043206083Srdivacky { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, 6044206083Srdivacky { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, 6045206083Srdivacky { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr }, 6046206083Srdivacky { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm }, 6047314564Sdim { X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr }, 6048321369Sdim { X86::MOVSDmr, X86::MOVSDmr, X86::MOVPQI2QImr }, 6049314564Sdim { X86::MOVSSmr, X86::MOVSSmr, X86::MOVPDI2DImr }, 6050314564Sdim { X86::MOVSDrm, X86::MOVSDrm, X86::MOVQI2PQIrm }, 6051353358Sdim { X86::MOVSDrm_alt,X86::MOVSDrm_alt,X86::MOVQI2PQIrm }, 6052314564Sdim { X86::MOVSSrm, X86::MOVSSrm, X86::MOVDI2PDIrm }, 6053353358Sdim { X86::MOVSSrm_alt,X86::MOVSSrm_alt,X86::MOVDI2PDIrm }, 6054206083Srdivacky { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr }, 6055206083Srdivacky { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm }, 6056206083Srdivacky { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr }, 6057206083Srdivacky { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm }, 6058206083Srdivacky { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, 6059206083Srdivacky { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, 6060206083Srdivacky { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, 6061206083Srdivacky { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, 6062206083Srdivacky { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, 6063327952Sdim { X86::UNPCKLPDrm, X86::UNPCKLPDrm, X86::PUNPCKLQDQrm }, 6064327952Sdim { X86::MOVLHPSrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr }, 6065327952Sdim { X86::UNPCKHPDrm, X86::UNPCKHPDrm, X86::PUNPCKHQDQrm }, 6066327952Sdim { X86::UNPCKHPDrr, X86::UNPCKHPDrr, X86::PUNPCKHQDQrr }, 6067327952Sdim { X86::UNPCKLPSrm, X86::UNPCKLPSrm, X86::PUNPCKLDQrm }, 6068327952Sdim { X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr }, 6069327952Sdim { X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm }, 6070327952Sdim { X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr }, 6071327952Sdim { X86::EXTRACTPSmr, X86::EXTRACTPSmr, X86::PEXTRDmr }, 6072327952Sdim { X86::EXTRACTPSrr, X86::EXTRACTPSrr, X86::PEXTRDrr }, 6073212904Sdim // AVX 128-bit support 6074212904Sdim { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, 6075212904Sdim { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, 6076212904Sdim { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr }, 6077212904Sdim { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr }, 6078212904Sdim { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm }, 6079314564Sdim { X86::VMOVLPSmr, X86::VMOVLPDmr, X86::VMOVPQI2QImr }, 6080321369Sdim { X86::VMOVSDmr, X86::VMOVSDmr, X86::VMOVPQI2QImr }, 6081314564Sdim { X86::VMOVSSmr, X86::VMOVSSmr, X86::VMOVPDI2DImr }, 6082314564Sdim { X86::VMOVSDrm, X86::VMOVSDrm, X86::VMOVQI2PQIrm }, 6083353358Sdim { X86::VMOVSDrm_alt,X86::VMOVSDrm_alt,X86::VMOVQI2PQIrm }, 6084314564Sdim { X86::VMOVSSrm, X86::VMOVSSrm, X86::VMOVDI2PDIrm }, 6085353358Sdim { X86::VMOVSSrm_alt,X86::VMOVSSrm_alt,X86::VMOVDI2PDIrm }, 6086212904Sdim { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr }, 6087212904Sdim { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm }, 6088212904Sdim { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr }, 6089212904Sdim { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm }, 6090212904Sdim { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr }, 6091212904Sdim { X86::VORPSrm, X86::VORPDrm, X86::VPORrm }, 6092212904Sdim { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, 6093212904Sdim { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, 6094212904Sdim { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, 6095327952Sdim { X86::VUNPCKLPDrm, X86::VUNPCKLPDrm, X86::VPUNPCKLQDQrm }, 6096327952Sdim { X86::VMOVLHPSrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr }, 6097327952Sdim { X86::VUNPCKHPDrm, X86::VUNPCKHPDrm, X86::VPUNPCKHQDQrm }, 6098327952Sdim { X86::VUNPCKHPDrr, X86::VUNPCKHPDrr, X86::VPUNPCKHQDQrr }, 6099327952Sdim { X86::VUNPCKLPSrm, X86::VUNPCKLPSrm, X86::VPUNPCKLDQrm }, 6100327952Sdim { X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr }, 6101327952Sdim { X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm }, 6102327952Sdim { X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr }, 6103327952Sdim { X86::VEXTRACTPSmr, X86::VEXTRACTPSmr, X86::VPEXTRDmr }, 6104327952Sdim { X86::VEXTRACTPSrr, X86::VEXTRACTPSrr, X86::VPEXTRDrr }, 6105224145Sdim // AVX 256-bit support 6106224145Sdim { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, 6107224145Sdim { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, 6108224145Sdim { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr }, 6109224145Sdim { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr }, 6110224145Sdim { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm }, 6111314564Sdim { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }, 6112327952Sdim { X86::VPERMPSYrm, X86::VPERMPSYrm, X86::VPERMDYrm }, 6113327952Sdim { X86::VPERMPSYrr, X86::VPERMPSYrr, X86::VPERMDYrr }, 6114327952Sdim { X86::VPERMPDYmi, X86::VPERMPDYmi, X86::VPERMQYmi }, 6115327952Sdim { X86::VPERMPDYri, X86::VPERMPDYri, X86::VPERMQYri }, 6116314564Sdim // AVX512 support 6117314564Sdim { X86::VMOVLPSZ128mr, X86::VMOVLPDZ128mr, X86::VMOVPQI2QIZmr }, 6118314564Sdim { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr }, 6119321369Sdim { X86::VMOVNTPSZ256mr, X86::VMOVNTPDZ256mr, X86::VMOVNTDQZ256mr }, 6120314564Sdim { X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr }, 6121314564Sdim { X86::VMOVSDZmr, X86::VMOVSDZmr, X86::VMOVPQI2QIZmr }, 6122314564Sdim { X86::VMOVSSZmr, X86::VMOVSSZmr, X86::VMOVPDI2DIZmr }, 6123314564Sdim { X86::VMOVSDZrm, X86::VMOVSDZrm, X86::VMOVQI2PQIZrm }, 6124353358Sdim { X86::VMOVSDZrm_alt, X86::VMOVSDZrm_alt, X86::VMOVQI2PQIZrm }, 6125314564Sdim { X86::VMOVSSZrm, X86::VMOVSSZrm, X86::VMOVDI2PDIZrm }, 6126353358Sdim { X86::VMOVSSZrm_alt, X86::VMOVSSZrm_alt, X86::VMOVDI2PDIZrm }, 6127314564Sdim { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128r, X86::VPBROADCASTDZ128r }, 6128314564Sdim { X86::VBROADCASTSSZ128m, X86::VBROADCASTSSZ128m, X86::VPBROADCASTDZ128m }, 6129314564Sdim { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256r, X86::VPBROADCASTDZ256r }, 6130314564Sdim { X86::VBROADCASTSSZ256m, X86::VBROADCASTSSZ256m, X86::VPBROADCASTDZ256m }, 6131314564Sdim { X86::VBROADCASTSSZr, X86::VBROADCASTSSZr, X86::VPBROADCASTDZr }, 6132314564Sdim { X86::VBROADCASTSSZm, X86::VBROADCASTSSZm, X86::VPBROADCASTDZm }, 6133353358Sdim { X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rr, X86::VPBROADCASTQZ128r }, 6134353358Sdim { X86::VMOVDDUPZ128rm, X86::VMOVDDUPZ128rm, X86::VPBROADCASTQZ128m }, 6135314564Sdim { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256r, X86::VPBROADCASTQZ256r }, 6136314564Sdim { X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m }, 6137314564Sdim { X86::VBROADCASTSDZr, X86::VBROADCASTSDZr, X86::VPBROADCASTQZr }, 6138314564Sdim { X86::VBROADCASTSDZm, X86::VBROADCASTSDZm, X86::VPBROADCASTQZm }, 6139327952Sdim { X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrr, X86::VINSERTI32x4Zrr }, 6140327952Sdim { X86::VINSERTF32x4Zrm, X86::VINSERTF32x4Zrm, X86::VINSERTI32x4Zrm }, 6141327952Sdim { X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrr, X86::VINSERTI32x8Zrr }, 6142327952Sdim { X86::VINSERTF32x8Zrm, X86::VINSERTF32x8Zrm, X86::VINSERTI32x8Zrm }, 6143327952Sdim { X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrr, X86::VINSERTI64x2Zrr }, 6144327952Sdim { X86::VINSERTF64x2Zrm, X86::VINSERTF64x2Zrm, X86::VINSERTI64x2Zrm }, 6145327952Sdim { X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrr, X86::VINSERTI64x4Zrr }, 6146327952Sdim { X86::VINSERTF64x4Zrm, X86::VINSERTF64x4Zrm, X86::VINSERTI64x4Zrm }, 6147327952Sdim { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rr,X86::VINSERTI32x4Z256rr }, 6148327952Sdim { X86::VINSERTF32x4Z256rm,X86::VINSERTF32x4Z256rm,X86::VINSERTI32x4Z256rm }, 6149327952Sdim { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rr,X86::VINSERTI64x2Z256rr }, 6150327952Sdim { X86::VINSERTF64x2Z256rm,X86::VINSERTF64x2Z256rm,X86::VINSERTI64x2Z256rm }, 6151327952Sdim { X86::VEXTRACTF32x4Zrr, X86::VEXTRACTF32x4Zrr, X86::VEXTRACTI32x4Zrr }, 6152327952Sdim { X86::VEXTRACTF32x4Zmr, X86::VEXTRACTF32x4Zmr, X86::VEXTRACTI32x4Zmr }, 6153327952Sdim { X86::VEXTRACTF32x8Zrr, X86::VEXTRACTF32x8Zrr, X86::VEXTRACTI32x8Zrr }, 6154327952Sdim { X86::VEXTRACTF32x8Zmr, X86::VEXTRACTF32x8Zmr, X86::VEXTRACTI32x8Zmr }, 6155327952Sdim { X86::VEXTRACTF64x2Zrr, X86::VEXTRACTF64x2Zrr, X86::VEXTRACTI64x2Zrr }, 6156327952Sdim { X86::VEXTRACTF64x2Zmr, X86::VEXTRACTF64x2Zmr, X86::VEXTRACTI64x2Zmr }, 6157327952Sdim { X86::VEXTRACTF64x4Zrr, X86::VEXTRACTF64x4Zrr, X86::VEXTRACTI64x4Zrr }, 6158327952Sdim { X86::VEXTRACTF64x4Zmr, X86::VEXTRACTF64x4Zmr, X86::VEXTRACTI64x4Zmr }, 6159327952Sdim { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTI32x4Z256rr }, 6160327952Sdim { X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTI32x4Z256mr }, 6161327952Sdim { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTI64x2Z256rr }, 6162327952Sdim { X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTI64x2Z256mr }, 6163327952Sdim { X86::VPERMILPSmi, X86::VPERMILPSmi, X86::VPSHUFDmi }, 6164327952Sdim { X86::VPERMILPSri, X86::VPERMILPSri, X86::VPSHUFDri }, 6165327952Sdim { X86::VPERMILPSZ128mi, X86::VPERMILPSZ128mi, X86::VPSHUFDZ128mi }, 6166327952Sdim { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128ri, X86::VPSHUFDZ128ri }, 6167327952Sdim { X86::VPERMILPSZ256mi, X86::VPERMILPSZ256mi, X86::VPSHUFDZ256mi }, 6168327952Sdim { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256ri, X86::VPSHUFDZ256ri }, 6169327952Sdim { X86::VPERMILPSZmi, X86::VPERMILPSZmi, X86::VPSHUFDZmi }, 6170327952Sdim { X86::VPERMILPSZri, X86::VPERMILPSZri, X86::VPSHUFDZri }, 6171327952Sdim { X86::VPERMPSZ256rm, X86::VPERMPSZ256rm, X86::VPERMDZ256rm }, 6172327952Sdim { X86::VPERMPSZ256rr, X86::VPERMPSZ256rr, X86::VPERMDZ256rr }, 6173327952Sdim { X86::VPERMPDZ256mi, X86::VPERMPDZ256mi, X86::VPERMQZ256mi }, 6174327952Sdim { X86::VPERMPDZ256ri, X86::VPERMPDZ256ri, X86::VPERMQZ256ri }, 6175327952Sdim { X86::VPERMPDZ256rm, X86::VPERMPDZ256rm, X86::VPERMQZ256rm }, 6176327952Sdim { X86::VPERMPDZ256rr, X86::VPERMPDZ256rr, X86::VPERMQZ256rr }, 6177327952Sdim { X86::VPERMPSZrm, X86::VPERMPSZrm, X86::VPERMDZrm }, 6178327952Sdim { X86::VPERMPSZrr, X86::VPERMPSZrr, X86::VPERMDZrr }, 6179327952Sdim { X86::VPERMPDZmi, X86::VPERMPDZmi, X86::VPERMQZmi }, 6180327952Sdim { X86::VPERMPDZri, X86::VPERMPDZri, X86::VPERMQZri }, 6181327952Sdim { X86::VPERMPDZrm, X86::VPERMPDZrm, X86::VPERMQZrm }, 6182327952Sdim { X86::VPERMPDZrr, X86::VPERMPDZrr, X86::VPERMQZrr }, 6183327952Sdim { X86::VUNPCKLPDZ256rm, X86::VUNPCKLPDZ256rm, X86::VPUNPCKLQDQZ256rm }, 6184327952Sdim { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rr, X86::VPUNPCKLQDQZ256rr }, 6185327952Sdim { X86::VUNPCKHPDZ256rm, X86::VUNPCKHPDZ256rm, X86::VPUNPCKHQDQZ256rm }, 6186327952Sdim { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rr, X86::VPUNPCKHQDQZ256rr }, 6187327952Sdim { X86::VUNPCKLPSZ256rm, X86::VUNPCKLPSZ256rm, X86::VPUNPCKLDQZ256rm }, 6188327952Sdim { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rr, X86::VPUNPCKLDQZ256rr }, 6189327952Sdim { X86::VUNPCKHPSZ256rm, X86::VUNPCKHPSZ256rm, X86::VPUNPCKHDQZ256rm }, 6190327952Sdim { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rr, X86::VPUNPCKHDQZ256rr }, 6191327952Sdim { X86::VUNPCKLPDZ128rm, X86::VUNPCKLPDZ128rm, X86::VPUNPCKLQDQZ128rm }, 6192327952Sdim { X86::VMOVLHPSZrr, X86::VUNPCKLPDZ128rr, X86::VPUNPCKLQDQZ128rr }, 6193327952Sdim { X86::VUNPCKHPDZ128rm, X86::VUNPCKHPDZ128rm, X86::VPUNPCKHQDQZ128rm }, 6194327952Sdim { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rr, X86::VPUNPCKHQDQZ128rr }, 6195327952Sdim { X86::VUNPCKLPSZ128rm, X86::VUNPCKLPSZ128rm, X86::VPUNPCKLDQZ128rm }, 6196327952Sdim { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rr, X86::VPUNPCKLDQZ128rr }, 6197327952Sdim { X86::VUNPCKHPSZ128rm, X86::VUNPCKHPSZ128rm, X86::VPUNPCKHDQZ128rm }, 6198327952Sdim { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rr, X86::VPUNPCKHDQZ128rr }, 6199327952Sdim { X86::VUNPCKLPDZrm, X86::VUNPCKLPDZrm, X86::VPUNPCKLQDQZrm }, 6200327952Sdim { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrr, X86::VPUNPCKLQDQZrr }, 6201327952Sdim { X86::VUNPCKHPDZrm, X86::VUNPCKHPDZrm, X86::VPUNPCKHQDQZrm }, 6202327952Sdim { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrr, X86::VPUNPCKHQDQZrr }, 6203327952Sdim { X86::VUNPCKLPSZrm, X86::VUNPCKLPSZrm, X86::VPUNPCKLDQZrm }, 6204327952Sdim { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr }, 6205327952Sdim { X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm }, 6206327952Sdim { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr }, 6207327952Sdim { X86::VEXTRACTPSZmr, X86::VEXTRACTPSZmr, X86::VPEXTRDZmr }, 6208327952Sdim { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZrr, X86::VPEXTRDZrr }, 6209206083Srdivacky}; 6210206083Srdivacky 6211234353Sdimstatic const uint16_t ReplaceableInstrsAVX2[][3] = { 6212234353Sdim //PackedSingle PackedDouble PackedInt 6213234353Sdim { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm }, 6214234353Sdim { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr }, 6215234353Sdim { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm }, 6216234353Sdim { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr }, 6217234353Sdim { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm }, 6218234353Sdim { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr }, 6219234353Sdim { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm }, 6220234353Sdim { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }, 6221234353Sdim { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm }, 6222276479Sdim { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }, 6223276479Sdim { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm}, 6224276479Sdim { X86::VBROADCASTSSrr, X86::VBROADCASTSSrr, X86::VPBROADCASTDrr}, 6225353358Sdim { X86::VMOVDDUPrm, X86::VMOVDDUPrm, X86::VPBROADCASTQrm}, 6226353358Sdim { X86::VMOVDDUPrr, X86::VMOVDDUPrr, X86::VPBROADCASTQrr}, 6227276479Sdim { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrr, X86::VPBROADCASTDYrr}, 6228276479Sdim { X86::VBROADCASTSSYrm, X86::VBROADCASTSSYrm, X86::VPBROADCASTDYrm}, 6229276479Sdim { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr}, 6230314564Sdim { X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm}, 6231314564Sdim { X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 }, 6232327952Sdim { X86::VBLENDPSYrri, X86::VBLENDPSYrri, X86::VPBLENDDYrri }, 6233327952Sdim { X86::VBLENDPSYrmi, X86::VBLENDPSYrmi, X86::VPBLENDDYrmi }, 6234327952Sdim { X86::VPERMILPSYmi, X86::VPERMILPSYmi, X86::VPSHUFDYmi }, 6235327952Sdim { X86::VPERMILPSYri, X86::VPERMILPSYri, X86::VPSHUFDYri }, 6236327952Sdim { X86::VUNPCKLPDYrm, X86::VUNPCKLPDYrm, X86::VPUNPCKLQDQYrm }, 6237327952Sdim { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrr, X86::VPUNPCKLQDQYrr }, 6238327952Sdim { X86::VUNPCKHPDYrm, X86::VUNPCKHPDYrm, X86::VPUNPCKHQDQYrm }, 6239327952Sdim { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrr, X86::VPUNPCKHQDQYrr }, 6240327952Sdim { X86::VUNPCKLPSYrm, X86::VUNPCKLPSYrm, X86::VPUNPCKLDQYrm }, 6241327952Sdim { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrr, X86::VPUNPCKLDQYrr }, 6242327952Sdim { X86::VUNPCKHPSYrm, X86::VUNPCKHPSYrm, X86::VPUNPCKHDQYrm }, 6243327952Sdim { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrr, X86::VPUNPCKHDQYrr }, 6244234353Sdim}; 6245234353Sdim 6246353358Sdimstatic const uint16_t ReplaceableInstrsFP[][3] = { 6247353358Sdim //PackedSingle PackedDouble 6248353358Sdim { X86::MOVLPSrm, X86::MOVLPDrm, X86::INSTRUCTION_LIST_END }, 6249353358Sdim { X86::MOVHPSrm, X86::MOVHPDrm, X86::INSTRUCTION_LIST_END }, 6250353358Sdim { X86::MOVHPSmr, X86::MOVHPDmr, X86::INSTRUCTION_LIST_END }, 6251353358Sdim { X86::VMOVLPSrm, X86::VMOVLPDrm, X86::INSTRUCTION_LIST_END }, 6252353358Sdim { X86::VMOVHPSrm, X86::VMOVHPDrm, X86::INSTRUCTION_LIST_END }, 6253353358Sdim { X86::VMOVHPSmr, X86::VMOVHPDmr, X86::INSTRUCTION_LIST_END }, 6254353358Sdim { X86::VMOVLPSZ128rm, X86::VMOVLPDZ128rm, X86::INSTRUCTION_LIST_END }, 6255353358Sdim { X86::VMOVHPSZ128rm, X86::VMOVHPDZ128rm, X86::INSTRUCTION_LIST_END }, 6256353358Sdim { X86::VMOVHPSZ128mr, X86::VMOVHPDZ128mr, X86::INSTRUCTION_LIST_END }, 6257353358Sdim}; 6258353358Sdim 6259321369Sdimstatic const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = { 6260321369Sdim //PackedSingle PackedDouble PackedInt 6261321369Sdim { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr }, 6262321369Sdim { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr }, 6263321369Sdim { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm }, 6264321369Sdim { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr }, 6265321369Sdim}; 6266321369Sdim 6267314564Sdimstatic const uint16_t ReplaceableInstrsAVX512[][4] = { 6268314564Sdim // Two integer columns for 64-bit and 32-bit elements. 6269314564Sdim //PackedSingle PackedDouble PackedInt PackedInt 6270314564Sdim { X86::VMOVAPSZ128mr, X86::VMOVAPDZ128mr, X86::VMOVDQA64Z128mr, X86::VMOVDQA32Z128mr }, 6271314564Sdim { X86::VMOVAPSZ128rm, X86::VMOVAPDZ128rm, X86::VMOVDQA64Z128rm, X86::VMOVDQA32Z128rm }, 6272314564Sdim { X86::VMOVAPSZ128rr, X86::VMOVAPDZ128rr, X86::VMOVDQA64Z128rr, X86::VMOVDQA32Z128rr }, 6273314564Sdim { X86::VMOVUPSZ128mr, X86::VMOVUPDZ128mr, X86::VMOVDQU64Z128mr, X86::VMOVDQU32Z128mr }, 6274314564Sdim { X86::VMOVUPSZ128rm, X86::VMOVUPDZ128rm, X86::VMOVDQU64Z128rm, X86::VMOVDQU32Z128rm }, 6275314564Sdim { X86::VMOVAPSZ256mr, X86::VMOVAPDZ256mr, X86::VMOVDQA64Z256mr, X86::VMOVDQA32Z256mr }, 6276314564Sdim { X86::VMOVAPSZ256rm, X86::VMOVAPDZ256rm, X86::VMOVDQA64Z256rm, X86::VMOVDQA32Z256rm }, 6277314564Sdim { X86::VMOVAPSZ256rr, X86::VMOVAPDZ256rr, X86::VMOVDQA64Z256rr, X86::VMOVDQA32Z256rr }, 6278314564Sdim { X86::VMOVUPSZ256mr, X86::VMOVUPDZ256mr, X86::VMOVDQU64Z256mr, X86::VMOVDQU32Z256mr }, 6279314564Sdim { X86::VMOVUPSZ256rm, X86::VMOVUPDZ256rm, X86::VMOVDQU64Z256rm, X86::VMOVDQU32Z256rm }, 6280314564Sdim { X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA32Zmr }, 6281314564Sdim { X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA32Zrm }, 6282314564Sdim { X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA32Zrr }, 6283314564Sdim { X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU32Zmr }, 6284314564Sdim { X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU32Zrm }, 6285314564Sdim}; 6286314564Sdim 6287314564Sdimstatic const uint16_t ReplaceableInstrsAVX512DQ[][4] = { 6288314564Sdim // Two integer columns for 64-bit and 32-bit elements. 6289314564Sdim //PackedSingle PackedDouble PackedInt PackedInt 6290314564Sdim { X86::VANDNPSZ128rm, X86::VANDNPDZ128rm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm }, 6291314564Sdim { X86::VANDNPSZ128rr, X86::VANDNPDZ128rr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr }, 6292314564Sdim { X86::VANDPSZ128rm, X86::VANDPDZ128rm, X86::VPANDQZ128rm, X86::VPANDDZ128rm }, 6293314564Sdim { X86::VANDPSZ128rr, X86::VANDPDZ128rr, X86::VPANDQZ128rr, X86::VPANDDZ128rr }, 6294314564Sdim { X86::VORPSZ128rm, X86::VORPDZ128rm, X86::VPORQZ128rm, X86::VPORDZ128rm }, 6295314564Sdim { X86::VORPSZ128rr, X86::VORPDZ128rr, X86::VPORQZ128rr, X86::VPORDZ128rr }, 6296314564Sdim { X86::VXORPSZ128rm, X86::VXORPDZ128rm, X86::VPXORQZ128rm, X86::VPXORDZ128rm }, 6297314564Sdim { X86::VXORPSZ128rr, X86::VXORPDZ128rr, X86::VPXORQZ128rr, X86::VPXORDZ128rr }, 6298314564Sdim { X86::VANDNPSZ256rm, X86::VANDNPDZ256rm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm }, 6299314564Sdim { X86::VANDNPSZ256rr, X86::VANDNPDZ256rr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr }, 6300314564Sdim { X86::VANDPSZ256rm, X86::VANDPDZ256rm, X86::VPANDQZ256rm, X86::VPANDDZ256rm }, 6301314564Sdim { X86::VANDPSZ256rr, X86::VANDPDZ256rr, X86::VPANDQZ256rr, X86::VPANDDZ256rr }, 6302314564Sdim { X86::VORPSZ256rm, X86::VORPDZ256rm, X86::VPORQZ256rm, X86::VPORDZ256rm }, 6303314564Sdim { X86::VORPSZ256rr, X86::VORPDZ256rr, X86::VPORQZ256rr, X86::VPORDZ256rr }, 6304314564Sdim { X86::VXORPSZ256rm, X86::VXORPDZ256rm, X86::VPXORQZ256rm, X86::VPXORDZ256rm }, 6305314564Sdim { X86::VXORPSZ256rr, X86::VXORPDZ256rr, X86::VPXORQZ256rr, X86::VPXORDZ256rr }, 6306314564Sdim { X86::VANDNPSZrm, X86::VANDNPDZrm, X86::VPANDNQZrm, X86::VPANDNDZrm }, 6307314564Sdim { X86::VANDNPSZrr, X86::VANDNPDZrr, X86::VPANDNQZrr, X86::VPANDNDZrr }, 6308314564Sdim { X86::VANDPSZrm, X86::VANDPDZrm, X86::VPANDQZrm, X86::VPANDDZrm }, 6309314564Sdim { X86::VANDPSZrr, X86::VANDPDZrr, X86::VPANDQZrr, X86::VPANDDZrr }, 6310314564Sdim { X86::VORPSZrm, X86::VORPDZrm, X86::VPORQZrm, X86::VPORDZrm }, 6311314564Sdim { X86::VORPSZrr, X86::VORPDZrr, X86::VPORQZrr, X86::VPORDZrr }, 6312314564Sdim { X86::VXORPSZrm, X86::VXORPDZrm, X86::VPXORQZrm, X86::VPXORDZrm }, 6313314564Sdim { X86::VXORPSZrr, X86::VXORPDZrr, X86::VPXORQZrr, X86::VPXORDZrr }, 6314314564Sdim}; 6315314564Sdim 6316314564Sdimstatic const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = { 6317314564Sdim // Two integer columns for 64-bit and 32-bit elements. 6318314564Sdim //PackedSingle PackedDouble 6319314564Sdim //PackedInt PackedInt 6320314564Sdim { X86::VANDNPSZ128rmk, X86::VANDNPDZ128rmk, 6321314564Sdim X86::VPANDNQZ128rmk, X86::VPANDNDZ128rmk }, 6322314564Sdim { X86::VANDNPSZ128rmkz, X86::VANDNPDZ128rmkz, 6323314564Sdim X86::VPANDNQZ128rmkz, X86::VPANDNDZ128rmkz }, 6324314564Sdim { X86::VANDNPSZ128rrk, X86::VANDNPDZ128rrk, 6325314564Sdim X86::VPANDNQZ128rrk, X86::VPANDNDZ128rrk }, 6326314564Sdim { X86::VANDNPSZ128rrkz, X86::VANDNPDZ128rrkz, 6327314564Sdim X86::VPANDNQZ128rrkz, X86::VPANDNDZ128rrkz }, 6328314564Sdim { X86::VANDPSZ128rmk, X86::VANDPDZ128rmk, 6329314564Sdim X86::VPANDQZ128rmk, X86::VPANDDZ128rmk }, 6330314564Sdim { X86::VANDPSZ128rmkz, X86::VANDPDZ128rmkz, 6331314564Sdim X86::VPANDQZ128rmkz, X86::VPANDDZ128rmkz }, 6332314564Sdim { X86::VANDPSZ128rrk, X86::VANDPDZ128rrk, 6333314564Sdim X86::VPANDQZ128rrk, X86::VPANDDZ128rrk }, 6334314564Sdim { X86::VANDPSZ128rrkz, X86::VANDPDZ128rrkz, 6335314564Sdim X86::VPANDQZ128rrkz, X86::VPANDDZ128rrkz }, 6336314564Sdim { X86::VORPSZ128rmk, X86::VORPDZ128rmk, 6337314564Sdim X86::VPORQZ128rmk, X86::VPORDZ128rmk }, 6338314564Sdim { X86::VORPSZ128rmkz, X86::VORPDZ128rmkz, 6339314564Sdim X86::VPORQZ128rmkz, X86::VPORDZ128rmkz }, 6340314564Sdim { X86::VORPSZ128rrk, X86::VORPDZ128rrk, 6341314564Sdim X86::VPORQZ128rrk, X86::VPORDZ128rrk }, 6342314564Sdim { X86::VORPSZ128rrkz, X86::VORPDZ128rrkz, 6343314564Sdim X86::VPORQZ128rrkz, X86::VPORDZ128rrkz }, 6344314564Sdim { X86::VXORPSZ128rmk, X86::VXORPDZ128rmk, 6345314564Sdim X86::VPXORQZ128rmk, X86::VPXORDZ128rmk }, 6346314564Sdim { X86::VXORPSZ128rmkz, X86::VXORPDZ128rmkz, 6347314564Sdim X86::VPXORQZ128rmkz, X86::VPXORDZ128rmkz }, 6348314564Sdim { X86::VXORPSZ128rrk, X86::VXORPDZ128rrk, 6349314564Sdim X86::VPXORQZ128rrk, X86::VPXORDZ128rrk }, 6350314564Sdim { X86::VXORPSZ128rrkz, X86::VXORPDZ128rrkz, 6351314564Sdim X86::VPXORQZ128rrkz, X86::VPXORDZ128rrkz }, 6352314564Sdim { X86::VANDNPSZ256rmk, X86::VANDNPDZ256rmk, 6353314564Sdim X86::VPANDNQZ256rmk, X86::VPANDNDZ256rmk }, 6354314564Sdim { X86::VANDNPSZ256rmkz, X86::VANDNPDZ256rmkz, 6355314564Sdim X86::VPANDNQZ256rmkz, X86::VPANDNDZ256rmkz }, 6356314564Sdim { X86::VANDNPSZ256rrk, X86::VANDNPDZ256rrk, 6357314564Sdim X86::VPANDNQZ256rrk, X86::VPANDNDZ256rrk }, 6358314564Sdim { X86::VANDNPSZ256rrkz, X86::VANDNPDZ256rrkz, 6359314564Sdim X86::VPANDNQZ256rrkz, X86::VPANDNDZ256rrkz }, 6360314564Sdim { X86::VANDPSZ256rmk, X86::VANDPDZ256rmk, 6361314564Sdim X86::VPANDQZ256rmk, X86::VPANDDZ256rmk }, 6362314564Sdim { X86::VANDPSZ256rmkz, X86::VANDPDZ256rmkz, 6363314564Sdim X86::VPANDQZ256rmkz, X86::VPANDDZ256rmkz }, 6364314564Sdim { X86::VANDPSZ256rrk, X86::VANDPDZ256rrk, 6365314564Sdim X86::VPANDQZ256rrk, X86::VPANDDZ256rrk }, 6366314564Sdim { X86::VANDPSZ256rrkz, X86::VANDPDZ256rrkz, 6367314564Sdim X86::VPANDQZ256rrkz, X86::VPANDDZ256rrkz }, 6368314564Sdim { X86::VORPSZ256rmk, X86::VORPDZ256rmk, 6369314564Sdim X86::VPORQZ256rmk, X86::VPORDZ256rmk }, 6370314564Sdim { X86::VORPSZ256rmkz, X86::VORPDZ256rmkz, 6371314564Sdim X86::VPORQZ256rmkz, X86::VPORDZ256rmkz }, 6372314564Sdim { X86::VORPSZ256rrk, X86::VORPDZ256rrk, 6373314564Sdim X86::VPORQZ256rrk, X86::VPORDZ256rrk }, 6374314564Sdim { X86::VORPSZ256rrkz, X86::VORPDZ256rrkz, 6375314564Sdim X86::VPORQZ256rrkz, X86::VPORDZ256rrkz }, 6376314564Sdim { X86::VXORPSZ256rmk, X86::VXORPDZ256rmk, 6377314564Sdim X86::VPXORQZ256rmk, X86::VPXORDZ256rmk }, 6378314564Sdim { X86::VXORPSZ256rmkz, X86::VXORPDZ256rmkz, 6379314564Sdim X86::VPXORQZ256rmkz, X86::VPXORDZ256rmkz }, 6380314564Sdim { X86::VXORPSZ256rrk, X86::VXORPDZ256rrk, 6381314564Sdim X86::VPXORQZ256rrk, X86::VPXORDZ256rrk }, 6382314564Sdim { X86::VXORPSZ256rrkz, X86::VXORPDZ256rrkz, 6383314564Sdim X86::VPXORQZ256rrkz, X86::VPXORDZ256rrkz }, 6384314564Sdim { X86::VANDNPSZrmk, X86::VANDNPDZrmk, 6385314564Sdim X86::VPANDNQZrmk, X86::VPANDNDZrmk }, 6386314564Sdim { X86::VANDNPSZrmkz, X86::VANDNPDZrmkz, 6387314564Sdim X86::VPANDNQZrmkz, X86::VPANDNDZrmkz }, 6388314564Sdim { X86::VANDNPSZrrk, X86::VANDNPDZrrk, 6389314564Sdim X86::VPANDNQZrrk, X86::VPANDNDZrrk }, 6390314564Sdim { X86::VANDNPSZrrkz, X86::VANDNPDZrrkz, 6391314564Sdim X86::VPANDNQZrrkz, X86::VPANDNDZrrkz }, 6392314564Sdim { X86::VANDPSZrmk, X86::VANDPDZrmk, 6393314564Sdim X86::VPANDQZrmk, X86::VPANDDZrmk }, 6394314564Sdim { X86::VANDPSZrmkz, X86::VANDPDZrmkz, 6395314564Sdim X86::VPANDQZrmkz, X86::VPANDDZrmkz }, 6396314564Sdim { X86::VANDPSZrrk, X86::VANDPDZrrk, 6397314564Sdim X86::VPANDQZrrk, X86::VPANDDZrrk }, 6398314564Sdim { X86::VANDPSZrrkz, X86::VANDPDZrrkz, 6399314564Sdim X86::VPANDQZrrkz, X86::VPANDDZrrkz }, 6400314564Sdim { X86::VORPSZrmk, X86::VORPDZrmk, 6401314564Sdim X86::VPORQZrmk, X86::VPORDZrmk }, 6402314564Sdim { X86::VORPSZrmkz, X86::VORPDZrmkz, 6403314564Sdim X86::VPORQZrmkz, X86::VPORDZrmkz }, 6404314564Sdim { X86::VORPSZrrk, X86::VORPDZrrk, 6405314564Sdim X86::VPORQZrrk, X86::VPORDZrrk }, 6406314564Sdim { X86::VORPSZrrkz, X86::VORPDZrrkz, 6407314564Sdim X86::VPORQZrrkz, X86::VPORDZrrkz }, 6408314564Sdim { X86::VXORPSZrmk, X86::VXORPDZrmk, 6409314564Sdim X86::VPXORQZrmk, X86::VPXORDZrmk }, 6410314564Sdim { X86::VXORPSZrmkz, X86::VXORPDZrmkz, 6411314564Sdim X86::VPXORQZrmkz, X86::VPXORDZrmkz }, 6412314564Sdim { X86::VXORPSZrrk, X86::VXORPDZrrk, 6413314564Sdim X86::VPXORQZrrk, X86::VPXORDZrrk }, 6414314564Sdim { X86::VXORPSZrrkz, X86::VXORPDZrrkz, 6415314564Sdim X86::VPXORQZrrkz, X86::VPXORDZrrkz }, 6416314564Sdim // Broadcast loads can be handled the same as masked operations to avoid 6417314564Sdim // changing element size. 6418314564Sdim { X86::VANDNPSZ128rmb, X86::VANDNPDZ128rmb, 6419314564Sdim X86::VPANDNQZ128rmb, X86::VPANDNDZ128rmb }, 6420314564Sdim { X86::VANDPSZ128rmb, X86::VANDPDZ128rmb, 6421314564Sdim X86::VPANDQZ128rmb, X86::VPANDDZ128rmb }, 6422314564Sdim { X86::VORPSZ128rmb, X86::VORPDZ128rmb, 6423314564Sdim X86::VPORQZ128rmb, X86::VPORDZ128rmb }, 6424314564Sdim { X86::VXORPSZ128rmb, X86::VXORPDZ128rmb, 6425314564Sdim X86::VPXORQZ128rmb, X86::VPXORDZ128rmb }, 6426314564Sdim { X86::VANDNPSZ256rmb, X86::VANDNPDZ256rmb, 6427314564Sdim X86::VPANDNQZ256rmb, X86::VPANDNDZ256rmb }, 6428314564Sdim { X86::VANDPSZ256rmb, X86::VANDPDZ256rmb, 6429314564Sdim X86::VPANDQZ256rmb, X86::VPANDDZ256rmb }, 6430314564Sdim { X86::VORPSZ256rmb, X86::VORPDZ256rmb, 6431314564Sdim X86::VPORQZ256rmb, X86::VPORDZ256rmb }, 6432314564Sdim { X86::VXORPSZ256rmb, X86::VXORPDZ256rmb, 6433314564Sdim X86::VPXORQZ256rmb, X86::VPXORDZ256rmb }, 6434314564Sdim { X86::VANDNPSZrmb, X86::VANDNPDZrmb, 6435314564Sdim X86::VPANDNQZrmb, X86::VPANDNDZrmb }, 6436314564Sdim { X86::VANDPSZrmb, X86::VANDPDZrmb, 6437314564Sdim X86::VPANDQZrmb, X86::VPANDDZrmb }, 6438314564Sdim { X86::VANDPSZrmb, X86::VANDPDZrmb, 6439314564Sdim X86::VPANDQZrmb, X86::VPANDDZrmb }, 6440314564Sdim { X86::VORPSZrmb, X86::VORPDZrmb, 6441314564Sdim X86::VPORQZrmb, X86::VPORDZrmb }, 6442314564Sdim { X86::VXORPSZrmb, X86::VXORPDZrmb, 6443314564Sdim X86::VPXORQZrmb, X86::VPXORDZrmb }, 6444314564Sdim { X86::VANDNPSZ128rmbk, X86::VANDNPDZ128rmbk, 6445314564Sdim X86::VPANDNQZ128rmbk, X86::VPANDNDZ128rmbk }, 6446314564Sdim { X86::VANDPSZ128rmbk, X86::VANDPDZ128rmbk, 6447314564Sdim X86::VPANDQZ128rmbk, X86::VPANDDZ128rmbk }, 6448314564Sdim { X86::VORPSZ128rmbk, X86::VORPDZ128rmbk, 6449314564Sdim X86::VPORQZ128rmbk, X86::VPORDZ128rmbk }, 6450314564Sdim { X86::VXORPSZ128rmbk, X86::VXORPDZ128rmbk, 6451314564Sdim X86::VPXORQZ128rmbk, X86::VPXORDZ128rmbk }, 6452314564Sdim { X86::VANDNPSZ256rmbk, X86::VANDNPDZ256rmbk, 6453314564Sdim X86::VPANDNQZ256rmbk, X86::VPANDNDZ256rmbk }, 6454314564Sdim { X86::VANDPSZ256rmbk, X86::VANDPDZ256rmbk, 6455314564Sdim X86::VPANDQZ256rmbk, X86::VPANDDZ256rmbk }, 6456314564Sdim { X86::VORPSZ256rmbk, X86::VORPDZ256rmbk, 6457314564Sdim X86::VPORQZ256rmbk, X86::VPORDZ256rmbk }, 6458314564Sdim { X86::VXORPSZ256rmbk, X86::VXORPDZ256rmbk, 6459314564Sdim X86::VPXORQZ256rmbk, X86::VPXORDZ256rmbk }, 6460314564Sdim { X86::VANDNPSZrmbk, X86::VANDNPDZrmbk, 6461314564Sdim X86::VPANDNQZrmbk, X86::VPANDNDZrmbk }, 6462314564Sdim { X86::VANDPSZrmbk, X86::VANDPDZrmbk, 6463314564Sdim X86::VPANDQZrmbk, X86::VPANDDZrmbk }, 6464314564Sdim { X86::VANDPSZrmbk, X86::VANDPDZrmbk, 6465314564Sdim X86::VPANDQZrmbk, X86::VPANDDZrmbk }, 6466314564Sdim { X86::VORPSZrmbk, X86::VORPDZrmbk, 6467314564Sdim X86::VPORQZrmbk, X86::VPORDZrmbk }, 6468314564Sdim { X86::VXORPSZrmbk, X86::VXORPDZrmbk, 6469314564Sdim X86::VPXORQZrmbk, X86::VPXORDZrmbk }, 6470314564Sdim { X86::VANDNPSZ128rmbkz,X86::VANDNPDZ128rmbkz, 6471314564Sdim X86::VPANDNQZ128rmbkz,X86::VPANDNDZ128rmbkz}, 6472314564Sdim { X86::VANDPSZ128rmbkz, X86::VANDPDZ128rmbkz, 6473314564Sdim X86::VPANDQZ128rmbkz, X86::VPANDDZ128rmbkz }, 6474314564Sdim { X86::VORPSZ128rmbkz, X86::VORPDZ128rmbkz, 6475314564Sdim X86::VPORQZ128rmbkz, X86::VPORDZ128rmbkz }, 6476314564Sdim { X86::VXORPSZ128rmbkz, X86::VXORPDZ128rmbkz, 6477314564Sdim X86::VPXORQZ128rmbkz, X86::VPXORDZ128rmbkz }, 6478314564Sdim { X86::VANDNPSZ256rmbkz,X86::VANDNPDZ256rmbkz, 6479314564Sdim X86::VPANDNQZ256rmbkz,X86::VPANDNDZ256rmbkz}, 6480314564Sdim { X86::VANDPSZ256rmbkz, X86::VANDPDZ256rmbkz, 6481314564Sdim X86::VPANDQZ256rmbkz, X86::VPANDDZ256rmbkz }, 6482314564Sdim { X86::VORPSZ256rmbkz, X86::VORPDZ256rmbkz, 6483314564Sdim X86::VPORQZ256rmbkz, X86::VPORDZ256rmbkz }, 6484314564Sdim { X86::VXORPSZ256rmbkz, X86::VXORPDZ256rmbkz, 6485314564Sdim X86::VPXORQZ256rmbkz, X86::VPXORDZ256rmbkz }, 6486314564Sdim { X86::VANDNPSZrmbkz, X86::VANDNPDZrmbkz, 6487314564Sdim X86::VPANDNQZrmbkz, X86::VPANDNDZrmbkz }, 6488314564Sdim { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz, 6489314564Sdim X86::VPANDQZrmbkz, X86::VPANDDZrmbkz }, 6490314564Sdim { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz, 6491314564Sdim X86::VPANDQZrmbkz, X86::VPANDDZrmbkz }, 6492314564Sdim { X86::VORPSZrmbkz, X86::VORPDZrmbkz, 6493314564Sdim X86::VPORQZrmbkz, X86::VPORDZrmbkz }, 6494314564Sdim { X86::VXORPSZrmbkz, X86::VXORPDZrmbkz, 6495314564Sdim X86::VPXORQZrmbkz, X86::VPXORDZrmbkz }, 6496314564Sdim}; 6497314564Sdim 6498341825Sdim// NOTE: These should only be used by the custom domain methods. 6499353358Sdimstatic const uint16_t ReplaceableBlendInstrs[][3] = { 6500341825Sdim //PackedSingle PackedDouble PackedInt 6501341825Sdim { X86::BLENDPSrmi, X86::BLENDPDrmi, X86::PBLENDWrmi }, 6502341825Sdim { X86::BLENDPSrri, X86::BLENDPDrri, X86::PBLENDWrri }, 6503341825Sdim { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDWrmi }, 6504341825Sdim { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDWrri }, 6505341825Sdim { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDWYrmi }, 6506341825Sdim { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDWYrri }, 6507341825Sdim}; 6508353358Sdimstatic const uint16_t ReplaceableBlendAVX2Instrs[][3] = { 6509341825Sdim //PackedSingle PackedDouble PackedInt 6510341825Sdim { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDDrmi }, 6511341825Sdim { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDDrri }, 6512341825Sdim { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDDYrmi }, 6513341825Sdim { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDDYrri }, 6514341825Sdim}; 6515341825Sdim 6516341825Sdim// Special table for changing EVEX logic instructions to VEX. 6517341825Sdim// TODO: Should we run EVEX->VEX earlier? 6518341825Sdimstatic const uint16_t ReplaceableCustomAVX512LogicInstrs[][4] = { 6519341825Sdim // Two integer columns for 64-bit and 32-bit elements. 6520341825Sdim //PackedSingle PackedDouble PackedInt PackedInt 6521341825Sdim { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm }, 6522341825Sdim { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr }, 6523341825Sdim { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDQZ128rm, X86::VPANDDZ128rm }, 6524341825Sdim { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDQZ128rr, X86::VPANDDZ128rr }, 6525341825Sdim { X86::VORPSrm, X86::VORPDrm, X86::VPORQZ128rm, X86::VPORDZ128rm }, 6526341825Sdim { X86::VORPSrr, X86::VORPDrr, X86::VPORQZ128rr, X86::VPORDZ128rr }, 6527341825Sdim { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORQZ128rm, X86::VPXORDZ128rm }, 6528341825Sdim { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORQZ128rr, X86::VPXORDZ128rr }, 6529341825Sdim { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm }, 6530341825Sdim { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr }, 6531341825Sdim { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDQZ256rm, X86::VPANDDZ256rm }, 6532341825Sdim { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDQZ256rr, X86::VPANDDZ256rr }, 6533341825Sdim { X86::VORPSYrm, X86::VORPDYrm, X86::VPORQZ256rm, X86::VPORDZ256rm }, 6534341825Sdim { X86::VORPSYrr, X86::VORPDYrr, X86::VPORQZ256rr, X86::VPORDZ256rr }, 6535341825Sdim { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORQZ256rm, X86::VPXORDZ256rm }, 6536341825Sdim { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORQZ256rr, X86::VPXORDZ256rr }, 6537341825Sdim}; 6538341825Sdim 6539206083Srdivacky// FIXME: Some shuffle and unpack instructions have equivalents in different 6540206083Srdivacky// domains, but they require a bit more work than just switching opcodes. 6541206083Srdivacky 6542314564Sdimstatic const uint16_t *lookup(unsigned opcode, unsigned domain, 6543314564Sdim ArrayRef<uint16_t[3]> Table) { 6544314564Sdim for (const uint16_t (&Row)[3] : Table) 6545296417Sdim if (Row[domain-1] == opcode) 6546296417Sdim return Row; 6547276479Sdim return nullptr; 6548206083Srdivacky} 6549206083Srdivacky 6550314564Sdimstatic const uint16_t *lookupAVX512(unsigned opcode, unsigned domain, 6551314564Sdim ArrayRef<uint16_t[4]> Table) { 6552314564Sdim // If this is the integer domain make sure to check both integer columns. 6553314564Sdim for (const uint16_t (&Row)[4] : Table) 6554314564Sdim if (Row[domain-1] == opcode || (domain == 3 && Row[3] == opcode)) 6555296417Sdim return Row; 6556276479Sdim return nullptr; 6557234353Sdim} 6558234353Sdim 6559341825Sdim// Helper to attempt to widen/narrow blend masks. 6560341825Sdimstatic bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth, 6561341825Sdim unsigned NewWidth, unsigned *pNewMask = nullptr) { 6562341825Sdim assert(((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) && 6563341825Sdim "Illegal blend mask scale"); 6564341825Sdim unsigned NewMask = 0; 6565341825Sdim 6566341825Sdim if ((OldWidth % NewWidth) == 0) { 6567341825Sdim unsigned Scale = OldWidth / NewWidth; 6568341825Sdim unsigned SubMask = (1u << Scale) - 1; 6569341825Sdim for (unsigned i = 0; i != NewWidth; ++i) { 6570341825Sdim unsigned Sub = (OldMask >> (i * Scale)) & SubMask; 6571341825Sdim if (Sub == SubMask) 6572341825Sdim NewMask |= (1u << i); 6573341825Sdim else if (Sub != 0x0) 6574341825Sdim return false; 6575341825Sdim } 6576341825Sdim } else { 6577341825Sdim unsigned Scale = NewWidth / OldWidth; 6578341825Sdim unsigned SubMask = (1u << Scale) - 1; 6579341825Sdim for (unsigned i = 0; i != OldWidth; ++i) { 6580341825Sdim if (OldMask & (1 << i)) { 6581341825Sdim NewMask |= (SubMask << (i * Scale)); 6582341825Sdim } 6583341825Sdim } 6584341825Sdim } 6585341825Sdim 6586341825Sdim if (pNewMask) 6587341825Sdim *pNewMask = NewMask; 6588341825Sdim return true; 6589341825Sdim} 6590341825Sdim 6591341825Sdimuint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const { 6592341825Sdim unsigned Opcode = MI.getOpcode(); 6593341825Sdim unsigned NumOperands = MI.getDesc().getNumOperands(); 6594341825Sdim 6595341825Sdim auto GetBlendDomains = [&](unsigned ImmWidth, bool Is256) { 6596341825Sdim uint16_t validDomains = 0; 6597341825Sdim if (MI.getOperand(NumOperands - 1).isImm()) { 6598341825Sdim unsigned Imm = MI.getOperand(NumOperands - 1).getImm(); 6599341825Sdim if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4)) 6600341825Sdim validDomains |= 0x2; // PackedSingle 6601341825Sdim if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2)) 6602341825Sdim validDomains |= 0x4; // PackedDouble 6603341825Sdim if (!Is256 || Subtarget.hasAVX2()) 6604341825Sdim validDomains |= 0x8; // PackedInt 6605341825Sdim } 6606341825Sdim return validDomains; 6607341825Sdim }; 6608341825Sdim 6609341825Sdim switch (Opcode) { 6610341825Sdim case X86::BLENDPDrmi: 6611341825Sdim case X86::BLENDPDrri: 6612341825Sdim case X86::VBLENDPDrmi: 6613341825Sdim case X86::VBLENDPDrri: 6614341825Sdim return GetBlendDomains(2, false); 6615341825Sdim case X86::VBLENDPDYrmi: 6616341825Sdim case X86::VBLENDPDYrri: 6617341825Sdim return GetBlendDomains(4, true); 6618341825Sdim case X86::BLENDPSrmi: 6619341825Sdim case X86::BLENDPSrri: 6620341825Sdim case X86::VBLENDPSrmi: 6621341825Sdim case X86::VBLENDPSrri: 6622341825Sdim case X86::VPBLENDDrmi: 6623341825Sdim case X86::VPBLENDDrri: 6624341825Sdim return GetBlendDomains(4, false); 6625341825Sdim case X86::VBLENDPSYrmi: 6626341825Sdim case X86::VBLENDPSYrri: 6627341825Sdim case X86::VPBLENDDYrmi: 6628341825Sdim case X86::VPBLENDDYrri: 6629341825Sdim return GetBlendDomains(8, true); 6630341825Sdim case X86::PBLENDWrmi: 6631341825Sdim case X86::PBLENDWrri: 6632341825Sdim case X86::VPBLENDWrmi: 6633341825Sdim case X86::VPBLENDWrri: 6634341825Sdim // Treat VPBLENDWY as a 128-bit vector as it repeats the lo/hi masks. 6635341825Sdim case X86::VPBLENDWYrmi: 6636341825Sdim case X86::VPBLENDWYrri: 6637341825Sdim return GetBlendDomains(8, false); 6638341825Sdim case X86::VPANDDZ128rr: case X86::VPANDDZ128rm: 6639341825Sdim case X86::VPANDDZ256rr: case X86::VPANDDZ256rm: 6640341825Sdim case X86::VPANDQZ128rr: case X86::VPANDQZ128rm: 6641341825Sdim case X86::VPANDQZ256rr: case X86::VPANDQZ256rm: 6642341825Sdim case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm: 6643341825Sdim case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm: 6644341825Sdim case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm: 6645341825Sdim case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm: 6646341825Sdim case X86::VPORDZ128rr: case X86::VPORDZ128rm: 6647341825Sdim case X86::VPORDZ256rr: case X86::VPORDZ256rm: 6648341825Sdim case X86::VPORQZ128rr: case X86::VPORQZ128rm: 6649341825Sdim case X86::VPORQZ256rr: case X86::VPORQZ256rm: 6650341825Sdim case X86::VPXORDZ128rr: case X86::VPXORDZ128rm: 6651341825Sdim case X86::VPXORDZ256rr: case X86::VPXORDZ256rm: 6652341825Sdim case X86::VPXORQZ128rr: case X86::VPXORQZ128rm: 6653341825Sdim case X86::VPXORQZ256rr: case X86::VPXORQZ256rm: 6654341825Sdim // If we don't have DQI see if we can still switch from an EVEX integer 6655341825Sdim // instruction to a VEX floating point instruction. 6656341825Sdim if (Subtarget.hasDQI()) 6657341825Sdim return 0; 6658341825Sdim 6659341825Sdim if (RI.getEncodingValue(MI.getOperand(0).getReg()) >= 16) 6660341825Sdim return 0; 6661341825Sdim if (RI.getEncodingValue(MI.getOperand(1).getReg()) >= 16) 6662341825Sdim return 0; 6663341825Sdim // Register forms will have 3 operands. Memory form will have more. 6664341825Sdim if (NumOperands == 3 && 6665341825Sdim RI.getEncodingValue(MI.getOperand(2).getReg()) >= 16) 6666341825Sdim return 0; 6667341825Sdim 6668341825Sdim // All domains are valid. 6669341825Sdim return 0xe; 6670344779Sdim case X86::MOVHLPSrr: 6671344779Sdim // We can swap domains when both inputs are the same register. 6672344779Sdim // FIXME: This doesn't catch all the cases we would like. If the input 6673344779Sdim // register isn't KILLed by the instruction, the two address instruction 6674344779Sdim // pass puts a COPY on one input. The other input uses the original 6675344779Sdim // register. This prevents the same physical register from being used by 6676344779Sdim // both inputs. 6677344779Sdim if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg() && 6678344779Sdim MI.getOperand(0).getSubReg() == 0 && 6679344779Sdim MI.getOperand(1).getSubReg() == 0 && 6680344779Sdim MI.getOperand(2).getSubReg() == 0) 6681344779Sdim return 0x6; 6682344779Sdim return 0; 6683353358Sdim case X86::SHUFPDrri: 6684353358Sdim return 0x6; 6685341825Sdim } 6686341825Sdim return 0; 6687341825Sdim} 6688341825Sdim 6689341825Sdimbool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI, 6690341825Sdim unsigned Domain) const { 6691341825Sdim assert(Domain > 0 && Domain < 4 && "Invalid execution domain"); 6692341825Sdim uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 6693341825Sdim assert(dom && "Not an SSE instruction"); 6694341825Sdim 6695341825Sdim unsigned Opcode = MI.getOpcode(); 6696341825Sdim unsigned NumOperands = MI.getDesc().getNumOperands(); 6697341825Sdim 6698341825Sdim auto SetBlendDomain = [&](unsigned ImmWidth, bool Is256) { 6699341825Sdim if (MI.getOperand(NumOperands - 1).isImm()) { 6700341825Sdim unsigned Imm = MI.getOperand(NumOperands - 1).getImm() & 255; 6701341825Sdim Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm); 6702341825Sdim unsigned NewImm = Imm; 6703341825Sdim 6704353358Sdim const uint16_t *table = lookup(Opcode, dom, ReplaceableBlendInstrs); 6705341825Sdim if (!table) 6706353358Sdim table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs); 6707341825Sdim 6708341825Sdim if (Domain == 1) { // PackedSingle 6709341825Sdim AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm); 6710341825Sdim } else if (Domain == 2) { // PackedDouble 6711341825Sdim AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2, &NewImm); 6712341825Sdim } else if (Domain == 3) { // PackedInt 6713341825Sdim if (Subtarget.hasAVX2()) { 6714341825Sdim // If we are already VPBLENDW use that, else use VPBLENDD. 6715341825Sdim if ((ImmWidth / (Is256 ? 2 : 1)) != 8) { 6716353358Sdim table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs); 6717341825Sdim AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm); 6718341825Sdim } 6719341825Sdim } else { 6720341825Sdim assert(!Is256 && "128-bit vector expected"); 6721341825Sdim AdjustBlendMask(Imm, ImmWidth, 8, &NewImm); 6722341825Sdim } 6723341825Sdim } 6724341825Sdim 6725341825Sdim assert(table && table[Domain - 1] && "Unknown domain op"); 6726341825Sdim MI.setDesc(get(table[Domain - 1])); 6727341825Sdim MI.getOperand(NumOperands - 1).setImm(NewImm & 255); 6728341825Sdim } 6729341825Sdim return true; 6730341825Sdim }; 6731341825Sdim 6732341825Sdim switch (Opcode) { 6733341825Sdim case X86::BLENDPDrmi: 6734341825Sdim case X86::BLENDPDrri: 6735341825Sdim case X86::VBLENDPDrmi: 6736341825Sdim case X86::VBLENDPDrri: 6737341825Sdim return SetBlendDomain(2, false); 6738341825Sdim case X86::VBLENDPDYrmi: 6739341825Sdim case X86::VBLENDPDYrri: 6740341825Sdim return SetBlendDomain(4, true); 6741341825Sdim case X86::BLENDPSrmi: 6742341825Sdim case X86::BLENDPSrri: 6743341825Sdim case X86::VBLENDPSrmi: 6744341825Sdim case X86::VBLENDPSrri: 6745341825Sdim case X86::VPBLENDDrmi: 6746341825Sdim case X86::VPBLENDDrri: 6747341825Sdim return SetBlendDomain(4, false); 6748341825Sdim case X86::VBLENDPSYrmi: 6749341825Sdim case X86::VBLENDPSYrri: 6750341825Sdim case X86::VPBLENDDYrmi: 6751341825Sdim case X86::VPBLENDDYrri: 6752341825Sdim return SetBlendDomain(8, true); 6753341825Sdim case X86::PBLENDWrmi: 6754341825Sdim case X86::PBLENDWrri: 6755341825Sdim case X86::VPBLENDWrmi: 6756341825Sdim case X86::VPBLENDWrri: 6757341825Sdim return SetBlendDomain(8, false); 6758341825Sdim case X86::VPBLENDWYrmi: 6759341825Sdim case X86::VPBLENDWYrri: 6760341825Sdim return SetBlendDomain(16, true); 6761341825Sdim case X86::VPANDDZ128rr: case X86::VPANDDZ128rm: 6762341825Sdim case X86::VPANDDZ256rr: case X86::VPANDDZ256rm: 6763341825Sdim case X86::VPANDQZ128rr: case X86::VPANDQZ128rm: 6764341825Sdim case X86::VPANDQZ256rr: case X86::VPANDQZ256rm: 6765341825Sdim case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm: 6766341825Sdim case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm: 6767341825Sdim case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm: 6768341825Sdim case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm: 6769341825Sdim case X86::VPORDZ128rr: case X86::VPORDZ128rm: 6770341825Sdim case X86::VPORDZ256rr: case X86::VPORDZ256rm: 6771341825Sdim case X86::VPORQZ128rr: case X86::VPORQZ128rm: 6772341825Sdim case X86::VPORQZ256rr: case X86::VPORQZ256rm: 6773341825Sdim case X86::VPXORDZ128rr: case X86::VPXORDZ128rm: 6774341825Sdim case X86::VPXORDZ256rr: case X86::VPXORDZ256rm: 6775341825Sdim case X86::VPXORQZ128rr: case X86::VPXORQZ128rm: 6776341825Sdim case X86::VPXORQZ256rr: case X86::VPXORQZ256rm: { 6777341825Sdim // Without DQI, convert EVEX instructions to VEX instructions. 6778341825Sdim if (Subtarget.hasDQI()) 6779341825Sdim return false; 6780341825Sdim 6781341825Sdim const uint16_t *table = lookupAVX512(MI.getOpcode(), dom, 6782341825Sdim ReplaceableCustomAVX512LogicInstrs); 6783341825Sdim assert(table && "Instruction not found in table?"); 6784341825Sdim // Don't change integer Q instructions to D instructions and 6785341825Sdim // use D intructions if we started with a PS instruction. 6786341825Sdim if (Domain == 3 && (dom == 1 || table[3] == MI.getOpcode())) 6787341825Sdim Domain = 4; 6788341825Sdim MI.setDesc(get(table[Domain - 1])); 6789341825Sdim return true; 6790341825Sdim } 6791344779Sdim case X86::UNPCKHPDrr: 6792344779Sdim case X86::MOVHLPSrr: 6793344779Sdim // We just need to commute the instruction which will switch the domains. 6794344779Sdim if (Domain != dom && Domain != 3 && 6795344779Sdim MI.getOperand(1).getReg() == MI.getOperand(2).getReg() && 6796344779Sdim MI.getOperand(0).getSubReg() == 0 && 6797344779Sdim MI.getOperand(1).getSubReg() == 0 && 6798344779Sdim MI.getOperand(2).getSubReg() == 0) { 6799344779Sdim commuteInstruction(MI, false); 6800344779Sdim return true; 6801344779Sdim } 6802344779Sdim // We must always return true for MOVHLPSrr. 6803344779Sdim if (Opcode == X86::MOVHLPSrr) 6804344779Sdim return true; 6805353358Sdim break; 6806353358Sdim case X86::SHUFPDrri: { 6807353358Sdim if (Domain == 1) { 6808353358Sdim unsigned Imm = MI.getOperand(3).getImm(); 6809353358Sdim unsigned NewImm = 0x44; 6810353358Sdim if (Imm & 1) NewImm |= 0x0a; 6811353358Sdim if (Imm & 2) NewImm |= 0xa0; 6812353358Sdim MI.getOperand(3).setImm(NewImm); 6813353358Sdim MI.setDesc(get(X86::SHUFPSrri)); 6814353358Sdim } 6815353358Sdim return true; 6816341825Sdim } 6817353358Sdim } 6818341825Sdim return false; 6819341825Sdim} 6820341825Sdim 6821206083Srdivackystd::pair<uint16_t, uint16_t> 6822309124SdimX86InstrInfo::getExecutionDomain(const MachineInstr &MI) const { 6823309124Sdim uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 6824314564Sdim unsigned opcode = MI.getOpcode(); 6825234353Sdim uint16_t validDomains = 0; 6826314564Sdim if (domain) { 6827341825Sdim // Attempt to match for custom instructions. 6828341825Sdim validDomains = getExecutionDomainCustom(MI); 6829341825Sdim if (validDomains) 6830341825Sdim return std::make_pair(domain, validDomains); 6831341825Sdim 6832341825Sdim if (lookup(opcode, domain, ReplaceableInstrs)) { 6833314564Sdim validDomains = 0xe; 6834314564Sdim } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) { 6835314564Sdim validDomains = Subtarget.hasAVX2() ? 0xe : 0x6; 6836353358Sdim } else if (lookup(opcode, domain, ReplaceableInstrsFP)) { 6837353358Sdim validDomains = 0x6; 6838321369Sdim } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) { 6839321369Sdim // Insert/extract instructions should only effect domain if AVX2 6840321369Sdim // is enabled. 6841321369Sdim if (!Subtarget.hasAVX2()) 6842321369Sdim return std::make_pair(0, 0); 6843321369Sdim validDomains = 0xe; 6844314564Sdim } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) { 6845314564Sdim validDomains = 0xe; 6846321369Sdim } else if (Subtarget.hasDQI() && lookupAVX512(opcode, domain, 6847321369Sdim ReplaceableInstrsAVX512DQ)) { 6848321369Sdim validDomains = 0xe; 6849321369Sdim } else if (Subtarget.hasDQI()) { 6850321369Sdim if (const uint16_t *table = lookupAVX512(opcode, domain, 6851314564Sdim ReplaceableInstrsAVX512DQMasked)) { 6852321369Sdim if (domain == 1 || (domain == 3 && table[3] == opcode)) 6853321369Sdim validDomains = 0xa; 6854321369Sdim else 6855321369Sdim validDomains = 0xc; 6856321369Sdim } 6857314564Sdim } 6858314564Sdim } 6859234353Sdim return std::make_pair(domain, validDomains); 6860206083Srdivacky} 6861206083Srdivacky 6862309124Sdimvoid X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const { 6863206083Srdivacky assert(Domain>0 && Domain<4 && "Invalid execution domain"); 6864309124Sdim uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 6865206083Srdivacky assert(dom && "Not an SSE instruction"); 6866341825Sdim 6867341825Sdim // Attempt to match for custom instructions. 6868341825Sdim if (setExecutionDomainCustom(MI, Domain)) 6869341825Sdim return; 6870341825Sdim 6871314564Sdim const uint16_t *table = lookup(MI.getOpcode(), dom, ReplaceableInstrs); 6872234353Sdim if (!table) { // try the other table 6873276479Sdim assert((Subtarget.hasAVX2() || Domain < 3) && 6874234353Sdim "256-bit vector operations only available in AVX2"); 6875314564Sdim table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2); 6876234353Sdim } 6877353358Sdim if (!table) { // try the FP table 6878353358Sdim table = lookup(MI.getOpcode(), dom, ReplaceableInstrsFP); 6879353358Sdim assert((!table || Domain < 3) && 6880353358Sdim "Can only select PackedSingle or PackedDouble"); 6881353358Sdim } 6882321369Sdim if (!table) { // try the other table 6883321369Sdim assert(Subtarget.hasAVX2() && 6884321369Sdim "256-bit insert/extract only available in AVX2"); 6885321369Sdim table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2InsertExtract); 6886321369Sdim } 6887314564Sdim if (!table) { // try the AVX512 table 6888314564Sdim assert(Subtarget.hasAVX512() && "Requires AVX-512"); 6889314564Sdim table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512); 6890314564Sdim // Don't change integer Q instructions to D instructions. 6891314564Sdim if (table && Domain == 3 && table[3] == MI.getOpcode()) 6892314564Sdim Domain = 4; 6893314564Sdim } 6894314564Sdim if (!table) { // try the AVX512DQ table 6895314564Sdim assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ"); 6896314564Sdim table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQ); 6897314564Sdim // Don't change integer Q instructions to D instructions and 6898314564Sdim // use D intructions if we started with a PS instruction. 6899314564Sdim if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode())) 6900314564Sdim Domain = 4; 6901314564Sdim } 6902314564Sdim if (!table) { // try the AVX512DQMasked table 6903314564Sdim assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ"); 6904314564Sdim table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQMasked); 6905314564Sdim if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode())) 6906314564Sdim Domain = 4; 6907314564Sdim } 6908206083Srdivacky assert(table && "Cannot change domain"); 6909309124Sdim MI.setDesc(get(table[Domain - 1])); 6910206083Srdivacky} 6911207618Srdivacky 6912288943Sdim/// Return the noop instruction to use for a noop. 6913321369Sdimvoid X86InstrInfo::getNoop(MCInst &NopInst) const { 6914207618Srdivacky NopInst.setOpcode(X86::NOOP); 6915207618Srdivacky} 6916207618Srdivacky 6917221345Sdimbool X86InstrInfo::isHighLatencyDef(int opc) const { 6918221345Sdim switch (opc) { 6919218893Sdim default: return false; 6920309124Sdim case X86::DIVPDrm: 6921309124Sdim case X86::DIVPDrr: 6922309124Sdim case X86::DIVPSrm: 6923309124Sdim case X86::DIVPSrr: 6924218893Sdim case X86::DIVSDrm: 6925218893Sdim case X86::DIVSDrm_Int: 6926218893Sdim case X86::DIVSDrr: 6927218893Sdim case X86::DIVSDrr_Int: 6928218893Sdim case X86::DIVSSrm: 6929218893Sdim case X86::DIVSSrm_Int: 6930218893Sdim case X86::DIVSSrr: 6931218893Sdim case X86::DIVSSrr_Int: 6932218893Sdim case X86::SQRTPDm: 6933218893Sdim case X86::SQRTPDr: 6934218893Sdim case X86::SQRTPSm: 6935218893Sdim case X86::SQRTPSr: 6936218893Sdim case X86::SQRTSDm: 6937218893Sdim case X86::SQRTSDm_Int: 6938218893Sdim case X86::SQRTSDr: 6939218893Sdim case X86::SQRTSDr_Int: 6940218893Sdim case X86::SQRTSSm: 6941218893Sdim case X86::SQRTSSm_Int: 6942218893Sdim case X86::SQRTSSr: 6943218893Sdim case X86::SQRTSSr_Int: 6944226633Sdim // AVX instructions with high latency 6945309124Sdim case X86::VDIVPDrm: 6946309124Sdim case X86::VDIVPDrr: 6947309124Sdim case X86::VDIVPDYrm: 6948309124Sdim case X86::VDIVPDYrr: 6949309124Sdim case X86::VDIVPSrm: 6950309124Sdim case X86::VDIVPSrr: 6951309124Sdim case X86::VDIVPSYrm: 6952309124Sdim case X86::VDIVPSYrr: 6953226633Sdim case X86::VDIVSDrm: 6954226633Sdim case X86::VDIVSDrm_Int: 6955226633Sdim case X86::VDIVSDrr: 6956226633Sdim case X86::VDIVSDrr_Int: 6957226633Sdim case X86::VDIVSSrm: 6958226633Sdim case X86::VDIVSSrm_Int: 6959226633Sdim case X86::VDIVSSrr: 6960226633Sdim case X86::VDIVSSrr_Int: 6961226633Sdim case X86::VSQRTPDm: 6962226633Sdim case X86::VSQRTPDr: 6963309124Sdim case X86::VSQRTPDYm: 6964309124Sdim case X86::VSQRTPDYr: 6965226633Sdim case X86::VSQRTPSm: 6966226633Sdim case X86::VSQRTPSr: 6967309124Sdim case X86::VSQRTPSYm: 6968309124Sdim case X86::VSQRTPSYr: 6969226633Sdim case X86::VSQRTSDm: 6970226633Sdim case X86::VSQRTSDm_Int: 6971226633Sdim case X86::VSQRTSDr: 6972309124Sdim case X86::VSQRTSDr_Int: 6973226633Sdim case X86::VSQRTSSm: 6974226633Sdim case X86::VSQRTSSm_Int: 6975226633Sdim case X86::VSQRTSSr: 6976309124Sdim case X86::VSQRTSSr_Int: 6977309124Sdim // AVX512 instructions with high latency 6978309124Sdim case X86::VDIVPDZ128rm: 6979309124Sdim case X86::VDIVPDZ128rmb: 6980309124Sdim case X86::VDIVPDZ128rmbk: 6981309124Sdim case X86::VDIVPDZ128rmbkz: 6982309124Sdim case X86::VDIVPDZ128rmk: 6983309124Sdim case X86::VDIVPDZ128rmkz: 6984309124Sdim case X86::VDIVPDZ128rr: 6985309124Sdim case X86::VDIVPDZ128rrk: 6986309124Sdim case X86::VDIVPDZ128rrkz: 6987309124Sdim case X86::VDIVPDZ256rm: 6988309124Sdim case X86::VDIVPDZ256rmb: 6989309124Sdim case X86::VDIVPDZ256rmbk: 6990309124Sdim case X86::VDIVPDZ256rmbkz: 6991309124Sdim case X86::VDIVPDZ256rmk: 6992309124Sdim case X86::VDIVPDZ256rmkz: 6993309124Sdim case X86::VDIVPDZ256rr: 6994309124Sdim case X86::VDIVPDZ256rrk: 6995309124Sdim case X86::VDIVPDZ256rrkz: 6996327952Sdim case X86::VDIVPDZrrb: 6997327952Sdim case X86::VDIVPDZrrbk: 6998327952Sdim case X86::VDIVPDZrrbkz: 6999309124Sdim case X86::VDIVPDZrm: 7000309124Sdim case X86::VDIVPDZrmb: 7001309124Sdim case X86::VDIVPDZrmbk: 7002309124Sdim case X86::VDIVPDZrmbkz: 7003309124Sdim case X86::VDIVPDZrmk: 7004309124Sdim case X86::VDIVPDZrmkz: 7005309124Sdim case X86::VDIVPDZrr: 7006309124Sdim case X86::VDIVPDZrrk: 7007309124Sdim case X86::VDIVPDZrrkz: 7008309124Sdim case X86::VDIVPSZ128rm: 7009309124Sdim case X86::VDIVPSZ128rmb: 7010309124Sdim case X86::VDIVPSZ128rmbk: 7011309124Sdim case X86::VDIVPSZ128rmbkz: 7012309124Sdim case X86::VDIVPSZ128rmk: 7013309124Sdim case X86::VDIVPSZ128rmkz: 7014309124Sdim case X86::VDIVPSZ128rr: 7015309124Sdim case X86::VDIVPSZ128rrk: 7016309124Sdim case X86::VDIVPSZ128rrkz: 7017309124Sdim case X86::VDIVPSZ256rm: 7018309124Sdim case X86::VDIVPSZ256rmb: 7019309124Sdim case X86::VDIVPSZ256rmbk: 7020309124Sdim case X86::VDIVPSZ256rmbkz: 7021309124Sdim case X86::VDIVPSZ256rmk: 7022309124Sdim case X86::VDIVPSZ256rmkz: 7023309124Sdim case X86::VDIVPSZ256rr: 7024309124Sdim case X86::VDIVPSZ256rrk: 7025309124Sdim case X86::VDIVPSZ256rrkz: 7026327952Sdim case X86::VDIVPSZrrb: 7027327952Sdim case X86::VDIVPSZrrbk: 7028327952Sdim case X86::VDIVPSZrrbkz: 7029309124Sdim case X86::VDIVPSZrm: 7030309124Sdim case X86::VDIVPSZrmb: 7031309124Sdim case X86::VDIVPSZrmbk: 7032309124Sdim case X86::VDIVPSZrmbkz: 7033309124Sdim case X86::VDIVPSZrmk: 7034309124Sdim case X86::VDIVPSZrmkz: 7035309124Sdim case X86::VDIVPSZrr: 7036309124Sdim case X86::VDIVPSZrrk: 7037309124Sdim case X86::VDIVPSZrrkz: 7038309124Sdim case X86::VDIVSDZrm: 7039309124Sdim case X86::VDIVSDZrr: 7040309124Sdim case X86::VDIVSDZrm_Int: 7041309124Sdim case X86::VDIVSDZrm_Intk: 7042309124Sdim case X86::VDIVSDZrm_Intkz: 7043309124Sdim case X86::VDIVSDZrr_Int: 7044309124Sdim case X86::VDIVSDZrr_Intk: 7045309124Sdim case X86::VDIVSDZrr_Intkz: 7046327952Sdim case X86::VDIVSDZrrb_Int: 7047327952Sdim case X86::VDIVSDZrrb_Intk: 7048327952Sdim case X86::VDIVSDZrrb_Intkz: 7049309124Sdim case X86::VDIVSSZrm: 7050309124Sdim case X86::VDIVSSZrr: 7051309124Sdim case X86::VDIVSSZrm_Int: 7052309124Sdim case X86::VDIVSSZrm_Intk: 7053309124Sdim case X86::VDIVSSZrm_Intkz: 7054309124Sdim case X86::VDIVSSZrr_Int: 7055309124Sdim case X86::VDIVSSZrr_Intk: 7056309124Sdim case X86::VDIVSSZrr_Intkz: 7057327952Sdim case X86::VDIVSSZrrb_Int: 7058327952Sdim case X86::VDIVSSZrrb_Intk: 7059327952Sdim case X86::VDIVSSZrrb_Intkz: 7060309124Sdim case X86::VSQRTPDZ128m: 7061309124Sdim case X86::VSQRTPDZ128mb: 7062309124Sdim case X86::VSQRTPDZ128mbk: 7063309124Sdim case X86::VSQRTPDZ128mbkz: 7064309124Sdim case X86::VSQRTPDZ128mk: 7065309124Sdim case X86::VSQRTPDZ128mkz: 7066309124Sdim case X86::VSQRTPDZ128r: 7067309124Sdim case X86::VSQRTPDZ128rk: 7068309124Sdim case X86::VSQRTPDZ128rkz: 7069309124Sdim case X86::VSQRTPDZ256m: 7070309124Sdim case X86::VSQRTPDZ256mb: 7071309124Sdim case X86::VSQRTPDZ256mbk: 7072309124Sdim case X86::VSQRTPDZ256mbkz: 7073309124Sdim case X86::VSQRTPDZ256mk: 7074309124Sdim case X86::VSQRTPDZ256mkz: 7075309124Sdim case X86::VSQRTPDZ256r: 7076309124Sdim case X86::VSQRTPDZ256rk: 7077309124Sdim case X86::VSQRTPDZ256rkz: 7078280031Sdim case X86::VSQRTPDZm: 7079309124Sdim case X86::VSQRTPDZmb: 7080309124Sdim case X86::VSQRTPDZmbk: 7081309124Sdim case X86::VSQRTPDZmbkz: 7082309124Sdim case X86::VSQRTPDZmk: 7083309124Sdim case X86::VSQRTPDZmkz: 7084280031Sdim case X86::VSQRTPDZr: 7085309124Sdim case X86::VSQRTPDZrb: 7086309124Sdim case X86::VSQRTPDZrbk: 7087309124Sdim case X86::VSQRTPDZrbkz: 7088309124Sdim case X86::VSQRTPDZrk: 7089309124Sdim case X86::VSQRTPDZrkz: 7090309124Sdim case X86::VSQRTPSZ128m: 7091309124Sdim case X86::VSQRTPSZ128mb: 7092309124Sdim case X86::VSQRTPSZ128mbk: 7093309124Sdim case X86::VSQRTPSZ128mbkz: 7094309124Sdim case X86::VSQRTPSZ128mk: 7095309124Sdim case X86::VSQRTPSZ128mkz: 7096309124Sdim case X86::VSQRTPSZ128r: 7097309124Sdim case X86::VSQRTPSZ128rk: 7098309124Sdim case X86::VSQRTPSZ128rkz: 7099309124Sdim case X86::VSQRTPSZ256m: 7100309124Sdim case X86::VSQRTPSZ256mb: 7101309124Sdim case X86::VSQRTPSZ256mbk: 7102309124Sdim case X86::VSQRTPSZ256mbkz: 7103309124Sdim case X86::VSQRTPSZ256mk: 7104309124Sdim case X86::VSQRTPSZ256mkz: 7105309124Sdim case X86::VSQRTPSZ256r: 7106309124Sdim case X86::VSQRTPSZ256rk: 7107309124Sdim case X86::VSQRTPSZ256rkz: 7108280031Sdim case X86::VSQRTPSZm: 7109309124Sdim case X86::VSQRTPSZmb: 7110309124Sdim case X86::VSQRTPSZmbk: 7111309124Sdim case X86::VSQRTPSZmbkz: 7112309124Sdim case X86::VSQRTPSZmk: 7113309124Sdim case X86::VSQRTPSZmkz: 7114280031Sdim case X86::VSQRTPSZr: 7115309124Sdim case X86::VSQRTPSZrb: 7116309124Sdim case X86::VSQRTPSZrbk: 7117309124Sdim case X86::VSQRTPSZrbkz: 7118309124Sdim case X86::VSQRTPSZrk: 7119309124Sdim case X86::VSQRTPSZrkz: 7120261991Sdim case X86::VSQRTSDZm: 7121261991Sdim case X86::VSQRTSDZm_Int: 7122309124Sdim case X86::VSQRTSDZm_Intk: 7123309124Sdim case X86::VSQRTSDZm_Intkz: 7124261991Sdim case X86::VSQRTSDZr: 7125309124Sdim case X86::VSQRTSDZr_Int: 7126309124Sdim case X86::VSQRTSDZr_Intk: 7127309124Sdim case X86::VSQRTSDZr_Intkz: 7128309124Sdim case X86::VSQRTSDZrb_Int: 7129309124Sdim case X86::VSQRTSDZrb_Intk: 7130309124Sdim case X86::VSQRTSDZrb_Intkz: 7131309124Sdim case X86::VSQRTSSZm: 7132261991Sdim case X86::VSQRTSSZm_Int: 7133309124Sdim case X86::VSQRTSSZm_Intk: 7134309124Sdim case X86::VSQRTSSZm_Intkz: 7135261991Sdim case X86::VSQRTSSZr: 7136309124Sdim case X86::VSQRTSSZr_Int: 7137309124Sdim case X86::VSQRTSSZr_Intk: 7138309124Sdim case X86::VSQRTSSZr_Intkz: 7139309124Sdim case X86::VSQRTSSZrb_Int: 7140309124Sdim case X86::VSQRTSSZrb_Intk: 7141309124Sdim case X86::VSQRTSSZrb_Intkz: 7142261991Sdim 7143309124Sdim case X86::VGATHERDPDYrm: 7144309124Sdim case X86::VGATHERDPDZ128rm: 7145309124Sdim case X86::VGATHERDPDZ256rm: 7146261991Sdim case X86::VGATHERDPDZrm: 7147309124Sdim case X86::VGATHERDPDrm: 7148309124Sdim case X86::VGATHERDPSYrm: 7149309124Sdim case X86::VGATHERDPSZ128rm: 7150309124Sdim case X86::VGATHERDPSZ256rm: 7151261991Sdim case X86::VGATHERDPSZrm: 7152309124Sdim case X86::VGATHERDPSrm: 7153309124Sdim case X86::VGATHERPF0DPDm: 7154309124Sdim case X86::VGATHERPF0DPSm: 7155309124Sdim case X86::VGATHERPF0QPDm: 7156309124Sdim case X86::VGATHERPF0QPSm: 7157309124Sdim case X86::VGATHERPF1DPDm: 7158309124Sdim case X86::VGATHERPF1DPSm: 7159309124Sdim case X86::VGATHERPF1QPDm: 7160309124Sdim case X86::VGATHERPF1QPSm: 7161309124Sdim case X86::VGATHERQPDYrm: 7162309124Sdim case X86::VGATHERQPDZ128rm: 7163309124Sdim case X86::VGATHERQPDZ256rm: 7164309124Sdim case X86::VGATHERQPDZrm: 7165309124Sdim case X86::VGATHERQPDrm: 7166309124Sdim case X86::VGATHERQPSYrm: 7167309124Sdim case X86::VGATHERQPSZ128rm: 7168309124Sdim case X86::VGATHERQPSZ256rm: 7169309124Sdim case X86::VGATHERQPSZrm: 7170309124Sdim case X86::VGATHERQPSrm: 7171309124Sdim case X86::VPGATHERDDYrm: 7172309124Sdim case X86::VPGATHERDDZ128rm: 7173309124Sdim case X86::VPGATHERDDZ256rm: 7174309124Sdim case X86::VPGATHERDDZrm: 7175309124Sdim case X86::VPGATHERDDrm: 7176309124Sdim case X86::VPGATHERDQYrm: 7177309124Sdim case X86::VPGATHERDQZ128rm: 7178309124Sdim case X86::VPGATHERDQZ256rm: 7179309124Sdim case X86::VPGATHERDQZrm: 7180309124Sdim case X86::VPGATHERDQrm: 7181309124Sdim case X86::VPGATHERQDYrm: 7182309124Sdim case X86::VPGATHERQDZ128rm: 7183309124Sdim case X86::VPGATHERQDZ256rm: 7184261991Sdim case X86::VPGATHERQDZrm: 7185309124Sdim case X86::VPGATHERQDrm: 7186309124Sdim case X86::VPGATHERQQYrm: 7187309124Sdim case X86::VPGATHERQQZ128rm: 7188309124Sdim case X86::VPGATHERQQZ256rm: 7189261991Sdim case X86::VPGATHERQQZrm: 7190309124Sdim case X86::VPGATHERQQrm: 7191309124Sdim case X86::VSCATTERDPDZ128mr: 7192309124Sdim case X86::VSCATTERDPDZ256mr: 7193309124Sdim case X86::VSCATTERDPDZmr: 7194309124Sdim case X86::VSCATTERDPSZ128mr: 7195309124Sdim case X86::VSCATTERDPSZ256mr: 7196309124Sdim case X86::VSCATTERDPSZmr: 7197309124Sdim case X86::VSCATTERPF0DPDm: 7198309124Sdim case X86::VSCATTERPF0DPSm: 7199309124Sdim case X86::VSCATTERPF0QPDm: 7200309124Sdim case X86::VSCATTERPF0QPSm: 7201309124Sdim case X86::VSCATTERPF1DPDm: 7202309124Sdim case X86::VSCATTERPF1DPSm: 7203309124Sdim case X86::VSCATTERPF1QPDm: 7204309124Sdim case X86::VSCATTERPF1QPSm: 7205309124Sdim case X86::VSCATTERQPDZ128mr: 7206309124Sdim case X86::VSCATTERQPDZ256mr: 7207261991Sdim case X86::VSCATTERQPDZmr: 7208309124Sdim case X86::VSCATTERQPSZ128mr: 7209309124Sdim case X86::VSCATTERQPSZ256mr: 7210261991Sdim case X86::VSCATTERQPSZmr: 7211309124Sdim case X86::VPSCATTERDDZ128mr: 7212309124Sdim case X86::VPSCATTERDDZ256mr: 7213309124Sdim case X86::VPSCATTERDDZmr: 7214309124Sdim case X86::VPSCATTERDQZ128mr: 7215309124Sdim case X86::VPSCATTERDQZ256mr: 7216309124Sdim case X86::VPSCATTERDQZmr: 7217309124Sdim case X86::VPSCATTERQDZ128mr: 7218309124Sdim case X86::VPSCATTERQDZ256mr: 7219261991Sdim case X86::VPSCATTERQDZmr: 7220309124Sdim case X86::VPSCATTERQQZ128mr: 7221309124Sdim case X86::VPSCATTERQQZ256mr: 7222261991Sdim case X86::VPSCATTERQQZmr: 7223218893Sdim return true; 7224218893Sdim } 7225218893Sdim} 7226218893Sdim 7227309124Sdimbool X86InstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel, 7228309124Sdim const MachineRegisterInfo *MRI, 7229309124Sdim const MachineInstr &DefMI, 7230309124Sdim unsigned DefIdx, 7231309124Sdim const MachineInstr &UseMI, 7232309124Sdim unsigned UseIdx) const { 7233309124Sdim return isHighLatencyDef(DefMI.getOpcode()); 7234221345Sdim} 7235221345Sdim 7236296417Sdimbool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst, 7237296417Sdim const MachineBasicBlock *MBB) const { 7238360784Sdim assert(Inst.getNumExplicitOperands() == 3 && Inst.getNumExplicitDefs() == 1 && 7239360784Sdim Inst.getNumDefs() <= 2 && "Reassociation needs binary operators"); 7240288943Sdim 7241296417Sdim // Integer binary math/logic instructions have a third source operand: 7242296417Sdim // the EFLAGS register. That operand must be both defined here and never 7243296417Sdim // used; ie, it must be dead. If the EFLAGS operand is live, then we can 7244296417Sdim // not change anything because rearranging the operands could affect other 7245296417Sdim // instructions that depend on the exact status flags (zero, sign, etc.) 7246296417Sdim // that are set by using these particular operands with this operation. 7247360784Sdim const MachineOperand *FlagDef = Inst.findRegisterDefOperand(X86::EFLAGS); 7248360784Sdim assert((Inst.getNumDefs() == 1 || FlagDef) && 7249360784Sdim "Implicit def isn't flags?"); 7250360784Sdim if (FlagDef && !FlagDef->isDead()) 7251360784Sdim return false; 7252288943Sdim 7253296417Sdim return TargetInstrInfo::hasReassociableOperands(Inst, MBB); 7254288943Sdim} 7255288943Sdim 7256288943Sdim// TODO: There are many more machine instruction opcodes to match: 7257288943Sdim// 1. Other data types (integer, vectors) 7258296417Sdim// 2. Other math / logic operations (xor, or) 7259296417Sdim// 3. Other forms of the same operation (intrinsics and other variants) 7260296417Sdimbool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { 7261296417Sdim switch (Inst.getOpcode()) { 7262296417Sdim case X86::AND8rr: 7263296417Sdim case X86::AND16rr: 7264296417Sdim case X86::AND32rr: 7265296417Sdim case X86::AND64rr: 7266296417Sdim case X86::OR8rr: 7267296417Sdim case X86::OR16rr: 7268296417Sdim case X86::OR32rr: 7269296417Sdim case X86::OR64rr: 7270296417Sdim case X86::XOR8rr: 7271296417Sdim case X86::XOR16rr: 7272296417Sdim case X86::XOR32rr: 7273296417Sdim case X86::XOR64rr: 7274296417Sdim case X86::IMUL16rr: 7275296417Sdim case X86::IMUL32rr: 7276296417Sdim case X86::IMUL64rr: 7277296417Sdim case X86::PANDrr: 7278296417Sdim case X86::PORrr: 7279296417Sdim case X86::PXORrr: 7280309124Sdim case X86::ANDPDrr: 7281309124Sdim case X86::ANDPSrr: 7282309124Sdim case X86::ORPDrr: 7283309124Sdim case X86::ORPSrr: 7284309124Sdim case X86::XORPDrr: 7285309124Sdim case X86::XORPSrr: 7286309124Sdim case X86::PADDBrr: 7287309124Sdim case X86::PADDWrr: 7288309124Sdim case X86::PADDDrr: 7289309124Sdim case X86::PADDQrr: 7290353358Sdim case X86::PMULLWrr: 7291353358Sdim case X86::PMULLDrr: 7292353358Sdim case X86::PMAXSBrr: 7293353358Sdim case X86::PMAXSDrr: 7294353358Sdim case X86::PMAXSWrr: 7295353358Sdim case X86::PMAXUBrr: 7296353358Sdim case X86::PMAXUDrr: 7297353358Sdim case X86::PMAXUWrr: 7298353358Sdim case X86::PMINSBrr: 7299353358Sdim case X86::PMINSDrr: 7300353358Sdim case X86::PMINSWrr: 7301353358Sdim case X86::PMINUBrr: 7302353358Sdim case X86::PMINUDrr: 7303353358Sdim case X86::PMINUWrr: 7304296417Sdim case X86::VPANDrr: 7305296417Sdim case X86::VPANDYrr: 7306309124Sdim case X86::VPANDDZ128rr: 7307309124Sdim case X86::VPANDDZ256rr: 7308309124Sdim case X86::VPANDDZrr: 7309309124Sdim case X86::VPANDQZ128rr: 7310309124Sdim case X86::VPANDQZ256rr: 7311309124Sdim case X86::VPANDQZrr: 7312296417Sdim case X86::VPORrr: 7313296417Sdim case X86::VPORYrr: 7314309124Sdim case X86::VPORDZ128rr: 7315309124Sdim case X86::VPORDZ256rr: 7316309124Sdim case X86::VPORDZrr: 7317309124Sdim case X86::VPORQZ128rr: 7318309124Sdim case X86::VPORQZ256rr: 7319309124Sdim case X86::VPORQZrr: 7320296417Sdim case X86::VPXORrr: 7321296417Sdim case X86::VPXORYrr: 7322309124Sdim case X86::VPXORDZ128rr: 7323309124Sdim case X86::VPXORDZ256rr: 7324309124Sdim case X86::VPXORDZrr: 7325309124Sdim case X86::VPXORQZ128rr: 7326309124Sdim case X86::VPXORQZ256rr: 7327309124Sdim case X86::VPXORQZrr: 7328309124Sdim case X86::VANDPDrr: 7329309124Sdim case X86::VANDPSrr: 7330309124Sdim case X86::VANDPDYrr: 7331309124Sdim case X86::VANDPSYrr: 7332309124Sdim case X86::VANDPDZ128rr: 7333309124Sdim case X86::VANDPSZ128rr: 7334309124Sdim case X86::VANDPDZ256rr: 7335309124Sdim case X86::VANDPSZ256rr: 7336309124Sdim case X86::VANDPDZrr: 7337309124Sdim case X86::VANDPSZrr: 7338309124Sdim case X86::VORPDrr: 7339309124Sdim case X86::VORPSrr: 7340309124Sdim case X86::VORPDYrr: 7341309124Sdim case X86::VORPSYrr: 7342309124Sdim case X86::VORPDZ128rr: 7343309124Sdim case X86::VORPSZ128rr: 7344309124Sdim case X86::VORPDZ256rr: 7345309124Sdim case X86::VORPSZ256rr: 7346309124Sdim case X86::VORPDZrr: 7347309124Sdim case X86::VORPSZrr: 7348309124Sdim case X86::VXORPDrr: 7349309124Sdim case X86::VXORPSrr: 7350309124Sdim case X86::VXORPDYrr: 7351309124Sdim case X86::VXORPSYrr: 7352309124Sdim case X86::VXORPDZ128rr: 7353309124Sdim case X86::VXORPSZ128rr: 7354309124Sdim case X86::VXORPDZ256rr: 7355309124Sdim case X86::VXORPSZ256rr: 7356309124Sdim case X86::VXORPDZrr: 7357309124Sdim case X86::VXORPSZrr: 7358309124Sdim case X86::KADDBrr: 7359309124Sdim case X86::KADDWrr: 7360309124Sdim case X86::KADDDrr: 7361309124Sdim case X86::KADDQrr: 7362309124Sdim case X86::KANDBrr: 7363309124Sdim case X86::KANDWrr: 7364309124Sdim case X86::KANDDrr: 7365309124Sdim case X86::KANDQrr: 7366309124Sdim case X86::KORBrr: 7367309124Sdim case X86::KORWrr: 7368309124Sdim case X86::KORDrr: 7369309124Sdim case X86::KORQrr: 7370309124Sdim case X86::KXORBrr: 7371309124Sdim case X86::KXORWrr: 7372309124Sdim case X86::KXORDrr: 7373309124Sdim case X86::KXORQrr: 7374309124Sdim case X86::VPADDBrr: 7375309124Sdim case X86::VPADDWrr: 7376309124Sdim case X86::VPADDDrr: 7377309124Sdim case X86::VPADDQrr: 7378309124Sdim case X86::VPADDBYrr: 7379309124Sdim case X86::VPADDWYrr: 7380309124Sdim case X86::VPADDDYrr: 7381309124Sdim case X86::VPADDQYrr: 7382309124Sdim case X86::VPADDBZ128rr: 7383309124Sdim case X86::VPADDWZ128rr: 7384309124Sdim case X86::VPADDDZ128rr: 7385309124Sdim case X86::VPADDQZ128rr: 7386309124Sdim case X86::VPADDBZ256rr: 7387309124Sdim case X86::VPADDWZ256rr: 7388309124Sdim case X86::VPADDDZ256rr: 7389309124Sdim case X86::VPADDQZ256rr: 7390309124Sdim case X86::VPADDBZrr: 7391309124Sdim case X86::VPADDWZrr: 7392309124Sdim case X86::VPADDDZrr: 7393309124Sdim case X86::VPADDQZrr: 7394309124Sdim case X86::VPMULLWrr: 7395309124Sdim case X86::VPMULLWYrr: 7396309124Sdim case X86::VPMULLWZ128rr: 7397309124Sdim case X86::VPMULLWZ256rr: 7398309124Sdim case X86::VPMULLWZrr: 7399309124Sdim case X86::VPMULLDrr: 7400309124Sdim case X86::VPMULLDYrr: 7401309124Sdim case X86::VPMULLDZ128rr: 7402309124Sdim case X86::VPMULLDZ256rr: 7403309124Sdim case X86::VPMULLDZrr: 7404309124Sdim case X86::VPMULLQZ128rr: 7405309124Sdim case X86::VPMULLQZ256rr: 7406309124Sdim case X86::VPMULLQZrr: 7407353358Sdim case X86::VPMAXSBrr: 7408353358Sdim case X86::VPMAXSBYrr: 7409353358Sdim case X86::VPMAXSBZ128rr: 7410353358Sdim case X86::VPMAXSBZ256rr: 7411353358Sdim case X86::VPMAXSBZrr: 7412353358Sdim case X86::VPMAXSDrr: 7413353358Sdim case X86::VPMAXSDYrr: 7414353358Sdim case X86::VPMAXSDZ128rr: 7415353358Sdim case X86::VPMAXSDZ256rr: 7416353358Sdim case X86::VPMAXSDZrr: 7417353358Sdim case X86::VPMAXSQZ128rr: 7418353358Sdim case X86::VPMAXSQZ256rr: 7419353358Sdim case X86::VPMAXSQZrr: 7420353358Sdim case X86::VPMAXSWrr: 7421353358Sdim case X86::VPMAXSWYrr: 7422353358Sdim case X86::VPMAXSWZ128rr: 7423353358Sdim case X86::VPMAXSWZ256rr: 7424353358Sdim case X86::VPMAXSWZrr: 7425353358Sdim case X86::VPMAXUBrr: 7426353358Sdim case X86::VPMAXUBYrr: 7427353358Sdim case X86::VPMAXUBZ128rr: 7428353358Sdim case X86::VPMAXUBZ256rr: 7429353358Sdim case X86::VPMAXUBZrr: 7430353358Sdim case X86::VPMAXUDrr: 7431353358Sdim case X86::VPMAXUDYrr: 7432353358Sdim case X86::VPMAXUDZ128rr: 7433353358Sdim case X86::VPMAXUDZ256rr: 7434353358Sdim case X86::VPMAXUDZrr: 7435353358Sdim case X86::VPMAXUQZ128rr: 7436353358Sdim case X86::VPMAXUQZ256rr: 7437353358Sdim case X86::VPMAXUQZrr: 7438353358Sdim case X86::VPMAXUWrr: 7439353358Sdim case X86::VPMAXUWYrr: 7440353358Sdim case X86::VPMAXUWZ128rr: 7441353358Sdim case X86::VPMAXUWZ256rr: 7442353358Sdim case X86::VPMAXUWZrr: 7443353358Sdim case X86::VPMINSBrr: 7444353358Sdim case X86::VPMINSBYrr: 7445353358Sdim case X86::VPMINSBZ128rr: 7446353358Sdim case X86::VPMINSBZ256rr: 7447353358Sdim case X86::VPMINSBZrr: 7448353358Sdim case X86::VPMINSDrr: 7449353358Sdim case X86::VPMINSDYrr: 7450353358Sdim case X86::VPMINSDZ128rr: 7451353358Sdim case X86::VPMINSDZ256rr: 7452353358Sdim case X86::VPMINSDZrr: 7453353358Sdim case X86::VPMINSQZ128rr: 7454353358Sdim case X86::VPMINSQZ256rr: 7455353358Sdim case X86::VPMINSQZrr: 7456353358Sdim case X86::VPMINSWrr: 7457353358Sdim case X86::VPMINSWYrr: 7458353358Sdim case X86::VPMINSWZ128rr: 7459353358Sdim case X86::VPMINSWZ256rr: 7460353358Sdim case X86::VPMINSWZrr: 7461353358Sdim case X86::VPMINUBrr: 7462353358Sdim case X86::VPMINUBYrr: 7463353358Sdim case X86::VPMINUBZ128rr: 7464353358Sdim case X86::VPMINUBZ256rr: 7465353358Sdim case X86::VPMINUBZrr: 7466353358Sdim case X86::VPMINUDrr: 7467353358Sdim case X86::VPMINUDYrr: 7468353358Sdim case X86::VPMINUDZ128rr: 7469353358Sdim case X86::VPMINUDZ256rr: 7470353358Sdim case X86::VPMINUDZrr: 7471353358Sdim case X86::VPMINUQZ128rr: 7472353358Sdim case X86::VPMINUQZ256rr: 7473353358Sdim case X86::VPMINUQZrr: 7474353358Sdim case X86::VPMINUWrr: 7475353358Sdim case X86::VPMINUWYrr: 7476353358Sdim case X86::VPMINUWZ128rr: 7477353358Sdim case X86::VPMINUWZ256rr: 7478353358Sdim case X86::VPMINUWZrr: 7479296417Sdim // Normal min/max instructions are not commutative because of NaN and signed 7480296417Sdim // zero semantics, but these are. Thus, there's no need to check for global 7481296417Sdim // relaxed math; the instructions themselves have the properties we need. 7482296417Sdim case X86::MAXCPDrr: 7483296417Sdim case X86::MAXCPSrr: 7484296417Sdim case X86::MAXCSDrr: 7485296417Sdim case X86::MAXCSSrr: 7486296417Sdim case X86::MINCPDrr: 7487296417Sdim case X86::MINCPSrr: 7488296417Sdim case X86::MINCSDrr: 7489296417Sdim case X86::MINCSSrr: 7490296417Sdim case X86::VMAXCPDrr: 7491296417Sdim case X86::VMAXCPSrr: 7492296417Sdim case X86::VMAXCPDYrr: 7493296417Sdim case X86::VMAXCPSYrr: 7494309124Sdim case X86::VMAXCPDZ128rr: 7495309124Sdim case X86::VMAXCPSZ128rr: 7496309124Sdim case X86::VMAXCPDZ256rr: 7497309124Sdim case X86::VMAXCPSZ256rr: 7498309124Sdim case X86::VMAXCPDZrr: 7499309124Sdim case X86::VMAXCPSZrr: 7500296417Sdim case X86::VMAXCSDrr: 7501296417Sdim case X86::VMAXCSSrr: 7502309124Sdim case X86::VMAXCSDZrr: 7503309124Sdim case X86::VMAXCSSZrr: 7504296417Sdim case X86::VMINCPDrr: 7505296417Sdim case X86::VMINCPSrr: 7506296417Sdim case X86::VMINCPDYrr: 7507296417Sdim case X86::VMINCPSYrr: 7508309124Sdim case X86::VMINCPDZ128rr: 7509309124Sdim case X86::VMINCPSZ128rr: 7510309124Sdim case X86::VMINCPDZ256rr: 7511309124Sdim case X86::VMINCPSZ256rr: 7512309124Sdim case X86::VMINCPDZrr: 7513309124Sdim case X86::VMINCPSZrr: 7514296417Sdim case X86::VMINCSDrr: 7515296417Sdim case X86::VMINCSSrr: 7516309124Sdim case X86::VMINCSDZrr: 7517309124Sdim case X86::VMINCSSZrr: 7518296417Sdim return true; 7519296417Sdim case X86::ADDPDrr: 7520296417Sdim case X86::ADDPSrr: 7521288943Sdim case X86::ADDSDrr: 7522288943Sdim case X86::ADDSSrr: 7523296417Sdim case X86::MULPDrr: 7524296417Sdim case X86::MULPSrr: 7525296417Sdim case X86::MULSDrr: 7526296417Sdim case X86::MULSSrr: 7527296417Sdim case X86::VADDPDrr: 7528296417Sdim case X86::VADDPSrr: 7529296417Sdim case X86::VADDPDYrr: 7530296417Sdim case X86::VADDPSYrr: 7531309124Sdim case X86::VADDPDZ128rr: 7532309124Sdim case X86::VADDPSZ128rr: 7533309124Sdim case X86::VADDPDZ256rr: 7534309124Sdim case X86::VADDPSZ256rr: 7535309124Sdim case X86::VADDPDZrr: 7536309124Sdim case X86::VADDPSZrr: 7537288943Sdim case X86::VADDSDrr: 7538288943Sdim case X86::VADDSSrr: 7539309124Sdim case X86::VADDSDZrr: 7540309124Sdim case X86::VADDSSZrr: 7541296417Sdim case X86::VMULPDrr: 7542296417Sdim case X86::VMULPSrr: 7543296417Sdim case X86::VMULPDYrr: 7544296417Sdim case X86::VMULPSYrr: 7545309124Sdim case X86::VMULPDZ128rr: 7546309124Sdim case X86::VMULPSZ128rr: 7547309124Sdim case X86::VMULPDZ256rr: 7548309124Sdim case X86::VMULPSZ256rr: 7549309124Sdim case X86::VMULPDZrr: 7550309124Sdim case X86::VMULPSZrr: 7551288943Sdim case X86::VMULSDrr: 7552288943Sdim case X86::VMULSSrr: 7553309124Sdim case X86::VMULSDZrr: 7554309124Sdim case X86::VMULSSZrr: 7555296417Sdim return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; 7556288943Sdim default: 7557288943Sdim return false; 7558288943Sdim } 7559288943Sdim} 7560288943Sdim 7561360784Sdim/// If \p DescribedReg overlaps with the MOVrr instruction's destination 7562360784Sdim/// register then, if possible, describe the value in terms of the source 7563360784Sdim/// register. 7564360784Sdimstatic Optional<ParamLoadedValue> 7565360784SdimdescribeMOVrrLoadedValue(const MachineInstr &MI, Register DescribedReg, 7566360784Sdim const TargetRegisterInfo *TRI) { 7567360784Sdim Register DestReg = MI.getOperand(0).getReg(); 7568360784Sdim Register SrcReg = MI.getOperand(1).getReg(); 7569360784Sdim 7570360784Sdim auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {}); 7571360784Sdim 7572360784Sdim // If the described register is the destination, just return the source. 7573360784Sdim if (DestReg == DescribedReg) 7574360784Sdim return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); 7575360784Sdim 7576360784Sdim // If the described register is a sub-register of the destination register, 7577360784Sdim // then pick out the source register's corresponding sub-register. 7578360784Sdim if (unsigned SubRegIdx = TRI->getSubRegIndex(DestReg, DescribedReg)) { 7579360784Sdim unsigned SrcSubReg = TRI->getSubReg(SrcReg, SubRegIdx); 7580360784Sdim return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr); 7581360784Sdim } 7582360784Sdim 7583360784Sdim // The remaining case to consider is when the described register is a 7584360784Sdim // super-register of the destination register. MOV8rr and MOV16rr does not 7585360784Sdim // write to any of the other bytes in the register, meaning that we'd have to 7586360784Sdim // describe the value using a combination of the source register and the 7587360784Sdim // non-overlapping bits in the described register, which is not currently 7588360784Sdim // possible. 7589360784Sdim if (MI.getOpcode() == X86::MOV8rr || MI.getOpcode() == X86::MOV16rr || 7590360784Sdim !TRI->isSuperRegister(DestReg, DescribedReg)) 7591360784Sdim return None; 7592360784Sdim 7593360784Sdim assert(MI.getOpcode() == X86::MOV32rr && "Unexpected super-register case"); 7594360784Sdim return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); 7595360784Sdim} 7596360784Sdim 7597360784SdimOptional<ParamLoadedValue> 7598360784SdimX86InstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const { 7599360784Sdim const MachineOperand *Op = nullptr; 7600360784Sdim DIExpression *Expr = nullptr; 7601360784Sdim 7602360784Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 7603360784Sdim 7604360784Sdim switch (MI.getOpcode()) { 7605360784Sdim case X86::LEA32r: 7606360784Sdim case X86::LEA64r: 7607360784Sdim case X86::LEA64_32r: { 7608360784Sdim // We may need to describe a 64-bit parameter with a 32-bit LEA. 7609360784Sdim if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg)) 7610360784Sdim return None; 7611360784Sdim 7612360784Sdim // Operand 4 could be global address. For now we do not support 7613360784Sdim // such situation. 7614360784Sdim if (!MI.getOperand(4).isImm() || !MI.getOperand(2).isImm()) 7615360784Sdim return None; 7616360784Sdim 7617360784Sdim const MachineOperand &Op1 = MI.getOperand(1); 7618360784Sdim const MachineOperand &Op2 = MI.getOperand(3); 7619360784Sdim assert(Op2.isReg() && (Op2.getReg() == X86::NoRegister || 7620360784Sdim Register::isPhysicalRegister(Op2.getReg()))); 7621360784Sdim 7622360784Sdim // Omit situations like: 7623360784Sdim // %rsi = lea %rsi, 4, ... 7624360784Sdim if ((Op1.isReg() && Op1.getReg() == MI.getOperand(0).getReg()) || 7625360784Sdim Op2.getReg() == MI.getOperand(0).getReg()) 7626360784Sdim return None; 7627360784Sdim else if ((Op1.isReg() && Op1.getReg() != X86::NoRegister && 7628360784Sdim TRI->regsOverlap(Op1.getReg(), MI.getOperand(0).getReg())) || 7629360784Sdim (Op2.getReg() != X86::NoRegister && 7630360784Sdim TRI->regsOverlap(Op2.getReg(), MI.getOperand(0).getReg()))) 7631360784Sdim return None; 7632360784Sdim 7633360784Sdim int64_t Coef = MI.getOperand(2).getImm(); 7634360784Sdim int64_t Offset = MI.getOperand(4).getImm(); 7635360784Sdim SmallVector<uint64_t, 8> Ops; 7636360784Sdim 7637360784Sdim if ((Op1.isReg() && Op1.getReg() != X86::NoRegister)) { 7638360784Sdim Op = &Op1; 7639360784Sdim } else if (Op1.isFI()) 7640360784Sdim Op = &Op1; 7641360784Sdim 7642360784Sdim if (Op && Op->isReg() && Op->getReg() == Op2.getReg() && Coef > 0) { 7643360784Sdim Ops.push_back(dwarf::DW_OP_constu); 7644360784Sdim Ops.push_back(Coef + 1); 7645360784Sdim Ops.push_back(dwarf::DW_OP_mul); 7646360784Sdim } else { 7647360784Sdim if (Op && Op2.getReg() != X86::NoRegister) { 7648360784Sdim int dwarfReg = TRI->getDwarfRegNum(Op2.getReg(), false); 7649360784Sdim if (dwarfReg < 0) 7650360784Sdim return None; 7651360784Sdim else if (dwarfReg < 32) { 7652360784Sdim Ops.push_back(dwarf::DW_OP_breg0 + dwarfReg); 7653360784Sdim Ops.push_back(0); 7654360784Sdim } else { 7655360784Sdim Ops.push_back(dwarf::DW_OP_bregx); 7656360784Sdim Ops.push_back(dwarfReg); 7657360784Sdim Ops.push_back(0); 7658360784Sdim } 7659360784Sdim } else if (!Op) { 7660360784Sdim assert(Op2.getReg() != X86::NoRegister); 7661360784Sdim Op = &Op2; 7662360784Sdim } 7663360784Sdim 7664360784Sdim if (Coef > 1) { 7665360784Sdim assert(Op2.getReg() != X86::NoRegister); 7666360784Sdim Ops.push_back(dwarf::DW_OP_constu); 7667360784Sdim Ops.push_back(Coef); 7668360784Sdim Ops.push_back(dwarf::DW_OP_mul); 7669360784Sdim } 7670360784Sdim 7671360784Sdim if (((Op1.isReg() && Op1.getReg() != X86::NoRegister) || Op1.isFI()) && 7672360784Sdim Op2.getReg() != X86::NoRegister) { 7673360784Sdim Ops.push_back(dwarf::DW_OP_plus); 7674360784Sdim } 7675360784Sdim } 7676360784Sdim 7677360784Sdim DIExpression::appendOffset(Ops, Offset); 7678360784Sdim Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), Ops); 7679360784Sdim 7680360784Sdim return ParamLoadedValue(*Op, Expr);; 7681360784Sdim } 7682360784Sdim case X86::MOV32ri: 7683360784Sdim case X86::MOV64ri: 7684360784Sdim case X86::MOV64ri32: 7685360784Sdim // MOV32ri may be used for producing zero-extended 32-bit immediates in 7686360784Sdim // 64-bit parameters, so we need to consider super-registers. 7687360784Sdim if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg)) 7688360784Sdim return None; 7689360784Sdim return ParamLoadedValue(MI.getOperand(1), Expr); 7690360784Sdim case X86::MOV8rr: 7691360784Sdim case X86::MOV16rr: 7692360784Sdim case X86::MOV32rr: 7693360784Sdim case X86::MOV64rr: 7694360784Sdim return describeMOVrrLoadedValue(MI, Reg, TRI); 7695360784Sdim case X86::XOR32rr: { 7696360784Sdim // 64-bit parameters are zero-materialized using XOR32rr, so also consider 7697360784Sdim // super-registers. 7698360784Sdim if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg)) 7699360784Sdim return None; 7700360784Sdim if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) 7701360784Sdim return ParamLoadedValue(MachineOperand::CreateImm(0), Expr); 7702360784Sdim return None; 7703360784Sdim } 7704360784Sdim case X86::MOVSX64rr32: { 7705360784Sdim // We may need to describe the lower 32 bits of the MOVSX; for example, in 7706360784Sdim // cases like this: 7707360784Sdim // 7708360784Sdim // $ebx = [...] 7709360784Sdim // $rdi = MOVSX64rr32 $ebx 7710360784Sdim // $esi = MOV32rr $edi 7711360784Sdim if (!TRI->isSubRegisterEq(MI.getOperand(0).getReg(), Reg)) 7712360784Sdim return None; 7713360784Sdim 7714360784Sdim Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {}); 7715360784Sdim 7716360784Sdim // If the described register is the destination register we need to 7717360784Sdim // sign-extend the source register from 32 bits. The other case we handle 7718360784Sdim // is when the described register is the 32-bit sub-register of the 7719360784Sdim // destination register, in case we just need to return the source 7720360784Sdim // register. 7721360784Sdim if (Reg == MI.getOperand(0).getReg()) 7722360784Sdim Expr = DIExpression::appendExt(Expr, 32, 64, true); 7723360784Sdim else 7724360784Sdim assert(X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg) && 7725360784Sdim "Unhandled sub-register case for MOVSX64rr32"); 7726360784Sdim 7727360784Sdim return ParamLoadedValue(MI.getOperand(1), Expr); 7728360784Sdim } 7729360784Sdim default: 7730360784Sdim assert(!MI.isMoveImmediate() && "Unexpected MoveImm instruction"); 7731360784Sdim return TargetInstrInfo::describeLoadedValue(MI, Reg); 7732360784Sdim } 7733360784Sdim} 7734360784Sdim 7735296417Sdim/// This is an architecture-specific helper function of reassociateOps. 7736296417Sdim/// Set special operand attributes for new instructions after reassociation. 7737296417Sdimvoid X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, 7738296417Sdim MachineInstr &OldMI2, 7739296417Sdim MachineInstr &NewMI1, 7740296417Sdim MachineInstr &NewMI2) const { 7741360784Sdim // Integer instructions may define an implicit EFLAGS dest register operand. 7742360784Sdim MachineOperand *OldFlagDef1 = OldMI1.findRegisterDefOperand(X86::EFLAGS); 7743360784Sdim MachineOperand *OldFlagDef2 = OldMI2.findRegisterDefOperand(X86::EFLAGS); 7744288943Sdim 7745360784Sdim assert(!OldFlagDef1 == !OldFlagDef2 && 7746296417Sdim "Unexpected instruction type for reassociation"); 7747288943Sdim 7748360784Sdim if (!OldFlagDef1 || !OldFlagDef2) 7749360784Sdim return; 7750288943Sdim 7751360784Sdim assert(OldFlagDef1->isDead() && OldFlagDef2->isDead() && 7752296417Sdim "Must have dead EFLAGS operand in reassociable instruction"); 7753288943Sdim 7754360784Sdim MachineOperand *NewFlagDef1 = NewMI1.findRegisterDefOperand(X86::EFLAGS); 7755360784Sdim MachineOperand *NewFlagDef2 = NewMI2.findRegisterDefOperand(X86::EFLAGS); 7756288943Sdim 7757360784Sdim assert(NewFlagDef1 && NewFlagDef2 && 7758296417Sdim "Unexpected operand in reassociable instruction"); 7759288943Sdim 7760296417Sdim // Mark the new EFLAGS operands as dead to be helpful to subsequent iterations 7761296417Sdim // of this pass or other passes. The EFLAGS operands must be dead in these new 7762296417Sdim // instructions because the EFLAGS operands in the original instructions must 7763296417Sdim // be dead in order for reassociation to occur. 7764360784Sdim NewFlagDef1->setIsDead(); 7765360784Sdim NewFlagDef2->setIsDead(); 7766288943Sdim} 7767288943Sdim 7768296417Sdimstd::pair<unsigned, unsigned> 7769296417SdimX86InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 7770296417Sdim return std::make_pair(TF, 0u); 7771288943Sdim} 7772288943Sdim 7773296417SdimArrayRef<std::pair<unsigned, const char *>> 7774296417SdimX86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 7775296417Sdim using namespace X86II; 7776296417Sdim static const std::pair<unsigned, const char *> TargetFlags[] = { 7777296417Sdim {MO_GOT_ABSOLUTE_ADDRESS, "x86-got-absolute-address"}, 7778296417Sdim {MO_PIC_BASE_OFFSET, "x86-pic-base-offset"}, 7779296417Sdim {MO_GOT, "x86-got"}, 7780296417Sdim {MO_GOTOFF, "x86-gotoff"}, 7781296417Sdim {MO_GOTPCREL, "x86-gotpcrel"}, 7782296417Sdim {MO_PLT, "x86-plt"}, 7783296417Sdim {MO_TLSGD, "x86-tlsgd"}, 7784296417Sdim {MO_TLSLD, "x86-tlsld"}, 7785296417Sdim {MO_TLSLDM, "x86-tlsldm"}, 7786296417Sdim {MO_GOTTPOFF, "x86-gottpoff"}, 7787296417Sdim {MO_INDNTPOFF, "x86-indntpoff"}, 7788296417Sdim {MO_TPOFF, "x86-tpoff"}, 7789296417Sdim {MO_DTPOFF, "x86-dtpoff"}, 7790296417Sdim {MO_NTPOFF, "x86-ntpoff"}, 7791296417Sdim {MO_GOTNTPOFF, "x86-gotntpoff"}, 7792296417Sdim {MO_DLLIMPORT, "x86-dllimport"}, 7793296417Sdim {MO_DARWIN_NONLAZY, "x86-darwin-nonlazy"}, 7794296417Sdim {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"}, 7795296417Sdim {MO_TLVP, "x86-tlvp"}, 7796296417Sdim {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"}, 7797344779Sdim {MO_SECREL, "x86-secrel"}, 7798344779Sdim {MO_COFFSTUB, "x86-coffstub"}}; 7799296417Sdim return makeArrayRef(TargetFlags); 7800288943Sdim} 7801288943Sdim 7802210299Sednamespace { 7803288943Sdim /// Create Global Base Reg pass. This initializes the PIC 7804210299Sed /// global base register for x86-32. 7805210299Sed struct CGBR : public MachineFunctionPass { 7806210299Sed static char ID; 7807212904Sdim CGBR() : MachineFunctionPass(ID) {} 7808210299Sed 7809276479Sdim bool runOnMachineFunction(MachineFunction &MF) override { 7810210299Sed const X86TargetMachine *TM = 7811210299Sed static_cast<const X86TargetMachine *>(&MF.getTarget()); 7812288943Sdim const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 7813210299Sed 7814341825Sdim // Don't do anything in the 64-bit small and kernel code models. They use 7815341825Sdim // RIP-relative addressing for everything. 7816341825Sdim if (STI.is64Bit() && (TM->getCodeModel() == CodeModel::Small || 7817341825Sdim TM->getCodeModel() == CodeModel::Kernel)) 7818276479Sdim return false; 7819210299Sed 7820210299Sed // Only emit a global base reg in PIC mode. 7821309124Sdim if (!TM->isPositionIndependent()) 7822210299Sed return false; 7823210299Sed 7824218893Sdim X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 7825218893Sdim unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 7826218893Sdim 7827218893Sdim // If we didn't need a GlobalBaseReg, don't insert code. 7828218893Sdim if (GlobalBaseReg == 0) 7829218893Sdim return false; 7830218893Sdim 7831210299Sed // Insert the set of GlobalBaseReg into the first MBB of the function 7832210299Sed MachineBasicBlock &FirstMBB = MF.front(); 7833210299Sed MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 7834210299Sed DebugLoc DL = FirstMBB.findDebugLoc(MBBI); 7835210299Sed MachineRegisterInfo &RegInfo = MF.getRegInfo(); 7836288943Sdim const X86InstrInfo *TII = STI.getInstrInfo(); 7837210299Sed 7838210299Sed unsigned PC; 7839288943Sdim if (STI.isPICStyleGOT()) 7840239462Sdim PC = RegInfo.createVirtualRegister(&X86::GR32RegClass); 7841210299Sed else 7842218893Sdim PC = GlobalBaseReg; 7843218893Sdim 7844341825Sdim if (STI.is64Bit()) { 7845341825Sdim if (TM->getCodeModel() == CodeModel::Medium) { 7846341825Sdim // In the medium code model, use a RIP-relative LEA to materialize the 7847341825Sdim // GOT. 7848341825Sdim BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PC) 7849341825Sdim .addReg(X86::RIP) 7850341825Sdim .addImm(0) 7851341825Sdim .addReg(0) 7852341825Sdim .addExternalSymbol("_GLOBAL_OFFSET_TABLE_") 7853341825Sdim .addReg(0); 7854341825Sdim } else if (TM->getCodeModel() == CodeModel::Large) { 7855344779Sdim // In the large code model, we are aiming for this code, though the 7856344779Sdim // register allocation may vary: 7857344779Sdim // leaq .LN$pb(%rip), %rax 7858344779Sdim // movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx 7859344779Sdim // addq %rcx, %rax 7860344779Sdim // RAX now holds address of _GLOBAL_OFFSET_TABLE_. 7861360784Sdim Register PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass); 7862360784Sdim Register GOTReg = RegInfo.createVirtualRegister(&X86::GR64RegClass); 7863344779Sdim BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg) 7864344779Sdim .addReg(X86::RIP) 7865344779Sdim .addImm(0) 7866344779Sdim .addReg(0) 7867344779Sdim .addSym(MF.getPICBaseSymbol()) 7868344779Sdim .addReg(0); 7869344779Sdim std::prev(MBBI)->setPreInstrSymbol(MF, MF.getPICBaseSymbol()); 7870344779Sdim BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOV64ri), GOTReg) 7871344779Sdim .addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 7872344779Sdim X86II::MO_PIC_BASE_OFFSET); 7873344779Sdim BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD64rr), PC) 7874344779Sdim .addReg(PBReg, RegState::Kill) 7875344779Sdim .addReg(GOTReg, RegState::Kill); 7876341825Sdim } else { 7877341825Sdim llvm_unreachable("unexpected code model"); 7878341825Sdim } 7879341825Sdim } else { 7880341825Sdim // Operand of MovePCtoStack is completely ignored by asm printer. It's 7881341825Sdim // only used in JIT code emission as displacement to pc. 7882341825Sdim BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); 7883218893Sdim 7884341825Sdim // If we're using vanilla 'GOT' PIC style, we should use relative 7885341825Sdim // addressing not to pc, but to _GLOBAL_OFFSET_TABLE_ external. 7886341825Sdim if (STI.isPICStyleGOT()) { 7887341825Sdim // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], 7888341825Sdim // %some_register 7889341825Sdim BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) 7890341825Sdim .addReg(PC) 7891341825Sdim .addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 7892341825Sdim X86II::MO_GOT_ABSOLUTE_ADDRESS); 7893341825Sdim } 7894210299Sed } 7895210299Sed 7896210299Sed return true; 7897210299Sed } 7898210299Sed 7899314564Sdim StringRef getPassName() const override { 7900210299Sed return "X86 PIC Global Base Reg Initialization"; 7901210299Sed } 7902210299Sed 7903276479Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 7904210299Sed AU.setPreservesCFG(); 7905210299Sed MachineFunctionPass::getAnalysisUsage(AU); 7906210299Sed } 7907210299Sed }; 7908210299Sed} 7909210299Sed 7910210299Sedchar CGBR::ID = 0; 7911210299SedFunctionPass* 7912276479Sdimllvm::createX86GlobalBaseRegPass() { return new CGBR(); } 7913239462Sdim 7914239462Sdimnamespace { 7915239462Sdim struct LDTLSCleanup : public MachineFunctionPass { 7916239462Sdim static char ID; 7917239462Sdim LDTLSCleanup() : MachineFunctionPass(ID) {} 7918239462Sdim 7919276479Sdim bool runOnMachineFunction(MachineFunction &MF) override { 7920327952Sdim if (skipFunction(MF.getFunction())) 7921309124Sdim return false; 7922309124Sdim 7923309124Sdim X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); 7924239462Sdim if (MFI->getNumLocalDynamicTLSAccesses() < 2) { 7925239462Sdim // No point folding accesses if there isn't at least two. 7926239462Sdim return false; 7927239462Sdim } 7928239462Sdim 7929239462Sdim MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); 7930239462Sdim return VisitNode(DT->getRootNode(), 0); 7931239462Sdim } 7932239462Sdim 7933239462Sdim // Visit the dominator subtree rooted at Node in pre-order. 7934239462Sdim // If TLSBaseAddrReg is non-null, then use that to replace any 7935239462Sdim // TLS_base_addr instructions. Otherwise, create the register 7936239462Sdim // when the first such instruction is seen, and then use it 7937239462Sdim // as we encounter more instructions. 7938239462Sdim bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { 7939239462Sdim MachineBasicBlock *BB = Node->getBlock(); 7940239462Sdim bool Changed = false; 7941239462Sdim 7942239462Sdim // Traverse the current block. 7943239462Sdim for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; 7944239462Sdim ++I) { 7945239462Sdim switch (I->getOpcode()) { 7946239462Sdim case X86::TLS_base_addr32: 7947239462Sdim case X86::TLS_base_addr64: 7948239462Sdim if (TLSBaseAddrReg) 7949309124Sdim I = ReplaceTLSBaseAddrCall(*I, TLSBaseAddrReg); 7950239462Sdim else 7951309124Sdim I = SetRegister(*I, &TLSBaseAddrReg); 7952239462Sdim Changed = true; 7953239462Sdim break; 7954239462Sdim default: 7955239462Sdim break; 7956239462Sdim } 7957239462Sdim } 7958239462Sdim 7959239462Sdim // Visit the children of this block in the dominator tree. 7960239462Sdim for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); 7961239462Sdim I != E; ++I) { 7962239462Sdim Changed |= VisitNode(*I, TLSBaseAddrReg); 7963239462Sdim } 7964239462Sdim 7965239462Sdim return Changed; 7966239462Sdim } 7967239462Sdim 7968239462Sdim // Replace the TLS_base_addr instruction I with a copy from 7969239462Sdim // TLSBaseAddrReg, returning the new instruction. 7970309124Sdim MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr &I, 7971239462Sdim unsigned TLSBaseAddrReg) { 7972309124Sdim MachineFunction *MF = I.getParent()->getParent(); 7973288943Sdim const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>(); 7974288943Sdim const bool is64Bit = STI.is64Bit(); 7975288943Sdim const X86InstrInfo *TII = STI.getInstrInfo(); 7976239462Sdim 7977239462Sdim // Insert a Copy from TLSBaseAddrReg to RAX/EAX. 7978309124Sdim MachineInstr *Copy = 7979309124Sdim BuildMI(*I.getParent(), I, I.getDebugLoc(), 7980309124Sdim TII->get(TargetOpcode::COPY), is64Bit ? X86::RAX : X86::EAX) 7981309124Sdim .addReg(TLSBaseAddrReg); 7982239462Sdim 7983239462Sdim // Erase the TLS_base_addr instruction. 7984309124Sdim I.eraseFromParent(); 7985239462Sdim 7986239462Sdim return Copy; 7987239462Sdim } 7988239462Sdim 7989321369Sdim // Create a virtual register in *TLSBaseAddrReg, and populate it by 7990239462Sdim // inserting a copy instruction after I. Returns the new instruction. 7991309124Sdim MachineInstr *SetRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) { 7992309124Sdim MachineFunction *MF = I.getParent()->getParent(); 7993288943Sdim const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>(); 7994288943Sdim const bool is64Bit = STI.is64Bit(); 7995288943Sdim const X86InstrInfo *TII = STI.getInstrInfo(); 7996239462Sdim 7997239462Sdim // Create a virtual register for the TLS base address. 7998239462Sdim MachineRegisterInfo &RegInfo = MF->getRegInfo(); 7999239462Sdim *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit 8000239462Sdim ? &X86::GR64RegClass 8001239462Sdim : &X86::GR32RegClass); 8002239462Sdim 8003239462Sdim // Insert a copy from RAX/EAX to TLSBaseAddrReg. 8004309124Sdim MachineInstr *Next = I.getNextNode(); 8005309124Sdim MachineInstr *Copy = 8006309124Sdim BuildMI(*I.getParent(), Next, I.getDebugLoc(), 8007309124Sdim TII->get(TargetOpcode::COPY), *TLSBaseAddrReg) 8008309124Sdim .addReg(is64Bit ? X86::RAX : X86::EAX); 8009239462Sdim 8010239462Sdim return Copy; 8011239462Sdim } 8012239462Sdim 8013314564Sdim StringRef getPassName() const override { 8014239462Sdim return "Local Dynamic TLS Access Clean-up"; 8015239462Sdim } 8016239462Sdim 8017276479Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 8018239462Sdim AU.setPreservesCFG(); 8019239462Sdim AU.addRequired<MachineDominatorTree>(); 8020239462Sdim MachineFunctionPass::getAnalysisUsage(AU); 8021239462Sdim } 8022239462Sdim }; 8023239462Sdim} 8024239462Sdim 8025239462Sdimchar LDTLSCleanup::ID = 0; 8026239462SdimFunctionPass* 8027239462Sdimllvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } 8028321369Sdim 8029327952Sdim/// Constants defining how certain sequences should be outlined. 8030327952Sdim/// 8031327952Sdim/// \p MachineOutlinerDefault implies that the function is called with a call 8032327952Sdim/// instruction, and a return must be emitted for the outlined function frame. 8033327952Sdim/// 8034327952Sdim/// That is, 8035327952Sdim/// 8036327952Sdim/// I1 OUTLINED_FUNCTION: 8037327952Sdim/// I2 --> call OUTLINED_FUNCTION I1 8038327952Sdim/// I3 I2 8039327952Sdim/// I3 8040327952Sdim/// ret 8041327952Sdim/// 8042327952Sdim/// * Call construction overhead: 1 (call instruction) 8043327952Sdim/// * Frame construction overhead: 1 (return instruction) 8044327952Sdim/// 8045327952Sdim/// \p MachineOutlinerTailCall implies that the function is being tail called. 8046327952Sdim/// A jump is emitted instead of a call, and the return is already present in 8047327952Sdim/// the outlined sequence. That is, 8048327952Sdim/// 8049327952Sdim/// I1 OUTLINED_FUNCTION: 8050327952Sdim/// I2 --> jmp OUTLINED_FUNCTION I1 8051327952Sdim/// ret I2 8052327952Sdim/// ret 8053327952Sdim/// 8054327952Sdim/// * Call construction overhead: 1 (jump instruction) 8055327952Sdim/// * Frame construction overhead: 0 (don't need to return) 8056327952Sdim/// 8057327952Sdimenum MachineOutlinerClass { 8058327952Sdim MachineOutlinerDefault, 8059327952Sdim MachineOutlinerTailCall 8060327952Sdim}; 8061321369Sdim 8062341825Sdimoutliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo( 8063341825Sdim std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 8064341825Sdim unsigned SequenceSize = 8065341825Sdim std::accumulate(RepeatedSequenceLocs[0].front(), 8066341825Sdim std::next(RepeatedSequenceLocs[0].back()), 0, 8067341825Sdim [](unsigned Sum, const MachineInstr &MI) { 8068341825Sdim // FIXME: x86 doesn't implement getInstSizeInBytes, so 8069341825Sdim // we can't tell the cost. Just assume each instruction 8070341825Sdim // is one byte. 8071341825Sdim if (MI.isDebugInstr() || MI.isKill()) 8072341825Sdim return Sum; 8073341825Sdim return Sum + 1; 8074341825Sdim }); 8075321369Sdim 8076341825Sdim // FIXME: Use real size in bytes for call and ret instructions. 8077341825Sdim if (RepeatedSequenceLocs[0].back()->isTerminator()) { 8078341825Sdim for (outliner::Candidate &C : RepeatedSequenceLocs) 8079341825Sdim C.setCallInfo(MachineOutlinerTailCall, 1); 8080327952Sdim 8081341825Sdim return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 8082341825Sdim 0, // Number of bytes to emit frame. 8083341825Sdim MachineOutlinerTailCall // Type of frame. 8084341825Sdim ); 8085341825Sdim } 8086341825Sdim 8087341825Sdim for (outliner::Candidate &C : RepeatedSequenceLocs) 8088341825Sdim C.setCallInfo(MachineOutlinerDefault, 1); 8089341825Sdim 8090341825Sdim return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 1, 8091341825Sdim MachineOutlinerDefault); 8092321369Sdim} 8093321369Sdim 8094327952Sdimbool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF, 8095327952Sdim bool OutlineFromLinkOnceODRs) const { 8096327952Sdim const Function &F = MF.getFunction(); 8097327952Sdim 8098327952Sdim // Does the function use a red zone? If it does, then we can't risk messing 8099327952Sdim // with the stack. 8100353358Sdim if (Subtarget.getFrameLowering()->has128ByteRedZone(MF)) { 8101341825Sdim // It could have a red zone. If it does, then we don't want to touch it. 8102341825Sdim const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 8103341825Sdim if (!X86FI || X86FI->getUsesRedZone()) 8104327952Sdim return false; 8105341825Sdim } 8106327952Sdim 8107327952Sdim // If we *don't* want to outline from things that could potentially be deduped 8108327952Sdim // then return false. 8109327952Sdim if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 8110327952Sdim return false; 8111327952Sdim 8112327952Sdim // This function is viable for outlining, so return true. 8113327952Sdim return true; 8114321369Sdim} 8115321369Sdim 8116341825Sdimoutliner::InstrType 8117341825SdimX86InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const { 8118341825Sdim MachineInstr &MI = *MIT; 8119321369Sdim // Don't allow debug values to impact outlining type. 8120341825Sdim if (MI.isDebugInstr() || MI.isIndirectDebugValue()) 8121341825Sdim return outliner::InstrType::Invisible; 8122321369Sdim 8123341825Sdim // At this point, KILL instructions don't really tell us much so we can go 8124341825Sdim // ahead and skip over them. 8125341825Sdim if (MI.isKill()) 8126341825Sdim return outliner::InstrType::Invisible; 8127341825Sdim 8128321369Sdim // Is this a tail call? If yes, we can outline as a tail call. 8129321369Sdim if (isTailCall(MI)) 8130341825Sdim return outliner::InstrType::Legal; 8131321369Sdim 8132321369Sdim // Is this the terminator of a basic block? 8133321369Sdim if (MI.isTerminator() || MI.isReturn()) { 8134321369Sdim 8135321369Sdim // Does its parent have any successors in its MachineFunction? 8136321369Sdim if (MI.getParent()->succ_empty()) 8137341825Sdim return outliner::InstrType::Legal; 8138321369Sdim 8139321369Sdim // It does, so we can't tail call it. 8140341825Sdim return outliner::InstrType::Illegal; 8141321369Sdim } 8142321369Sdim 8143321369Sdim // Don't outline anything that modifies or reads from the stack pointer. 8144321369Sdim // 8145321369Sdim // FIXME: There are instructions which are being manually built without 8146321369Sdim // explicit uses/defs so we also have to check the MCInstrDesc. We should be 8147321369Sdim // able to remove the extra checks once those are fixed up. For example, 8148327952Sdim // sometimes we might get something like %rax = POP64r 1. This won't be 8149321369Sdim // caught by modifiesRegister or readsRegister even though the instruction 8150321369Sdim // really ought to be formed so that modifiesRegister/readsRegister would 8151321369Sdim // catch it. 8152321369Sdim if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) || 8153321369Sdim MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) || 8154321369Sdim MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) 8155341825Sdim return outliner::InstrType::Illegal; 8156321369Sdim 8157321369Sdim // Outlined calls change the instruction pointer, so don't read from it. 8158321369Sdim if (MI.readsRegister(X86::RIP, &RI) || 8159321369Sdim MI.getDesc().hasImplicitUseOfPhysReg(X86::RIP) || 8160321369Sdim MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP)) 8161341825Sdim return outliner::InstrType::Illegal; 8162321369Sdim 8163321369Sdim // Positions can't safely be outlined. 8164321369Sdim if (MI.isPosition()) 8165341825Sdim return outliner::InstrType::Illegal; 8166321369Sdim 8167321369Sdim // Make sure none of the operands of this instruction do anything tricky. 8168321369Sdim for (const MachineOperand &MOP : MI.operands()) 8169321369Sdim if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || 8170321369Sdim MOP.isTargetIndex()) 8171341825Sdim return outliner::InstrType::Illegal; 8172321369Sdim 8173341825Sdim return outliner::InstrType::Legal; 8174321369Sdim} 8175321369Sdim 8176341825Sdimvoid X86InstrInfo::buildOutlinedFrame(MachineBasicBlock &MBB, 8177321369Sdim MachineFunction &MF, 8178341825Sdim const outliner::OutlinedFunction &OF) 8179327952Sdim const { 8180321369Sdim // If we're a tail call, we already have a return, so don't do anything. 8181341825Sdim if (OF.FrameConstructionID == MachineOutlinerTailCall) 8182321369Sdim return; 8183321369Sdim 8184321369Sdim // We're a normal call, so our sequence doesn't have a return instruction. 8185321369Sdim // Add it in. 8186321369Sdim MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RETQ)); 8187321369Sdim MBB.insert(MBB.end(), retq); 8188321369Sdim} 8189321369Sdim 8190321369SdimMachineBasicBlock::iterator 8191321369SdimX86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB, 8192321369Sdim MachineBasicBlock::iterator &It, 8193321369Sdim MachineFunction &MF, 8194341825Sdim const outliner::Candidate &C) const { 8195321369Sdim // Is it a tail call? 8196341825Sdim if (C.CallConstructionID == MachineOutlinerTailCall) { 8197321369Sdim // Yes, just insert a JMP. 8198321369Sdim It = MBB.insert(It, 8199341825Sdim BuildMI(MF, DebugLoc(), get(X86::TAILJMPd64)) 8200321369Sdim .addGlobalAddress(M.getNamedValue(MF.getName()))); 8201321369Sdim } else { 8202321369Sdim // No, insert a call. 8203321369Sdim It = MBB.insert(It, 8204321369Sdim BuildMI(MF, DebugLoc(), get(X86::CALL64pcrel32)) 8205321369Sdim .addGlobalAddress(M.getNamedValue(MF.getName()))); 8206321369Sdim } 8207321369Sdim 8208321369Sdim return It; 8209321369Sdim} 8210344779Sdim 8211344779Sdim#define GET_INSTRINFO_HELPERS 8212344779Sdim#include "X86GenInstrInfo.inc" 8213