1235633Sdim//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file contains the X86 implementation of the TargetInstrInfo class. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#include "X86InstrInfo.h" 15193323Sed#include "X86.h" 16193323Sed#include "X86InstrBuilder.h" 17193323Sed#include "X86MachineFunctionInfo.h" 18193323Sed#include "X86Subtarget.h" 19193323Sed#include "X86TargetMachine.h" 20193323Sed#include "llvm/ADT/STLExtras.h" 21252723Sdim#include "llvm/CodeGen/LiveVariables.h" 22193323Sed#include "llvm/CodeGen/MachineConstantPool.h" 23245431Sdim#include "llvm/CodeGen/MachineDominators.h" 24193323Sed#include "llvm/CodeGen/MachineFrameInfo.h" 25193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 26193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 27263509Sdim#include "llvm/CodeGen/StackMaps.h" 28252723Sdim#include "llvm/IR/DerivedTypes.h" 29252723Sdim#include "llvm/IR/LLVMContext.h" 30235633Sdim#include "llvm/MC/MCAsmInfo.h" 31207618Srdivacky#include "llvm/MC/MCInst.h" 32193323Sed#include "llvm/Support/CommandLine.h" 33202375Srdivacky#include "llvm/Support/Debug.h" 34198090Srdivacky#include "llvm/Support/ErrorHandling.h" 35198090Srdivacky#include "llvm/Support/raw_ostream.h" 36193323Sed#include "llvm/Target/TargetOptions.h" 37199481Srdivacky#include <limits> 38199481Srdivacky 39263509Sdim#define GET_INSTRINFO_CTOR_DTOR 40224145Sdim#include "X86GenInstrInfo.inc" 41224145Sdim 42193323Sedusing namespace llvm; 43193323Sed 44198090Srdivackystatic cl::opt<bool> 45198090SrdivackyNoFusing("disable-spill-fusing", 46198090Srdivacky cl::desc("Disable fusing of spill code into instructions")); 47198090Srdivackystatic cl::opt<bool> 48198090SrdivackyPrintFailedFusing("print-failed-fuse-candidates", 49198090Srdivacky cl::desc("Print instructions that the allocator wants to" 50198090Srdivacky " fuse, but the X86 backend currently can't"), 51198090Srdivacky cl::Hidden); 52198090Srdivackystatic cl::opt<bool> 53198090SrdivackyReMatPICStubLoad("remat-pic-stub-load", 54198090Srdivacky cl::desc("Re-materialize load from stub in PIC mode"), 55198090Srdivacky cl::init(false), cl::Hidden); 56193323Sed 57226890Sdimenum { 58226890Sdim // Select which memory operand is being unfolded. 59245431Sdim // (stored in bits 0 - 3) 60226890Sdim TB_INDEX_0 = 0, 61226890Sdim TB_INDEX_1 = 1, 62226890Sdim TB_INDEX_2 = 2, 63245431Sdim TB_INDEX_3 = 3, 64245431Sdim TB_INDEX_MASK = 0xf, 65226890Sdim 66245431Sdim // Do not insert the reverse map (MemOp -> RegOp) into the table. 67245431Sdim // This may be needed because there is a many -> one mapping. 68245431Sdim TB_NO_REVERSE = 1 << 4, 69245431Sdim 70245431Sdim // Do not insert the forward map (RegOp -> MemOp) into the table. 71245431Sdim // This is needed for Native Client, which prohibits branch 72245431Sdim // instructions from using a memory operand. 73245431Sdim TB_NO_FORWARD = 1 << 5, 74245431Sdim 75245431Sdim TB_FOLDED_LOAD = 1 << 6, 76245431Sdim TB_FOLDED_STORE = 1 << 7, 77245431Sdim 78226890Sdim // Minimum alignment required for load/store. 79226890Sdim // Used for RegOp->MemOp conversion. 80226890Sdim // (stored in bits 8 - 15) 81226890Sdim TB_ALIGN_SHIFT = 8, 82226890Sdim TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, 83226890Sdim TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, 84226890Sdim TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, 85263509Sdim TB_ALIGN_64 = 64 << TB_ALIGN_SHIFT, 86245431Sdim TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT 87226890Sdim}; 88226890Sdim 89235633Sdimstruct X86OpTblEntry { 90235633Sdim uint16_t RegOp; 91235633Sdim uint16_t MemOp; 92245431Sdim uint16_t Flags; 93235633Sdim}; 94235633Sdim 95263509Sdim// Pin the vtable to this file. 96263509Sdimvoid X86InstrInfo::anchor() {} 97263509Sdim 98193323SedX86InstrInfo::X86InstrInfo(X86TargetMachine &tm) 99224145Sdim : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit() 100224145Sdim ? X86::ADJCALLSTACKDOWN64 101224145Sdim : X86::ADJCALLSTACKDOWN32), 102224145Sdim (tm.getSubtarget<X86Subtarget>().is64Bit() 103224145Sdim ? X86::ADJCALLSTACKUP64 104224145Sdim : X86::ADJCALLSTACKUP32)), 105263509Sdim TM(tm), RI(tm) { 106218893Sdim 107235633Sdim static const X86OpTblEntry OpTbl2Addr[] = { 108226890Sdim { X86::ADC32ri, X86::ADC32mi, 0 }, 109226890Sdim { X86::ADC32ri8, X86::ADC32mi8, 0 }, 110226890Sdim { X86::ADC32rr, X86::ADC32mr, 0 }, 111226890Sdim { X86::ADC64ri32, X86::ADC64mi32, 0 }, 112226890Sdim { X86::ADC64ri8, X86::ADC64mi8, 0 }, 113226890Sdim { X86::ADC64rr, X86::ADC64mr, 0 }, 114226890Sdim { X86::ADD16ri, X86::ADD16mi, 0 }, 115226890Sdim { X86::ADD16ri8, X86::ADD16mi8, 0 }, 116226890Sdim { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE }, 117226890Sdim { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE }, 118226890Sdim { X86::ADD16rr, X86::ADD16mr, 0 }, 119226890Sdim { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE }, 120226890Sdim { X86::ADD32ri, X86::ADD32mi, 0 }, 121226890Sdim { X86::ADD32ri8, X86::ADD32mi8, 0 }, 122226890Sdim { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE }, 123226890Sdim { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE }, 124226890Sdim { X86::ADD32rr, X86::ADD32mr, 0 }, 125226890Sdim { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE }, 126226890Sdim { X86::ADD64ri32, X86::ADD64mi32, 0 }, 127226890Sdim { X86::ADD64ri8, X86::ADD64mi8, 0 }, 128226890Sdim { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE }, 129226890Sdim { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE }, 130226890Sdim { X86::ADD64rr, X86::ADD64mr, 0 }, 131226890Sdim { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE }, 132226890Sdim { X86::ADD8ri, X86::ADD8mi, 0 }, 133226890Sdim { X86::ADD8rr, X86::ADD8mr, 0 }, 134226890Sdim { X86::AND16ri, X86::AND16mi, 0 }, 135226890Sdim { X86::AND16ri8, X86::AND16mi8, 0 }, 136226890Sdim { X86::AND16rr, X86::AND16mr, 0 }, 137226890Sdim { X86::AND32ri, X86::AND32mi, 0 }, 138226890Sdim { X86::AND32ri8, X86::AND32mi8, 0 }, 139226890Sdim { X86::AND32rr, X86::AND32mr, 0 }, 140226890Sdim { X86::AND64ri32, X86::AND64mi32, 0 }, 141226890Sdim { X86::AND64ri8, X86::AND64mi8, 0 }, 142226890Sdim { X86::AND64rr, X86::AND64mr, 0 }, 143226890Sdim { X86::AND8ri, X86::AND8mi, 0 }, 144226890Sdim { X86::AND8rr, X86::AND8mr, 0 }, 145226890Sdim { X86::DEC16r, X86::DEC16m, 0 }, 146226890Sdim { X86::DEC32r, X86::DEC32m, 0 }, 147226890Sdim { X86::DEC64_16r, X86::DEC64_16m, 0 }, 148226890Sdim { X86::DEC64_32r, X86::DEC64_32m, 0 }, 149226890Sdim { X86::DEC64r, X86::DEC64m, 0 }, 150226890Sdim { X86::DEC8r, X86::DEC8m, 0 }, 151226890Sdim { X86::INC16r, X86::INC16m, 0 }, 152226890Sdim { X86::INC32r, X86::INC32m, 0 }, 153226890Sdim { X86::INC64_16r, X86::INC64_16m, 0 }, 154226890Sdim { X86::INC64_32r, X86::INC64_32m, 0 }, 155226890Sdim { X86::INC64r, X86::INC64m, 0 }, 156226890Sdim { X86::INC8r, X86::INC8m, 0 }, 157226890Sdim { X86::NEG16r, X86::NEG16m, 0 }, 158226890Sdim { X86::NEG32r, X86::NEG32m, 0 }, 159226890Sdim { X86::NEG64r, X86::NEG64m, 0 }, 160226890Sdim { X86::NEG8r, X86::NEG8m, 0 }, 161226890Sdim { X86::NOT16r, X86::NOT16m, 0 }, 162226890Sdim { X86::NOT32r, X86::NOT32m, 0 }, 163226890Sdim { X86::NOT64r, X86::NOT64m, 0 }, 164226890Sdim { X86::NOT8r, X86::NOT8m, 0 }, 165226890Sdim { X86::OR16ri, X86::OR16mi, 0 }, 166226890Sdim { X86::OR16ri8, X86::OR16mi8, 0 }, 167226890Sdim { X86::OR16rr, X86::OR16mr, 0 }, 168226890Sdim { X86::OR32ri, X86::OR32mi, 0 }, 169226890Sdim { X86::OR32ri8, X86::OR32mi8, 0 }, 170226890Sdim { X86::OR32rr, X86::OR32mr, 0 }, 171226890Sdim { X86::OR64ri32, X86::OR64mi32, 0 }, 172226890Sdim { X86::OR64ri8, X86::OR64mi8, 0 }, 173226890Sdim { X86::OR64rr, X86::OR64mr, 0 }, 174226890Sdim { X86::OR8ri, X86::OR8mi, 0 }, 175226890Sdim { X86::OR8rr, X86::OR8mr, 0 }, 176226890Sdim { X86::ROL16r1, X86::ROL16m1, 0 }, 177226890Sdim { X86::ROL16rCL, X86::ROL16mCL, 0 }, 178226890Sdim { X86::ROL16ri, X86::ROL16mi, 0 }, 179226890Sdim { X86::ROL32r1, X86::ROL32m1, 0 }, 180226890Sdim { X86::ROL32rCL, X86::ROL32mCL, 0 }, 181226890Sdim { X86::ROL32ri, X86::ROL32mi, 0 }, 182226890Sdim { X86::ROL64r1, X86::ROL64m1, 0 }, 183226890Sdim { X86::ROL64rCL, X86::ROL64mCL, 0 }, 184226890Sdim { X86::ROL64ri, X86::ROL64mi, 0 }, 185226890Sdim { X86::ROL8r1, X86::ROL8m1, 0 }, 186226890Sdim { X86::ROL8rCL, X86::ROL8mCL, 0 }, 187226890Sdim { X86::ROL8ri, X86::ROL8mi, 0 }, 188226890Sdim { X86::ROR16r1, X86::ROR16m1, 0 }, 189226890Sdim { X86::ROR16rCL, X86::ROR16mCL, 0 }, 190226890Sdim { X86::ROR16ri, X86::ROR16mi, 0 }, 191226890Sdim { X86::ROR32r1, X86::ROR32m1, 0 }, 192226890Sdim { X86::ROR32rCL, X86::ROR32mCL, 0 }, 193226890Sdim { X86::ROR32ri, X86::ROR32mi, 0 }, 194226890Sdim { X86::ROR64r1, X86::ROR64m1, 0 }, 195226890Sdim { X86::ROR64rCL, X86::ROR64mCL, 0 }, 196226890Sdim { X86::ROR64ri, X86::ROR64mi, 0 }, 197226890Sdim { X86::ROR8r1, X86::ROR8m1, 0 }, 198226890Sdim { X86::ROR8rCL, X86::ROR8mCL, 0 }, 199226890Sdim { X86::ROR8ri, X86::ROR8mi, 0 }, 200226890Sdim { X86::SAR16r1, X86::SAR16m1, 0 }, 201226890Sdim { X86::SAR16rCL, X86::SAR16mCL, 0 }, 202226890Sdim { X86::SAR16ri, X86::SAR16mi, 0 }, 203226890Sdim { X86::SAR32r1, X86::SAR32m1, 0 }, 204226890Sdim { X86::SAR32rCL, X86::SAR32mCL, 0 }, 205226890Sdim { X86::SAR32ri, X86::SAR32mi, 0 }, 206226890Sdim { X86::SAR64r1, X86::SAR64m1, 0 }, 207226890Sdim { X86::SAR64rCL, X86::SAR64mCL, 0 }, 208226890Sdim { X86::SAR64ri, X86::SAR64mi, 0 }, 209226890Sdim { X86::SAR8r1, X86::SAR8m1, 0 }, 210226890Sdim { X86::SAR8rCL, X86::SAR8mCL, 0 }, 211226890Sdim { X86::SAR8ri, X86::SAR8mi, 0 }, 212226890Sdim { X86::SBB32ri, X86::SBB32mi, 0 }, 213226890Sdim { X86::SBB32ri8, X86::SBB32mi8, 0 }, 214226890Sdim { X86::SBB32rr, X86::SBB32mr, 0 }, 215226890Sdim { X86::SBB64ri32, X86::SBB64mi32, 0 }, 216226890Sdim { X86::SBB64ri8, X86::SBB64mi8, 0 }, 217226890Sdim { X86::SBB64rr, X86::SBB64mr, 0 }, 218226890Sdim { X86::SHL16rCL, X86::SHL16mCL, 0 }, 219226890Sdim { X86::SHL16ri, X86::SHL16mi, 0 }, 220226890Sdim { X86::SHL32rCL, X86::SHL32mCL, 0 }, 221226890Sdim { X86::SHL32ri, X86::SHL32mi, 0 }, 222226890Sdim { X86::SHL64rCL, X86::SHL64mCL, 0 }, 223226890Sdim { X86::SHL64ri, X86::SHL64mi, 0 }, 224226890Sdim { X86::SHL8rCL, X86::SHL8mCL, 0 }, 225226890Sdim { X86::SHL8ri, X86::SHL8mi, 0 }, 226226890Sdim { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 }, 227226890Sdim { X86::SHLD16rri8, X86::SHLD16mri8, 0 }, 228226890Sdim { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 }, 229226890Sdim { X86::SHLD32rri8, X86::SHLD32mri8, 0 }, 230226890Sdim { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 }, 231226890Sdim { X86::SHLD64rri8, X86::SHLD64mri8, 0 }, 232226890Sdim { X86::SHR16r1, X86::SHR16m1, 0 }, 233226890Sdim { X86::SHR16rCL, X86::SHR16mCL, 0 }, 234226890Sdim { X86::SHR16ri, X86::SHR16mi, 0 }, 235226890Sdim { X86::SHR32r1, X86::SHR32m1, 0 }, 236226890Sdim { X86::SHR32rCL, X86::SHR32mCL, 0 }, 237226890Sdim { X86::SHR32ri, X86::SHR32mi, 0 }, 238226890Sdim { X86::SHR64r1, X86::SHR64m1, 0 }, 239226890Sdim { X86::SHR64rCL, X86::SHR64mCL, 0 }, 240226890Sdim { X86::SHR64ri, X86::SHR64mi, 0 }, 241226890Sdim { X86::SHR8r1, X86::SHR8m1, 0 }, 242226890Sdim { X86::SHR8rCL, X86::SHR8mCL, 0 }, 243226890Sdim { X86::SHR8ri, X86::SHR8mi, 0 }, 244226890Sdim { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 }, 245226890Sdim { X86::SHRD16rri8, X86::SHRD16mri8, 0 }, 246226890Sdim { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 }, 247226890Sdim { X86::SHRD32rri8, X86::SHRD32mri8, 0 }, 248226890Sdim { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 }, 249226890Sdim { X86::SHRD64rri8, X86::SHRD64mri8, 0 }, 250226890Sdim { X86::SUB16ri, X86::SUB16mi, 0 }, 251226890Sdim { X86::SUB16ri8, X86::SUB16mi8, 0 }, 252226890Sdim { X86::SUB16rr, X86::SUB16mr, 0 }, 253226890Sdim { X86::SUB32ri, X86::SUB32mi, 0 }, 254226890Sdim { X86::SUB32ri8, X86::SUB32mi8, 0 }, 255226890Sdim { X86::SUB32rr, X86::SUB32mr, 0 }, 256226890Sdim { X86::SUB64ri32, X86::SUB64mi32, 0 }, 257226890Sdim { X86::SUB64ri8, X86::SUB64mi8, 0 }, 258226890Sdim { X86::SUB64rr, X86::SUB64mr, 0 }, 259226890Sdim { X86::SUB8ri, X86::SUB8mi, 0 }, 260226890Sdim { X86::SUB8rr, X86::SUB8mr, 0 }, 261226890Sdim { X86::XOR16ri, X86::XOR16mi, 0 }, 262226890Sdim { X86::XOR16ri8, X86::XOR16mi8, 0 }, 263226890Sdim { X86::XOR16rr, X86::XOR16mr, 0 }, 264226890Sdim { X86::XOR32ri, X86::XOR32mi, 0 }, 265226890Sdim { X86::XOR32ri8, X86::XOR32mi8, 0 }, 266226890Sdim { X86::XOR32rr, X86::XOR32mr, 0 }, 267226890Sdim { X86::XOR64ri32, X86::XOR64mi32, 0 }, 268226890Sdim { X86::XOR64ri8, X86::XOR64mi8, 0 }, 269226890Sdim { X86::XOR64rr, X86::XOR64mr, 0 }, 270226890Sdim { X86::XOR8ri, X86::XOR8mi, 0 }, 271226890Sdim { X86::XOR8rr, X86::XOR8mr, 0 } 272193323Sed }; 273193323Sed 274193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { 275235633Sdim unsigned RegOp = OpTbl2Addr[i].RegOp; 276235633Sdim unsigned MemOp = OpTbl2Addr[i].MemOp; 277235633Sdim unsigned Flags = OpTbl2Addr[i].Flags; 278226890Sdim AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable, 279226890Sdim RegOp, MemOp, 280226890Sdim // Index 0, folded load and store, no alignment requirement. 281226890Sdim Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); 282193323Sed } 283193323Sed 284235633Sdim static const X86OpTblEntry OpTbl0[] = { 285226890Sdim { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD }, 286226890Sdim { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD }, 287226890Sdim { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD }, 288226890Sdim { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD }, 289226890Sdim { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD }, 290226890Sdim { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD }, 291226890Sdim { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD }, 292226890Sdim { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD }, 293226890Sdim { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD }, 294226890Sdim { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD }, 295226890Sdim { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD }, 296226890Sdim { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD }, 297226890Sdim { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD }, 298226890Sdim { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD }, 299226890Sdim { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD }, 300226890Sdim { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD }, 301226890Sdim { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD }, 302226890Sdim { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD }, 303226890Sdim { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD }, 304226890Sdim { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD }, 305252723Sdim { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE }, 306226890Sdim { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD }, 307226890Sdim { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD }, 308226890Sdim { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD }, 309226890Sdim { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD }, 310226890Sdim { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD }, 311226890Sdim { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD }, 312226890Sdim { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD }, 313226890Sdim { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD }, 314226890Sdim { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD }, 315226890Sdim { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD }, 316226890Sdim { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE }, 317226890Sdim { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE }, 318226890Sdim { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE }, 319226890Sdim { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE }, 320226890Sdim { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE }, 321226890Sdim { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE }, 322226890Sdim { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE }, 323226890Sdim { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE }, 324226890Sdim { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE }, 325226890Sdim { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 }, 326226890Sdim { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, 327226890Sdim { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 }, 328226890Sdim { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE }, 329226890Sdim { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE }, 330226890Sdim { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE }, 331226890Sdim { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE }, 332226890Sdim { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE }, 333226890Sdim { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE }, 334226890Sdim { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD }, 335226890Sdim { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD }, 336226890Sdim { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD }, 337226890Sdim { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD }, 338226890Sdim { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE }, 339226890Sdim { X86::SETAr, X86::SETAm, TB_FOLDED_STORE }, 340226890Sdim { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE }, 341226890Sdim { X86::SETBr, X86::SETBm, TB_FOLDED_STORE }, 342226890Sdim { X86::SETEr, X86::SETEm, TB_FOLDED_STORE }, 343226890Sdim { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE }, 344226890Sdim { X86::SETGr, X86::SETGm, TB_FOLDED_STORE }, 345226890Sdim { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE }, 346226890Sdim { X86::SETLr, X86::SETLm, TB_FOLDED_STORE }, 347226890Sdim { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE }, 348226890Sdim { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE }, 349226890Sdim { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE }, 350226890Sdim { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE }, 351226890Sdim { X86::SETOr, X86::SETOm, TB_FOLDED_STORE }, 352226890Sdim { X86::SETPr, X86::SETPm, TB_FOLDED_STORE }, 353226890Sdim { X86::SETSr, X86::SETSm, TB_FOLDED_STORE }, 354226890Sdim { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD }, 355226890Sdim { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD }, 356226890Sdim { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD }, 357226890Sdim { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD }, 358226890Sdim { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD }, 359226890Sdim { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD }, 360226890Sdim // AVX 128-bit versions of foldable instructions 361252723Sdim { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE }, 362235633Sdim { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, 363226890Sdim { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 }, 364226890Sdim { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, 365226890Sdim { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 }, 366226890Sdim { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE }, 367226890Sdim { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE }, 368226890Sdim { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE }, 369226890Sdim { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE }, 370226890Sdim { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE }, 371226890Sdim { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE }, 372226890Sdim // AVX 256-bit foldable instructions 373235633Sdim { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, 374226890Sdim { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, 375226890Sdim { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, 376226890Sdim { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, 377226890Sdim { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE }, 378263509Sdim { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }, 379263509Sdim // AVX-512 foldable instructions 380263509Sdim { X86::VMOVPDI2DIZrr,X86::VMOVPDI2DIZmr, TB_FOLDED_STORE } 381193323Sed }; 382193323Sed 383193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { 384235633Sdim unsigned RegOp = OpTbl0[i].RegOp; 385235633Sdim unsigned MemOp = OpTbl0[i].MemOp; 386235633Sdim unsigned Flags = OpTbl0[i].Flags; 387226890Sdim AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable, 388226890Sdim RegOp, MemOp, TB_INDEX_0 | Flags); 389193323Sed } 390193323Sed 391235633Sdim static const X86OpTblEntry OpTbl1[] = { 392226890Sdim { X86::CMP16rr, X86::CMP16rm, 0 }, 393226890Sdim { X86::CMP32rr, X86::CMP32rm, 0 }, 394226890Sdim { X86::CMP64rr, X86::CMP64rm, 0 }, 395226890Sdim { X86::CMP8rr, X86::CMP8rm, 0 }, 396226890Sdim { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, 397226890Sdim { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, 398226890Sdim { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, 399226890Sdim { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, 400226890Sdim { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, 401226890Sdim { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, 402226890Sdim { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, 403226890Sdim { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, 404226890Sdim { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, 405226890Sdim { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, 406226890Sdim { X86::IMUL16rri, X86::IMUL16rmi, 0 }, 407226890Sdim { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, 408226890Sdim { X86::IMUL32rri, X86::IMUL32rmi, 0 }, 409226890Sdim { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, 410226890Sdim { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, 411226890Sdim { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, 412226890Sdim { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, 413226890Sdim { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, 414226890Sdim { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 }, 415226890Sdim { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, 416245431Sdim { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 }, 417245431Sdim { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 }, 418226890Sdim { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 }, 419226890Sdim { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 }, 420226890Sdim { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, 421226890Sdim { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, 422226890Sdim { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, 423226890Sdim { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, 424226890Sdim { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, 425226890Sdim { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, 426226890Sdim { X86::MOV16rr, X86::MOV16rm, 0 }, 427226890Sdim { X86::MOV32rr, X86::MOV32rm, 0 }, 428226890Sdim { X86::MOV64rr, X86::MOV64rm, 0 }, 429226890Sdim { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, 430226890Sdim { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, 431226890Sdim { X86::MOV8rr, X86::MOV8rm, 0 }, 432226890Sdim { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 }, 433226890Sdim { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 }, 434226890Sdim { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, 435226890Sdim { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, 436226890Sdim { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, 437226890Sdim { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 }, 438226890Sdim { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 }, 439226890Sdim { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 }, 440226890Sdim { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, 441226890Sdim { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, 442226890Sdim { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, 443226890Sdim { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, 444226890Sdim { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, 445226890Sdim { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, 446226890Sdim { X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 }, 447226890Sdim { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, 448226890Sdim { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, 449226890Sdim { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 }, 450226890Sdim { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, 451226890Sdim { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, 452226890Sdim { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, 453226890Sdim { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, 454235633Sdim { X86::PABSBrr128, X86::PABSBrm128, TB_ALIGN_16 }, 455235633Sdim { X86::PABSDrr128, X86::PABSDrm128, TB_ALIGN_16 }, 456235633Sdim { X86::PABSWrr128, X86::PABSWrm128, TB_ALIGN_16 }, 457226890Sdim { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 }, 458226890Sdim { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 }, 459226890Sdim { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 }, 460226890Sdim { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 }, 461226890Sdim { X86::RCPPSr_Int, X86::RCPPSm_Int, TB_ALIGN_16 }, 462226890Sdim { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 }, 463226890Sdim { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, TB_ALIGN_16 }, 464226890Sdim { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, 465226890Sdim { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, 466226890Sdim { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 }, 467226890Sdim { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 }, 468226890Sdim { X86::SQRTSDr, X86::SQRTSDm, 0 }, 469226890Sdim { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, 470226890Sdim { X86::SQRTSSr, X86::SQRTSSm, 0 }, 471226890Sdim { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, 472226890Sdim { X86::TEST16rr, X86::TEST16rm, 0 }, 473226890Sdim { X86::TEST32rr, X86::TEST32rm, 0 }, 474226890Sdim { X86::TEST64rr, X86::TEST64rm, 0 }, 475226890Sdim { X86::TEST8rr, X86::TEST8rm, 0 }, 476193323Sed // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 477226890Sdim { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, 478226890Sdim { X86::UCOMISSrr, X86::UCOMISSrm, 0 }, 479226890Sdim // AVX 128-bit versions of foldable instructions 480226890Sdim { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 }, 481226890Sdim { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 }, 482226890Sdim { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 }, 483226890Sdim { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 }, 484245431Sdim { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, 485245431Sdim { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 }, 486245431Sdim { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, 487245431Sdim { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, 0 }, 488245431Sdim { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, 489245431Sdim { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 }, 490245431Sdim { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, 491245431Sdim { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, 0 }, 492245431Sdim { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 }, 493245431Sdim { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 }, 494245431Sdim { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 }, 495245431Sdim { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 }, 496226890Sdim { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, 497226890Sdim { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 }, 498226890Sdim { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 }, 499226890Sdim { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 }, 500226890Sdim { X86::VMOVDDUPrr, X86::VMOVDDUPrm, 0 }, 501226890Sdim { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 }, 502226890Sdim { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 }, 503226890Sdim { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 }, 504226890Sdim { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, TB_ALIGN_16 }, 505226890Sdim { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 }, 506252723Sdim { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 }, 507226890Sdim { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 }, 508226890Sdim { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 }, 509226890Sdim { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 }, 510252723Sdim { X86::VPABSBrr128, X86::VPABSBrm128, 0 }, 511252723Sdim { X86::VPABSDrr128, X86::VPABSDrm128, 0 }, 512252723Sdim { X86::VPABSWrr128, X86::VPABSWrm128, 0 }, 513252723Sdim { X86::VPERMILPDri, X86::VPERMILPDmi, 0 }, 514252723Sdim { X86::VPERMILPSri, X86::VPERMILPSmi, 0 }, 515252723Sdim { X86::VPSHUFDri, X86::VPSHUFDmi, 0 }, 516252723Sdim { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 }, 517252723Sdim { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 }, 518252723Sdim { X86::VRCPPSr, X86::VRCPPSm, 0 }, 519252723Sdim { X86::VRCPPSr_Int, X86::VRCPPSm_Int, 0 }, 520252723Sdim { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 }, 521252723Sdim { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, 0 }, 522252723Sdim { X86::VSQRTPDr, X86::VSQRTPDm, 0 }, 523252723Sdim { X86::VSQRTPSr, X86::VSQRTPSm, 0 }, 524226890Sdim { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 }, 525226890Sdim { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 }, 526245431Sdim { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE }, 527245431Sdim 528226890Sdim // AVX 256-bit foldable instructions 529226890Sdim { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 }, 530226890Sdim { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 }, 531235633Sdim { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 }, 532226890Sdim { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, 533235633Sdim { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, 534252723Sdim { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 }, 535252723Sdim { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 }, 536245431Sdim 537235633Sdim // AVX2 foldable instructions 538252723Sdim { X86::VPABSBrr256, X86::VPABSBrm256, 0 }, 539252723Sdim { X86::VPABSDrr256, X86::VPABSDrm256, 0 }, 540252723Sdim { X86::VPABSWrr256, X86::VPABSWrm256, 0 }, 541252723Sdim { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 }, 542252723Sdim { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 }, 543252723Sdim { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 }, 544252723Sdim { X86::VRCPPSYr, X86::VRCPPSYm, 0 }, 545252723Sdim { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 }, 546252723Sdim { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 }, 547252723Sdim { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 }, 548252723Sdim { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 }, 549245431Sdim { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, 550245431Sdim { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, 551245431Sdim 552263509Sdim // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions 553252723Sdim { X86::BEXTR32rr, X86::BEXTR32rm, 0 }, 554252723Sdim { X86::BEXTR64rr, X86::BEXTR64rm, 0 }, 555263509Sdim { X86::BEXTRI32ri, X86::BEXTRI32mi, 0 }, 556263509Sdim { X86::BEXTRI64ri, X86::BEXTRI64mi, 0 }, 557263509Sdim { X86::BLCFILL32rr, X86::BLCFILL32rm, 0 }, 558263509Sdim { X86::BLCFILL64rr, X86::BLCFILL64rm, 0 }, 559263509Sdim { X86::BLCI32rr, X86::BLCI32rm, 0 }, 560263509Sdim { X86::BLCI64rr, X86::BLCI64rm, 0 }, 561263509Sdim { X86::BLCIC32rr, X86::BLCIC32rm, 0 }, 562263509Sdim { X86::BLCIC64rr, X86::BLCIC64rm, 0 }, 563263509Sdim { X86::BLCMSK32rr, X86::BLCMSK32rm, 0 }, 564263509Sdim { X86::BLCMSK64rr, X86::BLCMSK64rm, 0 }, 565263509Sdim { X86::BLCS32rr, X86::BLCS32rm, 0 }, 566263509Sdim { X86::BLCS64rr, X86::BLCS64rm, 0 }, 567263509Sdim { X86::BLSFILL32rr, X86::BLSFILL32rm, 0 }, 568263509Sdim { X86::BLSFILL64rr, X86::BLSFILL64rm, 0 }, 569252723Sdim { X86::BLSI32rr, X86::BLSI32rm, 0 }, 570252723Sdim { X86::BLSI64rr, X86::BLSI64rm, 0 }, 571263509Sdim { X86::BLSIC32rr, X86::BLSIC32rm, 0 }, 572263509Sdim { X86::BLSIC64rr, X86::BLSIC64rm, 0 }, 573252723Sdim { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 }, 574252723Sdim { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 }, 575252723Sdim { X86::BLSR32rr, X86::BLSR32rm, 0 }, 576252723Sdim { X86::BLSR64rr, X86::BLSR64rm, 0 }, 577252723Sdim { X86::BZHI32rr, X86::BZHI32rm, 0 }, 578252723Sdim { X86::BZHI64rr, X86::BZHI64rm, 0 }, 579252723Sdim { X86::LZCNT16rr, X86::LZCNT16rm, 0 }, 580252723Sdim { X86::LZCNT32rr, X86::LZCNT32rm, 0 }, 581252723Sdim { X86::LZCNT64rr, X86::LZCNT64rm, 0 }, 582252723Sdim { X86::POPCNT16rr, X86::POPCNT16rm, 0 }, 583252723Sdim { X86::POPCNT32rr, X86::POPCNT32rm, 0 }, 584252723Sdim { X86::POPCNT64rr, X86::POPCNT64rm, 0 }, 585245431Sdim { X86::RORX32ri, X86::RORX32mi, 0 }, 586245431Sdim { X86::RORX64ri, X86::RORX64mi, 0 }, 587245431Sdim { X86::SARX32rr, X86::SARX32rm, 0 }, 588245431Sdim { X86::SARX64rr, X86::SARX64rm, 0 }, 589245431Sdim { X86::SHRX32rr, X86::SHRX32rm, 0 }, 590245431Sdim { X86::SHRX64rr, X86::SHRX64rm, 0 }, 591245431Sdim { X86::SHLX32rr, X86::SHLX32rm, 0 }, 592245431Sdim { X86::SHLX64rr, X86::SHLX64rm, 0 }, 593263509Sdim { X86::T1MSKC32rr, X86::T1MSKC32rm, 0 }, 594263509Sdim { X86::T1MSKC64rr, X86::T1MSKC64rm, 0 }, 595252723Sdim { X86::TZCNT16rr, X86::TZCNT16rm, 0 }, 596252723Sdim { X86::TZCNT32rr, X86::TZCNT32rm, 0 }, 597252723Sdim { X86::TZCNT64rr, X86::TZCNT64rm, 0 }, 598263509Sdim { X86::TZMSK32rr, X86::TZMSK32rm, 0 }, 599263509Sdim { X86::TZMSK64rr, X86::TZMSK64rm, 0 }, 600263509Sdim 601263509Sdim // AVX-512 foldable instructions 602263509Sdim { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, 603263509Sdim { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, 604263509Sdim { X86::VMOVDQA32rr, X86::VMOVDQA32rm, TB_ALIGN_64 }, 605263509Sdim { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 }, 606263509Sdim { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 }, 607263509Sdim { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 }, 608263509Sdim 609263509Sdim // AES foldable instructions 610263509Sdim { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 }, 611263509Sdim { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 }, 612263509Sdim { X86::VAESIMCrr, X86::VAESIMCrm, TB_ALIGN_16 }, 613263509Sdim { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 }, 614193323Sed }; 615193323Sed 616193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { 617235633Sdim unsigned RegOp = OpTbl1[i].RegOp; 618235633Sdim unsigned MemOp = OpTbl1[i].MemOp; 619235633Sdim unsigned Flags = OpTbl1[i].Flags; 620226890Sdim AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable, 621226890Sdim RegOp, MemOp, 622226890Sdim // Index 1, folded load 623226890Sdim Flags | TB_INDEX_1 | TB_FOLDED_LOAD); 624193323Sed } 625193323Sed 626235633Sdim static const X86OpTblEntry OpTbl2[] = { 627226890Sdim { X86::ADC32rr, X86::ADC32rm, 0 }, 628226890Sdim { X86::ADC64rr, X86::ADC64rm, 0 }, 629226890Sdim { X86::ADD16rr, X86::ADD16rm, 0 }, 630226890Sdim { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE }, 631226890Sdim { X86::ADD32rr, X86::ADD32rm, 0 }, 632226890Sdim { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE }, 633226890Sdim { X86::ADD64rr, X86::ADD64rm, 0 }, 634226890Sdim { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE }, 635226890Sdim { X86::ADD8rr, X86::ADD8rm, 0 }, 636226890Sdim { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 }, 637226890Sdim { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 }, 638226890Sdim { X86::ADDSDrr, X86::ADDSDrm, 0 }, 639226890Sdim { X86::ADDSSrr, X86::ADDSSrm, 0 }, 640226890Sdim { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 }, 641226890Sdim { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 }, 642226890Sdim { X86::AND16rr, X86::AND16rm, 0 }, 643226890Sdim { X86::AND32rr, X86::AND32rm, 0 }, 644226890Sdim { X86::AND64rr, X86::AND64rm, 0 }, 645226890Sdim { X86::AND8rr, X86::AND8rm, 0 }, 646226890Sdim { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 }, 647226890Sdim { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 }, 648226890Sdim { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 }, 649226890Sdim { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 }, 650235633Sdim { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 }, 651235633Sdim { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 }, 652235633Sdim { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 }, 653235633Sdim { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 }, 654226890Sdim { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, 655226890Sdim { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, 656226890Sdim { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, 657226890Sdim { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, 658226890Sdim { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, 659226890Sdim { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, 660226890Sdim { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, 661226890Sdim { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, 662226890Sdim { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, 663226890Sdim { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, 664226890Sdim { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, 665226890Sdim { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, 666226890Sdim { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, 667226890Sdim { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, 668226890Sdim { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, 669226890Sdim { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, 670226890Sdim { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, 671226890Sdim { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, 672226890Sdim { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, 673226890Sdim { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, 674226890Sdim { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, 675226890Sdim { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, 676226890Sdim { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, 677226890Sdim { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, 678226890Sdim { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, 679226890Sdim { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, 680226890Sdim { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, 681226890Sdim { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, 682226890Sdim { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, 683226890Sdim { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, 684226890Sdim { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, 685226890Sdim { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, 686226890Sdim { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, 687226890Sdim { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, 688226890Sdim { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, 689226890Sdim { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, 690226890Sdim { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, 691226890Sdim { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, 692226890Sdim { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, 693226890Sdim { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, 694226890Sdim { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, 695226890Sdim { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, 696226890Sdim { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, 697226890Sdim { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, 698226890Sdim { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, 699226890Sdim { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, 700226890Sdim { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, 701226890Sdim { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, 702226890Sdim { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 }, 703226890Sdim { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 }, 704226890Sdim { X86::CMPSDrr, X86::CMPSDrm, 0 }, 705226890Sdim { X86::CMPSSrr, X86::CMPSSrm, 0 }, 706226890Sdim { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 }, 707226890Sdim { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 }, 708226890Sdim { X86::DIVSDrr, X86::DIVSDrm, 0 }, 709226890Sdim { X86::DIVSSrr, X86::DIVSSrm, 0 }, 710226890Sdim { X86::FsANDNPDrr, X86::FsANDNPDrm, TB_ALIGN_16 }, 711226890Sdim { X86::FsANDNPSrr, X86::FsANDNPSrm, TB_ALIGN_16 }, 712226890Sdim { X86::FsANDPDrr, X86::FsANDPDrm, TB_ALIGN_16 }, 713226890Sdim { X86::FsANDPSrr, X86::FsANDPSrm, TB_ALIGN_16 }, 714226890Sdim { X86::FsORPDrr, X86::FsORPDrm, TB_ALIGN_16 }, 715226890Sdim { X86::FsORPSrr, X86::FsORPSrm, TB_ALIGN_16 }, 716226890Sdim { X86::FsXORPDrr, X86::FsXORPDrm, TB_ALIGN_16 }, 717226890Sdim { X86::FsXORPSrr, X86::FsXORPSrm, TB_ALIGN_16 }, 718226890Sdim { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 }, 719226890Sdim { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 }, 720226890Sdim { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 }, 721226890Sdim { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 }, 722226890Sdim { X86::IMUL16rr, X86::IMUL16rm, 0 }, 723226890Sdim { X86::IMUL32rr, X86::IMUL32rm, 0 }, 724226890Sdim { X86::IMUL64rr, X86::IMUL64rm, 0 }, 725226890Sdim { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, 726226890Sdim { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, 727245431Sdim { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, 728245431Sdim { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, 729245431Sdim { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, 730245431Sdim { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, 731245431Sdim { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, 732245431Sdim { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, 733226890Sdim { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, 734226890Sdim { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, 735226890Sdim { X86::MAXSDrr, X86::MAXSDrm, 0 }, 736226890Sdim { X86::MAXSSrr, X86::MAXSSrm, 0 }, 737226890Sdim { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 }, 738226890Sdim { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 }, 739226890Sdim { X86::MINSDrr, X86::MINSDrm, 0 }, 740226890Sdim { X86::MINSSrr, X86::MINSSrm, 0 }, 741235633Sdim { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 }, 742226890Sdim { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 }, 743226890Sdim { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 }, 744226890Sdim { X86::MULSDrr, X86::MULSDrm, 0 }, 745226890Sdim { X86::MULSSrr, X86::MULSSrm, 0 }, 746226890Sdim { X86::OR16rr, X86::OR16rm, 0 }, 747226890Sdim { X86::OR32rr, X86::OR32rm, 0 }, 748226890Sdim { X86::OR64rr, X86::OR64rm, 0 }, 749226890Sdim { X86::OR8rr, X86::OR8rm, 0 }, 750226890Sdim { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 }, 751226890Sdim { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 }, 752226890Sdim { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 }, 753226890Sdim { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 }, 754235633Sdim { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 }, 755226890Sdim { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 }, 756226890Sdim { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 }, 757226890Sdim { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 }, 758226890Sdim { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 }, 759226890Sdim { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 }, 760226890Sdim { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 }, 761235633Sdim { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 }, 762235633Sdim { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 }, 763226890Sdim { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 }, 764235633Sdim { X86::PALIGNR128rr, X86::PALIGNR128rm, TB_ALIGN_16 }, 765226890Sdim { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 }, 766226890Sdim { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 }, 767226890Sdim { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 }, 768226890Sdim { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 }, 769235633Sdim { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 }, 770226890Sdim { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 }, 771226890Sdim { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 }, 772235633Sdim { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 }, 773226890Sdim { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 }, 774226890Sdim { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 }, 775226890Sdim { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 }, 776235633Sdim { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 }, 777226890Sdim { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 }, 778235633Sdim { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 }, 779235633Sdim { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 }, 780235633Sdim { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 }, 781235633Sdim { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 }, 782235633Sdim { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 }, 783235633Sdim { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 }, 784226890Sdim { X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 }, 785235633Sdim { X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 }, 786226890Sdim { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, 787226890Sdim { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 }, 788226890Sdim { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 }, 789226890Sdim { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 }, 790226890Sdim { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 }, 791252723Sdim { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 }, 792252723Sdim { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 }, 793252723Sdim { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 }, 794252723Sdim { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 }, 795252723Sdim { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 }, 796252723Sdim { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 }, 797252723Sdim { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 }, 798252723Sdim { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 }, 799226890Sdim { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 }, 800235633Sdim { X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 }, 801226890Sdim { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 }, 802226890Sdim { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 }, 803226890Sdim { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 }, 804226890Sdim { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 }, 805226890Sdim { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 }, 806226890Sdim { X86::PORrr, X86::PORrm, TB_ALIGN_16 }, 807226890Sdim { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 }, 808235633Sdim { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 }, 809235633Sdim { X86::PSIGNBrr, X86::PSIGNBrm, TB_ALIGN_16 }, 810235633Sdim { X86::PSIGNWrr, X86::PSIGNWrm, TB_ALIGN_16 }, 811235633Sdim { X86::PSIGNDrr, X86::PSIGNDrm, TB_ALIGN_16 }, 812226890Sdim { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 }, 813226890Sdim { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 }, 814226890Sdim { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 }, 815226890Sdim { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 }, 816226890Sdim { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 }, 817226890Sdim { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 }, 818226890Sdim { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 }, 819226890Sdim { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 }, 820226890Sdim { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 }, 821226890Sdim { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 }, 822226890Sdim { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 }, 823226890Sdim { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 }, 824226890Sdim { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 }, 825226890Sdim { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 }, 826226890Sdim { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 }, 827226890Sdim { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 }, 828226890Sdim { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 }, 829226890Sdim { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 }, 830226890Sdim { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 }, 831226890Sdim { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 }, 832226890Sdim { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 }, 833226890Sdim { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 }, 834226890Sdim { X86::SBB32rr, X86::SBB32rm, 0 }, 835226890Sdim { X86::SBB64rr, X86::SBB64rm, 0 }, 836226890Sdim { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 }, 837226890Sdim { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 }, 838226890Sdim { X86::SUB16rr, X86::SUB16rm, 0 }, 839226890Sdim { X86::SUB32rr, X86::SUB32rm, 0 }, 840226890Sdim { X86::SUB64rr, X86::SUB64rm, 0 }, 841226890Sdim { X86::SUB8rr, X86::SUB8rm, 0 }, 842226890Sdim { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 }, 843226890Sdim { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 }, 844226890Sdim { X86::SUBSDrr, X86::SUBSDrm, 0 }, 845226890Sdim { X86::SUBSSrr, X86::SUBSSrm, 0 }, 846193323Sed // FIXME: TEST*rr -> swapped operand of TEST*mr. 847226890Sdim { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 }, 848226890Sdim { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 }, 849226890Sdim { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 }, 850226890Sdim { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 }, 851226890Sdim { X86::XOR16rr, X86::XOR16rm, 0 }, 852226890Sdim { X86::XOR32rr, X86::XOR32rm, 0 }, 853226890Sdim { X86::XOR64rr, X86::XOR64rm, 0 }, 854226890Sdim { X86::XOR8rr, X86::XOR8rm, 0 }, 855226890Sdim { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 }, 856226890Sdim { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 }, 857226890Sdim // AVX 128-bit versions of foldable instructions 858226890Sdim { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 }, 859226890Sdim { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 }, 860226890Sdim { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 }, 861226890Sdim { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 }, 862226890Sdim { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 }, 863226890Sdim { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 }, 864226890Sdim { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 }, 865226890Sdim { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 }, 866226890Sdim { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 }, 867226890Sdim { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, 868226890Sdim { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, 869226890Sdim { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 }, 870252723Sdim { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 }, 871252723Sdim { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 }, 872226890Sdim { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, 873226890Sdim { X86::VSQRTSDr, X86::VSQRTSDm, 0 }, 874226890Sdim { X86::VSQRTSSr, X86::VSQRTSSm, 0 }, 875252723Sdim { X86::VADDPDrr, X86::VADDPDrm, 0 }, 876252723Sdim { X86::VADDPSrr, X86::VADDPSrm, 0 }, 877226890Sdim { X86::VADDSDrr, X86::VADDSDrm, 0 }, 878226890Sdim { X86::VADDSSrr, X86::VADDSSrm, 0 }, 879252723Sdim { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 }, 880252723Sdim { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 }, 881252723Sdim { X86::VANDNPDrr, X86::VANDNPDrm, 0 }, 882252723Sdim { X86::VANDNPSrr, X86::VANDNPSrm, 0 }, 883252723Sdim { X86::VANDPDrr, X86::VANDPDrm, 0 }, 884252723Sdim { X86::VANDPSrr, X86::VANDPSrm, 0 }, 885252723Sdim { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 }, 886252723Sdim { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 }, 887252723Sdim { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 }, 888252723Sdim { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 }, 889252723Sdim { X86::VCMPPDrri, X86::VCMPPDrmi, 0 }, 890252723Sdim { X86::VCMPPSrri, X86::VCMPPSrmi, 0 }, 891226890Sdim { X86::VCMPSDrr, X86::VCMPSDrm, 0 }, 892226890Sdim { X86::VCMPSSrr, X86::VCMPSSrm, 0 }, 893252723Sdim { X86::VDIVPDrr, X86::VDIVPDrm, 0 }, 894252723Sdim { X86::VDIVPSrr, X86::VDIVPSrm, 0 }, 895226890Sdim { X86::VDIVSDrr, X86::VDIVSDrm, 0 }, 896226890Sdim { X86::VDIVSSrr, X86::VDIVSSrm, 0 }, 897226890Sdim { X86::VFsANDNPDrr, X86::VFsANDNPDrm, TB_ALIGN_16 }, 898226890Sdim { X86::VFsANDNPSrr, X86::VFsANDNPSrm, TB_ALIGN_16 }, 899226890Sdim { X86::VFsANDPDrr, X86::VFsANDPDrm, TB_ALIGN_16 }, 900226890Sdim { X86::VFsANDPSrr, X86::VFsANDPSrm, TB_ALIGN_16 }, 901226890Sdim { X86::VFsORPDrr, X86::VFsORPDrm, TB_ALIGN_16 }, 902226890Sdim { X86::VFsORPSrr, X86::VFsORPSrm, TB_ALIGN_16 }, 903226890Sdim { X86::VFsXORPDrr, X86::VFsXORPDrm, TB_ALIGN_16 }, 904226890Sdim { X86::VFsXORPSrr, X86::VFsXORPSrm, TB_ALIGN_16 }, 905252723Sdim { X86::VHADDPDrr, X86::VHADDPDrm, 0 }, 906252723Sdim { X86::VHADDPSrr, X86::VHADDPSrm, 0 }, 907252723Sdim { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 }, 908252723Sdim { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 }, 909226890Sdim { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 }, 910226890Sdim { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 }, 911252723Sdim { X86::VMAXPDrr, X86::VMAXPDrm, 0 }, 912252723Sdim { X86::VMAXPSrr, X86::VMAXPSrm, 0 }, 913226890Sdim { X86::VMAXSDrr, X86::VMAXSDrm, 0 }, 914226890Sdim { X86::VMAXSSrr, X86::VMAXSSrm, 0 }, 915252723Sdim { X86::VMINPDrr, X86::VMINPDrm, 0 }, 916252723Sdim { X86::VMINPSrr, X86::VMINPSrm, 0 }, 917226890Sdim { X86::VMINSDrr, X86::VMINSDrm, 0 }, 918226890Sdim { X86::VMINSSrr, X86::VMINSSrm, 0 }, 919252723Sdim { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 }, 920252723Sdim { X86::VMULPDrr, X86::VMULPDrm, 0 }, 921252723Sdim { X86::VMULPSrr, X86::VMULPSrm, 0 }, 922226890Sdim { X86::VMULSDrr, X86::VMULSDrm, 0 }, 923226890Sdim { X86::VMULSSrr, X86::VMULSSrm, 0 }, 924252723Sdim { X86::VORPDrr, X86::VORPDrm, 0 }, 925252723Sdim { X86::VORPSrr, X86::VORPSrm, 0 }, 926252723Sdim { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 }, 927252723Sdim { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 }, 928252723Sdim { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 }, 929252723Sdim { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 }, 930252723Sdim { X86::VPADDBrr, X86::VPADDBrm, 0 }, 931252723Sdim { X86::VPADDDrr, X86::VPADDDrm, 0 }, 932252723Sdim { X86::VPADDQrr, X86::VPADDQrm, 0 }, 933252723Sdim { X86::VPADDSBrr, X86::VPADDSBrm, 0 }, 934252723Sdim { X86::VPADDSWrr, X86::VPADDSWrm, 0 }, 935252723Sdim { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 }, 936252723Sdim { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 }, 937252723Sdim { X86::VPADDWrr, X86::VPADDWrm, 0 }, 938252723Sdim { X86::VPALIGNR128rr, X86::VPALIGNR128rm, 0 }, 939252723Sdim { X86::VPANDNrr, X86::VPANDNrm, 0 }, 940252723Sdim { X86::VPANDrr, X86::VPANDrm, 0 }, 941252723Sdim { X86::VPAVGBrr, X86::VPAVGBrm, 0 }, 942252723Sdim { X86::VPAVGWrr, X86::VPAVGWrm, 0 }, 943252723Sdim { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 }, 944252723Sdim { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 }, 945252723Sdim { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 }, 946252723Sdim { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 }, 947252723Sdim { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 }, 948252723Sdim { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 }, 949252723Sdim { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 }, 950252723Sdim { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 }, 951252723Sdim { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 }, 952252723Sdim { X86::VPHADDDrr, X86::VPHADDDrm, 0 }, 953252723Sdim { X86::VPHADDSWrr128, X86::VPHADDSWrm128, 0 }, 954252723Sdim { X86::VPHADDWrr, X86::VPHADDWrm, 0 }, 955252723Sdim { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 }, 956252723Sdim { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, 0 }, 957252723Sdim { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 }, 958252723Sdim { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 }, 959252723Sdim { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 }, 960252723Sdim { X86::VPINSRWrri, X86::VPINSRWrmi, 0 }, 961252723Sdim { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, 0 }, 962252723Sdim { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 }, 963252723Sdim { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 }, 964252723Sdim { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 }, 965252723Sdim { X86::VPMINSWrr, X86::VPMINSWrm, 0 }, 966252723Sdim { X86::VPMINUBrr, X86::VPMINUBrm, 0 }, 967252723Sdim { X86::VPMINSBrr, X86::VPMINSBrm, 0 }, 968252723Sdim { X86::VPMINSDrr, X86::VPMINSDrm, 0 }, 969252723Sdim { X86::VPMINUDrr, X86::VPMINUDrm, 0 }, 970252723Sdim { X86::VPMINUWrr, X86::VPMINUWrm, 0 }, 971252723Sdim { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 }, 972252723Sdim { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 }, 973252723Sdim { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 }, 974252723Sdim { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 }, 975252723Sdim { X86::VPMULDQrr, X86::VPMULDQrm, 0 }, 976252723Sdim { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, 0 }, 977252723Sdim { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 }, 978252723Sdim { X86::VPMULHWrr, X86::VPMULHWrm, 0 }, 979252723Sdim { X86::VPMULLDrr, X86::VPMULLDrm, 0 }, 980252723Sdim { X86::VPMULLWrr, X86::VPMULLWrm, 0 }, 981252723Sdim { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 }, 982252723Sdim { X86::VPORrr, X86::VPORrm, 0 }, 983252723Sdim { X86::VPSADBWrr, X86::VPSADBWrm, 0 }, 984252723Sdim { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 }, 985252723Sdim { X86::VPSIGNBrr, X86::VPSIGNBrm, 0 }, 986252723Sdim { X86::VPSIGNWrr, X86::VPSIGNWrm, 0 }, 987252723Sdim { X86::VPSIGNDrr, X86::VPSIGNDrm, 0 }, 988252723Sdim { X86::VPSLLDrr, X86::VPSLLDrm, 0 }, 989252723Sdim { X86::VPSLLQrr, X86::VPSLLQrm, 0 }, 990252723Sdim { X86::VPSLLWrr, X86::VPSLLWrm, 0 }, 991252723Sdim { X86::VPSRADrr, X86::VPSRADrm, 0 }, 992252723Sdim { X86::VPSRAWrr, X86::VPSRAWrm, 0 }, 993252723Sdim { X86::VPSRLDrr, X86::VPSRLDrm, 0 }, 994252723Sdim { X86::VPSRLQrr, X86::VPSRLQrm, 0 }, 995252723Sdim { X86::VPSRLWrr, X86::VPSRLWrm, 0 }, 996252723Sdim { X86::VPSUBBrr, X86::VPSUBBrm, 0 }, 997252723Sdim { X86::VPSUBDrr, X86::VPSUBDrm, 0 }, 998252723Sdim { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 }, 999252723Sdim { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 }, 1000252723Sdim { X86::VPSUBWrr, X86::VPSUBWrm, 0 }, 1001252723Sdim { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 }, 1002252723Sdim { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 }, 1003252723Sdim { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 }, 1004252723Sdim { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 }, 1005252723Sdim { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 }, 1006252723Sdim { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 }, 1007252723Sdim { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 }, 1008252723Sdim { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 }, 1009252723Sdim { X86::VPXORrr, X86::VPXORrm, 0 }, 1010252723Sdim { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 }, 1011252723Sdim { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 }, 1012252723Sdim { X86::VSUBPDrr, X86::VSUBPDrm, 0 }, 1013252723Sdim { X86::VSUBPSrr, X86::VSUBPSrm, 0 }, 1014226890Sdim { X86::VSUBSDrr, X86::VSUBSDrm, 0 }, 1015226890Sdim { X86::VSUBSSrr, X86::VSUBSSrm, 0 }, 1016252723Sdim { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 }, 1017252723Sdim { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 }, 1018252723Sdim { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 }, 1019252723Sdim { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 }, 1020252723Sdim { X86::VXORPDrr, X86::VXORPDrm, 0 }, 1021252723Sdim { X86::VXORPSrr, X86::VXORPSrm, 0 }, 1022235633Sdim // AVX 256-bit foldable instructions 1023252723Sdim { X86::VADDPDYrr, X86::VADDPDYrm, 0 }, 1024252723Sdim { X86::VADDPSYrr, X86::VADDPSYrm, 0 }, 1025252723Sdim { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 }, 1026252723Sdim { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 }, 1027252723Sdim { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 }, 1028252723Sdim { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 }, 1029252723Sdim { X86::VANDPDYrr, X86::VANDPDYrm, 0 }, 1030252723Sdim { X86::VANDPSYrr, X86::VANDPSYrm, 0 }, 1031252723Sdim { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 }, 1032252723Sdim { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 }, 1033252723Sdim { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 }, 1034252723Sdim { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 }, 1035252723Sdim { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 }, 1036252723Sdim { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 }, 1037252723Sdim { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 }, 1038252723Sdim { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 }, 1039252723Sdim { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 }, 1040252723Sdim { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 }, 1041252723Sdim { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 }, 1042252723Sdim { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 }, 1043252723Sdim { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 }, 1044252723Sdim { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 }, 1045252723Sdim { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 }, 1046252723Sdim { X86::VMINPDYrr, X86::VMINPDYrm, 0 }, 1047252723Sdim { X86::VMINPSYrr, X86::VMINPSYrm, 0 }, 1048252723Sdim { X86::VMULPDYrr, X86::VMULPDYrm, 0 }, 1049252723Sdim { X86::VMULPSYrr, X86::VMULPSYrm, 0 }, 1050252723Sdim { X86::VORPDYrr, X86::VORPDYrm, 0 }, 1051252723Sdim { X86::VORPSYrr, X86::VORPSYrm, 0 }, 1052252723Sdim { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 }, 1053252723Sdim { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 }, 1054252723Sdim { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 }, 1055252723Sdim { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 }, 1056252723Sdim { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 }, 1057252723Sdim { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 }, 1058252723Sdim { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 }, 1059252723Sdim { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 }, 1060252723Sdim { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 }, 1061252723Sdim { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 }, 1062252723Sdim { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 }, 1063252723Sdim { X86::VXORPDYrr, X86::VXORPDYrm, 0 }, 1064252723Sdim { X86::VXORPSYrr, X86::VXORPSYrm, 0 }, 1065235633Sdim // AVX2 foldable instructions 1066252723Sdim { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 }, 1067252723Sdim { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 }, 1068252723Sdim { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 }, 1069252723Sdim { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 }, 1070252723Sdim { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 }, 1071252723Sdim { X86::VPADDBYrr, X86::VPADDBYrm, 0 }, 1072252723Sdim { X86::VPADDDYrr, X86::VPADDDYrm, 0 }, 1073252723Sdim { X86::VPADDQYrr, X86::VPADDQYrm, 0 }, 1074252723Sdim { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 }, 1075252723Sdim { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 }, 1076252723Sdim { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 }, 1077252723Sdim { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 }, 1078252723Sdim { X86::VPADDWYrr, X86::VPADDWYrm, 0 }, 1079252723Sdim { X86::VPALIGNR256rr, X86::VPALIGNR256rm, 0 }, 1080252723Sdim { X86::VPANDNYrr, X86::VPANDNYrm, 0 }, 1081252723Sdim { X86::VPANDYrr, X86::VPANDYrm, 0 }, 1082252723Sdim { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 }, 1083252723Sdim { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 }, 1084252723Sdim { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 }, 1085252723Sdim { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 }, 1086252723Sdim { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 }, 1087252723Sdim { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 }, 1088252723Sdim { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 }, 1089252723Sdim { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 }, 1090252723Sdim { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 }, 1091252723Sdim { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 }, 1092252723Sdim { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 }, 1093252723Sdim { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 }, 1094252723Sdim { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 }, 1095252723Sdim { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 }, 1096252723Sdim { X86::VPERMDYrr, X86::VPERMDYrm, 0 }, 1097252723Sdim { X86::VPERMPDYri, X86::VPERMPDYmi, 0 }, 1098252723Sdim { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 }, 1099252723Sdim { X86::VPERMQYri, X86::VPERMQYmi, 0 }, 1100252723Sdim { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 }, 1101252723Sdim { X86::VPHADDSWrr256, X86::VPHADDSWrm256, 0 }, 1102252723Sdim { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 }, 1103252723Sdim { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 }, 1104252723Sdim { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 }, 1105252723Sdim { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 }, 1106252723Sdim { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, 0 }, 1107252723Sdim { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 }, 1108252723Sdim { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 }, 1109252723Sdim { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 }, 1110252723Sdim { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 }, 1111252723Sdim { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 }, 1112252723Sdim { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 }, 1113252723Sdim { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 }, 1114252723Sdim { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 }, 1115252723Sdim { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 }, 1116252723Sdim { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 }, 1117252723Sdim { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 }, 1118252723Sdim { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 }, 1119252723Sdim { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 }, 1120252723Sdim { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 }, 1121252723Sdim { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 }, 1122252723Sdim { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, 0 }, 1123252723Sdim { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 }, 1124252723Sdim { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 }, 1125252723Sdim { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 }, 1126252723Sdim { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 }, 1127252723Sdim { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 }, 1128252723Sdim { X86::VPORYrr, X86::VPORYrm, 0 }, 1129252723Sdim { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 }, 1130252723Sdim { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 }, 1131252723Sdim { X86::VPSIGNBYrr, X86::VPSIGNBYrm, 0 }, 1132252723Sdim { X86::VPSIGNWYrr, X86::VPSIGNWYrm, 0 }, 1133252723Sdim { X86::VPSIGNDYrr, X86::VPSIGNDYrm, 0 }, 1134252723Sdim { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 }, 1135252723Sdim { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 }, 1136252723Sdim { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 }, 1137252723Sdim { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 }, 1138252723Sdim { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 }, 1139252723Sdim { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 }, 1140252723Sdim { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 }, 1141252723Sdim { X86::VPSRADYrr, X86::VPSRADYrm, 0 }, 1142252723Sdim { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 }, 1143252723Sdim { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 }, 1144252723Sdim { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 }, 1145252723Sdim { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 }, 1146252723Sdim { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 }, 1147252723Sdim { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 }, 1148252723Sdim { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 }, 1149252723Sdim { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 }, 1150252723Sdim { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 }, 1151252723Sdim { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 }, 1152252723Sdim { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 }, 1153252723Sdim { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 }, 1154252723Sdim { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 }, 1155252723Sdim { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 }, 1156252723Sdim { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 }, 1157252723Sdim { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 }, 1158252723Sdim { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 }, 1159252723Sdim { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 }, 1160252723Sdim { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 }, 1161252723Sdim { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 }, 1162252723Sdim { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 }, 1163252723Sdim { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 }, 1164252723Sdim { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 }, 1165252723Sdim { X86::VPXORYrr, X86::VPXORYrm, 0 }, 1166226890Sdim // FIXME: add AVX 256-bit foldable instructions 1167245431Sdim 1168245431Sdim // FMA4 foldable patterns 1169245431Sdim { X86::VFMADDSS4rr, X86::VFMADDSS4mr, 0 }, 1170245431Sdim { X86::VFMADDSD4rr, X86::VFMADDSD4mr, 0 }, 1171245431Sdim { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_16 }, 1172245431Sdim { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_16 }, 1173245431Sdim { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_32 }, 1174245431Sdim { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_32 }, 1175245431Sdim { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, 0 }, 1176245431Sdim { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, 0 }, 1177245431Sdim { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_16 }, 1178245431Sdim { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_16 }, 1179245431Sdim { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_32 }, 1180245431Sdim { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_32 }, 1181245431Sdim { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, 0 }, 1182245431Sdim { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, 0 }, 1183245431Sdim { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_16 }, 1184245431Sdim { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_16 }, 1185245431Sdim { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_32 }, 1186245431Sdim { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_32 }, 1187245431Sdim { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, 0 }, 1188245431Sdim { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, 0 }, 1189245431Sdim { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_16 }, 1190245431Sdim { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_16 }, 1191245431Sdim { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_32 }, 1192245431Sdim { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_32 }, 1193245431Sdim { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_16 }, 1194245431Sdim { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_16 }, 1195245431Sdim { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_32 }, 1196245431Sdim { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_32 }, 1197245431Sdim { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_16 }, 1198245431Sdim { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 }, 1199245431Sdim { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 }, 1200245431Sdim { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 }, 1201245431Sdim 1202245431Sdim // BMI/BMI2 foldable instructions 1203252723Sdim { X86::ANDN32rr, X86::ANDN32rm, 0 }, 1204252723Sdim { X86::ANDN64rr, X86::ANDN64rm, 0 }, 1205245431Sdim { X86::MULX32rr, X86::MULX32rm, 0 }, 1206245431Sdim { X86::MULX64rr, X86::MULX64rm, 0 }, 1207252723Sdim { X86::PDEP32rr, X86::PDEP32rm, 0 }, 1208252723Sdim { X86::PDEP64rr, X86::PDEP64rm, 0 }, 1209252723Sdim { X86::PEXT32rr, X86::PEXT32rm, 0 }, 1210252723Sdim { X86::PEXT64rr, X86::PEXT64rm, 0 }, 1211263509Sdim 1212263509Sdim // AVX-512 foldable instructions 1213263509Sdim { X86::VPADDDZrr, X86::VPADDDZrm, 0 }, 1214263509Sdim { X86::VPADDQZrr, X86::VPADDQZrm, 0 }, 1215263509Sdim { X86::VADDPSZrr, X86::VADDPSZrm, 0 }, 1216263509Sdim { X86::VADDPDZrr, X86::VADDPDZrm, 0 }, 1217263509Sdim { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 }, 1218263509Sdim { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 }, 1219263509Sdim { X86::VMULPSZrr, X86::VMULPSZrm, 0 }, 1220263509Sdim { X86::VMULPDZrr, X86::VMULPDZrm, 0 }, 1221263509Sdim { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 }, 1222263509Sdim { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 }, 1223263509Sdim { X86::VMINPSZrr, X86::VMINPSZrm, 0 }, 1224263509Sdim { X86::VMINPDZrr, X86::VMINPDZrm, 0 }, 1225263509Sdim { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 }, 1226263509Sdim { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 }, 1227263509Sdim { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, 1228263509Sdim { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 }, 1229263509Sdim { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 }, 1230263509Sdim { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 }, 1231263509Sdim { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 }, 1232263509Sdim { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 }, 1233263509Sdim { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 }, 1234263509Sdim { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 }, 1235263509Sdim { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 }, 1236263509Sdim { X86::VALIGNQrri, X86::VALIGNQrmi, 0 }, 1237263509Sdim { X86::VALIGNDrri, X86::VALIGNDrmi, 0 }, 1238263509Sdim 1239263509Sdim // AES foldable instructions 1240263509Sdim { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 }, 1241263509Sdim { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 }, 1242263509Sdim { X86::AESENCLASTrr, X86::AESENCLASTrm, TB_ALIGN_16 }, 1243263509Sdim { X86::AESENCrr, X86::AESENCrm, TB_ALIGN_16 }, 1244263509Sdim { X86::VAESDECLASTrr, X86::VAESDECLASTrm, TB_ALIGN_16 }, 1245263509Sdim { X86::VAESDECrr, X86::VAESDECrm, TB_ALIGN_16 }, 1246263509Sdim { X86::VAESENCLASTrr, X86::VAESENCLASTrm, TB_ALIGN_16 }, 1247263509Sdim { X86::VAESENCrr, X86::VAESENCrm, TB_ALIGN_16 }, 1248263509Sdim 1249263509Sdim // SHA foldable instructions 1250263509Sdim { X86::SHA1MSG1rr, X86::SHA1MSG1rm, TB_ALIGN_16 }, 1251263509Sdim { X86::SHA1MSG2rr, X86::SHA1MSG2rm, TB_ALIGN_16 }, 1252263509Sdim { X86::SHA1NEXTErr, X86::SHA1NEXTErm, TB_ALIGN_16 }, 1253263509Sdim { X86::SHA1RNDS4rri, X86::SHA1RNDS4rmi, TB_ALIGN_16 }, 1254263509Sdim { X86::SHA256MSG1rr, X86::SHA256MSG1rm, TB_ALIGN_16 }, 1255263509Sdim { X86::SHA256MSG2rr, X86::SHA256MSG2rm, TB_ALIGN_16 }, 1256263509Sdim { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 }, 1257193323Sed }; 1258193323Sed 1259193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { 1260235633Sdim unsigned RegOp = OpTbl2[i].RegOp; 1261235633Sdim unsigned MemOp = OpTbl2[i].MemOp; 1262235633Sdim unsigned Flags = OpTbl2[i].Flags; 1263226890Sdim AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable, 1264226890Sdim RegOp, MemOp, 1265226890Sdim // Index 2, folded load 1266226890Sdim Flags | TB_INDEX_2 | TB_FOLDED_LOAD); 1267226890Sdim } 1268245431Sdim 1269245431Sdim static const X86OpTblEntry OpTbl3[] = { 1270245431Sdim // FMA foldable instructions 1271245431Sdim { X86::VFMADDSSr231r, X86::VFMADDSSr231m, 0 }, 1272245431Sdim { X86::VFMADDSDr231r, X86::VFMADDSDr231m, 0 }, 1273245431Sdim { X86::VFMADDSSr132r, X86::VFMADDSSr132m, 0 }, 1274245431Sdim { X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 }, 1275245431Sdim { X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 }, 1276245431Sdim { X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 }, 1277245431Sdim { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, 0 }, 1278245431Sdim { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, 0 }, 1279245431Sdim 1280245431Sdim { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 }, 1281245431Sdim { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 }, 1282245431Sdim { X86::VFMADDPSr132r, X86::VFMADDPSr132m, TB_ALIGN_16 }, 1283245431Sdim { X86::VFMADDPDr132r, X86::VFMADDPDr132m, TB_ALIGN_16 }, 1284245431Sdim { X86::VFMADDPSr213r, X86::VFMADDPSr213m, TB_ALIGN_16 }, 1285245431Sdim { X86::VFMADDPDr213r, X86::VFMADDPDr213m, TB_ALIGN_16 }, 1286245431Sdim { X86::VFMADDPSr231rY, X86::VFMADDPSr231mY, TB_ALIGN_32 }, 1287245431Sdim { X86::VFMADDPDr231rY, X86::VFMADDPDr231mY, TB_ALIGN_32 }, 1288245431Sdim { X86::VFMADDPSr132rY, X86::VFMADDPSr132mY, TB_ALIGN_32 }, 1289245431Sdim { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 }, 1290245431Sdim { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 }, 1291245431Sdim { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 }, 1292245431Sdim 1293245431Sdim { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 }, 1294245431Sdim { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 }, 1295245431Sdim { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, 0 }, 1296245431Sdim { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 }, 1297245431Sdim { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 }, 1298245431Sdim { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 }, 1299245431Sdim { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, 0 }, 1300245431Sdim { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, 0 }, 1301245431Sdim 1302245431Sdim { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 }, 1303245431Sdim { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 }, 1304245431Sdim { X86::VFNMADDPSr132r, X86::VFNMADDPSr132m, TB_ALIGN_16 }, 1305245431Sdim { X86::VFNMADDPDr132r, X86::VFNMADDPDr132m, TB_ALIGN_16 }, 1306245431Sdim { X86::VFNMADDPSr213r, X86::VFNMADDPSr213m, TB_ALIGN_16 }, 1307245431Sdim { X86::VFNMADDPDr213r, X86::VFNMADDPDr213m, TB_ALIGN_16 }, 1308245431Sdim { X86::VFNMADDPSr231rY, X86::VFNMADDPSr231mY, TB_ALIGN_32 }, 1309245431Sdim { X86::VFNMADDPDr231rY, X86::VFNMADDPDr231mY, TB_ALIGN_32 }, 1310245431Sdim { X86::VFNMADDPSr132rY, X86::VFNMADDPSr132mY, TB_ALIGN_32 }, 1311245431Sdim { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 }, 1312245431Sdim { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 }, 1313245431Sdim { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 }, 1314245431Sdim 1315245431Sdim { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 }, 1316245431Sdim { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 }, 1317245431Sdim { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, 0 }, 1318245431Sdim { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 }, 1319245431Sdim { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 }, 1320245431Sdim { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 }, 1321245431Sdim { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, 0 }, 1322245431Sdim { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, 0 }, 1323245431Sdim 1324245431Sdim { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 }, 1325245431Sdim { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 }, 1326245431Sdim { X86::VFMSUBPSr132r, X86::VFMSUBPSr132m, TB_ALIGN_16 }, 1327245431Sdim { X86::VFMSUBPDr132r, X86::VFMSUBPDr132m, TB_ALIGN_16 }, 1328245431Sdim { X86::VFMSUBPSr213r, X86::VFMSUBPSr213m, TB_ALIGN_16 }, 1329245431Sdim { X86::VFMSUBPDr213r, X86::VFMSUBPDr213m, TB_ALIGN_16 }, 1330245431Sdim { X86::VFMSUBPSr231rY, X86::VFMSUBPSr231mY, TB_ALIGN_32 }, 1331245431Sdim { X86::VFMSUBPDr231rY, X86::VFMSUBPDr231mY, TB_ALIGN_32 }, 1332245431Sdim { X86::VFMSUBPSr132rY, X86::VFMSUBPSr132mY, TB_ALIGN_32 }, 1333245431Sdim { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 }, 1334245431Sdim { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 }, 1335245431Sdim { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 }, 1336245431Sdim 1337245431Sdim { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 }, 1338245431Sdim { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 }, 1339245431Sdim { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, 0 }, 1340245431Sdim { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 }, 1341245431Sdim { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 }, 1342245431Sdim { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 }, 1343245431Sdim { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, 0 }, 1344245431Sdim { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, 0 }, 1345245431Sdim 1346245431Sdim { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 }, 1347245431Sdim { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 }, 1348245431Sdim { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr132m, TB_ALIGN_16 }, 1349245431Sdim { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr132m, TB_ALIGN_16 }, 1350245431Sdim { X86::VFNMSUBPSr213r, X86::VFNMSUBPSr213m, TB_ALIGN_16 }, 1351245431Sdim { X86::VFNMSUBPDr213r, X86::VFNMSUBPDr213m, TB_ALIGN_16 }, 1352245431Sdim { X86::VFNMSUBPSr231rY, X86::VFNMSUBPSr231mY, TB_ALIGN_32 }, 1353245431Sdim { X86::VFNMSUBPDr231rY, X86::VFNMSUBPDr231mY, TB_ALIGN_32 }, 1354245431Sdim { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr132mY, TB_ALIGN_32 }, 1355245431Sdim { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 }, 1356245431Sdim { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 }, 1357245431Sdim { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 }, 1358245431Sdim 1359245431Sdim { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 }, 1360245431Sdim { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 }, 1361245431Sdim { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr132m, TB_ALIGN_16 }, 1362245431Sdim { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr132m, TB_ALIGN_16 }, 1363245431Sdim { X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr213m, TB_ALIGN_16 }, 1364245431Sdim { X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr213m, TB_ALIGN_16 }, 1365245431Sdim { X86::VFMADDSUBPSr231rY, X86::VFMADDSUBPSr231mY, TB_ALIGN_32 }, 1366245431Sdim { X86::VFMADDSUBPDr231rY, X86::VFMADDSUBPDr231mY, TB_ALIGN_32 }, 1367245431Sdim { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr132mY, TB_ALIGN_32 }, 1368245431Sdim { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 }, 1369245431Sdim { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 }, 1370245431Sdim { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 }, 1371245431Sdim 1372245431Sdim { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 }, 1373245431Sdim { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 }, 1374245431Sdim { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr132m, TB_ALIGN_16 }, 1375245431Sdim { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr132m, TB_ALIGN_16 }, 1376245431Sdim { X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr213m, TB_ALIGN_16 }, 1377245431Sdim { X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr213m, TB_ALIGN_16 }, 1378245431Sdim { X86::VFMSUBADDPSr231rY, X86::VFMSUBADDPSr231mY, TB_ALIGN_32 }, 1379245431Sdim { X86::VFMSUBADDPDr231rY, X86::VFMSUBADDPDr231mY, TB_ALIGN_32 }, 1380245431Sdim { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr132mY, TB_ALIGN_32 }, 1381245431Sdim { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 }, 1382245431Sdim { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 }, 1383245431Sdim { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 }, 1384245431Sdim 1385245431Sdim // FMA4 foldable patterns 1386245431Sdim { X86::VFMADDSS4rr, X86::VFMADDSS4rm, 0 }, 1387245431Sdim { X86::VFMADDSD4rr, X86::VFMADDSD4rm, 0 }, 1388245431Sdim { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 }, 1389245431Sdim { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 }, 1390245431Sdim { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 }, 1391245431Sdim { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 }, 1392245431Sdim { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, 0 }, 1393245431Sdim { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, 0 }, 1394245431Sdim { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 }, 1395245431Sdim { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 }, 1396245431Sdim { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 }, 1397245431Sdim { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 }, 1398245431Sdim { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, 0 }, 1399245431Sdim { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, 0 }, 1400245431Sdim { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 }, 1401245431Sdim { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 }, 1402245431Sdim { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 }, 1403245431Sdim { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 }, 1404245431Sdim { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, 0 }, 1405245431Sdim { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, 0 }, 1406245431Sdim { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 }, 1407245431Sdim { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 }, 1408245431Sdim { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 }, 1409245431Sdim { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_32 }, 1410245431Sdim { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_16 }, 1411245431Sdim { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_16 }, 1412245431Sdim { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_32 }, 1413245431Sdim { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_32 }, 1414245431Sdim { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_16 }, 1415245431Sdim { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 }, 1416245431Sdim { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 }, 1417245431Sdim { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 }, 1418263509Sdim // AVX-512 VPERMI instructions with 3 source operands. 1419263509Sdim { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 }, 1420263509Sdim { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 }, 1421263509Sdim { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 }, 1422263509Sdim { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 }, 1423245431Sdim }; 1424245431Sdim 1425245431Sdim for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) { 1426245431Sdim unsigned RegOp = OpTbl3[i].RegOp; 1427245431Sdim unsigned MemOp = OpTbl3[i].MemOp; 1428245431Sdim unsigned Flags = OpTbl3[i].Flags; 1429245431Sdim AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, 1430245431Sdim RegOp, MemOp, 1431245431Sdim // Index 3, folded load 1432245431Sdim Flags | TB_INDEX_3 | TB_FOLDED_LOAD); 1433245431Sdim } 1434245431Sdim 1435226890Sdim} 1436218893Sdim 1437226890Sdimvoid 1438226890SdimX86InstrInfo::AddTableEntry(RegOp2MemOpTableType &R2MTable, 1439226890Sdim MemOp2RegOpTableType &M2RTable, 1440226890Sdim unsigned RegOp, unsigned MemOp, unsigned Flags) { 1441226890Sdim if ((Flags & TB_NO_FORWARD) == 0) { 1442226890Sdim assert(!R2MTable.count(RegOp) && "Duplicate entry!"); 1443226890Sdim R2MTable[RegOp] = std::make_pair(MemOp, Flags); 1444226890Sdim } 1445226890Sdim if ((Flags & TB_NO_REVERSE) == 0) { 1446226890Sdim assert(!M2RTable.count(MemOp) && 1447218893Sdim "Duplicated entries in unfolding maps?"); 1448226890Sdim M2RTable[MemOp] = std::make_pair(RegOp, Flags); 1449226890Sdim } 1450193323Sed} 1451193323Sed 1452202375Srdivackybool 1453202375SrdivackyX86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 1454202375Srdivacky unsigned &SrcReg, unsigned &DstReg, 1455202375Srdivacky unsigned &SubIdx) const { 1456202375Srdivacky switch (MI.getOpcode()) { 1457202375Srdivacky default: break; 1458202375Srdivacky case X86::MOVSX16rr8: 1459202375Srdivacky case X86::MOVZX16rr8: 1460202375Srdivacky case X86::MOVSX32rr8: 1461202375Srdivacky case X86::MOVZX32rr8: 1462202375Srdivacky case X86::MOVSX64rr8: 1463202375Srdivacky if (!TM.getSubtarget<X86Subtarget>().is64Bit()) 1464202375Srdivacky // It's not always legal to reference the low 8-bit of the larger 1465202375Srdivacky // register in 32-bit mode. 1466202375Srdivacky return false; 1467202375Srdivacky case X86::MOVSX32rr16: 1468202375Srdivacky case X86::MOVZX32rr16: 1469202375Srdivacky case X86::MOVSX64rr16: 1470263509Sdim case X86::MOVSX64rr32: { 1471202375Srdivacky if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) 1472202375Srdivacky // Be conservative. 1473202375Srdivacky return false; 1474202375Srdivacky SrcReg = MI.getOperand(1).getReg(); 1475202375Srdivacky DstReg = MI.getOperand(0).getReg(); 1476202375Srdivacky switch (MI.getOpcode()) { 1477245431Sdim default: llvm_unreachable("Unreachable!"); 1478202375Srdivacky case X86::MOVSX16rr8: 1479202375Srdivacky case X86::MOVZX16rr8: 1480202375Srdivacky case X86::MOVSX32rr8: 1481202375Srdivacky case X86::MOVZX32rr8: 1482202375Srdivacky case X86::MOVSX64rr8: 1483208599Srdivacky SubIdx = X86::sub_8bit; 1484202375Srdivacky break; 1485202375Srdivacky case X86::MOVSX32rr16: 1486202375Srdivacky case X86::MOVZX32rr16: 1487202375Srdivacky case X86::MOVSX64rr16: 1488208599Srdivacky SubIdx = X86::sub_16bit; 1489202375Srdivacky break; 1490202375Srdivacky case X86::MOVSX64rr32: 1491208599Srdivacky SubIdx = X86::sub_32bit; 1492202375Srdivacky break; 1493202375Srdivacky } 1494202375Srdivacky return true; 1495202375Srdivacky } 1496202375Srdivacky } 1497202375Srdivacky return false; 1498202375Srdivacky} 1499202375Srdivacky 1500199481Srdivacky/// isFrameOperand - Return true and the FrameIndex if the specified 1501199481Srdivacky/// operand and follow operands form a reference to the stack frame. 1502199481Srdivackybool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, 1503199481Srdivacky int &FrameIndex) const { 1504199481Srdivacky if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && 1505199481Srdivacky MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && 1506199481Srdivacky MI->getOperand(Op+1).getImm() == 1 && 1507199481Srdivacky MI->getOperand(Op+2).getReg() == 0 && 1508199481Srdivacky MI->getOperand(Op+3).getImm() == 0) { 1509199481Srdivacky FrameIndex = MI->getOperand(Op).getIndex(); 1510199481Srdivacky return true; 1511199481Srdivacky } 1512199481Srdivacky return false; 1513199481Srdivacky} 1514199481Srdivacky 1515199481Srdivackystatic bool isFrameLoadOpcode(int Opcode) { 1516199481Srdivacky switch (Opcode) { 1517235633Sdim default: 1518235633Sdim return false; 1519193323Sed case X86::MOV8rm: 1520193323Sed case X86::MOV16rm: 1521193323Sed case X86::MOV32rm: 1522193323Sed case X86::MOV64rm: 1523193323Sed case X86::LD_Fp64m: 1524193323Sed case X86::MOVSSrm: 1525193323Sed case X86::MOVSDrm: 1526193323Sed case X86::MOVAPSrm: 1527193323Sed case X86::MOVAPDrm: 1528193323Sed case X86::MOVDQArm: 1529226890Sdim case X86::VMOVSSrm: 1530226890Sdim case X86::VMOVSDrm: 1531226890Sdim case X86::VMOVAPSrm: 1532226890Sdim case X86::VMOVAPDrm: 1533226890Sdim case X86::VMOVDQArm: 1534224145Sdim case X86::VMOVAPSYrm: 1535224145Sdim case X86::VMOVAPDYrm: 1536224145Sdim case X86::VMOVDQAYrm: 1537193323Sed case X86::MMX_MOVD64rm: 1538193323Sed case X86::MMX_MOVQ64rm: 1539263509Sdim case X86::VMOVDQA32rm: 1540263509Sdim case X86::VMOVDQA64rm: 1541199481Srdivacky return true; 1542193323Sed } 1543193323Sed} 1544193323Sed 1545199481Srdivackystatic bool isFrameStoreOpcode(int Opcode) { 1546199481Srdivacky switch (Opcode) { 1547193323Sed default: break; 1548193323Sed case X86::MOV8mr: 1549193323Sed case X86::MOV16mr: 1550193323Sed case X86::MOV32mr: 1551193323Sed case X86::MOV64mr: 1552193323Sed case X86::ST_FpP64m: 1553193323Sed case X86::MOVSSmr: 1554193323Sed case X86::MOVSDmr: 1555193323Sed case X86::MOVAPSmr: 1556193323Sed case X86::MOVAPDmr: 1557193323Sed case X86::MOVDQAmr: 1558226890Sdim case X86::VMOVSSmr: 1559226890Sdim case X86::VMOVSDmr: 1560226890Sdim case X86::VMOVAPSmr: 1561226890Sdim case X86::VMOVAPDmr: 1562226890Sdim case X86::VMOVDQAmr: 1563224145Sdim case X86::VMOVAPSYmr: 1564224145Sdim case X86::VMOVAPDYmr: 1565224145Sdim case X86::VMOVDQAYmr: 1566193323Sed case X86::MMX_MOVD64mr: 1567193323Sed case X86::MMX_MOVQ64mr: 1568193323Sed case X86::MMX_MOVNTQmr: 1569199481Srdivacky return true; 1570199481Srdivacky } 1571199481Srdivacky return false; 1572199481Srdivacky} 1573199481Srdivacky 1574218893Sdimunsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 1575199481Srdivacky int &FrameIndex) const { 1576199481Srdivacky if (isFrameLoadOpcode(MI->getOpcode())) 1577212904Sdim if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) 1578199481Srdivacky return MI->getOperand(0).getReg(); 1579199481Srdivacky return 0; 1580199481Srdivacky} 1581199481Srdivacky 1582218893Sdimunsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 1583199481Srdivacky int &FrameIndex) const { 1584199481Srdivacky if (isFrameLoadOpcode(MI->getOpcode())) { 1585199481Srdivacky unsigned Reg; 1586199481Srdivacky if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) 1587199481Srdivacky return Reg; 1588199481Srdivacky // Check for post-frame index elimination operations 1589200581Srdivacky const MachineMemOperand *Dummy; 1590200581Srdivacky return hasLoadFromStackSlot(MI, Dummy, FrameIndex); 1591199481Srdivacky } 1592199481Srdivacky return 0; 1593199481Srdivacky} 1594199481Srdivacky 1595199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 1596199481Srdivacky int &FrameIndex) const { 1597199481Srdivacky if (isFrameStoreOpcode(MI->getOpcode())) 1598212904Sdim if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 && 1599212904Sdim isFrameOperand(MI, 0, FrameIndex)) 1600210299Sed return MI->getOperand(X86::AddrNumOperands).getReg(); 1601199481Srdivacky return 0; 1602199481Srdivacky} 1603199481Srdivacky 1604199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 1605199481Srdivacky int &FrameIndex) const { 1606199481Srdivacky if (isFrameStoreOpcode(MI->getOpcode())) { 1607199481Srdivacky unsigned Reg; 1608199481Srdivacky if ((Reg = isStoreToStackSlot(MI, FrameIndex))) 1609199481Srdivacky return Reg; 1610199481Srdivacky // Check for post-frame index elimination operations 1611200581Srdivacky const MachineMemOperand *Dummy; 1612200581Srdivacky return hasStoreToStackSlot(MI, Dummy, FrameIndex); 1613193323Sed } 1614193323Sed return 0; 1615193323Sed} 1616193323Sed 1617193323Sed/// regIsPICBase - Return true if register is PIC base (i.e.g defined by 1618193323Sed/// X86::MOVPC32r. 1619193323Sedstatic bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { 1620245431Sdim // Don't waste compile time scanning use-def chains of physregs. 1621245431Sdim if (!TargetRegisterInfo::isVirtualRegister(BaseReg)) 1622245431Sdim return false; 1623193323Sed bool isPICBase = false; 1624193323Sed for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 1625193323Sed E = MRI.def_end(); I != E; ++I) { 1626193323Sed MachineInstr *DefMI = I.getOperand().getParent(); 1627193323Sed if (DefMI->getOpcode() != X86::MOVPC32r) 1628193323Sed return false; 1629193323Sed assert(!isPICBase && "More than one PIC base?"); 1630193323Sed isPICBase = true; 1631193323Sed } 1632193323Sed return isPICBase; 1633193323Sed} 1634193323Sed 1635193323Sedbool 1636198090SrdivackyX86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, 1637198090Srdivacky AliasAnalysis *AA) const { 1638193323Sed switch (MI->getOpcode()) { 1639193323Sed default: break; 1640245431Sdim case X86::MOV8rm: 1641245431Sdim case X86::MOV16rm: 1642245431Sdim case X86::MOV32rm: 1643245431Sdim case X86::MOV64rm: 1644245431Sdim case X86::LD_Fp64m: 1645245431Sdim case X86::MOVSSrm: 1646245431Sdim case X86::MOVSDrm: 1647245431Sdim case X86::MOVAPSrm: 1648245431Sdim case X86::MOVUPSrm: 1649245431Sdim case X86::MOVAPDrm: 1650245431Sdim case X86::MOVDQArm: 1651252723Sdim case X86::MOVDQUrm: 1652245431Sdim case X86::VMOVSSrm: 1653245431Sdim case X86::VMOVSDrm: 1654245431Sdim case X86::VMOVAPSrm: 1655245431Sdim case X86::VMOVUPSrm: 1656245431Sdim case X86::VMOVAPDrm: 1657245431Sdim case X86::VMOVDQArm: 1658252723Sdim case X86::VMOVDQUrm: 1659245431Sdim case X86::VMOVAPSYrm: 1660245431Sdim case X86::VMOVUPSYrm: 1661245431Sdim case X86::VMOVAPDYrm: 1662245431Sdim case X86::VMOVDQAYrm: 1663252723Sdim case X86::VMOVDQUYrm: 1664245431Sdim case X86::MMX_MOVD64rm: 1665245431Sdim case X86::MMX_MOVQ64rm: 1666245431Sdim case X86::FsVMOVAPSrm: 1667245431Sdim case X86::FsVMOVAPDrm: 1668245431Sdim case X86::FsMOVAPSrm: 1669245431Sdim case X86::FsMOVAPDrm: { 1670245431Sdim // Loads from constant pools are trivially rematerializable. 1671245431Sdim if (MI->getOperand(1).isReg() && 1672245431Sdim MI->getOperand(2).isImm() && 1673245431Sdim MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 1674245431Sdim MI->isInvariantLoad(AA)) { 1675245431Sdim unsigned BaseReg = MI->getOperand(1).getReg(); 1676245431Sdim if (BaseReg == 0 || BaseReg == X86::RIP) 1677245431Sdim return true; 1678245431Sdim // Allow re-materialization of PIC load. 1679245431Sdim if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) 1680245431Sdim return false; 1681245431Sdim const MachineFunction &MF = *MI->getParent()->getParent(); 1682245431Sdim const MachineRegisterInfo &MRI = MF.getRegInfo(); 1683245431Sdim return regIsPICBase(BaseReg, MRI); 1684193323Sed } 1685245431Sdim return false; 1686245431Sdim } 1687218893Sdim 1688245431Sdim case X86::LEA32r: 1689245431Sdim case X86::LEA64r: { 1690245431Sdim if (MI->getOperand(2).isImm() && 1691245431Sdim MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 1692245431Sdim !MI->getOperand(4).isReg()) { 1693245431Sdim // lea fi#, lea GV, etc. are all rematerializable. 1694245431Sdim if (!MI->getOperand(1).isReg()) 1695245431Sdim return true; 1696245431Sdim unsigned BaseReg = MI->getOperand(1).getReg(); 1697245431Sdim if (BaseReg == 0) 1698245431Sdim return true; 1699245431Sdim // Allow re-materialization of lea PICBase + x. 1700245431Sdim const MachineFunction &MF = *MI->getParent()->getParent(); 1701245431Sdim const MachineRegisterInfo &MRI = MF.getRegInfo(); 1702245431Sdim return regIsPICBase(BaseReg, MRI); 1703245431Sdim } 1704245431Sdim return false; 1705193323Sed } 1706245431Sdim } 1707193323Sed 1708193323Sed // All other instructions marked M_REMATERIALIZABLE are always trivially 1709193323Sed // rematerializable. 1710193323Sed return true; 1711193323Sed} 1712193323Sed 1713193323Sed/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that 1714193323Sed/// would clobber the EFLAGS condition register. Note the result may be 1715193323Sed/// conservative. If it cannot definitely determine the safety after visiting 1716198090Srdivacky/// a few instructions in each direction it assumes it's not safe. 1717193323Sedstatic bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, 1718193323Sed MachineBasicBlock::iterator I) { 1719206083Srdivacky MachineBasicBlock::iterator E = MBB.end(); 1720206083Srdivacky 1721193323Sed // For compile time consideration, if we are not able to determine the 1722198090Srdivacky // safety after visiting 4 instructions in each direction, we will assume 1723198090Srdivacky // it's not safe. 1724198090Srdivacky MachineBasicBlock::iterator Iter = I; 1725226890Sdim for (unsigned i = 0; Iter != E && i < 4; ++i) { 1726193323Sed bool SeenDef = false; 1727198090Srdivacky for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 1728198090Srdivacky MachineOperand &MO = Iter->getOperand(j); 1729235633Sdim if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS)) 1730235633Sdim SeenDef = true; 1731193323Sed if (!MO.isReg()) 1732193323Sed continue; 1733193323Sed if (MO.getReg() == X86::EFLAGS) { 1734193323Sed if (MO.isUse()) 1735193323Sed return false; 1736193323Sed SeenDef = true; 1737193323Sed } 1738193323Sed } 1739193323Sed 1740193323Sed if (SeenDef) 1741193323Sed // This instruction defines EFLAGS, no need to look any further. 1742193323Sed return true; 1743198090Srdivacky ++Iter; 1744206083Srdivacky // Skip over DBG_VALUE. 1745206083Srdivacky while (Iter != E && Iter->isDebugValue()) 1746206083Srdivacky ++Iter; 1747226890Sdim } 1748193323Sed 1749226890Sdim // It is safe to clobber EFLAGS at the end of a block of no successor has it 1750226890Sdim // live in. 1751226890Sdim if (Iter == E) { 1752226890Sdim for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), 1753226890Sdim SE = MBB.succ_end(); SI != SE; ++SI) 1754226890Sdim if ((*SI)->isLiveIn(X86::EFLAGS)) 1755226890Sdim return false; 1756226890Sdim return true; 1757193323Sed } 1758193323Sed 1759206083Srdivacky MachineBasicBlock::iterator B = MBB.begin(); 1760198090Srdivacky Iter = I; 1761198090Srdivacky for (unsigned i = 0; i < 4; ++i) { 1762198090Srdivacky // If we make it to the beginning of the block, it's safe to clobber 1763198090Srdivacky // EFLAGS iff EFLAGS is not live-in. 1764206083Srdivacky if (Iter == B) 1765198090Srdivacky return !MBB.isLiveIn(X86::EFLAGS); 1766198090Srdivacky 1767198090Srdivacky --Iter; 1768206083Srdivacky // Skip over DBG_VALUE. 1769206083Srdivacky while (Iter != B && Iter->isDebugValue()) 1770206083Srdivacky --Iter; 1771206083Srdivacky 1772198090Srdivacky bool SawKill = false; 1773198090Srdivacky for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 1774198090Srdivacky MachineOperand &MO = Iter->getOperand(j); 1775235633Sdim // A register mask may clobber EFLAGS, but we should still look for a 1776235633Sdim // live EFLAGS def. 1777235633Sdim if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS)) 1778235633Sdim SawKill = true; 1779198090Srdivacky if (MO.isReg() && MO.getReg() == X86::EFLAGS) { 1780198090Srdivacky if (MO.isDef()) return MO.isDead(); 1781198090Srdivacky if (MO.isKill()) SawKill = true; 1782198090Srdivacky } 1783198090Srdivacky } 1784198090Srdivacky 1785198090Srdivacky if (SawKill) 1786198090Srdivacky // This instruction kills EFLAGS and doesn't redefine it, so 1787198090Srdivacky // there's no need to look further. 1788198090Srdivacky return true; 1789198090Srdivacky } 1790198090Srdivacky 1791193323Sed // Conservative answer. 1792193323Sed return false; 1793193323Sed} 1794193323Sed 1795193323Sedvoid X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, 1796193323Sed MachineBasicBlock::iterator I, 1797198090Srdivacky unsigned DestReg, unsigned SubIdx, 1798199481Srdivacky const MachineInstr *Orig, 1799210299Sed const TargetRegisterInfo &TRI) const { 1800263509Sdim // MOV32r0 is implemented with a xor which clobbers condition code. 1801263509Sdim // Re-materialize it as movri instructions to avoid side effects. 1802198090Srdivacky unsigned Opc = Orig->getOpcode(); 1803263509Sdim if (Opc == X86::MOV32r0 && !isSafeToClobberEFLAGS(MBB, I)) { 1804263509Sdim DebugLoc DL = Orig->getDebugLoc(); 1805263509Sdim BuildMI(MBB, I, DL, get(X86::MOV32ri)).addOperand(Orig->getOperand(0)) 1806263509Sdim .addImm(0); 1807263509Sdim } else { 1808193323Sed MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 1809193323Sed MBB.insert(I, MI); 1810193323Sed } 1811193323Sed 1812198090Srdivacky MachineInstr *NewMI = prior(I); 1813210299Sed NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); 1814193323Sed} 1815193323Sed 1816193323Sed/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that 1817193323Sed/// is not marked dead. 1818193323Sedstatic bool hasLiveCondCodeDef(MachineInstr *MI) { 1819193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1820193323Sed MachineOperand &MO = MI->getOperand(i); 1821193323Sed if (MO.isReg() && MO.isDef() && 1822193323Sed MO.getReg() == X86::EFLAGS && !MO.isDead()) { 1823193323Sed return true; 1824193323Sed } 1825193323Sed } 1826193323Sed return false; 1827193323Sed} 1828193323Sed 1829263509Sdim/// getTruncatedShiftCount - check whether the shift count for a machine operand 1830263509Sdim/// is non-zero. 1831263509Sdiminline static unsigned getTruncatedShiftCount(MachineInstr *MI, 1832263509Sdim unsigned ShiftAmtOperandIdx) { 1833263509Sdim // The shift count is six bits with the REX.W prefix and five bits without. 1834263509Sdim unsigned ShiftCountMask = (MI->getDesc().TSFlags & X86II::REX_W) ? 63 : 31; 1835263509Sdim unsigned Imm = MI->getOperand(ShiftAmtOperandIdx).getImm(); 1836263509Sdim return Imm & ShiftCountMask; 1837263509Sdim} 1838263509Sdim 1839263509Sdim/// isTruncatedShiftCountForLEA - check whether the given shift count is appropriate 1840263509Sdim/// can be represented by a LEA instruction. 1841263509Sdiminline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) { 1842263509Sdim // Left shift instructions can be transformed into load-effective-address 1843263509Sdim // instructions if we can encode them appropriately. 1844263509Sdim // A LEA instruction utilizes a SIB byte to encode it's scale factor. 1845263509Sdim // The SIB.scale field is two bits wide which means that we can encode any 1846263509Sdim // shift amount less than 4. 1847263509Sdim return ShAmt < 4 && ShAmt > 0; 1848263509Sdim} 1849263509Sdim 1850263509Sdimbool X86InstrInfo::classifyLEAReg(MachineInstr *MI, const MachineOperand &Src, 1851263509Sdim unsigned Opc, bool AllowSP, 1852263509Sdim unsigned &NewSrc, bool &isKill, bool &isUndef, 1853263509Sdim MachineOperand &ImplicitOp) const { 1854263509Sdim MachineFunction &MF = *MI->getParent()->getParent(); 1855263509Sdim const TargetRegisterClass *RC; 1856263509Sdim if (AllowSP) { 1857263509Sdim RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass; 1858263509Sdim } else { 1859263509Sdim RC = Opc != X86::LEA32r ? 1860263509Sdim &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass; 1861263509Sdim } 1862263509Sdim unsigned SrcReg = Src.getReg(); 1863263509Sdim 1864263509Sdim // For both LEA64 and LEA32 the register already has essentially the right 1865263509Sdim // type (32-bit or 64-bit) we may just need to forbid SP. 1866263509Sdim if (Opc != X86::LEA64_32r) { 1867263509Sdim NewSrc = SrcReg; 1868263509Sdim isKill = Src.isKill(); 1869263509Sdim isUndef = Src.isUndef(); 1870263509Sdim 1871263509Sdim if (TargetRegisterInfo::isVirtualRegister(NewSrc) && 1872263509Sdim !MF.getRegInfo().constrainRegClass(NewSrc, RC)) 1873263509Sdim return false; 1874263509Sdim 1875263509Sdim return true; 1876263509Sdim } 1877263509Sdim 1878263509Sdim // This is for an LEA64_32r and incoming registers are 32-bit. One way or 1879263509Sdim // another we need to add 64-bit registers to the final MI. 1880263509Sdim if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) { 1881263509Sdim ImplicitOp = Src; 1882263509Sdim ImplicitOp.setImplicit(); 1883263509Sdim 1884263509Sdim NewSrc = getX86SubSuperRegister(Src.getReg(), MVT::i64); 1885263509Sdim MachineBasicBlock::LivenessQueryResult LQR = 1886263509Sdim MI->getParent()->computeRegisterLiveness(&getRegisterInfo(), NewSrc, MI); 1887263509Sdim 1888263509Sdim switch (LQR) { 1889263509Sdim case MachineBasicBlock::LQR_Unknown: 1890263509Sdim // We can't give sane liveness flags to the instruction, abandon LEA 1891263509Sdim // formation. 1892263509Sdim return false; 1893263509Sdim case MachineBasicBlock::LQR_Live: 1894263509Sdim isKill = MI->killsRegister(SrcReg); 1895263509Sdim isUndef = false; 1896263509Sdim break; 1897263509Sdim default: 1898263509Sdim // The physreg itself is dead, so we have to use it as an <undef>. 1899263509Sdim isKill = false; 1900263509Sdim isUndef = true; 1901263509Sdim break; 1902263509Sdim } 1903263509Sdim } else { 1904263509Sdim // Virtual register of the wrong class, we have to create a temporary 64-bit 1905263509Sdim // vreg to feed into the LEA. 1906263509Sdim NewSrc = MF.getRegInfo().createVirtualRegister(RC); 1907263509Sdim BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 1908263509Sdim get(TargetOpcode::COPY)) 1909263509Sdim .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit) 1910263509Sdim .addOperand(Src); 1911263509Sdim 1912263509Sdim // Which is obviously going to be dead after we're done with it. 1913263509Sdim isKill = true; 1914263509Sdim isUndef = false; 1915263509Sdim } 1916263509Sdim 1917263509Sdim // We've set all the parameters without issue. 1918263509Sdim return true; 1919263509Sdim} 1920263509Sdim 1921200581Srdivacky/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when 1922200581Srdivacky/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting 1923200581Srdivacky/// to a 32-bit superregister and then truncating back down to a 16-bit 1924200581Srdivacky/// subregister. 1925200581SrdivackyMachineInstr * 1926200581SrdivackyX86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, 1927200581Srdivacky MachineFunction::iterator &MFI, 1928200581Srdivacky MachineBasicBlock::iterator &MBBI, 1929200581Srdivacky LiveVariables *LV) const { 1930200581Srdivacky MachineInstr *MI = MBBI; 1931200581Srdivacky unsigned Dest = MI->getOperand(0).getReg(); 1932200581Srdivacky unsigned Src = MI->getOperand(1).getReg(); 1933200581Srdivacky bool isDead = MI->getOperand(0).isDead(); 1934200581Srdivacky bool isKill = MI->getOperand(1).isKill(); 1935200581Srdivacky 1936200581Srdivacky MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); 1937200581Srdivacky unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1938263509Sdim unsigned Opc, leaInReg; 1939263509Sdim if (TM.getSubtarget<X86Subtarget>().is64Bit()) { 1940263509Sdim Opc = X86::LEA64_32r; 1941263509Sdim leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); 1942263509Sdim } else { 1943263509Sdim Opc = X86::LEA32r; 1944263509Sdim leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); 1945263509Sdim } 1946218893Sdim 1947200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 1948218893Sdim // well be shifting and then extracting the lower 16-bits. 1949200581Srdivacky // This has the potential to cause partial register stall. e.g. 1950200581Srdivacky // movw (%rbp,%rcx,2), %dx 1951200581Srdivacky // leal -65(%rdx), %esi 1952200581Srdivacky // But testing has shown this *does* help performance in 64-bit mode (at 1953200581Srdivacky // least on modern x86 machines). 1954200581Srdivacky BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); 1955200581Srdivacky MachineInstr *InsMI = 1956210299Sed BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) 1957210299Sed .addReg(leaInReg, RegState::Define, X86::sub_16bit) 1958210299Sed .addReg(Src, getKillRegState(isKill)); 1959200581Srdivacky 1960200581Srdivacky MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), 1961200581Srdivacky get(Opc), leaOutReg); 1962200581Srdivacky switch (MIOpc) { 1963245431Sdim default: llvm_unreachable("Unreachable!"); 1964200581Srdivacky case X86::SHL16ri: { 1965200581Srdivacky unsigned ShAmt = MI->getOperand(2).getImm(); 1966200581Srdivacky MIB.addReg(0).addImm(1 << ShAmt) 1967210299Sed .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0); 1968200581Srdivacky break; 1969200581Srdivacky } 1970200581Srdivacky case X86::INC16r: 1971200581Srdivacky case X86::INC64_16r: 1972210299Sed addRegOffset(MIB, leaInReg, true, 1); 1973200581Srdivacky break; 1974200581Srdivacky case X86::DEC16r: 1975200581Srdivacky case X86::DEC64_16r: 1976210299Sed addRegOffset(MIB, leaInReg, true, -1); 1977200581Srdivacky break; 1978200581Srdivacky case X86::ADD16ri: 1979200581Srdivacky case X86::ADD16ri8: 1980218893Sdim case X86::ADD16ri_DB: 1981218893Sdim case X86::ADD16ri8_DB: 1982218893Sdim addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); 1983200581Srdivacky break; 1984218893Sdim case X86::ADD16rr: 1985218893Sdim case X86::ADD16rr_DB: { 1986200581Srdivacky unsigned Src2 = MI->getOperand(2).getReg(); 1987200581Srdivacky bool isKill2 = MI->getOperand(2).isKill(); 1988200581Srdivacky unsigned leaInReg2 = 0; 1989200581Srdivacky MachineInstr *InsMI2 = 0; 1990200581Srdivacky if (Src == Src2) { 1991200581Srdivacky // ADD16rr %reg1028<kill>, %reg1028 1992200581Srdivacky // just a single insert_subreg. 1993200581Srdivacky addRegReg(MIB, leaInReg, true, leaInReg, false); 1994200581Srdivacky } else { 1995263509Sdim if (TM.getSubtarget<X86Subtarget>().is64Bit()) 1996263509Sdim leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); 1997263509Sdim else 1998263509Sdim leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); 1999200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 2000218893Sdim // well be shifting and then extracting the lower 16-bits. 2001235633Sdim BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2); 2002200581Srdivacky InsMI2 = 2003235633Sdim BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) 2004210299Sed .addReg(leaInReg2, RegState::Define, X86::sub_16bit) 2005210299Sed .addReg(Src2, getKillRegState(isKill2)); 2006200581Srdivacky addRegReg(MIB, leaInReg, true, leaInReg2, true); 2007200581Srdivacky } 2008200581Srdivacky if (LV && isKill2 && InsMI2) 2009200581Srdivacky LV->replaceKillInstruction(Src2, MI, InsMI2); 2010200581Srdivacky break; 2011200581Srdivacky } 2012200581Srdivacky } 2013200581Srdivacky 2014200581Srdivacky MachineInstr *NewMI = MIB; 2015200581Srdivacky MachineInstr *ExtMI = 2016210299Sed BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) 2017200581Srdivacky .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 2018210299Sed .addReg(leaOutReg, RegState::Kill, X86::sub_16bit); 2019200581Srdivacky 2020200581Srdivacky if (LV) { 2021200581Srdivacky // Update live variables 2022200581Srdivacky LV->getVarInfo(leaInReg).Kills.push_back(NewMI); 2023200581Srdivacky LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); 2024200581Srdivacky if (isKill) 2025200581Srdivacky LV->replaceKillInstruction(Src, MI, InsMI); 2026200581Srdivacky if (isDead) 2027200581Srdivacky LV->replaceKillInstruction(Dest, MI, ExtMI); 2028200581Srdivacky } 2029200581Srdivacky 2030200581Srdivacky return ExtMI; 2031200581Srdivacky} 2032200581Srdivacky 2033193323Sed/// convertToThreeAddress - This method must be implemented by targets that 2034193323Sed/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target 2035193323Sed/// may be able to convert a two-address instruction into a true 2036193323Sed/// three-address instruction on demand. This allows the X86 target (for 2037193323Sed/// example) to convert ADD and SHL instructions into LEA instructions if they 2038193323Sed/// would require register copies due to two-addressness. 2039193323Sed/// 2040193323Sed/// This method returns a null pointer if the transformation cannot be 2041193323Sed/// performed, otherwise it returns the new instruction. 2042193323Sed/// 2043193323SedMachineInstr * 2044193323SedX86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 2045193323Sed MachineBasicBlock::iterator &MBBI, 2046193323Sed LiveVariables *LV) const { 2047193323Sed MachineInstr *MI = MBBI; 2048263509Sdim 2049263509Sdim // The following opcodes also sets the condition code register(s). Only 2050263509Sdim // convert them to equivalent lea if the condition code register def's 2051263509Sdim // are dead! 2052263509Sdim if (hasLiveCondCodeDef(MI)) 2053263509Sdim return 0; 2054263509Sdim 2055193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 2056193323Sed // All instructions input are two-addr instructions. Get the known operands. 2057245431Sdim const MachineOperand &Dest = MI->getOperand(0); 2058245431Sdim const MachineOperand &Src = MI->getOperand(1); 2059193323Sed 2060193323Sed MachineInstr *NewMI = NULL; 2061193323Sed // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When 2062193323Sed // we have better subtarget support, enable the 16-bit LEA generation here. 2063200581Srdivacky // 16-bit LEA is also slow on Core2. 2064193323Sed bool DisableLEA16 = true; 2065200581Srdivacky bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2066193323Sed 2067193323Sed unsigned MIOpc = MI->getOpcode(); 2068193323Sed switch (MIOpc) { 2069193323Sed case X86::SHUFPSrri: { 2070193323Sed assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); 2071193323Sed if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0; 2072218893Sdim 2073193323Sed unsigned B = MI->getOperand(1).getReg(); 2074193323Sed unsigned C = MI->getOperand(2).getReg(); 2075193323Sed if (B != C) return 0; 2076193323Sed unsigned M = MI->getOperand(3).getImm(); 2077193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) 2078245431Sdim .addOperand(Dest).addOperand(Src).addImm(M); 2079193323Sed break; 2080193323Sed } 2081235633Sdim case X86::SHUFPDrri: { 2082235633Sdim assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!"); 2083235633Sdim if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0; 2084235633Sdim 2085235633Sdim unsigned B = MI->getOperand(1).getReg(); 2086235633Sdim unsigned C = MI->getOperand(2).getReg(); 2087235633Sdim if (B != C) return 0; 2088235633Sdim unsigned M = MI->getOperand(3).getImm(); 2089235633Sdim 2090235633Sdim // Convert to PSHUFD mask. 2091235633Sdim M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44; 2092235633Sdim 2093235633Sdim NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) 2094245431Sdim .addOperand(Dest).addOperand(Src).addImm(M); 2095235633Sdim break; 2096235633Sdim } 2097193323Sed case X86::SHL64ri: { 2098193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 2099263509Sdim unsigned ShAmt = getTruncatedShiftCount(MI, 2); 2100263509Sdim if (!isTruncatedShiftCountForLEA(ShAmt)) return 0; 2101193323Sed 2102218893Sdim // LEA can't handle RSP. 2103245431Sdim if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && 2104245431Sdim !MF.getRegInfo().constrainRegClass(Src.getReg(), 2105245431Sdim &X86::GR64_NOSPRegClass)) 2106218893Sdim return 0; 2107218893Sdim 2108193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 2109245431Sdim .addOperand(Dest) 2110245431Sdim .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0); 2111193323Sed break; 2112193323Sed } 2113193323Sed case X86::SHL32ri: { 2114193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 2115263509Sdim unsigned ShAmt = getTruncatedShiftCount(MI, 2); 2116263509Sdim if (!isTruncatedShiftCountForLEA(ShAmt)) return 0; 2117193323Sed 2118263509Sdim unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 2119263509Sdim 2120218893Sdim // LEA can't handle ESP. 2121263509Sdim bool isKill, isUndef; 2122263509Sdim unsigned SrcReg; 2123263509Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 2124263509Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, 2125263509Sdim SrcReg, isKill, isUndef, ImplicitOp)) 2126218893Sdim return 0; 2127218893Sdim 2128263509Sdim MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) 2129245431Sdim .addOperand(Dest) 2130263509Sdim .addReg(0).addImm(1 << ShAmt) 2131263509Sdim .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) 2132263509Sdim .addImm(0).addReg(0); 2133263509Sdim if (ImplicitOp.getReg() != 0) 2134263509Sdim MIB.addOperand(ImplicitOp); 2135263509Sdim NewMI = MIB; 2136263509Sdim 2137193323Sed break; 2138193323Sed } 2139193323Sed case X86::SHL16ri: { 2140193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 2141263509Sdim unsigned ShAmt = getTruncatedShiftCount(MI, 2); 2142263509Sdim if (!isTruncatedShiftCountForLEA(ShAmt)) return 0; 2143193323Sed 2144200581Srdivacky if (DisableLEA16) 2145200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 2146200581Srdivacky NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 2147245431Sdim .addOperand(Dest) 2148245431Sdim .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0); 2149193323Sed break; 2150193323Sed } 2151193323Sed default: { 2152193323Sed 2153193323Sed switch (MIOpc) { 2154193323Sed default: return 0; 2155193323Sed case X86::INC64r: 2156193323Sed case X86::INC32r: 2157193323Sed case X86::INC64_32r: { 2158193323Sed assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 2159193323Sed unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r 2160193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 2161263509Sdim bool isKill, isUndef; 2162263509Sdim unsigned SrcReg; 2163263509Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 2164263509Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, 2165263509Sdim SrcReg, isKill, isUndef, ImplicitOp)) 2166218893Sdim return 0; 2167218893Sdim 2168263509Sdim MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) 2169263509Sdim .addOperand(Dest) 2170263509Sdim .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)); 2171263509Sdim if (ImplicitOp.getReg() != 0) 2172263509Sdim MIB.addOperand(ImplicitOp); 2173263509Sdim 2174263509Sdim NewMI = addOffset(MIB, 1); 2175193323Sed break; 2176193323Sed } 2177193323Sed case X86::INC16r: 2178193323Sed case X86::INC64_16r: 2179200581Srdivacky if (DisableLEA16) 2180200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 2181193323Sed assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 2182245431Sdim NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 2183245431Sdim .addOperand(Dest).addOperand(Src), 1); 2184193323Sed break; 2185193323Sed case X86::DEC64r: 2186193323Sed case X86::DEC32r: 2187193323Sed case X86::DEC64_32r: { 2188193323Sed assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 2189193323Sed unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r 2190193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 2191263509Sdim 2192263509Sdim bool isKill, isUndef; 2193263509Sdim unsigned SrcReg; 2194263509Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 2195263509Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, 2196263509Sdim SrcReg, isKill, isUndef, ImplicitOp)) 2197218893Sdim return 0; 2198218893Sdim 2199263509Sdim MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) 2200263509Sdim .addOperand(Dest) 2201263509Sdim .addReg(SrcReg, getUndefRegState(isUndef) | getKillRegState(isKill)); 2202263509Sdim if (ImplicitOp.getReg() != 0) 2203263509Sdim MIB.addOperand(ImplicitOp); 2204263509Sdim 2205263509Sdim NewMI = addOffset(MIB, -1); 2206263509Sdim 2207193323Sed break; 2208193323Sed } 2209193323Sed case X86::DEC16r: 2210193323Sed case X86::DEC64_16r: 2211200581Srdivacky if (DisableLEA16) 2212200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 2213193323Sed assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 2214245431Sdim NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 2215245431Sdim .addOperand(Dest).addOperand(Src), -1); 2216193323Sed break; 2217193323Sed case X86::ADD64rr: 2218218893Sdim case X86::ADD64rr_DB: 2219218893Sdim case X86::ADD32rr: 2220218893Sdim case X86::ADD32rr_DB: { 2221193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 2222218893Sdim unsigned Opc; 2223263509Sdim if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) 2224218893Sdim Opc = X86::LEA64r; 2225263509Sdim else 2226218893Sdim Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 2227218893Sdim 2228263509Sdim bool isKill, isUndef; 2229263509Sdim unsigned SrcReg; 2230263509Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 2231263509Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, 2232263509Sdim SrcReg, isKill, isUndef, ImplicitOp)) 2233263509Sdim return 0; 2234218893Sdim 2235263509Sdim const MachineOperand &Src2 = MI->getOperand(2); 2236263509Sdim bool isKill2, isUndef2; 2237263509Sdim unsigned SrcReg2; 2238263509Sdim MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false); 2239263509Sdim if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false, 2240263509Sdim SrcReg2, isKill2, isUndef2, ImplicitOp2)) 2241218893Sdim return 0; 2242218893Sdim 2243263509Sdim MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) 2244263509Sdim .addOperand(Dest); 2245263509Sdim if (ImplicitOp.getReg() != 0) 2246263509Sdim MIB.addOperand(ImplicitOp); 2247263509Sdim if (ImplicitOp2.getReg() != 0) 2248263509Sdim MIB.addOperand(ImplicitOp2); 2249245431Sdim 2250263509Sdim NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2); 2251263509Sdim 2252245431Sdim // Preserve undefness of the operands. 2253245431Sdim NewMI->getOperand(1).setIsUndef(isUndef); 2254245431Sdim NewMI->getOperand(3).setIsUndef(isUndef2); 2255245431Sdim 2256263509Sdim if (LV && Src2.isKill()) 2257263509Sdim LV->replaceKillInstruction(SrcReg2, MI, NewMI); 2258193323Sed break; 2259193323Sed } 2260218893Sdim case X86::ADD16rr: 2261218893Sdim case X86::ADD16rr_DB: { 2262200581Srdivacky if (DisableLEA16) 2263200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 2264193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 2265193323Sed unsigned Src2 = MI->getOperand(2).getReg(); 2266193323Sed bool isKill2 = MI->getOperand(2).isKill(); 2267193323Sed NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 2268245431Sdim .addOperand(Dest), 2269245431Sdim Src.getReg(), Src.isKill(), Src2, isKill2); 2270245431Sdim 2271245431Sdim // Preserve undefness of the operands. 2272245431Sdim bool isUndef = MI->getOperand(1).isUndef(); 2273245431Sdim bool isUndef2 = MI->getOperand(2).isUndef(); 2274245431Sdim NewMI->getOperand(1).setIsUndef(isUndef); 2275245431Sdim NewMI->getOperand(3).setIsUndef(isUndef2); 2276245431Sdim 2277193323Sed if (LV && isKill2) 2278193323Sed LV->replaceKillInstruction(Src2, MI, NewMI); 2279193323Sed break; 2280193323Sed } 2281193323Sed case X86::ADD64ri32: 2282193323Sed case X86::ADD64ri8: 2283218893Sdim case X86::ADD64ri32_DB: 2284218893Sdim case X86::ADD64ri8_DB: 2285193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 2286245431Sdim NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 2287245431Sdim .addOperand(Dest).addOperand(Src), 2288245431Sdim MI->getOperand(2).getImm()); 2289193323Sed break; 2290193323Sed case X86::ADD32ri: 2291218893Sdim case X86::ADD32ri8: 2292218893Sdim case X86::ADD32ri_DB: 2293218893Sdim case X86::ADD32ri8_DB: { 2294193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 2295200581Srdivacky unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 2296263509Sdim 2297263509Sdim bool isKill, isUndef; 2298263509Sdim unsigned SrcReg; 2299263509Sdim MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); 2300263509Sdim if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, 2301263509Sdim SrcReg, isKill, isUndef, ImplicitOp)) 2302263509Sdim return 0; 2303263509Sdim 2304263509Sdim MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) 2305263509Sdim .addOperand(Dest) 2306263509Sdim .addReg(SrcReg, getUndefRegState(isUndef) | getKillRegState(isKill)); 2307263509Sdim if (ImplicitOp.getReg() != 0) 2308263509Sdim MIB.addOperand(ImplicitOp); 2309263509Sdim 2310263509Sdim NewMI = addOffset(MIB, MI->getOperand(2).getImm()); 2311193323Sed break; 2312200581Srdivacky } 2313193323Sed case X86::ADD16ri: 2314193323Sed case X86::ADD16ri8: 2315218893Sdim case X86::ADD16ri_DB: 2316218893Sdim case X86::ADD16ri8_DB: 2317200581Srdivacky if (DisableLEA16) 2318200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 2319193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 2320245431Sdim NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 2321245431Sdim .addOperand(Dest).addOperand(Src), 2322245431Sdim MI->getOperand(2).getImm()); 2323193323Sed break; 2324193323Sed } 2325193323Sed } 2326193323Sed } 2327193323Sed 2328193323Sed if (!NewMI) return 0; 2329193323Sed 2330193323Sed if (LV) { // Update live variables 2331245431Sdim if (Src.isKill()) 2332245431Sdim LV->replaceKillInstruction(Src.getReg(), MI, NewMI); 2333245431Sdim if (Dest.isDead()) 2334245431Sdim LV->replaceKillInstruction(Dest.getReg(), MI, NewMI); 2335193323Sed } 2336193323Sed 2337218893Sdim MFI->insert(MBBI, NewMI); // Insert the new inst 2338193323Sed return NewMI; 2339193323Sed} 2340193323Sed 2341193323Sed/// commuteInstruction - We have a few instructions that must be hacked on to 2342193323Sed/// commute them. 2343193323Sed/// 2344193323SedMachineInstr * 2345193323SedX86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { 2346193323Sed switch (MI->getOpcode()) { 2347193323Sed case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) 2348193323Sed case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) 2349193323Sed case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) 2350193323Sed case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) 2351193323Sed case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) 2352193323Sed case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) 2353193323Sed unsigned Opc; 2354193323Sed unsigned Size; 2355193323Sed switch (MI->getOpcode()) { 2356198090Srdivacky default: llvm_unreachable("Unreachable!"); 2357193323Sed case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; 2358193323Sed case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; 2359193323Sed case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; 2360193323Sed case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break; 2361193323Sed case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break; 2362193323Sed case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; 2363193323Sed } 2364193323Sed unsigned Amt = MI->getOperand(3).getImm(); 2365193323Sed if (NewMI) { 2366193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 2367193323Sed MI = MF.CloneMachineInstr(MI); 2368193323Sed NewMI = false; 2369193323Sed } 2370193323Sed MI->setDesc(get(Opc)); 2371193323Sed MI->getOperand(3).setImm(Size-Amt); 2372252723Sdim return TargetInstrInfo::commuteInstruction(MI, NewMI); 2373193323Sed } 2374245431Sdim case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr: 2375245431Sdim case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr: 2376245431Sdim case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr: 2377245431Sdim case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr: 2378245431Sdim case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr: 2379245431Sdim case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr: 2380245431Sdim case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr: 2381245431Sdim case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr: 2382245431Sdim case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr: 2383245431Sdim case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr: 2384245431Sdim case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr: 2385245431Sdim case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr: 2386245431Sdim case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr: 2387245431Sdim case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr: 2388245431Sdim case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr: 2389245431Sdim case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: { 2390245431Sdim unsigned Opc; 2391193323Sed switch (MI->getOpcode()) { 2392245431Sdim default: llvm_unreachable("Unreachable!"); 2393193323Sed case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; 2394193323Sed case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; 2395193323Sed case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; 2396193323Sed case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break; 2397193323Sed case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break; 2398193323Sed case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break; 2399193323Sed case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break; 2400193323Sed case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break; 2401193323Sed case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break; 2402193323Sed case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; 2403193323Sed case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; 2404193323Sed case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; 2405193323Sed case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; 2406193323Sed case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; 2407193323Sed case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; 2408193323Sed case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; 2409193323Sed case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; 2410193323Sed case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; 2411193323Sed case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; 2412193323Sed case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; 2413193323Sed case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; 2414193323Sed case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break; 2415193323Sed case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break; 2416193323Sed case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break; 2417193323Sed case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break; 2418193323Sed case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break; 2419193323Sed case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break; 2420193323Sed case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break; 2421193323Sed case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break; 2422193323Sed case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; 2423193323Sed case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; 2424193323Sed case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; 2425193323Sed case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; 2426193323Sed case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; 2427193323Sed case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; 2428193323Sed case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; 2429193323Sed case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; 2430193323Sed case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; 2431193323Sed case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; 2432193323Sed case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; 2433193323Sed case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; 2434193323Sed case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; 2435193323Sed case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; 2436193323Sed case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; 2437193323Sed case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; 2438193323Sed case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; 2439193323Sed case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; 2440193323Sed case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; 2441193323Sed } 2442193323Sed if (NewMI) { 2443193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 2444193323Sed MI = MF.CloneMachineInstr(MI); 2445193323Sed NewMI = false; 2446193323Sed } 2447193323Sed MI->setDesc(get(Opc)); 2448193323Sed // Fallthrough intended. 2449193323Sed } 2450193323Sed default: 2451252723Sdim return TargetInstrInfo::commuteInstruction(MI, NewMI); 2452193323Sed } 2453193323Sed} 2454193323Sed 2455245431Sdimstatic X86::CondCode getCondFromBranchOpc(unsigned BrOpc) { 2456193323Sed switch (BrOpc) { 2457193323Sed default: return X86::COND_INVALID; 2458203954Srdivacky case X86::JE_4: return X86::COND_E; 2459203954Srdivacky case X86::JNE_4: return X86::COND_NE; 2460203954Srdivacky case X86::JL_4: return X86::COND_L; 2461203954Srdivacky case X86::JLE_4: return X86::COND_LE; 2462203954Srdivacky case X86::JG_4: return X86::COND_G; 2463203954Srdivacky case X86::JGE_4: return X86::COND_GE; 2464203954Srdivacky case X86::JB_4: return X86::COND_B; 2465203954Srdivacky case X86::JBE_4: return X86::COND_BE; 2466203954Srdivacky case X86::JA_4: return X86::COND_A; 2467203954Srdivacky case X86::JAE_4: return X86::COND_AE; 2468203954Srdivacky case X86::JS_4: return X86::COND_S; 2469203954Srdivacky case X86::JNS_4: return X86::COND_NS; 2470203954Srdivacky case X86::JP_4: return X86::COND_P; 2471203954Srdivacky case X86::JNP_4: return X86::COND_NP; 2472203954Srdivacky case X86::JO_4: return X86::COND_O; 2473203954Srdivacky case X86::JNO_4: return X86::COND_NO; 2474193323Sed } 2475193323Sed} 2476193323Sed 2477245431Sdim/// getCondFromSETOpc - return condition code of a SET opcode. 2478245431Sdimstatic X86::CondCode getCondFromSETOpc(unsigned Opc) { 2479245431Sdim switch (Opc) { 2480245431Sdim default: return X86::COND_INVALID; 2481245431Sdim case X86::SETAr: case X86::SETAm: return X86::COND_A; 2482245431Sdim case X86::SETAEr: case X86::SETAEm: return X86::COND_AE; 2483245431Sdim case X86::SETBr: case X86::SETBm: return X86::COND_B; 2484245431Sdim case X86::SETBEr: case X86::SETBEm: return X86::COND_BE; 2485245431Sdim case X86::SETEr: case X86::SETEm: return X86::COND_E; 2486245431Sdim case X86::SETGr: case X86::SETGm: return X86::COND_G; 2487245431Sdim case X86::SETGEr: case X86::SETGEm: return X86::COND_GE; 2488245431Sdim case X86::SETLr: case X86::SETLm: return X86::COND_L; 2489245431Sdim case X86::SETLEr: case X86::SETLEm: return X86::COND_LE; 2490245431Sdim case X86::SETNEr: case X86::SETNEm: return X86::COND_NE; 2491245431Sdim case X86::SETNOr: case X86::SETNOm: return X86::COND_NO; 2492245431Sdim case X86::SETNPr: case X86::SETNPm: return X86::COND_NP; 2493245431Sdim case X86::SETNSr: case X86::SETNSm: return X86::COND_NS; 2494245431Sdim case X86::SETOr: case X86::SETOm: return X86::COND_O; 2495245431Sdim case X86::SETPr: case X86::SETPm: return X86::COND_P; 2496245431Sdim case X86::SETSr: case X86::SETSm: return X86::COND_S; 2497245431Sdim } 2498245431Sdim} 2499245431Sdim 2500245431Sdim/// getCondFromCmovOpc - return condition code of a CMov opcode. 2501245431SdimX86::CondCode X86::getCondFromCMovOpc(unsigned Opc) { 2502245431Sdim switch (Opc) { 2503245431Sdim default: return X86::COND_INVALID; 2504245431Sdim case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm: 2505245431Sdim case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr: 2506245431Sdim return X86::COND_A; 2507245431Sdim case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm: 2508245431Sdim case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr: 2509245431Sdim return X86::COND_AE; 2510245431Sdim case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm: 2511245431Sdim case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr: 2512245431Sdim return X86::COND_B; 2513245431Sdim case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm: 2514245431Sdim case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr: 2515245431Sdim return X86::COND_BE; 2516245431Sdim case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm: 2517245431Sdim case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr: 2518245431Sdim return X86::COND_E; 2519245431Sdim case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm: 2520245431Sdim case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr: 2521245431Sdim return X86::COND_G; 2522245431Sdim case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm: 2523245431Sdim case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr: 2524245431Sdim return X86::COND_GE; 2525245431Sdim case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm: 2526245431Sdim case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr: 2527245431Sdim return X86::COND_L; 2528245431Sdim case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm: 2529245431Sdim case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr: 2530245431Sdim return X86::COND_LE; 2531245431Sdim case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm: 2532245431Sdim case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr: 2533245431Sdim return X86::COND_NE; 2534245431Sdim case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm: 2535245431Sdim case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr: 2536245431Sdim return X86::COND_NO; 2537245431Sdim case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm: 2538245431Sdim case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr: 2539245431Sdim return X86::COND_NP; 2540245431Sdim case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm: 2541245431Sdim case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr: 2542245431Sdim return X86::COND_NS; 2543245431Sdim case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm: 2544245431Sdim case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr: 2545245431Sdim return X86::COND_O; 2546245431Sdim case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm: 2547245431Sdim case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr: 2548245431Sdim return X86::COND_P; 2549245431Sdim case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm: 2550245431Sdim case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr: 2551245431Sdim return X86::COND_S; 2552245431Sdim } 2553245431Sdim} 2554245431Sdim 2555193323Sedunsigned X86::GetCondBranchFromCond(X86::CondCode CC) { 2556193323Sed switch (CC) { 2557198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 2558203954Srdivacky case X86::COND_E: return X86::JE_4; 2559203954Srdivacky case X86::COND_NE: return X86::JNE_4; 2560203954Srdivacky case X86::COND_L: return X86::JL_4; 2561203954Srdivacky case X86::COND_LE: return X86::JLE_4; 2562203954Srdivacky case X86::COND_G: return X86::JG_4; 2563203954Srdivacky case X86::COND_GE: return X86::JGE_4; 2564203954Srdivacky case X86::COND_B: return X86::JB_4; 2565203954Srdivacky case X86::COND_BE: return X86::JBE_4; 2566203954Srdivacky case X86::COND_A: return X86::JA_4; 2567203954Srdivacky case X86::COND_AE: return X86::JAE_4; 2568203954Srdivacky case X86::COND_S: return X86::JS_4; 2569203954Srdivacky case X86::COND_NS: return X86::JNS_4; 2570203954Srdivacky case X86::COND_P: return X86::JP_4; 2571203954Srdivacky case X86::COND_NP: return X86::JNP_4; 2572203954Srdivacky case X86::COND_O: return X86::JO_4; 2573203954Srdivacky case X86::COND_NO: return X86::JNO_4; 2574193323Sed } 2575193323Sed} 2576193323Sed 2577193323Sed/// GetOppositeBranchCondition - Return the inverse of the specified condition, 2578193323Sed/// e.g. turning COND_E to COND_NE. 2579193323SedX86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { 2580193323Sed switch (CC) { 2581198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 2582193323Sed case X86::COND_E: return X86::COND_NE; 2583193323Sed case X86::COND_NE: return X86::COND_E; 2584193323Sed case X86::COND_L: return X86::COND_GE; 2585193323Sed case X86::COND_LE: return X86::COND_G; 2586193323Sed case X86::COND_G: return X86::COND_LE; 2587193323Sed case X86::COND_GE: return X86::COND_L; 2588193323Sed case X86::COND_B: return X86::COND_AE; 2589193323Sed case X86::COND_BE: return X86::COND_A; 2590193323Sed case X86::COND_A: return X86::COND_BE; 2591193323Sed case X86::COND_AE: return X86::COND_B; 2592193323Sed case X86::COND_S: return X86::COND_NS; 2593193323Sed case X86::COND_NS: return X86::COND_S; 2594193323Sed case X86::COND_P: return X86::COND_NP; 2595193323Sed case X86::COND_NP: return X86::COND_P; 2596193323Sed case X86::COND_O: return X86::COND_NO; 2597193323Sed case X86::COND_NO: return X86::COND_O; 2598193323Sed } 2599193323Sed} 2600193323Sed 2601245431Sdim/// getSwappedCondition - assume the flags are set by MI(a,b), return 2602245431Sdim/// the condition code if we modify the instructions such that flags are 2603245431Sdim/// set by MI(b,a). 2604245431Sdimstatic X86::CondCode getSwappedCondition(X86::CondCode CC) { 2605245431Sdim switch (CC) { 2606245431Sdim default: return X86::COND_INVALID; 2607245431Sdim case X86::COND_E: return X86::COND_E; 2608245431Sdim case X86::COND_NE: return X86::COND_NE; 2609245431Sdim case X86::COND_L: return X86::COND_G; 2610245431Sdim case X86::COND_LE: return X86::COND_GE; 2611245431Sdim case X86::COND_G: return X86::COND_L; 2612245431Sdim case X86::COND_GE: return X86::COND_LE; 2613245431Sdim case X86::COND_B: return X86::COND_A; 2614245431Sdim case X86::COND_BE: return X86::COND_AE; 2615245431Sdim case X86::COND_A: return X86::COND_B; 2616245431Sdim case X86::COND_AE: return X86::COND_BE; 2617245431Sdim } 2618245431Sdim} 2619245431Sdim 2620245431Sdim/// getSETFromCond - Return a set opcode for the given condition and 2621245431Sdim/// whether it has memory operand. 2622245431Sdimstatic unsigned getSETFromCond(X86::CondCode CC, 2623245431Sdim bool HasMemoryOperand) { 2624245431Sdim static const uint16_t Opc[16][2] = { 2625245431Sdim { X86::SETAr, X86::SETAm }, 2626245431Sdim { X86::SETAEr, X86::SETAEm }, 2627245431Sdim { X86::SETBr, X86::SETBm }, 2628245431Sdim { X86::SETBEr, X86::SETBEm }, 2629245431Sdim { X86::SETEr, X86::SETEm }, 2630245431Sdim { X86::SETGr, X86::SETGm }, 2631245431Sdim { X86::SETGEr, X86::SETGEm }, 2632245431Sdim { X86::SETLr, X86::SETLm }, 2633245431Sdim { X86::SETLEr, X86::SETLEm }, 2634245431Sdim { X86::SETNEr, X86::SETNEm }, 2635245431Sdim { X86::SETNOr, X86::SETNOm }, 2636245431Sdim { X86::SETNPr, X86::SETNPm }, 2637245431Sdim { X86::SETNSr, X86::SETNSm }, 2638245431Sdim { X86::SETOr, X86::SETOm }, 2639245431Sdim { X86::SETPr, X86::SETPm }, 2640245431Sdim { X86::SETSr, X86::SETSm } 2641245431Sdim }; 2642245431Sdim 2643245431Sdim assert(CC < 16 && "Can only handle standard cond codes"); 2644245431Sdim return Opc[CC][HasMemoryOperand ? 1 : 0]; 2645245431Sdim} 2646245431Sdim 2647245431Sdim/// getCMovFromCond - Return a cmov opcode for the given condition, 2648245431Sdim/// register size in bytes, and operand type. 2649245431Sdimstatic unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes, 2650245431Sdim bool HasMemoryOperand) { 2651245431Sdim static const uint16_t Opc[32][3] = { 2652245431Sdim { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, 2653245431Sdim { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, 2654245431Sdim { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr }, 2655245431Sdim { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr }, 2656245431Sdim { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr }, 2657245431Sdim { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr }, 2658245431Sdim { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr }, 2659245431Sdim { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr }, 2660245431Sdim { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr }, 2661245431Sdim { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr }, 2662245431Sdim { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr }, 2663245431Sdim { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr }, 2664245431Sdim { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr }, 2665245431Sdim { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr }, 2666245431Sdim { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr }, 2667245431Sdim { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr }, 2668245431Sdim { X86::CMOVA16rm, X86::CMOVA32rm, X86::CMOVA64rm }, 2669245431Sdim { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm }, 2670245431Sdim { X86::CMOVB16rm, X86::CMOVB32rm, X86::CMOVB64rm }, 2671245431Sdim { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm }, 2672245431Sdim { X86::CMOVE16rm, X86::CMOVE32rm, X86::CMOVE64rm }, 2673245431Sdim { X86::CMOVG16rm, X86::CMOVG32rm, X86::CMOVG64rm }, 2674245431Sdim { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm }, 2675245431Sdim { X86::CMOVL16rm, X86::CMOVL32rm, X86::CMOVL64rm }, 2676245431Sdim { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm }, 2677245431Sdim { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm }, 2678245431Sdim { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm }, 2679245431Sdim { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm }, 2680245431Sdim { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm }, 2681245431Sdim { X86::CMOVO16rm, X86::CMOVO32rm, X86::CMOVO64rm }, 2682245431Sdim { X86::CMOVP16rm, X86::CMOVP32rm, X86::CMOVP64rm }, 2683245431Sdim { X86::CMOVS16rm, X86::CMOVS32rm, X86::CMOVS64rm } 2684245431Sdim }; 2685245431Sdim 2686245431Sdim assert(CC < 16 && "Can only handle standard cond codes"); 2687245431Sdim unsigned Idx = HasMemoryOperand ? 16+CC : CC; 2688245431Sdim switch(RegBytes) { 2689245431Sdim default: llvm_unreachable("Illegal register size!"); 2690245431Sdim case 2: return Opc[Idx][0]; 2691245431Sdim case 4: return Opc[Idx][1]; 2692245431Sdim case 8: return Opc[Idx][2]; 2693245431Sdim } 2694245431Sdim} 2695245431Sdim 2696193323Sedbool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { 2697235633Sdim if (!MI->isTerminator()) return false; 2698218893Sdim 2699193323Sed // Conditional branch is a special case. 2700235633Sdim if (MI->isBranch() && !MI->isBarrier()) 2701193323Sed return true; 2702235633Sdim if (!MI->isPredicable()) 2703193323Sed return true; 2704193323Sed return !isPredicated(MI); 2705193323Sed} 2706193323Sed 2707218893Sdimbool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 2708193323Sed MachineBasicBlock *&TBB, 2709193323Sed MachineBasicBlock *&FBB, 2710193323Sed SmallVectorImpl<MachineOperand> &Cond, 2711193323Sed bool AllowModify) const { 2712193323Sed // Start from the bottom of the block and work up, examining the 2713193323Sed // terminator instructions. 2714193323Sed MachineBasicBlock::iterator I = MBB.end(); 2715207618Srdivacky MachineBasicBlock::iterator UnCondBrIter = MBB.end(); 2716193323Sed while (I != MBB.begin()) { 2717193323Sed --I; 2718206083Srdivacky if (I->isDebugValue()) 2719206083Srdivacky continue; 2720200581Srdivacky 2721200581Srdivacky // Working from the bottom, when we see a non-terminator instruction, we're 2722200581Srdivacky // done. 2723212904Sdim if (!isUnpredicatedTerminator(I)) 2724193323Sed break; 2725200581Srdivacky 2726200581Srdivacky // A terminator that isn't a branch can't easily be handled by this 2727200581Srdivacky // analysis. 2728235633Sdim if (!I->isBranch()) 2729193323Sed return true; 2730200581Srdivacky 2731193323Sed // Handle unconditional branches. 2732203954Srdivacky if (I->getOpcode() == X86::JMP_4) { 2733207618Srdivacky UnCondBrIter = I; 2734207618Srdivacky 2735193323Sed if (!AllowModify) { 2736193323Sed TBB = I->getOperand(0).getMBB(); 2737193323Sed continue; 2738193323Sed } 2739193323Sed 2740193323Sed // If the block has any instructions after a JMP, delete them. 2741200581Srdivacky while (llvm::next(I) != MBB.end()) 2742200581Srdivacky llvm::next(I)->eraseFromParent(); 2743200581Srdivacky 2744193323Sed Cond.clear(); 2745193323Sed FBB = 0; 2746200581Srdivacky 2747193323Sed // Delete the JMP if it's equivalent to a fall-through. 2748193323Sed if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { 2749193323Sed TBB = 0; 2750193323Sed I->eraseFromParent(); 2751193323Sed I = MBB.end(); 2752207618Srdivacky UnCondBrIter = MBB.end(); 2753193323Sed continue; 2754193323Sed } 2755200581Srdivacky 2756207618Srdivacky // TBB is used to indicate the unconditional destination. 2757193323Sed TBB = I->getOperand(0).getMBB(); 2758193323Sed continue; 2759193323Sed } 2760200581Srdivacky 2761193323Sed // Handle conditional branches. 2762245431Sdim X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode()); 2763193323Sed if (BranchCode == X86::COND_INVALID) 2764193323Sed return true; // Can't handle indirect branch. 2765200581Srdivacky 2766193323Sed // Working from the bottom, handle the first conditional branch. 2767193323Sed if (Cond.empty()) { 2768207618Srdivacky MachineBasicBlock *TargetBB = I->getOperand(0).getMBB(); 2769207618Srdivacky if (AllowModify && UnCondBrIter != MBB.end() && 2770207618Srdivacky MBB.isLayoutSuccessor(TargetBB)) { 2771207618Srdivacky // If we can modify the code and it ends in something like: 2772207618Srdivacky // 2773207618Srdivacky // jCC L1 2774207618Srdivacky // jmp L2 2775207618Srdivacky // L1: 2776207618Srdivacky // ... 2777207618Srdivacky // L2: 2778207618Srdivacky // 2779207618Srdivacky // Then we can change this to: 2780207618Srdivacky // 2781207618Srdivacky // jnCC L2 2782207618Srdivacky // L1: 2783207618Srdivacky // ... 2784207618Srdivacky // L2: 2785207618Srdivacky // 2786207618Srdivacky // Which is a bit more efficient. 2787207618Srdivacky // We conditionally jump to the fall-through block. 2788207618Srdivacky BranchCode = GetOppositeBranchCondition(BranchCode); 2789207618Srdivacky unsigned JNCC = GetCondBranchFromCond(BranchCode); 2790207618Srdivacky MachineBasicBlock::iterator OldInst = I; 2791207618Srdivacky 2792207618Srdivacky BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC)) 2793207618Srdivacky .addMBB(UnCondBrIter->getOperand(0).getMBB()); 2794207618Srdivacky BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4)) 2795207618Srdivacky .addMBB(TargetBB); 2796207618Srdivacky 2797207618Srdivacky OldInst->eraseFromParent(); 2798207618Srdivacky UnCondBrIter->eraseFromParent(); 2799207618Srdivacky 2800207618Srdivacky // Restart the analysis. 2801207618Srdivacky UnCondBrIter = MBB.end(); 2802207618Srdivacky I = MBB.end(); 2803207618Srdivacky continue; 2804207618Srdivacky } 2805207618Srdivacky 2806193323Sed FBB = TBB; 2807193323Sed TBB = I->getOperand(0).getMBB(); 2808193323Sed Cond.push_back(MachineOperand::CreateImm(BranchCode)); 2809193323Sed continue; 2810193323Sed } 2811200581Srdivacky 2812200581Srdivacky // Handle subsequent conditional branches. Only handle the case where all 2813200581Srdivacky // conditional branches branch to the same destination and their condition 2814200581Srdivacky // opcodes fit one of the special multi-branch idioms. 2815193323Sed assert(Cond.size() == 1); 2816193323Sed assert(TBB); 2817200581Srdivacky 2818200581Srdivacky // Only handle the case where all conditional branches branch to the same 2819200581Srdivacky // destination. 2820193323Sed if (TBB != I->getOperand(0).getMBB()) 2821193323Sed return true; 2822200581Srdivacky 2823200581Srdivacky // If the conditions are the same, we can leave them alone. 2824193323Sed X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); 2825193323Sed if (OldBranchCode == BranchCode) 2826193323Sed continue; 2827200581Srdivacky 2828200581Srdivacky // If they differ, see if they fit one of the known patterns. Theoretically, 2829200581Srdivacky // we could handle more patterns here, but we shouldn't expect to see them 2830200581Srdivacky // if instruction selection has done a reasonable job. 2831193323Sed if ((OldBranchCode == X86::COND_NP && 2832193323Sed BranchCode == X86::COND_E) || 2833193323Sed (OldBranchCode == X86::COND_E && 2834193323Sed BranchCode == X86::COND_NP)) 2835193323Sed BranchCode = X86::COND_NP_OR_E; 2836193323Sed else if ((OldBranchCode == X86::COND_P && 2837193323Sed BranchCode == X86::COND_NE) || 2838193323Sed (OldBranchCode == X86::COND_NE && 2839193323Sed BranchCode == X86::COND_P)) 2840193323Sed BranchCode = X86::COND_NE_OR_P; 2841193323Sed else 2842193323Sed return true; 2843200581Srdivacky 2844193323Sed // Update the MachineOperand. 2845193323Sed Cond[0].setImm(BranchCode); 2846193323Sed } 2847193323Sed 2848193323Sed return false; 2849193323Sed} 2850193323Sed 2851193323Sedunsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 2852193323Sed MachineBasicBlock::iterator I = MBB.end(); 2853193323Sed unsigned Count = 0; 2854193323Sed 2855193323Sed while (I != MBB.begin()) { 2856193323Sed --I; 2857206083Srdivacky if (I->isDebugValue()) 2858206083Srdivacky continue; 2859203954Srdivacky if (I->getOpcode() != X86::JMP_4 && 2860245431Sdim getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) 2861193323Sed break; 2862193323Sed // Remove the branch. 2863193323Sed I->eraseFromParent(); 2864193323Sed I = MBB.end(); 2865193323Sed ++Count; 2866193323Sed } 2867218893Sdim 2868193323Sed return Count; 2869193323Sed} 2870193323Sed 2871193323Sedunsigned 2872193323SedX86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 2873193323Sed MachineBasicBlock *FBB, 2874210299Sed const SmallVectorImpl<MachineOperand> &Cond, 2875210299Sed DebugLoc DL) const { 2876193323Sed // Shouldn't be a fall through. 2877193323Sed assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 2878193323Sed assert((Cond.size() == 1 || Cond.size() == 0) && 2879193323Sed "X86 branch conditions have one component!"); 2880193323Sed 2881193323Sed if (Cond.empty()) { 2882193323Sed // Unconditional branch? 2883193323Sed assert(!FBB && "Unconditional branch with multiple successors!"); 2884210299Sed BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB); 2885193323Sed return 1; 2886193323Sed } 2887193323Sed 2888193323Sed // Conditional branch. 2889193323Sed unsigned Count = 0; 2890193323Sed X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); 2891193323Sed switch (CC) { 2892193323Sed case X86::COND_NP_OR_E: 2893193323Sed // Synthesize NP_OR_E with two branches. 2894210299Sed BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB); 2895193323Sed ++Count; 2896210299Sed BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB); 2897193323Sed ++Count; 2898193323Sed break; 2899193323Sed case X86::COND_NE_OR_P: 2900193323Sed // Synthesize NE_OR_P with two branches. 2901210299Sed BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB); 2902193323Sed ++Count; 2903210299Sed BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB); 2904193323Sed ++Count; 2905193323Sed break; 2906193323Sed default: { 2907193323Sed unsigned Opc = GetCondBranchFromCond(CC); 2908210299Sed BuildMI(&MBB, DL, get(Opc)).addMBB(TBB); 2909193323Sed ++Count; 2910193323Sed } 2911193323Sed } 2912193323Sed if (FBB) { 2913193323Sed // Two-way Conditional branch. Insert the second branch. 2914210299Sed BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB); 2915193323Sed ++Count; 2916193323Sed } 2917193323Sed return Count; 2918193323Sed} 2919193323Sed 2920245431Sdimbool X86InstrInfo:: 2921245431SdimcanInsertSelect(const MachineBasicBlock &MBB, 2922245431Sdim const SmallVectorImpl<MachineOperand> &Cond, 2923245431Sdim unsigned TrueReg, unsigned FalseReg, 2924245431Sdim int &CondCycles, int &TrueCycles, int &FalseCycles) const { 2925245431Sdim // Not all subtargets have cmov instructions. 2926245431Sdim if (!TM.getSubtarget<X86Subtarget>().hasCMov()) 2927245431Sdim return false; 2928245431Sdim if (Cond.size() != 1) 2929245431Sdim return false; 2930245431Sdim // We cannot do the composite conditions, at least not in SSA form. 2931245431Sdim if ((X86::CondCode)Cond[0].getImm() > X86::COND_S) 2932245431Sdim return false; 2933245431Sdim 2934245431Sdim // Check register classes. 2935245431Sdim const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2936245431Sdim const TargetRegisterClass *RC = 2937245431Sdim RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 2938245431Sdim if (!RC) 2939245431Sdim return false; 2940245431Sdim 2941245431Sdim // We have cmov instructions for 16, 32, and 64 bit general purpose registers. 2942245431Sdim if (X86::GR16RegClass.hasSubClassEq(RC) || 2943245431Sdim X86::GR32RegClass.hasSubClassEq(RC) || 2944245431Sdim X86::GR64RegClass.hasSubClassEq(RC)) { 2945245431Sdim // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy 2946245431Sdim // Bridge. Probably Ivy Bridge as well. 2947245431Sdim CondCycles = 2; 2948245431Sdim TrueCycles = 2; 2949245431Sdim FalseCycles = 2; 2950245431Sdim return true; 2951245431Sdim } 2952245431Sdim 2953245431Sdim // Can't do vectors. 2954245431Sdim return false; 2955245431Sdim} 2956245431Sdim 2957245431Sdimvoid X86InstrInfo::insertSelect(MachineBasicBlock &MBB, 2958245431Sdim MachineBasicBlock::iterator I, DebugLoc DL, 2959245431Sdim unsigned DstReg, 2960245431Sdim const SmallVectorImpl<MachineOperand> &Cond, 2961245431Sdim unsigned TrueReg, unsigned FalseReg) const { 2962245431Sdim MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2963245431Sdim assert(Cond.size() == 1 && "Invalid Cond array"); 2964245431Sdim unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(), 2965245431Sdim MRI.getRegClass(DstReg)->getSize(), 2966245431Sdim false/*HasMemoryOperand*/); 2967245431Sdim BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg); 2968245431Sdim} 2969245431Sdim 2970193323Sed/// isHReg - Test if the given register is a physical h register. 2971193323Sedstatic bool isHReg(unsigned Reg) { 2972193323Sed return X86::GR8_ABCD_HRegClass.contains(Reg); 2973193323Sed} 2974193323Sed 2975212904Sdim// Try and copy between VR128/VR64 and GR64 registers. 2976226890Sdimstatic unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, 2977263509Sdim const X86Subtarget& Subtarget) { 2978263509Sdim 2979263509Sdim 2980212904Sdim // SrcReg(VR128) -> DestReg(GR64) 2981212904Sdim // SrcReg(VR64) -> DestReg(GR64) 2982212904Sdim // SrcReg(GR64) -> DestReg(VR128) 2983212904Sdim // SrcReg(GR64) -> DestReg(VR64) 2984212904Sdim 2985263509Sdim bool HasAVX = Subtarget.hasAVX(); 2986263509Sdim bool HasAVX512 = Subtarget.hasAVX512(); 2987212904Sdim if (X86::GR64RegClass.contains(DestReg)) { 2988263509Sdim if (X86::VR128XRegClass.contains(SrcReg)) 2989212904Sdim // Copy from a VR128 register to a GR64 register. 2990263509Sdim return HasAVX512 ? X86::VMOVPQIto64Zrr: (HasAVX ? X86::VMOVPQIto64rr : 2991263509Sdim X86::MOVPQIto64rr); 2992245431Sdim if (X86::VR64RegClass.contains(SrcReg)) 2993212904Sdim // Copy from a VR64 register to a GR64 register. 2994212904Sdim return X86::MOVSDto64rr; 2995212904Sdim } else if (X86::GR64RegClass.contains(SrcReg)) { 2996212904Sdim // Copy from a GR64 register to a VR128 register. 2997263509Sdim if (X86::VR128XRegClass.contains(DestReg)) 2998263509Sdim return HasAVX512 ? X86::VMOV64toPQIZrr: (HasAVX ? X86::VMOV64toPQIrr : 2999263509Sdim X86::MOV64toPQIrr); 3000212904Sdim // Copy from a GR64 register to a VR64 register. 3001245431Sdim if (X86::VR64RegClass.contains(DestReg)) 3002212904Sdim return X86::MOV64toSDrr; 3003212904Sdim } 3004212904Sdim 3005226890Sdim // SrcReg(FR32) -> DestReg(GR32) 3006226890Sdim // SrcReg(GR32) -> DestReg(FR32) 3007226890Sdim 3008263509Sdim if (X86::GR32RegClass.contains(DestReg) && X86::FR32XRegClass.contains(SrcReg)) 3009245431Sdim // Copy from a FR32 register to a GR32 register. 3010263509Sdim return HasAVX512 ? X86::VMOVSS2DIZrr : (HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr); 3011226890Sdim 3012263509Sdim if (X86::FR32XRegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg)) 3013245431Sdim // Copy from a GR32 register to a FR32 register. 3014263509Sdim return HasAVX512 ? X86::VMOVDI2SSZrr : (HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr); 3015263509Sdim return 0; 3016263509Sdim} 3017226890Sdim 3018263509Sdimstatic 3019263509Sdimunsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) { 3020263509Sdim if (X86::VR128XRegClass.contains(DestReg, SrcReg) || 3021263509Sdim X86::VR256XRegClass.contains(DestReg, SrcReg) || 3022263509Sdim X86::VR512RegClass.contains(DestReg, SrcReg)) { 3023263509Sdim DestReg = get512BitSuperRegister(DestReg); 3024263509Sdim SrcReg = get512BitSuperRegister(SrcReg); 3025263509Sdim return X86::VMOVAPSZrr; 3026263509Sdim } 3027263509Sdim if ((X86::VK8RegClass.contains(DestReg) || 3028263509Sdim X86::VK16RegClass.contains(DestReg)) && 3029263509Sdim (X86::VK8RegClass.contains(SrcReg) || 3030263509Sdim X86::VK16RegClass.contains(SrcReg))) 3031263509Sdim return X86::KMOVWkk; 3032212904Sdim return 0; 3033212904Sdim} 3034212904Sdim 3035210299Sedvoid X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 3036210299Sed MachineBasicBlock::iterator MI, DebugLoc DL, 3037210299Sed unsigned DestReg, unsigned SrcReg, 3038210299Sed bool KillSrc) const { 3039210299Sed // First deal with the normal symmetric copies. 3040226890Sdim bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); 3041263509Sdim bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512(); 3042263509Sdim unsigned Opc = 0; 3043210299Sed if (X86::GR64RegClass.contains(DestReg, SrcReg)) 3044210299Sed Opc = X86::MOV64rr; 3045210299Sed else if (X86::GR32RegClass.contains(DestReg, SrcReg)) 3046210299Sed Opc = X86::MOV32rr; 3047210299Sed else if (X86::GR16RegClass.contains(DestReg, SrcReg)) 3048210299Sed Opc = X86::MOV16rr; 3049210299Sed else if (X86::GR8RegClass.contains(DestReg, SrcReg)) { 3050210299Sed // Copying to or from a physical H register on x86-64 requires a NOREX 3051210299Sed // move. Otherwise use a normal move. 3052210299Sed if ((isHReg(DestReg) || isHReg(SrcReg)) && 3053226890Sdim TM.getSubtarget<X86Subtarget>().is64Bit()) { 3054210299Sed Opc = X86::MOV8rr_NOREX; 3055226890Sdim // Both operands must be encodable without an REX prefix. 3056226890Sdim assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) && 3057226890Sdim "8-bit H register can not be copied outside GR8_NOREX"); 3058226890Sdim } else 3059210299Sed Opc = X86::MOV8rr; 3060263509Sdim } 3061263509Sdim else if (X86::VR64RegClass.contains(DestReg, SrcReg)) 3062263509Sdim Opc = X86::MMX_MOVQ64rr; 3063263509Sdim else if (HasAVX512) 3064263509Sdim Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg); 3065263509Sdim else if (X86::VR128RegClass.contains(DestReg, SrcReg)) 3066226890Sdim Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr; 3067224145Sdim else if (X86::VR256RegClass.contains(DestReg, SrcReg)) 3068224145Sdim Opc = X86::VMOVAPSYrr; 3069263509Sdim if (!Opc) 3070263509Sdim Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, TM.getSubtarget<X86Subtarget>()); 3071193323Sed 3072210299Sed if (Opc) { 3073210299Sed BuildMI(MBB, MI, DL, get(Opc), DestReg) 3074210299Sed .addReg(SrcReg, getKillRegState(KillSrc)); 3075210299Sed return; 3076193323Sed } 3077198090Srdivacky 3078193323Sed // Moving EFLAGS to / from another register requires a push and a pop. 3079252723Sdim // Notice that we have to adjust the stack if we don't want to clobber the 3080252723Sdim // first frame index. See X86FrameLowering.cpp - colobbersTheStack. 3081210299Sed if (SrcReg == X86::EFLAGS) { 3082210299Sed if (X86::GR64RegClass.contains(DestReg)) { 3083208599Srdivacky BuildMI(MBB, MI, DL, get(X86::PUSHF64)); 3084193323Sed BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); 3085210299Sed return; 3086245431Sdim } 3087245431Sdim if (X86::GR32RegClass.contains(DestReg)) { 3088208599Srdivacky BuildMI(MBB, MI, DL, get(X86::PUSHF32)); 3089193323Sed BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); 3090210299Sed return; 3091193323Sed } 3092210299Sed } 3093210299Sed if (DestReg == X86::EFLAGS) { 3094210299Sed if (X86::GR64RegClass.contains(SrcReg)) { 3095210299Sed BuildMI(MBB, MI, DL, get(X86::PUSH64r)) 3096210299Sed .addReg(SrcReg, getKillRegState(KillSrc)); 3097208599Srdivacky BuildMI(MBB, MI, DL, get(X86::POPF64)); 3098210299Sed return; 3099245431Sdim } 3100245431Sdim if (X86::GR32RegClass.contains(SrcReg)) { 3101210299Sed BuildMI(MBB, MI, DL, get(X86::PUSH32r)) 3102210299Sed .addReg(SrcReg, getKillRegState(KillSrc)); 3103208599Srdivacky BuildMI(MBB, MI, DL, get(X86::POPF32)); 3104210299Sed return; 3105193323Sed } 3106193323Sed } 3107193323Sed 3108210299Sed DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) 3109210299Sed << " to " << RI.getName(DestReg) << '\n'); 3110210299Sed llvm_unreachable("Cannot emit physreg copy instruction"); 3111193323Sed} 3112193323Sed 3113210299Sedstatic unsigned getLoadStoreRegOpcode(unsigned Reg, 3114210299Sed const TargetRegisterClass *RC, 3115210299Sed bool isStackAligned, 3116210299Sed const TargetMachine &TM, 3117210299Sed bool load) { 3118263509Sdim if (TM.getSubtarget<X86Subtarget>().hasAVX512()) { 3119263509Sdim if (X86::VK8RegClass.hasSubClassEq(RC) || 3120263509Sdim X86::VK16RegClass.hasSubClassEq(RC)) 3121263509Sdim return load ? X86::KMOVWkm : X86::KMOVWmk; 3122263509Sdim if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC)) 3123263509Sdim return load ? X86::VMOVSSZrm : X86::VMOVSSZmr; 3124263509Sdim if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC)) 3125263509Sdim return load ? X86::VMOVSDZrm : X86::VMOVSDZmr; 3126263509Sdim if (X86::VR512RegClass.hasSubClassEq(RC)) 3127263509Sdim return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; 3128263509Sdim } 3129263509Sdim 3130226890Sdim bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); 3131223017Sdim switch (RC->getSize()) { 3132210299Sed default: 3133223017Sdim llvm_unreachable("Unknown spill size"); 3134223017Sdim case 1: 3135223017Sdim assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass"); 3136223017Sdim if (TM.getSubtarget<X86Subtarget>().is64Bit()) 3137223017Sdim // Copying to or from a physical H register on x86-64 requires a NOREX 3138223017Sdim // move. Otherwise use a normal move. 3139223017Sdim if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC)) 3140223017Sdim return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; 3141223017Sdim return load ? X86::MOV8rm : X86::MOV8mr; 3142223017Sdim case 2: 3143223017Sdim assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass"); 3144210299Sed return load ? X86::MOV16rm : X86::MOV16mr; 3145223017Sdim case 4: 3146223017Sdim if (X86::GR32RegClass.hasSubClassEq(RC)) 3147223017Sdim return load ? X86::MOV32rm : X86::MOV32mr; 3148223017Sdim if (X86::FR32RegClass.hasSubClassEq(RC)) 3149226890Sdim return load ? 3150226890Sdim (HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) : 3151226890Sdim (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr); 3152223017Sdim if (X86::RFP32RegClass.hasSubClassEq(RC)) 3153223017Sdim return load ? X86::LD_Fp32m : X86::ST_Fp32m; 3154223017Sdim llvm_unreachable("Unknown 4-byte regclass"); 3155223017Sdim case 8: 3156223017Sdim if (X86::GR64RegClass.hasSubClassEq(RC)) 3157223017Sdim return load ? X86::MOV64rm : X86::MOV64mr; 3158223017Sdim if (X86::FR64RegClass.hasSubClassEq(RC)) 3159226890Sdim return load ? 3160226890Sdim (HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) : 3161226890Sdim (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr); 3162223017Sdim if (X86::VR64RegClass.hasSubClassEq(RC)) 3163223017Sdim return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; 3164223017Sdim if (X86::RFP64RegClass.hasSubClassEq(RC)) 3165223017Sdim return load ? X86::LD_Fp64m : X86::ST_Fp64m; 3166223017Sdim llvm_unreachable("Unknown 8-byte regclass"); 3167223017Sdim case 10: 3168223017Sdim assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); 3169210299Sed return load ? X86::LD_Fp80m : X86::ST_FpP80m; 3170226890Sdim case 16: { 3171263509Sdim assert((X86::VR128RegClass.hasSubClassEq(RC) || 3172263509Sdim X86::VR128XRegClass.hasSubClassEq(RC))&& "Unknown 16-byte regclass"); 3173193323Sed // If stack is realigned we can use aligned stores. 3174210299Sed if (isStackAligned) 3175226890Sdim return load ? 3176226890Sdim (HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) : 3177226890Sdim (HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr); 3178210299Sed else 3179226890Sdim return load ? 3180226890Sdim (HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) : 3181226890Sdim (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); 3182226890Sdim } 3183224145Sdim case 32: 3184263509Sdim assert((X86::VR256RegClass.hasSubClassEq(RC) || 3185263509Sdim X86::VR256XRegClass.hasSubClassEq(RC)) && "Unknown 32-byte regclass"); 3186224145Sdim // If stack is realigned we can use aligned stores. 3187224145Sdim if (isStackAligned) 3188224145Sdim return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr; 3189224145Sdim else 3190224145Sdim return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr; 3191263509Sdim case 64: 3192263509Sdim assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass"); 3193263509Sdim if (isStackAligned) 3194263509Sdim return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; 3195263509Sdim else 3196263509Sdim return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; 3197193323Sed } 3198210299Sed} 3199193323Sed 3200210299Sedstatic unsigned getStoreRegOpcode(unsigned SrcReg, 3201210299Sed const TargetRegisterClass *RC, 3202210299Sed bool isStackAligned, 3203210299Sed TargetMachine &TM) { 3204210299Sed return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false); 3205193323Sed} 3206193323Sed 3207210299Sed 3208210299Sedstatic unsigned getLoadRegOpcode(unsigned DestReg, 3209210299Sed const TargetRegisterClass *RC, 3210210299Sed bool isStackAligned, 3211210299Sed const TargetMachine &TM) { 3212210299Sed return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true); 3213210299Sed} 3214210299Sed 3215193323Sedvoid X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 3216193323Sed MachineBasicBlock::iterator MI, 3217193323Sed unsigned SrcReg, bool isKill, int FrameIdx, 3218208599Srdivacky const TargetRegisterClass *RC, 3219208599Srdivacky const TargetRegisterInfo *TRI) const { 3220193323Sed const MachineFunction &MF = *MBB.getParent(); 3221212904Sdim assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && 3222212904Sdim "Stack slot too small for store"); 3223263509Sdim unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); 3224226890Sdim bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || 3225224145Sdim RI.canRealignStack(MF); 3226193323Sed unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 3227203954Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 3228193323Sed addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) 3229193323Sed .addReg(SrcReg, getKillRegState(isKill)); 3230193323Sed} 3231193323Sed 3232193323Sedvoid X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, 3233193323Sed bool isKill, 3234193323Sed SmallVectorImpl<MachineOperand> &Addr, 3235193323Sed const TargetRegisterClass *RC, 3236198090Srdivacky MachineInstr::mmo_iterator MMOBegin, 3237198090Srdivacky MachineInstr::mmo_iterator MMOEnd, 3238193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 3239263509Sdim unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); 3240226890Sdim bool isAligned = MMOBegin != MMOEnd && 3241226890Sdim (*MMOBegin)->getAlignment() >= Alignment; 3242193323Sed unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 3243206124Srdivacky DebugLoc DL; 3244193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); 3245193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 3246193323Sed MIB.addOperand(Addr[i]); 3247193323Sed MIB.addReg(SrcReg, getKillRegState(isKill)); 3248198090Srdivacky (*MIB).setMemRefs(MMOBegin, MMOEnd); 3249193323Sed NewMIs.push_back(MIB); 3250193323Sed} 3251193323Sed 3252193323Sed 3253193323Sedvoid X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 3254193323Sed MachineBasicBlock::iterator MI, 3255193323Sed unsigned DestReg, int FrameIdx, 3256208599Srdivacky const TargetRegisterClass *RC, 3257208599Srdivacky const TargetRegisterInfo *TRI) const { 3258193323Sed const MachineFunction &MF = *MBB.getParent(); 3259263509Sdim unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); 3260226890Sdim bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || 3261224145Sdim RI.canRealignStack(MF); 3262193323Sed unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 3263203954Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 3264193323Sed addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); 3265193323Sed} 3266193323Sed 3267193323Sedvoid X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, 3268193323Sed SmallVectorImpl<MachineOperand> &Addr, 3269193323Sed const TargetRegisterClass *RC, 3270198090Srdivacky MachineInstr::mmo_iterator MMOBegin, 3271198090Srdivacky MachineInstr::mmo_iterator MMOEnd, 3272193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 3273263509Sdim unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); 3274226890Sdim bool isAligned = MMOBegin != MMOEnd && 3275226890Sdim (*MMOBegin)->getAlignment() >= Alignment; 3276193323Sed unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 3277206124Srdivacky DebugLoc DL; 3278193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); 3279193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 3280193323Sed MIB.addOperand(Addr[i]); 3281198090Srdivacky (*MIB).setMemRefs(MMOBegin, MMOEnd); 3282193323Sed NewMIs.push_back(MIB); 3283193323Sed} 3284193323Sed 3285245431Sdimbool X86InstrInfo:: 3286245431SdimanalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, 3287245431Sdim int &CmpMask, int &CmpValue) const { 3288245431Sdim switch (MI->getOpcode()) { 3289245431Sdim default: break; 3290245431Sdim case X86::CMP64ri32: 3291245431Sdim case X86::CMP64ri8: 3292245431Sdim case X86::CMP32ri: 3293245431Sdim case X86::CMP32ri8: 3294245431Sdim case X86::CMP16ri: 3295245431Sdim case X86::CMP16ri8: 3296245431Sdim case X86::CMP8ri: 3297245431Sdim SrcReg = MI->getOperand(0).getReg(); 3298245431Sdim SrcReg2 = 0; 3299245431Sdim CmpMask = ~0; 3300245431Sdim CmpValue = MI->getOperand(1).getImm(); 3301245431Sdim return true; 3302245431Sdim // A SUB can be used to perform comparison. 3303245431Sdim case X86::SUB64rm: 3304245431Sdim case X86::SUB32rm: 3305245431Sdim case X86::SUB16rm: 3306245431Sdim case X86::SUB8rm: 3307245431Sdim SrcReg = MI->getOperand(1).getReg(); 3308245431Sdim SrcReg2 = 0; 3309245431Sdim CmpMask = ~0; 3310245431Sdim CmpValue = 0; 3311245431Sdim return true; 3312245431Sdim case X86::SUB64rr: 3313245431Sdim case X86::SUB32rr: 3314245431Sdim case X86::SUB16rr: 3315245431Sdim case X86::SUB8rr: 3316245431Sdim SrcReg = MI->getOperand(1).getReg(); 3317245431Sdim SrcReg2 = MI->getOperand(2).getReg(); 3318245431Sdim CmpMask = ~0; 3319245431Sdim CmpValue = 0; 3320245431Sdim return true; 3321245431Sdim case X86::SUB64ri32: 3322245431Sdim case X86::SUB64ri8: 3323245431Sdim case X86::SUB32ri: 3324245431Sdim case X86::SUB32ri8: 3325245431Sdim case X86::SUB16ri: 3326245431Sdim case X86::SUB16ri8: 3327245431Sdim case X86::SUB8ri: 3328245431Sdim SrcReg = MI->getOperand(1).getReg(); 3329245431Sdim SrcReg2 = 0; 3330245431Sdim CmpMask = ~0; 3331245431Sdim CmpValue = MI->getOperand(2).getImm(); 3332245431Sdim return true; 3333245431Sdim case X86::CMP64rr: 3334245431Sdim case X86::CMP32rr: 3335245431Sdim case X86::CMP16rr: 3336245431Sdim case X86::CMP8rr: 3337245431Sdim SrcReg = MI->getOperand(0).getReg(); 3338245431Sdim SrcReg2 = MI->getOperand(1).getReg(); 3339245431Sdim CmpMask = ~0; 3340245431Sdim CmpValue = 0; 3341245431Sdim return true; 3342245431Sdim case X86::TEST8rr: 3343245431Sdim case X86::TEST16rr: 3344245431Sdim case X86::TEST32rr: 3345245431Sdim case X86::TEST64rr: 3346245431Sdim SrcReg = MI->getOperand(0).getReg(); 3347245431Sdim if (MI->getOperand(1).getReg() != SrcReg) return false; 3348245431Sdim // Compare against zero. 3349245431Sdim SrcReg2 = 0; 3350245431Sdim CmpMask = ~0; 3351245431Sdim CmpValue = 0; 3352245431Sdim return true; 3353245431Sdim } 3354245431Sdim return false; 3355245431Sdim} 3356245431Sdim 3357245431Sdim/// isRedundantFlagInstr - check whether the first instruction, whose only 3358245431Sdim/// purpose is to update flags, can be made redundant. 3359245431Sdim/// CMPrr can be made redundant by SUBrr if the operands are the same. 3360245431Sdim/// This function can be extended later on. 3361245431Sdim/// SrcReg, SrcRegs: register operands for FlagI. 3362245431Sdim/// ImmValue: immediate for FlagI if it takes an immediate. 3363245431Sdiminline static bool isRedundantFlagInstr(MachineInstr *FlagI, unsigned SrcReg, 3364245431Sdim unsigned SrcReg2, int ImmValue, 3365245431Sdim MachineInstr *OI) { 3366245431Sdim if (((FlagI->getOpcode() == X86::CMP64rr && 3367245431Sdim OI->getOpcode() == X86::SUB64rr) || 3368245431Sdim (FlagI->getOpcode() == X86::CMP32rr && 3369245431Sdim OI->getOpcode() == X86::SUB32rr)|| 3370245431Sdim (FlagI->getOpcode() == X86::CMP16rr && 3371245431Sdim OI->getOpcode() == X86::SUB16rr)|| 3372245431Sdim (FlagI->getOpcode() == X86::CMP8rr && 3373245431Sdim OI->getOpcode() == X86::SUB8rr)) && 3374245431Sdim ((OI->getOperand(1).getReg() == SrcReg && 3375245431Sdim OI->getOperand(2).getReg() == SrcReg2) || 3376245431Sdim (OI->getOperand(1).getReg() == SrcReg2 && 3377245431Sdim OI->getOperand(2).getReg() == SrcReg))) 3378245431Sdim return true; 3379245431Sdim 3380245431Sdim if (((FlagI->getOpcode() == X86::CMP64ri32 && 3381245431Sdim OI->getOpcode() == X86::SUB64ri32) || 3382245431Sdim (FlagI->getOpcode() == X86::CMP64ri8 && 3383245431Sdim OI->getOpcode() == X86::SUB64ri8) || 3384245431Sdim (FlagI->getOpcode() == X86::CMP32ri && 3385245431Sdim OI->getOpcode() == X86::SUB32ri) || 3386245431Sdim (FlagI->getOpcode() == X86::CMP32ri8 && 3387245431Sdim OI->getOpcode() == X86::SUB32ri8) || 3388245431Sdim (FlagI->getOpcode() == X86::CMP16ri && 3389245431Sdim OI->getOpcode() == X86::SUB16ri) || 3390245431Sdim (FlagI->getOpcode() == X86::CMP16ri8 && 3391245431Sdim OI->getOpcode() == X86::SUB16ri8) || 3392245431Sdim (FlagI->getOpcode() == X86::CMP8ri && 3393245431Sdim OI->getOpcode() == X86::SUB8ri)) && 3394245431Sdim OI->getOperand(1).getReg() == SrcReg && 3395245431Sdim OI->getOperand(2).getImm() == ImmValue) 3396245431Sdim return true; 3397245431Sdim return false; 3398245431Sdim} 3399245431Sdim 3400245431Sdim/// isDefConvertible - check whether the definition can be converted 3401245431Sdim/// to remove a comparison against zero. 3402245431Sdiminline static bool isDefConvertible(MachineInstr *MI) { 3403245431Sdim switch (MI->getOpcode()) { 3404245431Sdim default: return false; 3405263509Sdim 3406263509Sdim // The shift instructions only modify ZF if their shift count is non-zero. 3407263509Sdim // N.B.: The processor truncates the shift count depending on the encoding. 3408263509Sdim case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri:case X86::SAR64ri: 3409263509Sdim case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri:case X86::SHR64ri: 3410263509Sdim return getTruncatedShiftCount(MI, 2) != 0; 3411263509Sdim 3412263509Sdim // Some left shift instructions can be turned into LEA instructions but only 3413263509Sdim // if their flags aren't used. Avoid transforming such instructions. 3414263509Sdim case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri:case X86::SHL64ri:{ 3415263509Sdim unsigned ShAmt = getTruncatedShiftCount(MI, 2); 3416263509Sdim if (isTruncatedShiftCountForLEA(ShAmt)) return false; 3417263509Sdim return ShAmt != 0; 3418263509Sdim } 3419263509Sdim 3420263509Sdim case X86::SHRD16rri8:case X86::SHRD32rri8:case X86::SHRD64rri8: 3421263509Sdim case X86::SHLD16rri8:case X86::SHLD32rri8:case X86::SHLD64rri8: 3422263509Sdim return getTruncatedShiftCount(MI, 3) != 0; 3423263509Sdim 3424245431Sdim case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri: 3425245431Sdim case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8: 3426245431Sdim case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr: 3427245431Sdim case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm: 3428245431Sdim case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm: 3429252723Sdim case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r: 3430245431Sdim case X86::DEC64_32r: case X86::DEC64_16r: 3431245431Sdim case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri: 3432245431Sdim case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8: 3433245431Sdim case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr: 3434245431Sdim case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm: 3435245431Sdim case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm: 3436252723Sdim case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r: 3437245431Sdim case X86::INC64_32r: case X86::INC64_16r: 3438245431Sdim case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri: 3439245431Sdim case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8: 3440245431Sdim case X86::AND8ri: case X86::AND64rr: case X86::AND32rr: 3441245431Sdim case X86::AND16rr: case X86::AND8rr: case X86::AND64rm: 3442245431Sdim case X86::AND32rm: case X86::AND16rm: case X86::AND8rm: 3443245431Sdim case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri: 3444245431Sdim case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8: 3445245431Sdim case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr: 3446245431Sdim case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm: 3447245431Sdim case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm: 3448245431Sdim case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri: 3449245431Sdim case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8: 3450245431Sdim case X86::OR8ri: case X86::OR64rr: case X86::OR32rr: 3451245431Sdim case X86::OR16rr: case X86::OR8rr: case X86::OR64rm: 3452245431Sdim case X86::OR32rm: case X86::OR16rm: case X86::OR8rm: 3453263509Sdim case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r: 3454263509Sdim case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1: 3455263509Sdim case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1: 3456263509Sdim case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1: 3457263509Sdim case X86::ADC32ri: case X86::ADC32ri8: 3458263509Sdim case X86::ADC32rr: case X86::ADC64ri32: 3459263509Sdim case X86::ADC64ri8: case X86::ADC64rr: 3460263509Sdim case X86::SBB32ri: case X86::SBB32ri8: 3461263509Sdim case X86::SBB32rr: case X86::SBB64ri32: 3462263509Sdim case X86::SBB64ri8: case X86::SBB64rr: 3463252723Sdim case X86::ANDN32rr: case X86::ANDN32rm: 3464252723Sdim case X86::ANDN64rr: case X86::ANDN64rm: 3465263509Sdim case X86::BEXTR32rr: case X86::BEXTR64rr: 3466263509Sdim case X86::BEXTR32rm: case X86::BEXTR64rm: 3467263509Sdim case X86::BLSI32rr: case X86::BLSI32rm: 3468263509Sdim case X86::BLSI64rr: case X86::BLSI64rm: 3469263509Sdim case X86::BLSMSK32rr:case X86::BLSMSK32rm: 3470263509Sdim case X86::BLSMSK64rr:case X86::BLSMSK64rm: 3471263509Sdim case X86::BLSR32rr: case X86::BLSR32rm: 3472263509Sdim case X86::BLSR64rr: case X86::BLSR64rm: 3473263509Sdim case X86::BZHI32rr: case X86::BZHI32rm: 3474263509Sdim case X86::BZHI64rr: case X86::BZHI64rm: 3475263509Sdim case X86::LZCNT16rr: case X86::LZCNT16rm: 3476263509Sdim case X86::LZCNT32rr: case X86::LZCNT32rm: 3477263509Sdim case X86::LZCNT64rr: case X86::LZCNT64rm: 3478263509Sdim case X86::POPCNT16rr:case X86::POPCNT16rm: 3479263509Sdim case X86::POPCNT32rr:case X86::POPCNT32rm: 3480263509Sdim case X86::POPCNT64rr:case X86::POPCNT64rm: 3481263509Sdim case X86::TZCNT16rr: case X86::TZCNT16rm: 3482263509Sdim case X86::TZCNT32rr: case X86::TZCNT32rm: 3483263509Sdim case X86::TZCNT64rr: case X86::TZCNT64rm: 3484245431Sdim return true; 3485245431Sdim } 3486245431Sdim} 3487245431Sdim 3488245431Sdim/// optimizeCompareInstr - Check if there exists an earlier instruction that 3489245431Sdim/// operates on the same source operands and sets flags in the same way as 3490245431Sdim/// Compare; remove Compare if possible. 3491245431Sdimbool X86InstrInfo:: 3492245431SdimoptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, 3493245431Sdim int CmpMask, int CmpValue, 3494245431Sdim const MachineRegisterInfo *MRI) const { 3495245431Sdim // Check whether we can replace SUB with CMP. 3496245431Sdim unsigned NewOpcode = 0; 3497245431Sdim switch (CmpInstr->getOpcode()) { 3498245431Sdim default: break; 3499245431Sdim case X86::SUB64ri32: 3500245431Sdim case X86::SUB64ri8: 3501245431Sdim case X86::SUB32ri: 3502245431Sdim case X86::SUB32ri8: 3503245431Sdim case X86::SUB16ri: 3504245431Sdim case X86::SUB16ri8: 3505245431Sdim case X86::SUB8ri: 3506245431Sdim case X86::SUB64rm: 3507245431Sdim case X86::SUB32rm: 3508245431Sdim case X86::SUB16rm: 3509245431Sdim case X86::SUB8rm: 3510245431Sdim case X86::SUB64rr: 3511245431Sdim case X86::SUB32rr: 3512245431Sdim case X86::SUB16rr: 3513245431Sdim case X86::SUB8rr: { 3514245431Sdim if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) 3515245431Sdim return false; 3516245431Sdim // There is no use of the destination register, we can replace SUB with CMP. 3517245431Sdim switch (CmpInstr->getOpcode()) { 3518245431Sdim default: llvm_unreachable("Unreachable!"); 3519245431Sdim case X86::SUB64rm: NewOpcode = X86::CMP64rm; break; 3520245431Sdim case X86::SUB32rm: NewOpcode = X86::CMP32rm; break; 3521245431Sdim case X86::SUB16rm: NewOpcode = X86::CMP16rm; break; 3522245431Sdim case X86::SUB8rm: NewOpcode = X86::CMP8rm; break; 3523245431Sdim case X86::SUB64rr: NewOpcode = X86::CMP64rr; break; 3524245431Sdim case X86::SUB32rr: NewOpcode = X86::CMP32rr; break; 3525245431Sdim case X86::SUB16rr: NewOpcode = X86::CMP16rr; break; 3526245431Sdim case X86::SUB8rr: NewOpcode = X86::CMP8rr; break; 3527245431Sdim case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break; 3528245431Sdim case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break; 3529245431Sdim case X86::SUB32ri: NewOpcode = X86::CMP32ri; break; 3530245431Sdim case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break; 3531245431Sdim case X86::SUB16ri: NewOpcode = X86::CMP16ri; break; 3532245431Sdim case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break; 3533245431Sdim case X86::SUB8ri: NewOpcode = X86::CMP8ri; break; 3534245431Sdim } 3535245431Sdim CmpInstr->setDesc(get(NewOpcode)); 3536245431Sdim CmpInstr->RemoveOperand(0); 3537245431Sdim // Fall through to optimize Cmp if Cmp is CMPrr or CMPri. 3538245431Sdim if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm || 3539245431Sdim NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm) 3540245431Sdim return false; 3541245431Sdim } 3542245431Sdim } 3543245431Sdim 3544245431Sdim // Get the unique definition of SrcReg. 3545245431Sdim MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 3546245431Sdim if (!MI) return false; 3547245431Sdim 3548245431Sdim // CmpInstr is the first instruction of the BB. 3549245431Sdim MachineBasicBlock::iterator I = CmpInstr, Def = MI; 3550245431Sdim 3551245431Sdim // If we are comparing against zero, check whether we can use MI to update 3552245431Sdim // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize. 3553245431Sdim bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0); 3554245431Sdim if (IsCmpZero && (MI->getParent() != CmpInstr->getParent() || 3555245431Sdim !isDefConvertible(MI))) 3556245431Sdim return false; 3557245431Sdim 3558245431Sdim // We are searching for an earlier instruction that can make CmpInstr 3559245431Sdim // redundant and that instruction will be saved in Sub. 3560245431Sdim MachineInstr *Sub = NULL; 3561245431Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 3562245431Sdim 3563245431Sdim // We iterate backward, starting from the instruction before CmpInstr and 3564245431Sdim // stop when reaching the definition of a source register or done with the BB. 3565245431Sdim // RI points to the instruction before CmpInstr. 3566245431Sdim // If the definition is in this basic block, RE points to the definition; 3567245431Sdim // otherwise, RE is the rend of the basic block. 3568245431Sdim MachineBasicBlock::reverse_iterator 3569245431Sdim RI = MachineBasicBlock::reverse_iterator(I), 3570245431Sdim RE = CmpInstr->getParent() == MI->getParent() ? 3571245431Sdim MachineBasicBlock::reverse_iterator(++Def) /* points to MI */ : 3572245431Sdim CmpInstr->getParent()->rend(); 3573245431Sdim MachineInstr *Movr0Inst = 0; 3574245431Sdim for (; RI != RE; ++RI) { 3575245431Sdim MachineInstr *Instr = &*RI; 3576245431Sdim // Check whether CmpInstr can be made redundant by the current instruction. 3577245431Sdim if (!IsCmpZero && 3578245431Sdim isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, Instr)) { 3579245431Sdim Sub = Instr; 3580245431Sdim break; 3581245431Sdim } 3582245431Sdim 3583245431Sdim if (Instr->modifiesRegister(X86::EFLAGS, TRI) || 3584245431Sdim Instr->readsRegister(X86::EFLAGS, TRI)) { 3585245431Sdim // This instruction modifies or uses EFLAGS. 3586245431Sdim 3587245431Sdim // MOV32r0 etc. are implemented with xor which clobbers condition code. 3588245431Sdim // They are safe to move up, if the definition to EFLAGS is dead and 3589245431Sdim // earlier instructions do not read or write EFLAGS. 3590263509Sdim if (!Movr0Inst && Instr->getOpcode() == X86::MOV32r0 && 3591245431Sdim Instr->registerDefIsDead(X86::EFLAGS, TRI)) { 3592245431Sdim Movr0Inst = Instr; 3593245431Sdim continue; 3594245431Sdim } 3595245431Sdim 3596245431Sdim // We can't remove CmpInstr. 3597245431Sdim return false; 3598245431Sdim } 3599245431Sdim } 3600245431Sdim 3601245431Sdim // Return false if no candidates exist. 3602245431Sdim if (!IsCmpZero && !Sub) 3603245431Sdim return false; 3604245431Sdim 3605245431Sdim bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && 3606245431Sdim Sub->getOperand(2).getReg() == SrcReg); 3607245431Sdim 3608245431Sdim // Scan forward from the instruction after CmpInstr for uses of EFLAGS. 3609245431Sdim // It is safe to remove CmpInstr if EFLAGS is redefined or killed. 3610245431Sdim // If we are done with the basic block, we need to check whether EFLAGS is 3611245431Sdim // live-out. 3612245431Sdim bool IsSafe = false; 3613245431Sdim SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate; 3614245431Sdim MachineBasicBlock::iterator E = CmpInstr->getParent()->end(); 3615245431Sdim for (++I; I != E; ++I) { 3616245431Sdim const MachineInstr &Instr = *I; 3617245431Sdim bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI); 3618245431Sdim bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI); 3619245431Sdim // We should check the usage if this instruction uses and updates EFLAGS. 3620245431Sdim if (!UseEFLAGS && ModifyEFLAGS) { 3621245431Sdim // It is safe to remove CmpInstr if EFLAGS is updated again. 3622245431Sdim IsSafe = true; 3623245431Sdim break; 3624245431Sdim } 3625245431Sdim if (!UseEFLAGS && !ModifyEFLAGS) 3626245431Sdim continue; 3627245431Sdim 3628245431Sdim // EFLAGS is used by this instruction. 3629245431Sdim X86::CondCode OldCC; 3630245431Sdim bool OpcIsSET = false; 3631245431Sdim if (IsCmpZero || IsSwapped) { 3632245431Sdim // We decode the condition code from opcode. 3633245431Sdim if (Instr.isBranch()) 3634245431Sdim OldCC = getCondFromBranchOpc(Instr.getOpcode()); 3635245431Sdim else { 3636245431Sdim OldCC = getCondFromSETOpc(Instr.getOpcode()); 3637245431Sdim if (OldCC != X86::COND_INVALID) 3638245431Sdim OpcIsSET = true; 3639245431Sdim else 3640245431Sdim OldCC = X86::getCondFromCMovOpc(Instr.getOpcode()); 3641245431Sdim } 3642245431Sdim if (OldCC == X86::COND_INVALID) return false; 3643245431Sdim } 3644245431Sdim if (IsCmpZero) { 3645245431Sdim switch (OldCC) { 3646245431Sdim default: break; 3647245431Sdim case X86::COND_A: case X86::COND_AE: 3648245431Sdim case X86::COND_B: case X86::COND_BE: 3649245431Sdim case X86::COND_G: case X86::COND_GE: 3650245431Sdim case X86::COND_L: case X86::COND_LE: 3651245431Sdim case X86::COND_O: case X86::COND_NO: 3652245431Sdim // CF and OF are used, we can't perform this optimization. 3653245431Sdim return false; 3654245431Sdim } 3655245431Sdim } else if (IsSwapped) { 3656245431Sdim // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs 3657245431Sdim // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. 3658245431Sdim // We swap the condition code and synthesize the new opcode. 3659245431Sdim X86::CondCode NewCC = getSwappedCondition(OldCC); 3660245431Sdim if (NewCC == X86::COND_INVALID) return false; 3661245431Sdim 3662245431Sdim // Synthesize the new opcode. 3663245431Sdim bool HasMemoryOperand = Instr.hasOneMemOperand(); 3664245431Sdim unsigned NewOpc; 3665245431Sdim if (Instr.isBranch()) 3666245431Sdim NewOpc = GetCondBranchFromCond(NewCC); 3667245431Sdim else if(OpcIsSET) 3668245431Sdim NewOpc = getSETFromCond(NewCC, HasMemoryOperand); 3669245431Sdim else { 3670245431Sdim unsigned DstReg = Instr.getOperand(0).getReg(); 3671245431Sdim NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(), 3672245431Sdim HasMemoryOperand); 3673245431Sdim } 3674245431Sdim 3675245431Sdim // Push the MachineInstr to OpsToUpdate. 3676245431Sdim // If it is safe to remove CmpInstr, the condition code of these 3677245431Sdim // instructions will be modified. 3678245431Sdim OpsToUpdate.push_back(std::make_pair(&*I, NewOpc)); 3679245431Sdim } 3680245431Sdim if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) { 3681245431Sdim // It is safe to remove CmpInstr if EFLAGS is updated again or killed. 3682245431Sdim IsSafe = true; 3683245431Sdim break; 3684245431Sdim } 3685245431Sdim } 3686245431Sdim 3687245431Sdim // If EFLAGS is not killed nor re-defined, we should check whether it is 3688245431Sdim // live-out. If it is live-out, do not optimize. 3689245431Sdim if ((IsCmpZero || IsSwapped) && !IsSafe) { 3690245431Sdim MachineBasicBlock *MBB = CmpInstr->getParent(); 3691245431Sdim for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 3692245431Sdim SE = MBB->succ_end(); SI != SE; ++SI) 3693245431Sdim if ((*SI)->isLiveIn(X86::EFLAGS)) 3694245431Sdim return false; 3695245431Sdim } 3696245431Sdim 3697245431Sdim // The instruction to be updated is either Sub or MI. 3698245431Sdim Sub = IsCmpZero ? MI : Sub; 3699263509Sdim // Move Movr0Inst to the appropriate place before Sub. 3700245431Sdim if (Movr0Inst) { 3701263509Sdim // Look backwards until we find a def that doesn't use the current EFLAGS. 3702263509Sdim Def = Sub; 3703263509Sdim MachineBasicBlock::reverse_iterator 3704263509Sdim InsertI = MachineBasicBlock::reverse_iterator(++Def), 3705263509Sdim InsertE = Sub->getParent()->rend(); 3706263509Sdim for (; InsertI != InsertE; ++InsertI) { 3707263509Sdim MachineInstr *Instr = &*InsertI; 3708263509Sdim if (!Instr->readsRegister(X86::EFLAGS, TRI) && 3709263509Sdim Instr->modifiesRegister(X86::EFLAGS, TRI)) { 3710263509Sdim Sub->getParent()->remove(Movr0Inst); 3711263509Sdim Instr->getParent()->insert(MachineBasicBlock::iterator(Instr), 3712263509Sdim Movr0Inst); 3713263509Sdim break; 3714263509Sdim } 3715263509Sdim } 3716263509Sdim if (InsertI == InsertE) 3717263509Sdim return false; 3718245431Sdim } 3719245431Sdim 3720245431Sdim // Make sure Sub instruction defines EFLAGS and mark the def live. 3721263509Sdim unsigned i = 0, e = Sub->getNumOperands(); 3722263509Sdim for (; i != e; ++i) { 3723263509Sdim MachineOperand &MO = Sub->getOperand(i); 3724263509Sdim if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) { 3725263509Sdim MO.setIsDead(false); 3726263509Sdim break; 3727263509Sdim } 3728263509Sdim } 3729263509Sdim assert(i != e && "Unable to locate a def EFLAGS operand"); 3730263509Sdim 3731245431Sdim CmpInstr->eraseFromParent(); 3732245431Sdim 3733245431Sdim // Modify the condition code of instructions in OpsToUpdate. 3734245431Sdim for (unsigned i = 0, e = OpsToUpdate.size(); i < e; i++) 3735245431Sdim OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second)); 3736245431Sdim return true; 3737245431Sdim} 3738245431Sdim 3739245431Sdim/// optimizeLoadInstr - Try to remove the load by folding it to a register 3740245431Sdim/// operand at the use. We fold the load instructions if load defines a virtual 3741245431Sdim/// register, the virtual register is used once in the same BB, and the 3742245431Sdim/// instructions in-between do not load or store, and have no side effects. 3743245431SdimMachineInstr* X86InstrInfo:: 3744245431SdimoptimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, 3745245431Sdim unsigned &FoldAsLoadDefReg, 3746245431Sdim MachineInstr *&DefMI) const { 3747245431Sdim if (FoldAsLoadDefReg == 0) 3748245431Sdim return 0; 3749245431Sdim // To be conservative, if there exists another load, clear the load candidate. 3750245431Sdim if (MI->mayLoad()) { 3751245431Sdim FoldAsLoadDefReg = 0; 3752245431Sdim return 0; 3753245431Sdim } 3754245431Sdim 3755245431Sdim // Check whether we can move DefMI here. 3756245431Sdim DefMI = MRI->getVRegDef(FoldAsLoadDefReg); 3757245431Sdim assert(DefMI); 3758245431Sdim bool SawStore = false; 3759245431Sdim if (!DefMI->isSafeToMove(this, 0, SawStore)) 3760245431Sdim return 0; 3761245431Sdim 3762245431Sdim // We try to commute MI if possible. 3763245431Sdim unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1; 3764245431Sdim for (unsigned Idx = 0; Idx < IdxEnd; Idx++) { 3765245431Sdim // Collect information about virtual register operands of MI. 3766245431Sdim unsigned SrcOperandId = 0; 3767245431Sdim bool FoundSrcOperand = false; 3768245431Sdim for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 3769245431Sdim MachineOperand &MO = MI->getOperand(i); 3770245431Sdim if (!MO.isReg()) 3771245431Sdim continue; 3772245431Sdim unsigned Reg = MO.getReg(); 3773245431Sdim if (Reg != FoldAsLoadDefReg) 3774245431Sdim continue; 3775245431Sdim // Do not fold if we have a subreg use or a def or multiple uses. 3776245431Sdim if (MO.getSubReg() || MO.isDef() || FoundSrcOperand) 3777245431Sdim return 0; 3778245431Sdim 3779245431Sdim SrcOperandId = i; 3780245431Sdim FoundSrcOperand = true; 3781245431Sdim } 3782245431Sdim if (!FoundSrcOperand) return 0; 3783245431Sdim 3784245431Sdim // Check whether we can fold the def into SrcOperandId. 3785245431Sdim SmallVector<unsigned, 8> Ops; 3786245431Sdim Ops.push_back(SrcOperandId); 3787245431Sdim MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI); 3788245431Sdim if (FoldMI) { 3789245431Sdim FoldAsLoadDefReg = 0; 3790245431Sdim return FoldMI; 3791245431Sdim } 3792245431Sdim 3793245431Sdim if (Idx == 1) { 3794245431Sdim // MI was changed but it didn't help, commute it back! 3795245431Sdim commuteInstruction(MI, false); 3796245431Sdim return 0; 3797245431Sdim } 3798245431Sdim 3799245431Sdim // Check whether we can commute MI and enable folding. 3800245431Sdim if (MI->isCommutable()) { 3801245431Sdim MachineInstr *NewMI = commuteInstruction(MI, false); 3802245431Sdim // Unable to commute. 3803245431Sdim if (!NewMI) return 0; 3804245431Sdim if (NewMI != MI) { 3805245431Sdim // New instruction. It doesn't need to be kept. 3806245431Sdim NewMI->eraseFromParent(); 3807245431Sdim return 0; 3808245431Sdim } 3809245431Sdim } 3810245431Sdim } 3811245431Sdim return 0; 3812245431Sdim} 3813245431Sdim 3814226890Sdim/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr 3815226890Sdim/// instruction with two undef reads of the register being defined. This is 3816226890Sdim/// used for mapping: 3817226890Sdim/// %xmm4 = V_SET0 3818226890Sdim/// to: 3819226890Sdim/// %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef> 3820226890Sdim/// 3821252723Sdimstatic bool Expand2AddrUndef(MachineInstrBuilder &MIB, 3822252723Sdim const MCInstrDesc &Desc) { 3823226890Sdim assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); 3824252723Sdim unsigned Reg = MIB->getOperand(0).getReg(); 3825252723Sdim MIB->setDesc(Desc); 3826226890Sdim 3827226890Sdim // MachineInstr::addOperand() will insert explicit operands before any 3828226890Sdim // implicit operands. 3829252723Sdim MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); 3830226890Sdim // But we don't trust that. 3831252723Sdim assert(MIB->getOperand(1).getReg() == Reg && 3832252723Sdim MIB->getOperand(2).getReg() == Reg && "Misplaced operand"); 3833226890Sdim return true; 3834226890Sdim} 3835226890Sdim 3836226890Sdimbool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { 3837226890Sdim bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); 3838252723Sdim MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 3839226890Sdim switch (MI->getOpcode()) { 3840245431Sdim case X86::SETB_C8r: 3841252723Sdim return Expand2AddrUndef(MIB, get(X86::SBB8rr)); 3842245431Sdim case X86::SETB_C16r: 3843252723Sdim return Expand2AddrUndef(MIB, get(X86::SBB16rr)); 3844245431Sdim case X86::SETB_C32r: 3845252723Sdim return Expand2AddrUndef(MIB, get(X86::SBB32rr)); 3846245431Sdim case X86::SETB_C64r: 3847252723Sdim return Expand2AddrUndef(MIB, get(X86::SBB64rr)); 3848226890Sdim case X86::V_SET0: 3849235633Sdim case X86::FsFLD0SS: 3850235633Sdim case X86::FsFLD0SD: 3851252723Sdim return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); 3852245431Sdim case X86::AVX_SET0: 3853245431Sdim assert(HasAVX && "AVX not supported"); 3854252723Sdim return Expand2AddrUndef(MIB, get(X86::VXORPSYrr)); 3855263509Sdim case X86::AVX512_512_SET0: 3856263509Sdim return Expand2AddrUndef(MIB, get(X86::VPXORDZrr)); 3857245431Sdim case X86::V_SETALLONES: 3858252723Sdim return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); 3859245431Sdim case X86::AVX2_SETALLONES: 3860252723Sdim return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr)); 3861226890Sdim case X86::TEST8ri_NOREX: 3862226890Sdim MI->setDesc(get(X86::TEST8ri)); 3863226890Sdim return true; 3864263509Sdim case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr)); 3865263509Sdim case X86::KSET1B: 3866263509Sdim case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr)); 3867226890Sdim } 3868226890Sdim return false; 3869226890Sdim} 3870226890Sdim 3871193323Sedstatic MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, 3872193323Sed const SmallVectorImpl<MachineOperand> &MOs, 3873193323Sed MachineInstr *MI, 3874193323Sed const TargetInstrInfo &TII) { 3875193323Sed // Create the base instruction with the memory operand as the first part. 3876252723Sdim // Omit the implicit operands, something BuildMI can't do. 3877193323Sed MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 3878193323Sed MI->getDebugLoc(), true); 3879252723Sdim MachineInstrBuilder MIB(MF, NewMI); 3880193323Sed unsigned NumAddrOps = MOs.size(); 3881193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 3882193323Sed MIB.addOperand(MOs[i]); 3883193323Sed if (NumAddrOps < 4) // FrameIndex only 3884193323Sed addOffset(MIB, 0); 3885218893Sdim 3886193323Sed // Loop over the rest of the ri operands, converting them over. 3887193323Sed unsigned NumOps = MI->getDesc().getNumOperands()-2; 3888193323Sed for (unsigned i = 0; i != NumOps; ++i) { 3889193323Sed MachineOperand &MO = MI->getOperand(i+2); 3890193323Sed MIB.addOperand(MO); 3891193323Sed } 3892193323Sed for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { 3893193323Sed MachineOperand &MO = MI->getOperand(i); 3894193323Sed MIB.addOperand(MO); 3895193323Sed } 3896193323Sed return MIB; 3897193323Sed} 3898193323Sed 3899193323Sedstatic MachineInstr *FuseInst(MachineFunction &MF, 3900193323Sed unsigned Opcode, unsigned OpNo, 3901193323Sed const SmallVectorImpl<MachineOperand> &MOs, 3902193323Sed MachineInstr *MI, const TargetInstrInfo &TII) { 3903252723Sdim // Omit the implicit operands, something BuildMI can't do. 3904193323Sed MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 3905193323Sed MI->getDebugLoc(), true); 3906252723Sdim MachineInstrBuilder MIB(MF, NewMI); 3907218893Sdim 3908193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 3909193323Sed MachineOperand &MO = MI->getOperand(i); 3910193323Sed if (i == OpNo) { 3911193323Sed assert(MO.isReg() && "Expected to fold into reg operand!"); 3912193323Sed unsigned NumAddrOps = MOs.size(); 3913193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 3914193323Sed MIB.addOperand(MOs[i]); 3915193323Sed if (NumAddrOps < 4) // FrameIndex only 3916193323Sed addOffset(MIB, 0); 3917193323Sed } else { 3918193323Sed MIB.addOperand(MO); 3919193323Sed } 3920193323Sed } 3921193323Sed return MIB; 3922193323Sed} 3923193323Sed 3924193323Sedstatic MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, 3925193323Sed const SmallVectorImpl<MachineOperand> &MOs, 3926193323Sed MachineInstr *MI) { 3927193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 3928193323Sed MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); 3929193323Sed 3930193323Sed unsigned NumAddrOps = MOs.size(); 3931193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 3932193323Sed MIB.addOperand(MOs[i]); 3933193323Sed if (NumAddrOps < 4) // FrameIndex only 3934193323Sed addOffset(MIB, 0); 3935193323Sed return MIB.addImm(0); 3936193323Sed} 3937193323Sed 3938193323SedMachineInstr* 3939193323SedX86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 3940193323Sed MachineInstr *MI, unsigned i, 3941198090Srdivacky const SmallVectorImpl<MachineOperand> &MOs, 3942198090Srdivacky unsigned Size, unsigned Align) const { 3943218893Sdim const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0; 3944252723Sdim bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect(); 3945193323Sed bool isTwoAddrFold = false; 3946252723Sdim 3947252723Sdim // Atom favors register form of call. So, we do not fold loads into calls 3948252723Sdim // when X86Subtarget is Atom. 3949252723Sdim if (isCallRegIndirect && 3950252723Sdim (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) { 3951252723Sdim return NULL; 3952252723Sdim } 3953252723Sdim 3954193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 3955193323Sed bool isTwoAddr = NumOps > 1 && 3956224145Sdim MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; 3957193323Sed 3958221345Sdim // FIXME: AsmPrinter doesn't know how to handle 3959221345Sdim // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. 3960221345Sdim if (MI->getOpcode() == X86::ADD32ri && 3961221345Sdim MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) 3962221345Sdim return NULL; 3963221345Sdim 3964193323Sed MachineInstr *NewMI = NULL; 3965193323Sed // Folding a memory location into the two-address part of a two-address 3966193323Sed // instruction is different than folding it other places. It requires 3967193323Sed // replacing the *two* registers with the memory location. 3968193323Sed if (isTwoAddr && NumOps >= 2 && i < 2 && 3969193323Sed MI->getOperand(0).isReg() && 3970193323Sed MI->getOperand(1).isReg() && 3971218893Sdim MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { 3972193323Sed OpcodeTablePtr = &RegOp2MemOpTable2Addr; 3973193323Sed isTwoAddrFold = true; 3974193323Sed } else if (i == 0) { // If operand 0 3975263509Sdim if (MI->getOpcode() == X86::MOV32r0) { 3976263509Sdim NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); 3977263509Sdim if (NewMI) 3978263509Sdim return NewMI; 3979245431Sdim } 3980218893Sdim 3981193323Sed OpcodeTablePtr = &RegOp2MemOpTable0; 3982193323Sed } else if (i == 1) { 3983193323Sed OpcodeTablePtr = &RegOp2MemOpTable1; 3984193323Sed } else if (i == 2) { 3985193323Sed OpcodeTablePtr = &RegOp2MemOpTable2; 3986245431Sdim } else if (i == 3) { 3987245431Sdim OpcodeTablePtr = &RegOp2MemOpTable3; 3988193323Sed } 3989218893Sdim 3990193323Sed // If table selected... 3991193323Sed if (OpcodeTablePtr) { 3992193323Sed // Find the Opcode to fuse 3993218893Sdim DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = 3994218893Sdim OpcodeTablePtr->find(MI->getOpcode()); 3995193323Sed if (I != OpcodeTablePtr->end()) { 3996198090Srdivacky unsigned Opcode = I->second.first; 3997226890Sdim unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT; 3998198090Srdivacky if (Align < MinAlign) 3999198090Srdivacky return NULL; 4000198090Srdivacky bool NarrowToMOV32rm = false; 4001198090Srdivacky if (Size) { 4002245431Sdim unsigned RCSize = getRegClass(MI->getDesc(), i, &RI, MF)->getSize(); 4003198090Srdivacky if (Size < RCSize) { 4004198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 4005198090Srdivacky // narrower than the load width, then it's not. 4006198090Srdivacky if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) 4007198090Srdivacky return NULL; 4008198090Srdivacky // If this is a 64-bit load, but the spill slot is 32, then we can do 4009198090Srdivacky // a 32-bit load which is implicitly zero-extended. This likely is due 4010198090Srdivacky // to liveintervalanalysis remat'ing a load from stack slot. 4011198090Srdivacky if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) 4012198090Srdivacky return NULL; 4013198090Srdivacky Opcode = X86::MOV32rm; 4014198090Srdivacky NarrowToMOV32rm = true; 4015198090Srdivacky } 4016198090Srdivacky } 4017198090Srdivacky 4018193323Sed if (isTwoAddrFold) 4019198090Srdivacky NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); 4020193323Sed else 4021198090Srdivacky NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this); 4022198090Srdivacky 4023198090Srdivacky if (NarrowToMOV32rm) { 4024198090Srdivacky // If this is the special case where we use a MOV32rm to load a 32-bit 4025198090Srdivacky // value and zero-extend the top bits. Change the destination register 4026198090Srdivacky // to a 32-bit one. 4027198090Srdivacky unsigned DstReg = NewMI->getOperand(0).getReg(); 4028198090Srdivacky if (TargetRegisterInfo::isPhysicalRegister(DstReg)) 4029198090Srdivacky NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, 4030208599Srdivacky X86::sub_32bit)); 4031198090Srdivacky else 4032208599Srdivacky NewMI->getOperand(0).setSubReg(X86::sub_32bit); 4033198090Srdivacky } 4034193323Sed return NewMI; 4035193323Sed } 4036193323Sed } 4037218893Sdim 4038218893Sdim // No fusion 4039210299Sed if (PrintFailedFusing && !MI->isCopy()) 4040202375Srdivacky dbgs() << "We failed to fuse operand " << i << " in " << *MI; 4041193323Sed return NULL; 4042193323Sed} 4043193323Sed 4044226890Sdim/// hasPartialRegUpdate - Return true for all instructions that only update 4045226890Sdim/// the first 32 or 64-bits of the destination register and leave the rest 4046226890Sdim/// unmodified. This can be used to avoid folding loads if the instructions 4047226890Sdim/// only update part of the destination register, and the non-updated part is 4048226890Sdim/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these 4049226890Sdim/// instructions breaks the partial register dependency and it can improve 4050226890Sdim/// performance. e.g.: 4051226890Sdim/// 4052226890Sdim/// movss (%rdi), %xmm0 4053226890Sdim/// cvtss2sd %xmm0, %xmm0 4054226890Sdim/// 4055226890Sdim/// Instead of 4056226890Sdim/// cvtss2sd (%rdi), %xmm0 4057226890Sdim/// 4058226890Sdim/// FIXME: This should be turned into a TSFlags. 4059226890Sdim/// 4060226890Sdimstatic bool hasPartialRegUpdate(unsigned Opcode) { 4061226890Sdim switch (Opcode) { 4062235633Sdim case X86::CVTSI2SSrr: 4063235633Sdim case X86::CVTSI2SS64rr: 4064235633Sdim case X86::CVTSI2SDrr: 4065235633Sdim case X86::CVTSI2SD64rr: 4066226890Sdim case X86::CVTSD2SSrr: 4067226890Sdim case X86::Int_CVTSD2SSrr: 4068226890Sdim case X86::CVTSS2SDrr: 4069226890Sdim case X86::Int_CVTSS2SDrr: 4070226890Sdim case X86::RCPSSr: 4071226890Sdim case X86::RCPSSr_Int: 4072226890Sdim case X86::ROUNDSDr: 4073235633Sdim case X86::ROUNDSDr_Int: 4074226890Sdim case X86::ROUNDSSr: 4075235633Sdim case X86::ROUNDSSr_Int: 4076226890Sdim case X86::RSQRTSSr: 4077226890Sdim case X86::RSQRTSSr_Int: 4078226890Sdim case X86::SQRTSSr: 4079226890Sdim case X86::SQRTSSr_Int: 4080226890Sdim return true; 4081226890Sdim } 4082193323Sed 4083226890Sdim return false; 4084226890Sdim} 4085226890Sdim 4086235633Sdim/// getPartialRegUpdateClearance - Inform the ExeDepsFix pass how many idle 4087235633Sdim/// instructions we would like before a partial register update. 4088235633Sdimunsigned X86InstrInfo:: 4089235633SdimgetPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, 4090235633Sdim const TargetRegisterInfo *TRI) const { 4091235633Sdim if (OpNum != 0 || !hasPartialRegUpdate(MI->getOpcode())) 4092235633Sdim return 0; 4093235633Sdim 4094235633Sdim // If MI is marked as reading Reg, the partial register update is wanted. 4095235633Sdim const MachineOperand &MO = MI->getOperand(0); 4096235633Sdim unsigned Reg = MO.getReg(); 4097235633Sdim if (TargetRegisterInfo::isVirtualRegister(Reg)) { 4098235633Sdim if (MO.readsReg() || MI->readsVirtualRegister(Reg)) 4099235633Sdim return 0; 4100235633Sdim } else { 4101235633Sdim if (MI->readsRegister(Reg, TRI)) 4102235633Sdim return 0; 4103235633Sdim } 4104235633Sdim 4105235633Sdim // If any of the preceding 16 instructions are reading Reg, insert a 4106235633Sdim // dependency breaking instruction. The magic number is based on a few 4107235633Sdim // Nehalem experiments. 4108235633Sdim return 16; 4109235633Sdim} 4110235633Sdim 4111263509Sdim// Return true for any instruction the copies the high bits of the first source 4112263509Sdim// operand into the unused high bits of the destination operand. 4113263509Sdimstatic bool hasUndefRegUpdate(unsigned Opcode) { 4114263509Sdim switch (Opcode) { 4115263509Sdim case X86::VCVTSI2SSrr: 4116263509Sdim case X86::Int_VCVTSI2SSrr: 4117263509Sdim case X86::VCVTSI2SS64rr: 4118263509Sdim case X86::Int_VCVTSI2SS64rr: 4119263509Sdim case X86::VCVTSI2SDrr: 4120263509Sdim case X86::Int_VCVTSI2SDrr: 4121263509Sdim case X86::VCVTSI2SD64rr: 4122263509Sdim case X86::Int_VCVTSI2SD64rr: 4123263509Sdim case X86::VCVTSD2SSrr: 4124263509Sdim case X86::Int_VCVTSD2SSrr: 4125263509Sdim case X86::VCVTSS2SDrr: 4126263509Sdim case X86::Int_VCVTSS2SDrr: 4127263509Sdim case X86::VRCPSSr: 4128263509Sdim case X86::VROUNDSDr: 4129263509Sdim case X86::VROUNDSDr_Int: 4130263509Sdim case X86::VROUNDSSr: 4131263509Sdim case X86::VROUNDSSr_Int: 4132263509Sdim case X86::VRSQRTSSr: 4133263509Sdim case X86::VSQRTSSr: 4134263509Sdim 4135263509Sdim // AVX-512 4136263509Sdim case X86::VCVTSD2SSZrr: 4137263509Sdim case X86::VCVTSS2SDZrr: 4138263509Sdim return true; 4139263509Sdim } 4140263509Sdim 4141263509Sdim return false; 4142263509Sdim} 4143263509Sdim 4144263509Sdim/// Inform the ExeDepsFix pass how many idle instructions we would like before 4145263509Sdim/// certain undef register reads. 4146263509Sdim/// 4147263509Sdim/// This catches the VCVTSI2SD family of instructions: 4148263509Sdim/// 4149263509Sdim/// vcvtsi2sdq %rax, %xmm0<undef>, %xmm14 4150263509Sdim/// 4151263509Sdim/// We should to be careful *not* to catch VXOR idioms which are presumably 4152263509Sdim/// handled specially in the pipeline: 4153263509Sdim/// 4154263509Sdim/// vxorps %xmm1<undef>, %xmm1<undef>, %xmm1 4155263509Sdim/// 4156263509Sdim/// Like getPartialRegUpdateClearance, this makes a strong assumption that the 4157263509Sdim/// high bits that are passed-through are not live. 4158263509Sdimunsigned X86InstrInfo:: 4159263509SdimgetUndefRegClearance(const MachineInstr *MI, unsigned &OpNum, 4160263509Sdim const TargetRegisterInfo *TRI) const { 4161263509Sdim if (!hasUndefRegUpdate(MI->getOpcode())) 4162263509Sdim return 0; 4163263509Sdim 4164263509Sdim // Set the OpNum parameter to the first source operand. 4165263509Sdim OpNum = 1; 4166263509Sdim 4167263509Sdim const MachineOperand &MO = MI->getOperand(OpNum); 4168263509Sdim if (MO.isUndef() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 4169263509Sdim // Use the same magic number as getPartialRegUpdateClearance. 4170263509Sdim return 16; 4171263509Sdim } 4172263509Sdim return 0; 4173263509Sdim} 4174263509Sdim 4175235633Sdimvoid X86InstrInfo:: 4176235633SdimbreakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, 4177235633Sdim const TargetRegisterInfo *TRI) const { 4178235633Sdim unsigned Reg = MI->getOperand(OpNum).getReg(); 4179263509Sdim // If MI kills this register, the false dependence is already broken. 4180263509Sdim if (MI->killsRegister(Reg, TRI)) 4181263509Sdim return; 4182235633Sdim if (X86::VR128RegClass.contains(Reg)) { 4183235633Sdim // These instructions are all floating point domain, so xorps is the best 4184235633Sdim // choice. 4185235633Sdim bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); 4186235633Sdim unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr; 4187235633Sdim BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg) 4188235633Sdim .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); 4189235633Sdim } else if (X86::VR256RegClass.contains(Reg)) { 4190235633Sdim // Use vxorps to clear the full ymm register. 4191235633Sdim // It wants to read and write the xmm sub-register. 4192235633Sdim unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm); 4193235633Sdim BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg) 4194235633Sdim .addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef) 4195235633Sdim .addReg(Reg, RegState::ImplicitDefine); 4196235633Sdim } else 4197235633Sdim return; 4198235633Sdim MI->addRegisterKilled(Reg, TRI, true); 4199235633Sdim} 4200235633Sdim 4201263509Sdimstatic MachineInstr* foldPatchpoint(MachineFunction &MF, 4202263509Sdim MachineInstr *MI, 4203263509Sdim const SmallVectorImpl<unsigned> &Ops, 4204263509Sdim int FrameIndex, 4205263509Sdim const TargetInstrInfo &TII) { 4206263509Sdim unsigned StartIdx = 0; 4207263509Sdim switch (MI->getOpcode()) { 4208263509Sdim case TargetOpcode::STACKMAP: 4209263509Sdim StartIdx = 2; // Skip ID, nShadowBytes. 4210263509Sdim break; 4211263509Sdim case TargetOpcode::PATCHPOINT: { 4212263509Sdim // For PatchPoint, the call args are not foldable. 4213263509Sdim PatchPointOpers opers(MI); 4214263509Sdim StartIdx = opers.getVarIdx(); 4215263509Sdim break; 4216263509Sdim } 4217263509Sdim default: 4218263509Sdim llvm_unreachable("unexpected stackmap opcode"); 4219263509Sdim } 4220263509Sdim 4221263509Sdim // Return false if any operands requested for folding are not foldable (not 4222263509Sdim // part of the stackmap's live values). 4223263509Sdim for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end(); 4224263509Sdim I != E; ++I) { 4225263509Sdim if (*I < StartIdx) 4226263509Sdim return 0; 4227263509Sdim } 4228263509Sdim 4229263509Sdim MachineInstr *NewMI = 4230263509Sdim MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true); 4231263509Sdim MachineInstrBuilder MIB(MF, NewMI); 4232263509Sdim 4233263509Sdim // No need to fold return, the meta data, and function arguments 4234263509Sdim for (unsigned i = 0; i < StartIdx; ++i) 4235263509Sdim MIB.addOperand(MI->getOperand(i)); 4236263509Sdim 4237263509Sdim for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) { 4238263509Sdim MachineOperand &MO = MI->getOperand(i); 4239263509Sdim if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) { 4240263509Sdim assert(MO.getReg() && "patchpoint can only fold a vreg operand"); 4241263509Sdim // Compute the spill slot size and offset. 4242263509Sdim const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(MO.getReg()); 4243263509Sdim unsigned SpillSize; 4244263509Sdim unsigned SpillOffset; 4245263509Sdim bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, 4246263509Sdim SpillOffset, &MF.getTarget()); 4247263509Sdim if (!Valid) 4248263509Sdim report_fatal_error("cannot spill patchpoint subregister operand"); 4249263509Sdim 4250263509Sdim MIB.addOperand(MachineOperand::CreateImm(StackMaps::IndirectMemRefOp)); 4251263509Sdim MIB.addOperand(MachineOperand::CreateImm(SpillSize)); 4252263509Sdim MIB.addOperand(MachineOperand::CreateFI(FrameIndex)); 4253263509Sdim addOffset(MIB, SpillOffset); 4254263509Sdim } 4255263509Sdim else 4256263509Sdim MIB.addOperand(MO); 4257263509Sdim } 4258263509Sdim return NewMI; 4259263509Sdim} 4260263509Sdim 4261263509SdimMachineInstr* 4262263509SdimX86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, 4263263509Sdim const SmallVectorImpl<unsigned> &Ops, 4264263509Sdim int FrameIndex) const { 4265263509Sdim // Special case stack map and patch point intrinsics. 4266263509Sdim if (MI->getOpcode() == TargetOpcode::STACKMAP 4267263509Sdim || MI->getOpcode() == TargetOpcode::PATCHPOINT) { 4268263509Sdim return foldPatchpoint(MF, MI, Ops, FrameIndex, *this); 4269263509Sdim } 4270218893Sdim // Check switch flag 4271193323Sed if (NoFusing) return NULL; 4272193323Sed 4273226890Sdim // Unless optimizing for size, don't fold to avoid partial 4274226890Sdim // register update stalls 4275252723Sdim if (!MF.getFunction()->getAttributes(). 4276252723Sdim hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && 4277226890Sdim hasPartialRegUpdate(MI->getOpcode())) 4278226890Sdim return 0; 4279201360Srdivacky 4280193323Sed const MachineFrameInfo *MFI = MF.getFrameInfo(); 4281198090Srdivacky unsigned Size = MFI->getObjectSize(FrameIndex); 4282193323Sed unsigned Alignment = MFI->getObjectAlignment(FrameIndex); 4283256178Sdim // If the function stack isn't realigned we don't want to fold instructions 4284256178Sdim // that need increased alignment. 4285256178Sdim if (!RI.needsStackRealignment(MF)) 4286256178Sdim Alignment = std::min(Alignment, TM.getFrameLowering()->getStackAlignment()); 4287193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 4288193323Sed unsigned NewOpc = 0; 4289198090Srdivacky unsigned RCSize = 0; 4290193323Sed switch (MI->getOpcode()) { 4291193323Sed default: return NULL; 4292198090Srdivacky case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; 4293208599Srdivacky case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break; 4294208599Srdivacky case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break; 4295208599Srdivacky case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break; 4296193323Sed } 4297198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 4298198090Srdivacky // narrower than the load width, then it's not. 4299198090Srdivacky if (Size < RCSize) 4300198090Srdivacky return NULL; 4301193323Sed // Change to CMPXXri r, 0 first. 4302193323Sed MI->setDesc(get(NewOpc)); 4303193323Sed MI->getOperand(1).ChangeToImmediate(0); 4304193323Sed } else if (Ops.size() != 1) 4305193323Sed return NULL; 4306193323Sed 4307193323Sed SmallVector<MachineOperand,4> MOs; 4308193323Sed MOs.push_back(MachineOperand::CreateFI(FrameIndex)); 4309198090Srdivacky return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment); 4310193323Sed} 4311193323Sed 4312193323SedMachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 4313193323Sed MachineInstr *MI, 4314198090Srdivacky const SmallVectorImpl<unsigned> &Ops, 4315193323Sed MachineInstr *LoadMI) const { 4316263509Sdim // If loading from a FrameIndex, fold directly from the FrameIndex. 4317263509Sdim unsigned NumOps = LoadMI->getDesc().getNumOperands(); 4318263509Sdim int FrameIndex; 4319263509Sdim if (isLoadFromStackSlot(LoadMI, FrameIndex)) 4320263509Sdim return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex); 4321263509Sdim 4322218893Sdim // Check switch flag 4323193323Sed if (NoFusing) return NULL; 4324193323Sed 4325226890Sdim // Unless optimizing for size, don't fold to avoid partial 4326226890Sdim // register update stalls 4327252723Sdim if (!MF.getFunction()->getAttributes(). 4328252723Sdim hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && 4329226890Sdim hasPartialRegUpdate(MI->getOpcode())) 4330226890Sdim return 0; 4331201360Srdivacky 4332193323Sed // Determine the alignment of the load. 4333193323Sed unsigned Alignment = 0; 4334193323Sed if (LoadMI->hasOneMemOperand()) 4335198090Srdivacky Alignment = (*LoadMI->memoperands_begin())->getAlignment(); 4336198090Srdivacky else 4337198090Srdivacky switch (LoadMI->getOpcode()) { 4338235633Sdim case X86::AVX2_SETALLONES: 4339245431Sdim case X86::AVX_SET0: 4340212904Sdim Alignment = 32; 4341212904Sdim break; 4342226890Sdim case X86::V_SET0: 4343198090Srdivacky case X86::V_SETALLONES: 4344198090Srdivacky Alignment = 16; 4345198090Srdivacky break; 4346198090Srdivacky case X86::FsFLD0SD: 4347198090Srdivacky Alignment = 8; 4348198090Srdivacky break; 4349198090Srdivacky case X86::FsFLD0SS: 4350198090Srdivacky Alignment = 4; 4351198090Srdivacky break; 4352198090Srdivacky default: 4353223017Sdim return 0; 4354193323Sed } 4355193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 4356193323Sed unsigned NewOpc = 0; 4357193323Sed switch (MI->getOpcode()) { 4358193323Sed default: return NULL; 4359193323Sed case X86::TEST8rr: NewOpc = X86::CMP8ri; break; 4360208599Srdivacky case X86::TEST16rr: NewOpc = X86::CMP16ri8; break; 4361208599Srdivacky case X86::TEST32rr: NewOpc = X86::CMP32ri8; break; 4362208599Srdivacky case X86::TEST64rr: NewOpc = X86::CMP64ri8; break; 4363193323Sed } 4364193323Sed // Change to CMPXXri r, 0 first. 4365193323Sed MI->setDesc(get(NewOpc)); 4366193323Sed MI->getOperand(1).ChangeToImmediate(0); 4367193323Sed } else if (Ops.size() != 1) 4368193323Sed return NULL; 4369193323Sed 4370212904Sdim // Make sure the subregisters match. 4371212904Sdim // Otherwise we risk changing the size of the load. 4372212904Sdim if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg()) 4373212904Sdim return NULL; 4374212904Sdim 4375210299Sed SmallVector<MachineOperand,X86::AddrNumOperands> MOs; 4376198090Srdivacky switch (LoadMI->getOpcode()) { 4377226890Sdim case X86::V_SET0: 4378198090Srdivacky case X86::V_SETALLONES: 4379235633Sdim case X86::AVX2_SETALLONES: 4380245431Sdim case X86::AVX_SET0: 4381198090Srdivacky case X86::FsFLD0SD: 4382235633Sdim case X86::FsFLD0SS: { 4383226890Sdim // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. 4384193323Sed // Create a constant-pool entry and operands to load from it. 4385193323Sed 4386204961Srdivacky // Medium and large mode can't fold loads this way. 4387204961Srdivacky if (TM.getCodeModel() != CodeModel::Small && 4388204961Srdivacky TM.getCodeModel() != CodeModel::Kernel) 4389204961Srdivacky return NULL; 4390204961Srdivacky 4391193323Sed // x86-32 PIC requires a PIC base register for constant pools. 4392193323Sed unsigned PICBase = 0; 4393198090Srdivacky if (TM.getRelocationModel() == Reloc::PIC_) { 4394198090Srdivacky if (TM.getSubtarget<X86Subtarget>().is64Bit()) 4395198090Srdivacky PICBase = X86::RIP; 4396198090Srdivacky else 4397210299Sed // FIXME: PICBase = getGlobalBaseReg(&MF); 4398198090Srdivacky // This doesn't work for several reasons. 4399198090Srdivacky // 1. GlobalBaseReg may have been spilled. 4400198090Srdivacky // 2. It may not be live at MI. 4401198090Srdivacky return NULL; 4402198090Srdivacky } 4403193323Sed 4404198090Srdivacky // Create a constant-pool entry. 4405193323Sed MachineConstantPool &MCP = *MF.getConstantPool(); 4406226890Sdim Type *Ty; 4407212904Sdim unsigned Opc = LoadMI->getOpcode(); 4408235633Sdim if (Opc == X86::FsFLD0SS) 4409198090Srdivacky Ty = Type::getFloatTy(MF.getFunction()->getContext()); 4410235633Sdim else if (Opc == X86::FsFLD0SD) 4411198090Srdivacky Ty = Type::getDoubleTy(MF.getFunction()->getContext()); 4412245431Sdim else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0) 4413235633Sdim Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8); 4414198090Srdivacky else 4415198090Srdivacky Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); 4416226890Sdim 4417245431Sdim bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES); 4418226890Sdim const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) : 4419226890Sdim Constant::getNullValue(Ty); 4420198090Srdivacky unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); 4421193323Sed 4422193323Sed // Create operands to load from the constant pool entry. 4423193323Sed MOs.push_back(MachineOperand::CreateReg(PICBase, false)); 4424193323Sed MOs.push_back(MachineOperand::CreateImm(1)); 4425193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 4426193323Sed MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); 4427193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 4428198090Srdivacky break; 4429198090Srdivacky } 4430198090Srdivacky default: { 4431252723Sdim if ((LoadMI->getOpcode() == X86::MOVSSrm || 4432252723Sdim LoadMI->getOpcode() == X86::VMOVSSrm) && 4433252723Sdim MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize() 4434252723Sdim > 4) 4435252723Sdim // These instructions only load 32 bits, we can't fold them if the 4436252723Sdim // destination register is wider than 32 bits (4 bytes). 4437252723Sdim return NULL; 4438252723Sdim if ((LoadMI->getOpcode() == X86::MOVSDrm || 4439252723Sdim LoadMI->getOpcode() == X86::VMOVSDrm) && 4440252723Sdim MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize() 4441252723Sdim > 8) 4442252723Sdim // These instructions only load 64 bits, we can't fold them if the 4443252723Sdim // destination register is wider than 64 bits (8 bytes). 4444252723Sdim return NULL; 4445252723Sdim 4446193323Sed // Folding a normal load. Just copy the load's address operands. 4447210299Sed for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) 4448193323Sed MOs.push_back(LoadMI->getOperand(i)); 4449198090Srdivacky break; 4450193323Sed } 4451198090Srdivacky } 4452198090Srdivacky return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment); 4453193323Sed} 4454193323Sed 4455193323Sed 4456193323Sedbool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, 4457193323Sed const SmallVectorImpl<unsigned> &Ops) const { 4458218893Sdim // Check switch flag 4459193323Sed if (NoFusing) return 0; 4460193323Sed 4461193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 4462193323Sed switch (MI->getOpcode()) { 4463193323Sed default: return false; 4464218893Sdim case X86::TEST8rr: 4465193323Sed case X86::TEST16rr: 4466193323Sed case X86::TEST32rr: 4467193323Sed case X86::TEST64rr: 4468193323Sed return true; 4469221345Sdim case X86::ADD32ri: 4470221345Sdim // FIXME: AsmPrinter doesn't know how to handle 4471221345Sdim // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. 4472221345Sdim if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) 4473221345Sdim return false; 4474221345Sdim break; 4475193323Sed } 4476193323Sed } 4477193323Sed 4478193323Sed if (Ops.size() != 1) 4479193323Sed return false; 4480193323Sed 4481193323Sed unsigned OpNum = Ops[0]; 4482193323Sed unsigned Opc = MI->getOpcode(); 4483193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 4484193323Sed bool isTwoAddr = NumOps > 1 && 4485224145Sdim MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; 4486193323Sed 4487193323Sed // Folding a memory location into the two-address part of a two-address 4488193323Sed // instruction is different than folding it other places. It requires 4489193323Sed // replacing the *two* registers with the memory location. 4490218893Sdim const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0; 4491218893Sdim if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 4492193323Sed OpcodeTablePtr = &RegOp2MemOpTable2Addr; 4493193323Sed } else if (OpNum == 0) { // If operand 0 4494263509Sdim if (Opc == X86::MOV32r0) 4495263509Sdim return true; 4496263509Sdim 4497193323Sed OpcodeTablePtr = &RegOp2MemOpTable0; 4498193323Sed } else if (OpNum == 1) { 4499193323Sed OpcodeTablePtr = &RegOp2MemOpTable1; 4500193323Sed } else if (OpNum == 2) { 4501193323Sed OpcodeTablePtr = &RegOp2MemOpTable2; 4502245431Sdim } else if (OpNum == 3) { 4503245431Sdim OpcodeTablePtr = &RegOp2MemOpTable3; 4504193323Sed } 4505218893Sdim 4506218893Sdim if (OpcodeTablePtr && OpcodeTablePtr->count(Opc)) 4507218893Sdim return true; 4508252723Sdim return TargetInstrInfo::canFoldMemoryOperand(MI, Ops); 4509193323Sed} 4510193323Sed 4511193323Sedbool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, 4512193323Sed unsigned Reg, bool UnfoldLoad, bool UnfoldStore, 4513193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 4514218893Sdim DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = 4515218893Sdim MemOp2RegOpTable.find(MI->getOpcode()); 4516193323Sed if (I == MemOp2RegOpTable.end()) 4517193323Sed return false; 4518193323Sed unsigned Opc = I->second.first; 4519226890Sdim unsigned Index = I->second.second & TB_INDEX_MASK; 4520226890Sdim bool FoldedLoad = I->second.second & TB_FOLDED_LOAD; 4521226890Sdim bool FoldedStore = I->second.second & TB_FOLDED_STORE; 4522193323Sed if (UnfoldLoad && !FoldedLoad) 4523193323Sed return false; 4524193323Sed UnfoldLoad &= FoldedLoad; 4525193323Sed if (UnfoldStore && !FoldedStore) 4526193323Sed return false; 4527193323Sed UnfoldStore &= FoldedStore; 4528193323Sed 4529224145Sdim const MCInstrDesc &MCID = get(Opc); 4530245431Sdim const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); 4531210299Sed if (!MI->hasOneMemOperand() && 4532210299Sed RC == &X86::VR128RegClass && 4533210299Sed !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) 4534210299Sed // Without memoperands, loadRegFromAddr and storeRegToStackSlot will 4535210299Sed // conservatively assume the address is unaligned. That's bad for 4536210299Sed // performance. 4537210299Sed return false; 4538210299Sed SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps; 4539193323Sed SmallVector<MachineOperand,2> BeforeOps; 4540193323Sed SmallVector<MachineOperand,2> AfterOps; 4541193323Sed SmallVector<MachineOperand,4> ImpOps; 4542193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 4543193323Sed MachineOperand &Op = MI->getOperand(i); 4544210299Sed if (i >= Index && i < Index + X86::AddrNumOperands) 4545193323Sed AddrOps.push_back(Op); 4546193323Sed else if (Op.isReg() && Op.isImplicit()) 4547193323Sed ImpOps.push_back(Op); 4548193323Sed else if (i < Index) 4549193323Sed BeforeOps.push_back(Op); 4550193323Sed else if (i > Index) 4551193323Sed AfterOps.push_back(Op); 4552193323Sed } 4553193323Sed 4554193323Sed // Emit the load instruction. 4555193323Sed if (UnfoldLoad) { 4556198090Srdivacky std::pair<MachineInstr::mmo_iterator, 4557198090Srdivacky MachineInstr::mmo_iterator> MMOs = 4558198090Srdivacky MF.extractLoadMemRefs(MI->memoperands_begin(), 4559198090Srdivacky MI->memoperands_end()); 4560198090Srdivacky loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); 4561193323Sed if (UnfoldStore) { 4562193323Sed // Address operands cannot be marked isKill. 4563210299Sed for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) { 4564193323Sed MachineOperand &MO = NewMIs[0]->getOperand(i); 4565193323Sed if (MO.isReg()) 4566193323Sed MO.setIsKill(false); 4567193323Sed } 4568193323Sed } 4569193323Sed } 4570193323Sed 4571193323Sed // Emit the data processing instruction. 4572224145Sdim MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true); 4573252723Sdim MachineInstrBuilder MIB(MF, DataMI); 4574218893Sdim 4575193323Sed if (FoldedStore) 4576193323Sed MIB.addReg(Reg, RegState::Define); 4577193323Sed for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) 4578193323Sed MIB.addOperand(BeforeOps[i]); 4579193323Sed if (FoldedLoad) 4580193323Sed MIB.addReg(Reg); 4581193323Sed for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) 4582193323Sed MIB.addOperand(AfterOps[i]); 4583193323Sed for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { 4584193323Sed MachineOperand &MO = ImpOps[i]; 4585193323Sed MIB.addReg(MO.getReg(), 4586193323Sed getDefRegState(MO.isDef()) | 4587193323Sed RegState::Implicit | 4588193323Sed getKillRegState(MO.isKill()) | 4589195340Sed getDeadRegState(MO.isDead()) | 4590195340Sed getUndefRegState(MO.isUndef())); 4591193323Sed } 4592193323Sed // Change CMP32ri r, 0 back to TEST32rr r, r, etc. 4593193323Sed switch (DataMI->getOpcode()) { 4594193323Sed default: break; 4595193323Sed case X86::CMP64ri32: 4596208599Srdivacky case X86::CMP64ri8: 4597193323Sed case X86::CMP32ri: 4598208599Srdivacky case X86::CMP32ri8: 4599193323Sed case X86::CMP16ri: 4600208599Srdivacky case X86::CMP16ri8: 4601193323Sed case X86::CMP8ri: { 4602193323Sed MachineOperand &MO0 = DataMI->getOperand(0); 4603193323Sed MachineOperand &MO1 = DataMI->getOperand(1); 4604193323Sed if (MO1.getImm() == 0) { 4605245431Sdim unsigned NewOpc; 4606193323Sed switch (DataMI->getOpcode()) { 4607245431Sdim default: llvm_unreachable("Unreachable!"); 4608208599Srdivacky case X86::CMP64ri8: 4609193323Sed case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; 4610208599Srdivacky case X86::CMP32ri8: 4611193323Sed case X86::CMP32ri: NewOpc = X86::TEST32rr; break; 4612208599Srdivacky case X86::CMP16ri8: 4613193323Sed case X86::CMP16ri: NewOpc = X86::TEST16rr; break; 4614193323Sed case X86::CMP8ri: NewOpc = X86::TEST8rr; break; 4615193323Sed } 4616193323Sed DataMI->setDesc(get(NewOpc)); 4617193323Sed MO1.ChangeToRegister(MO0.getReg(), false); 4618193323Sed } 4619193323Sed } 4620193323Sed } 4621193323Sed NewMIs.push_back(DataMI); 4622193323Sed 4623193323Sed // Emit the store instruction. 4624193323Sed if (UnfoldStore) { 4625245431Sdim const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF); 4626198090Srdivacky std::pair<MachineInstr::mmo_iterator, 4627198090Srdivacky MachineInstr::mmo_iterator> MMOs = 4628198090Srdivacky MF.extractStoreMemRefs(MI->memoperands_begin(), 4629198090Srdivacky MI->memoperands_end()); 4630198090Srdivacky storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs); 4631193323Sed } 4632193323Sed 4633193323Sed return true; 4634193323Sed} 4635193323Sed 4636193323Sedbool 4637193323SedX86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, 4638193323Sed SmallVectorImpl<SDNode*> &NewNodes) const { 4639193323Sed if (!N->isMachineOpcode()) 4640193323Sed return false; 4641193323Sed 4642218893Sdim DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = 4643218893Sdim MemOp2RegOpTable.find(N->getMachineOpcode()); 4644193323Sed if (I == MemOp2RegOpTable.end()) 4645193323Sed return false; 4646193323Sed unsigned Opc = I->second.first; 4647226890Sdim unsigned Index = I->second.second & TB_INDEX_MASK; 4648226890Sdim bool FoldedLoad = I->second.second & TB_FOLDED_LOAD; 4649226890Sdim bool FoldedStore = I->second.second & TB_FOLDED_STORE; 4650224145Sdim const MCInstrDesc &MCID = get(Opc); 4651245431Sdim MachineFunction &MF = DAG.getMachineFunction(); 4652245431Sdim const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); 4653224145Sdim unsigned NumDefs = MCID.NumDefs; 4654193323Sed std::vector<SDValue> AddrOps; 4655193323Sed std::vector<SDValue> BeforeOps; 4656193323Sed std::vector<SDValue> AfterOps; 4657263509Sdim SDLoc dl(N); 4658193323Sed unsigned NumOps = N->getNumOperands(); 4659193323Sed for (unsigned i = 0; i != NumOps-1; ++i) { 4660193323Sed SDValue Op = N->getOperand(i); 4661210299Sed if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands) 4662193323Sed AddrOps.push_back(Op); 4663193323Sed else if (i < Index-NumDefs) 4664193323Sed BeforeOps.push_back(Op); 4665193323Sed else if (i > Index-NumDefs) 4666193323Sed AfterOps.push_back(Op); 4667193323Sed } 4668193323Sed SDValue Chain = N->getOperand(NumOps-1); 4669193323Sed AddrOps.push_back(Chain); 4670193323Sed 4671193323Sed // Emit the load instruction. 4672193323Sed SDNode *Load = 0; 4673193323Sed if (FoldedLoad) { 4674198090Srdivacky EVT VT = *RC->vt_begin(); 4675199481Srdivacky std::pair<MachineInstr::mmo_iterator, 4676199481Srdivacky MachineInstr::mmo_iterator> MMOs = 4677199481Srdivacky MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 4678199481Srdivacky cast<MachineSDNode>(N)->memoperands_end()); 4679210299Sed if (!(*MMOs.first) && 4680210299Sed RC == &X86::VR128RegClass && 4681210299Sed !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) 4682210299Sed // Do not introduce a slow unaligned load. 4683210299Sed return false; 4684226890Sdim unsigned Alignment = RC->getSize() == 32 ? 32 : 16; 4685226890Sdim bool isAligned = (*MMOs.first) && 4686226890Sdim (*MMOs.first)->getAlignment() >= Alignment; 4687198090Srdivacky Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, 4688252723Sdim VT, MVT::Other, AddrOps); 4689193323Sed NewNodes.push_back(Load); 4690198090Srdivacky 4691198090Srdivacky // Preserve memory reference information. 4692198090Srdivacky cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 4693193323Sed } 4694193323Sed 4695193323Sed // Emit the data processing instruction. 4696198090Srdivacky std::vector<EVT> VTs; 4697193323Sed const TargetRegisterClass *DstRC = 0; 4698224145Sdim if (MCID.getNumDefs() > 0) { 4699245431Sdim DstRC = getRegClass(MCID, 0, &RI, MF); 4700193323Sed VTs.push_back(*DstRC->vt_begin()); 4701193323Sed } 4702193323Sed for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 4703198090Srdivacky EVT VT = N->getValueType(i); 4704224145Sdim if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs()) 4705193323Sed VTs.push_back(VT); 4706193323Sed } 4707193323Sed if (Load) 4708193323Sed BeforeOps.push_back(SDValue(Load, 0)); 4709193323Sed std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); 4710252723Sdim SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps); 4711193323Sed NewNodes.push_back(NewNode); 4712193323Sed 4713193323Sed // Emit the store instruction. 4714193323Sed if (FoldedStore) { 4715193323Sed AddrOps.pop_back(); 4716193323Sed AddrOps.push_back(SDValue(NewNode, 0)); 4717193323Sed AddrOps.push_back(Chain); 4718199481Srdivacky std::pair<MachineInstr::mmo_iterator, 4719199481Srdivacky MachineInstr::mmo_iterator> MMOs = 4720199481Srdivacky MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 4721199481Srdivacky cast<MachineSDNode>(N)->memoperands_end()); 4722210299Sed if (!(*MMOs.first) && 4723210299Sed RC == &X86::VR128RegClass && 4724210299Sed !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) 4725210299Sed // Do not introduce a slow unaligned store. 4726210299Sed return false; 4727226890Sdim unsigned Alignment = RC->getSize() == 32 ? 32 : 16; 4728226890Sdim bool isAligned = (*MMOs.first) && 4729226890Sdim (*MMOs.first)->getAlignment() >= Alignment; 4730198090Srdivacky SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, 4731198090Srdivacky isAligned, TM), 4732252723Sdim dl, MVT::Other, AddrOps); 4733193323Sed NewNodes.push_back(Store); 4734198090Srdivacky 4735198090Srdivacky // Preserve memory reference information. 4736198090Srdivacky cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 4737193323Sed } 4738193323Sed 4739193323Sed return true; 4740193323Sed} 4741193323Sed 4742193323Sedunsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, 4743198892Srdivacky bool UnfoldLoad, bool UnfoldStore, 4744198892Srdivacky unsigned *LoadRegIndex) const { 4745218893Sdim DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = 4746218893Sdim MemOp2RegOpTable.find(Opc); 4747193323Sed if (I == MemOp2RegOpTable.end()) 4748193323Sed return 0; 4749226890Sdim bool FoldedLoad = I->second.second & TB_FOLDED_LOAD; 4750226890Sdim bool FoldedStore = I->second.second & TB_FOLDED_STORE; 4751193323Sed if (UnfoldLoad && !FoldedLoad) 4752193323Sed return 0; 4753193323Sed if (UnfoldStore && !FoldedStore) 4754193323Sed return 0; 4755198892Srdivacky if (LoadRegIndex) 4756226890Sdim *LoadRegIndex = I->second.second & TB_INDEX_MASK; 4757193323Sed return I->second.first; 4758193323Sed} 4759193323Sed 4760202878Srdivackybool 4761202878SrdivackyX86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 4762202878Srdivacky int64_t &Offset1, int64_t &Offset2) const { 4763202878Srdivacky if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 4764202878Srdivacky return false; 4765202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 4766202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 4767202878Srdivacky switch (Opc1) { 4768202878Srdivacky default: return false; 4769202878Srdivacky case X86::MOV8rm: 4770202878Srdivacky case X86::MOV16rm: 4771202878Srdivacky case X86::MOV32rm: 4772202878Srdivacky case X86::MOV64rm: 4773202878Srdivacky case X86::LD_Fp32m: 4774202878Srdivacky case X86::LD_Fp64m: 4775202878Srdivacky case X86::LD_Fp80m: 4776202878Srdivacky case X86::MOVSSrm: 4777202878Srdivacky case X86::MOVSDrm: 4778202878Srdivacky case X86::MMX_MOVD64rm: 4779202878Srdivacky case X86::MMX_MOVQ64rm: 4780202878Srdivacky case X86::FsMOVAPSrm: 4781202878Srdivacky case X86::FsMOVAPDrm: 4782202878Srdivacky case X86::MOVAPSrm: 4783202878Srdivacky case X86::MOVUPSrm: 4784202878Srdivacky case X86::MOVAPDrm: 4785202878Srdivacky case X86::MOVDQArm: 4786202878Srdivacky case X86::MOVDQUrm: 4787226890Sdim // AVX load instructions 4788226890Sdim case X86::VMOVSSrm: 4789226890Sdim case X86::VMOVSDrm: 4790226890Sdim case X86::FsVMOVAPSrm: 4791226890Sdim case X86::FsVMOVAPDrm: 4792226890Sdim case X86::VMOVAPSrm: 4793226890Sdim case X86::VMOVUPSrm: 4794226890Sdim case X86::VMOVAPDrm: 4795226890Sdim case X86::VMOVDQArm: 4796226890Sdim case X86::VMOVDQUrm: 4797224145Sdim case X86::VMOVAPSYrm: 4798224145Sdim case X86::VMOVUPSYrm: 4799224145Sdim case X86::VMOVAPDYrm: 4800224145Sdim case X86::VMOVDQAYrm: 4801224145Sdim case X86::VMOVDQUYrm: 4802202878Srdivacky break; 4803202878Srdivacky } 4804202878Srdivacky switch (Opc2) { 4805202878Srdivacky default: return false; 4806202878Srdivacky case X86::MOV8rm: 4807202878Srdivacky case X86::MOV16rm: 4808202878Srdivacky case X86::MOV32rm: 4809202878Srdivacky case X86::MOV64rm: 4810202878Srdivacky case X86::LD_Fp32m: 4811202878Srdivacky case X86::LD_Fp64m: 4812202878Srdivacky case X86::LD_Fp80m: 4813202878Srdivacky case X86::MOVSSrm: 4814202878Srdivacky case X86::MOVSDrm: 4815202878Srdivacky case X86::MMX_MOVD64rm: 4816202878Srdivacky case X86::MMX_MOVQ64rm: 4817202878Srdivacky case X86::FsMOVAPSrm: 4818202878Srdivacky case X86::FsMOVAPDrm: 4819202878Srdivacky case X86::MOVAPSrm: 4820202878Srdivacky case X86::MOVUPSrm: 4821202878Srdivacky case X86::MOVAPDrm: 4822202878Srdivacky case X86::MOVDQArm: 4823202878Srdivacky case X86::MOVDQUrm: 4824226890Sdim // AVX load instructions 4825226890Sdim case X86::VMOVSSrm: 4826226890Sdim case X86::VMOVSDrm: 4827226890Sdim case X86::FsVMOVAPSrm: 4828226890Sdim case X86::FsVMOVAPDrm: 4829226890Sdim case X86::VMOVAPSrm: 4830226890Sdim case X86::VMOVUPSrm: 4831226890Sdim case X86::VMOVAPDrm: 4832226890Sdim case X86::VMOVDQArm: 4833226890Sdim case X86::VMOVDQUrm: 4834224145Sdim case X86::VMOVAPSYrm: 4835224145Sdim case X86::VMOVUPSYrm: 4836224145Sdim case X86::VMOVAPDYrm: 4837224145Sdim case X86::VMOVDQAYrm: 4838224145Sdim case X86::VMOVDQUYrm: 4839202878Srdivacky break; 4840202878Srdivacky } 4841202878Srdivacky 4842202878Srdivacky // Check if chain operands and base addresses match. 4843202878Srdivacky if (Load1->getOperand(0) != Load2->getOperand(0) || 4844202878Srdivacky Load1->getOperand(5) != Load2->getOperand(5)) 4845202878Srdivacky return false; 4846202878Srdivacky // Segment operands should match as well. 4847202878Srdivacky if (Load1->getOperand(4) != Load2->getOperand(4)) 4848202878Srdivacky return false; 4849202878Srdivacky // Scale should be 1, Index should be Reg0. 4850202878Srdivacky if (Load1->getOperand(1) == Load2->getOperand(1) && 4851202878Srdivacky Load1->getOperand(2) == Load2->getOperand(2)) { 4852202878Srdivacky if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1) 4853202878Srdivacky return false; 4854202878Srdivacky 4855202878Srdivacky // Now let's examine the displacements. 4856202878Srdivacky if (isa<ConstantSDNode>(Load1->getOperand(3)) && 4857202878Srdivacky isa<ConstantSDNode>(Load2->getOperand(3))) { 4858202878Srdivacky Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue(); 4859202878Srdivacky Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue(); 4860202878Srdivacky return true; 4861202878Srdivacky } 4862202878Srdivacky } 4863202878Srdivacky return false; 4864202878Srdivacky} 4865202878Srdivacky 4866202878Srdivackybool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 4867202878Srdivacky int64_t Offset1, int64_t Offset2, 4868202878Srdivacky unsigned NumLoads) const { 4869202878Srdivacky assert(Offset2 > Offset1); 4870202878Srdivacky if ((Offset2 - Offset1) / 8 > 64) 4871202878Srdivacky return false; 4872202878Srdivacky 4873202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 4874202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 4875202878Srdivacky if (Opc1 != Opc2) 4876202878Srdivacky return false; // FIXME: overly conservative? 4877202878Srdivacky 4878202878Srdivacky switch (Opc1) { 4879202878Srdivacky default: break; 4880202878Srdivacky case X86::LD_Fp32m: 4881202878Srdivacky case X86::LD_Fp64m: 4882202878Srdivacky case X86::LD_Fp80m: 4883202878Srdivacky case X86::MMX_MOVD64rm: 4884202878Srdivacky case X86::MMX_MOVQ64rm: 4885202878Srdivacky return false; 4886202878Srdivacky } 4887202878Srdivacky 4888202878Srdivacky EVT VT = Load1->getValueType(0); 4889202878Srdivacky switch (VT.getSimpleVT().SimpleTy) { 4890210299Sed default: 4891202878Srdivacky // XMM registers. In 64-bit mode we can be a bit more aggressive since we 4892202878Srdivacky // have 16 of them to play with. 4893202878Srdivacky if (TM.getSubtargetImpl()->is64Bit()) { 4894202878Srdivacky if (NumLoads >= 3) 4895202878Srdivacky return false; 4896210299Sed } else if (NumLoads) { 4897202878Srdivacky return false; 4898210299Sed } 4899202878Srdivacky break; 4900202878Srdivacky case MVT::i8: 4901202878Srdivacky case MVT::i16: 4902202878Srdivacky case MVT::i32: 4903202878Srdivacky case MVT::i64: 4904202878Srdivacky case MVT::f32: 4905202878Srdivacky case MVT::f64: 4906202878Srdivacky if (NumLoads) 4907202878Srdivacky return false; 4908210299Sed break; 4909202878Srdivacky } 4910202878Srdivacky 4911202878Srdivacky return true; 4912202878Srdivacky} 4913202878Srdivacky 4914263509Sdimbool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First, 4915263509Sdim MachineInstr *Second) const { 4916263509Sdim // Check if this processor supports macro-fusion. Since this is a minor 4917263509Sdim // heuristic, we haven't specifically reserved a feature. hasAVX is a decent 4918263509Sdim // proxy for SandyBridge+. 4919263509Sdim if (!TM.getSubtarget<X86Subtarget>().hasAVX()) 4920263509Sdim return false; 4921202878Srdivacky 4922263509Sdim enum { 4923263509Sdim FuseTest, 4924263509Sdim FuseCmp, 4925263509Sdim FuseInc 4926263509Sdim } FuseKind; 4927263509Sdim 4928263509Sdim switch(Second->getOpcode()) { 4929263509Sdim default: 4930263509Sdim return false; 4931263509Sdim case X86::JE_4: 4932263509Sdim case X86::JNE_4: 4933263509Sdim case X86::JL_4: 4934263509Sdim case X86::JLE_4: 4935263509Sdim case X86::JG_4: 4936263509Sdim case X86::JGE_4: 4937263509Sdim FuseKind = FuseInc; 4938263509Sdim break; 4939263509Sdim case X86::JB_4: 4940263509Sdim case X86::JBE_4: 4941263509Sdim case X86::JA_4: 4942263509Sdim case X86::JAE_4: 4943263509Sdim FuseKind = FuseCmp; 4944263509Sdim break; 4945263509Sdim case X86::JS_4: 4946263509Sdim case X86::JNS_4: 4947263509Sdim case X86::JP_4: 4948263509Sdim case X86::JNP_4: 4949263509Sdim case X86::JO_4: 4950263509Sdim case X86::JNO_4: 4951263509Sdim FuseKind = FuseTest; 4952263509Sdim break; 4953263509Sdim } 4954263509Sdim switch (First->getOpcode()) { 4955263509Sdim default: 4956263509Sdim return false; 4957263509Sdim case X86::TEST8rr: 4958263509Sdim case X86::TEST16rr: 4959263509Sdim case X86::TEST32rr: 4960263509Sdim case X86::TEST64rr: 4961263509Sdim case X86::TEST8ri: 4962263509Sdim case X86::TEST16ri: 4963263509Sdim case X86::TEST32ri: 4964263509Sdim case X86::TEST32i32: 4965263509Sdim case X86::TEST64i32: 4966263509Sdim case X86::TEST64ri32: 4967263509Sdim case X86::TEST8rm: 4968263509Sdim case X86::TEST16rm: 4969263509Sdim case X86::TEST32rm: 4970263509Sdim case X86::TEST64rm: 4971263509Sdim case X86::AND16i16: 4972263509Sdim case X86::AND16ri: 4973263509Sdim case X86::AND16ri8: 4974263509Sdim case X86::AND16rm: 4975263509Sdim case X86::AND16rr: 4976263509Sdim case X86::AND32i32: 4977263509Sdim case X86::AND32ri: 4978263509Sdim case X86::AND32ri8: 4979263509Sdim case X86::AND32rm: 4980263509Sdim case X86::AND32rr: 4981263509Sdim case X86::AND64i32: 4982263509Sdim case X86::AND64ri32: 4983263509Sdim case X86::AND64ri8: 4984263509Sdim case X86::AND64rm: 4985263509Sdim case X86::AND64rr: 4986263509Sdim case X86::AND8i8: 4987263509Sdim case X86::AND8ri: 4988263509Sdim case X86::AND8rm: 4989263509Sdim case X86::AND8rr: 4990263509Sdim return true; 4991263509Sdim case X86::CMP16i16: 4992263509Sdim case X86::CMP16ri: 4993263509Sdim case X86::CMP16ri8: 4994263509Sdim case X86::CMP16rm: 4995263509Sdim case X86::CMP16rr: 4996263509Sdim case X86::CMP32i32: 4997263509Sdim case X86::CMP32ri: 4998263509Sdim case X86::CMP32ri8: 4999263509Sdim case X86::CMP32rm: 5000263509Sdim case X86::CMP32rr: 5001263509Sdim case X86::CMP64i32: 5002263509Sdim case X86::CMP64ri32: 5003263509Sdim case X86::CMP64ri8: 5004263509Sdim case X86::CMP64rm: 5005263509Sdim case X86::CMP64rr: 5006263509Sdim case X86::CMP8i8: 5007263509Sdim case X86::CMP8ri: 5008263509Sdim case X86::CMP8rm: 5009263509Sdim case X86::CMP8rr: 5010263509Sdim case X86::ADD16i16: 5011263509Sdim case X86::ADD16ri: 5012263509Sdim case X86::ADD16ri8: 5013263509Sdim case X86::ADD16ri8_DB: 5014263509Sdim case X86::ADD16ri_DB: 5015263509Sdim case X86::ADD16rm: 5016263509Sdim case X86::ADD16rr: 5017263509Sdim case X86::ADD16rr_DB: 5018263509Sdim case X86::ADD32i32: 5019263509Sdim case X86::ADD32ri: 5020263509Sdim case X86::ADD32ri8: 5021263509Sdim case X86::ADD32ri8_DB: 5022263509Sdim case X86::ADD32ri_DB: 5023263509Sdim case X86::ADD32rm: 5024263509Sdim case X86::ADD32rr: 5025263509Sdim case X86::ADD32rr_DB: 5026263509Sdim case X86::ADD64i32: 5027263509Sdim case X86::ADD64ri32: 5028263509Sdim case X86::ADD64ri32_DB: 5029263509Sdim case X86::ADD64ri8: 5030263509Sdim case X86::ADD64ri8_DB: 5031263509Sdim case X86::ADD64rm: 5032263509Sdim case X86::ADD64rr: 5033263509Sdim case X86::ADD64rr_DB: 5034263509Sdim case X86::ADD8i8: 5035263509Sdim case X86::ADD8mi: 5036263509Sdim case X86::ADD8mr: 5037263509Sdim case X86::ADD8ri: 5038263509Sdim case X86::ADD8rm: 5039263509Sdim case X86::ADD8rr: 5040263509Sdim case X86::SUB16i16: 5041263509Sdim case X86::SUB16ri: 5042263509Sdim case X86::SUB16ri8: 5043263509Sdim case X86::SUB16rm: 5044263509Sdim case X86::SUB16rr: 5045263509Sdim case X86::SUB32i32: 5046263509Sdim case X86::SUB32ri: 5047263509Sdim case X86::SUB32ri8: 5048263509Sdim case X86::SUB32rm: 5049263509Sdim case X86::SUB32rr: 5050263509Sdim case X86::SUB64i32: 5051263509Sdim case X86::SUB64ri32: 5052263509Sdim case X86::SUB64ri8: 5053263509Sdim case X86::SUB64rm: 5054263509Sdim case X86::SUB64rr: 5055263509Sdim case X86::SUB8i8: 5056263509Sdim case X86::SUB8ri: 5057263509Sdim case X86::SUB8rm: 5058263509Sdim case X86::SUB8rr: 5059263509Sdim return FuseKind == FuseCmp || FuseKind == FuseInc; 5060263509Sdim case X86::INC16r: 5061263509Sdim case X86::INC32r: 5062263509Sdim case X86::INC64_16r: 5063263509Sdim case X86::INC64_32r: 5064263509Sdim case X86::INC64r: 5065263509Sdim case X86::INC8r: 5066263509Sdim case X86::DEC16r: 5067263509Sdim case X86::DEC32r: 5068263509Sdim case X86::DEC64_16r: 5069263509Sdim case X86::DEC64_32r: 5070263509Sdim case X86::DEC64r: 5071263509Sdim case X86::DEC8r: 5072263509Sdim return FuseKind == FuseInc; 5073263509Sdim } 5074263509Sdim} 5075263509Sdim 5076193323Sedbool X86InstrInfo:: 5077193323SedReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 5078193323Sed assert(Cond.size() == 1 && "Invalid X86 branch condition!"); 5079193323Sed X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm()); 5080193323Sed if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E) 5081193323Sed return true; 5082193323Sed Cond[0].setImm(GetOppositeBranchCondition(CC)); 5083193323Sed return false; 5084193323Sed} 5085193323Sed 5086193323Sedbool X86InstrInfo:: 5087193323SedisSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 5088193323Sed // FIXME: Return false for x87 stack register classes for now. We can't 5089193323Sed // allow any loads of these registers before FpGet_ST0_80. 5090193323Sed return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || 5091193323Sed RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); 5092193323Sed} 5093193323Sed 5094193323Sed/// getGlobalBaseReg - Return a virtual register initialized with the 5095193323Sed/// the global base register value. Output instructions required to 5096193323Sed/// initialize the register in the function entry block, if necessary. 5097193323Sed/// 5098210299Sed/// TODO: Eliminate this and move the code to X86MachineFunctionInfo. 5099210299Sed/// 5100193323Sedunsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { 5101193323Sed assert(!TM.getSubtarget<X86Subtarget>().is64Bit() && 5102193323Sed "X86-64 PIC uses RIP relative addressing"); 5103193323Sed 5104193323Sed X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); 5105193323Sed unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 5106193323Sed if (GlobalBaseReg != 0) 5107193323Sed return GlobalBaseReg; 5108193323Sed 5109210299Sed // Create the register. The code to initialize it is inserted 5110210299Sed // later, by the CGBR pass (below). 5111193323Sed MachineRegisterInfo &RegInfo = MF->getRegInfo(); 5112245431Sdim GlobalBaseReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); 5113193323Sed X86FI->setGlobalBaseReg(GlobalBaseReg); 5114193323Sed return GlobalBaseReg; 5115193323Sed} 5116206083Srdivacky 5117206083Srdivacky// These are the replaceable SSE instructions. Some of these have Int variants 5118206083Srdivacky// that we don't include here. We don't want to replace instructions selected 5119206083Srdivacky// by intrinsics. 5120235633Sdimstatic const uint16_t ReplaceableInstrs[][3] = { 5121212904Sdim //PackedSingle PackedDouble PackedInt 5122206083Srdivacky { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, 5123206083Srdivacky { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, 5124206083Srdivacky { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, 5125206083Srdivacky { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr }, 5126206083Srdivacky { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm }, 5127206083Srdivacky { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr }, 5128206083Srdivacky { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm }, 5129206083Srdivacky { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr }, 5130206083Srdivacky { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm }, 5131206083Srdivacky { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, 5132206083Srdivacky { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, 5133206083Srdivacky { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, 5134206083Srdivacky { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, 5135206083Srdivacky { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, 5136212904Sdim // AVX 128-bit support 5137212904Sdim { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, 5138212904Sdim { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, 5139212904Sdim { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr }, 5140212904Sdim { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr }, 5141212904Sdim { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm }, 5142212904Sdim { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr }, 5143212904Sdim { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm }, 5144212904Sdim { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr }, 5145212904Sdim { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm }, 5146212904Sdim { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr }, 5147212904Sdim { X86::VORPSrm, X86::VORPDrm, X86::VPORrm }, 5148212904Sdim { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, 5149212904Sdim { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, 5150212904Sdim { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, 5151224145Sdim // AVX 256-bit support 5152224145Sdim { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, 5153224145Sdim { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, 5154224145Sdim { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr }, 5155224145Sdim { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr }, 5156224145Sdim { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm }, 5157235633Sdim { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr } 5158206083Srdivacky}; 5159206083Srdivacky 5160235633Sdimstatic const uint16_t ReplaceableInstrsAVX2[][3] = { 5161235633Sdim //PackedSingle PackedDouble PackedInt 5162235633Sdim { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm }, 5163235633Sdim { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr }, 5164235633Sdim { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm }, 5165235633Sdim { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr }, 5166235633Sdim { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm }, 5167235633Sdim { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr }, 5168235633Sdim { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm }, 5169235633Sdim { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }, 5170235633Sdim { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr }, 5171235633Sdim { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr }, 5172235633Sdim { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm }, 5173235633Sdim { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr }, 5174235633Sdim { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm }, 5175235633Sdim { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr } 5176235633Sdim}; 5177235633Sdim 5178206083Srdivacky// FIXME: Some shuffle and unpack instructions have equivalents in different 5179206083Srdivacky// domains, but they require a bit more work than just switching opcodes. 5180206083Srdivacky 5181235633Sdimstatic const uint16_t *lookup(unsigned opcode, unsigned domain) { 5182206083Srdivacky for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) 5183206083Srdivacky if (ReplaceableInstrs[i][domain-1] == opcode) 5184206083Srdivacky return ReplaceableInstrs[i]; 5185206083Srdivacky return 0; 5186206083Srdivacky} 5187206083Srdivacky 5188235633Sdimstatic const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) { 5189235633Sdim for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i) 5190235633Sdim if (ReplaceableInstrsAVX2[i][domain-1] == opcode) 5191235633Sdim return ReplaceableInstrsAVX2[i]; 5192235633Sdim return 0; 5193235633Sdim} 5194235633Sdim 5195206083Srdivackystd::pair<uint16_t, uint16_t> 5196226890SdimX86InstrInfo::getExecutionDomain(const MachineInstr *MI) const { 5197206083Srdivacky uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 5198235633Sdim bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2(); 5199235633Sdim uint16_t validDomains = 0; 5200235633Sdim if (domain && lookup(MI->getOpcode(), domain)) 5201235633Sdim validDomains = 0xe; 5202235633Sdim else if (domain && lookupAVX2(MI->getOpcode(), domain)) 5203235633Sdim validDomains = hasAVX2 ? 0xe : 0x6; 5204235633Sdim return std::make_pair(domain, validDomains); 5205206083Srdivacky} 5206206083Srdivacky 5207226890Sdimvoid X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { 5208206083Srdivacky assert(Domain>0 && Domain<4 && "Invalid execution domain"); 5209206083Srdivacky uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 5210206083Srdivacky assert(dom && "Not an SSE instruction"); 5211235633Sdim const uint16_t *table = lookup(MI->getOpcode(), dom); 5212235633Sdim if (!table) { // try the other table 5213235633Sdim assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) && 5214235633Sdim "256-bit vector operations only available in AVX2"); 5215235633Sdim table = lookupAVX2(MI->getOpcode(), dom); 5216235633Sdim } 5217206083Srdivacky assert(table && "Cannot change domain"); 5218206083Srdivacky MI->setDesc(get(table[Domain-1])); 5219206083Srdivacky} 5220207618Srdivacky 5221207618Srdivacky/// getNoopForMachoTarget - Return the noop instruction to use for a noop. 5222207618Srdivackyvoid X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { 5223207618Srdivacky NopInst.setOpcode(X86::NOOP); 5224207618Srdivacky} 5225207618Srdivacky 5226221345Sdimbool X86InstrInfo::isHighLatencyDef(int opc) const { 5227221345Sdim switch (opc) { 5228218893Sdim default: return false; 5229218893Sdim case X86::DIVSDrm: 5230218893Sdim case X86::DIVSDrm_Int: 5231218893Sdim case X86::DIVSDrr: 5232218893Sdim case X86::DIVSDrr_Int: 5233218893Sdim case X86::DIVSSrm: 5234218893Sdim case X86::DIVSSrm_Int: 5235218893Sdim case X86::DIVSSrr: 5236218893Sdim case X86::DIVSSrr_Int: 5237218893Sdim case X86::SQRTPDm: 5238218893Sdim case X86::SQRTPDr: 5239218893Sdim case X86::SQRTPSm: 5240218893Sdim case X86::SQRTPSr: 5241218893Sdim case X86::SQRTSDm: 5242218893Sdim case X86::SQRTSDm_Int: 5243218893Sdim case X86::SQRTSDr: 5244218893Sdim case X86::SQRTSDr_Int: 5245218893Sdim case X86::SQRTSSm: 5246218893Sdim case X86::SQRTSSm_Int: 5247218893Sdim case X86::SQRTSSr: 5248218893Sdim case X86::SQRTSSr_Int: 5249226890Sdim // AVX instructions with high latency 5250226890Sdim case X86::VDIVSDrm: 5251226890Sdim case X86::VDIVSDrm_Int: 5252226890Sdim case X86::VDIVSDrr: 5253226890Sdim case X86::VDIVSDrr_Int: 5254226890Sdim case X86::VDIVSSrm: 5255226890Sdim case X86::VDIVSSrm_Int: 5256226890Sdim case X86::VDIVSSrr: 5257226890Sdim case X86::VDIVSSrr_Int: 5258226890Sdim case X86::VSQRTPDm: 5259226890Sdim case X86::VSQRTPDr: 5260226890Sdim case X86::VSQRTPSm: 5261226890Sdim case X86::VSQRTPSr: 5262226890Sdim case X86::VSQRTSDm: 5263226890Sdim case X86::VSQRTSDm_Int: 5264226890Sdim case X86::VSQRTSDr: 5265226890Sdim case X86::VSQRTSSm: 5266226890Sdim case X86::VSQRTSSm_Int: 5267226890Sdim case X86::VSQRTSSr: 5268263509Sdim case X86::VSQRTPDZrm: 5269263509Sdim case X86::VSQRTPDZrr: 5270263509Sdim case X86::VSQRTPSZrm: 5271263509Sdim case X86::VSQRTPSZrr: 5272263509Sdim case X86::VSQRTSDZm: 5273263509Sdim case X86::VSQRTSDZm_Int: 5274263509Sdim case X86::VSQRTSDZr: 5275263509Sdim case X86::VSQRTSSZm_Int: 5276263509Sdim case X86::VSQRTSSZr: 5277263509Sdim case X86::VSQRTSSZm: 5278263509Sdim case X86::VDIVSDZrm: 5279263509Sdim case X86::VDIVSDZrr: 5280263509Sdim case X86::VDIVSSZrm: 5281263509Sdim case X86::VDIVSSZrr: 5282263509Sdim 5283263509Sdim case X86::VGATHERQPSZrm: 5284263509Sdim case X86::VGATHERQPDZrm: 5285263509Sdim case X86::VGATHERDPDZrm: 5286263509Sdim case X86::VGATHERDPSZrm: 5287263509Sdim case X86::VPGATHERQDZrm: 5288263509Sdim case X86::VPGATHERQQZrm: 5289263509Sdim case X86::VPGATHERDDZrm: 5290263509Sdim case X86::VPGATHERDQZrm: 5291263509Sdim case X86::VSCATTERQPDZmr: 5292263509Sdim case X86::VSCATTERQPSZmr: 5293263509Sdim case X86::VSCATTERDPDZmr: 5294263509Sdim case X86::VSCATTERDPSZmr: 5295263509Sdim case X86::VPSCATTERQDZmr: 5296263509Sdim case X86::VPSCATTERQQZmr: 5297263509Sdim case X86::VPSCATTERDDZmr: 5298263509Sdim case X86::VPSCATTERDQZmr: 5299218893Sdim return true; 5300218893Sdim } 5301218893Sdim} 5302218893Sdim 5303221345Sdimbool X86InstrInfo:: 5304221345SdimhasHighOperandLatency(const InstrItineraryData *ItinData, 5305221345Sdim const MachineRegisterInfo *MRI, 5306221345Sdim const MachineInstr *DefMI, unsigned DefIdx, 5307221345Sdim const MachineInstr *UseMI, unsigned UseIdx) const { 5308221345Sdim return isHighLatencyDef(DefMI->getOpcode()); 5309221345Sdim} 5310221345Sdim 5311210299Sednamespace { 5312210299Sed /// CGBR - Create Global Base Reg pass. This initializes the PIC 5313210299Sed /// global base register for x86-32. 5314210299Sed struct CGBR : public MachineFunctionPass { 5315210299Sed static char ID; 5316212904Sdim CGBR() : MachineFunctionPass(ID) {} 5317210299Sed 5318210299Sed virtual bool runOnMachineFunction(MachineFunction &MF) { 5319210299Sed const X86TargetMachine *TM = 5320210299Sed static_cast<const X86TargetMachine *>(&MF.getTarget()); 5321210299Sed 5322210299Sed assert(!TM->getSubtarget<X86Subtarget>().is64Bit() && 5323210299Sed "X86-64 PIC uses RIP relative addressing"); 5324210299Sed 5325210299Sed // Only emit a global base reg in PIC mode. 5326210299Sed if (TM->getRelocationModel() != Reloc::PIC_) 5327210299Sed return false; 5328210299Sed 5329218893Sdim X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 5330218893Sdim unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 5331218893Sdim 5332218893Sdim // If we didn't need a GlobalBaseReg, don't insert code. 5333218893Sdim if (GlobalBaseReg == 0) 5334218893Sdim return false; 5335218893Sdim 5336210299Sed // Insert the set of GlobalBaseReg into the first MBB of the function 5337210299Sed MachineBasicBlock &FirstMBB = MF.front(); 5338210299Sed MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 5339210299Sed DebugLoc DL = FirstMBB.findDebugLoc(MBBI); 5340210299Sed MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5341210299Sed const X86InstrInfo *TII = TM->getInstrInfo(); 5342210299Sed 5343210299Sed unsigned PC; 5344210299Sed if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) 5345245431Sdim PC = RegInfo.createVirtualRegister(&X86::GR32RegClass); 5346210299Sed else 5347218893Sdim PC = GlobalBaseReg; 5348218893Sdim 5349210299Sed // Operand of MovePCtoStack is completely ignored by asm printer. It's 5350210299Sed // only used in JIT code emission as displacement to pc. 5351210299Sed BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); 5352218893Sdim 5353210299Sed // If we're using vanilla 'GOT' PIC style, we should use relative addressing 5354210299Sed // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. 5355210299Sed if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) { 5356210299Sed // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register 5357210299Sed BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) 5358210299Sed .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 5359210299Sed X86II::MO_GOT_ABSOLUTE_ADDRESS); 5360210299Sed } 5361210299Sed 5362210299Sed return true; 5363210299Sed } 5364210299Sed 5365210299Sed virtual const char *getPassName() const { 5366210299Sed return "X86 PIC Global Base Reg Initialization"; 5367210299Sed } 5368210299Sed 5369210299Sed virtual void getAnalysisUsage(AnalysisUsage &AU) const { 5370210299Sed AU.setPreservesCFG(); 5371210299Sed MachineFunctionPass::getAnalysisUsage(AU); 5372210299Sed } 5373210299Sed }; 5374210299Sed} 5375210299Sed 5376210299Sedchar CGBR::ID = 0; 5377210299SedFunctionPass* 5378210299Sedllvm::createGlobalBaseRegPass() { return new CGBR(); } 5379245431Sdim 5380245431Sdimnamespace { 5381245431Sdim struct LDTLSCleanup : public MachineFunctionPass { 5382245431Sdim static char ID; 5383245431Sdim LDTLSCleanup() : MachineFunctionPass(ID) {} 5384245431Sdim 5385245431Sdim virtual bool runOnMachineFunction(MachineFunction &MF) { 5386245431Sdim X86MachineFunctionInfo* MFI = MF.getInfo<X86MachineFunctionInfo>(); 5387245431Sdim if (MFI->getNumLocalDynamicTLSAccesses() < 2) { 5388245431Sdim // No point folding accesses if there isn't at least two. 5389245431Sdim return false; 5390245431Sdim } 5391245431Sdim 5392245431Sdim MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); 5393245431Sdim return VisitNode(DT->getRootNode(), 0); 5394245431Sdim } 5395245431Sdim 5396245431Sdim // Visit the dominator subtree rooted at Node in pre-order. 5397245431Sdim // If TLSBaseAddrReg is non-null, then use that to replace any 5398245431Sdim // TLS_base_addr instructions. Otherwise, create the register 5399245431Sdim // when the first such instruction is seen, and then use it 5400245431Sdim // as we encounter more instructions. 5401245431Sdim bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { 5402245431Sdim MachineBasicBlock *BB = Node->getBlock(); 5403245431Sdim bool Changed = false; 5404245431Sdim 5405245431Sdim // Traverse the current block. 5406245431Sdim for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; 5407245431Sdim ++I) { 5408245431Sdim switch (I->getOpcode()) { 5409245431Sdim case X86::TLS_base_addr32: 5410245431Sdim case X86::TLS_base_addr64: 5411245431Sdim if (TLSBaseAddrReg) 5412245431Sdim I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg); 5413245431Sdim else 5414245431Sdim I = SetRegister(I, &TLSBaseAddrReg); 5415245431Sdim Changed = true; 5416245431Sdim break; 5417245431Sdim default: 5418245431Sdim break; 5419245431Sdim } 5420245431Sdim } 5421245431Sdim 5422245431Sdim // Visit the children of this block in the dominator tree. 5423245431Sdim for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); 5424245431Sdim I != E; ++I) { 5425245431Sdim Changed |= VisitNode(*I, TLSBaseAddrReg); 5426245431Sdim } 5427245431Sdim 5428245431Sdim return Changed; 5429245431Sdim } 5430245431Sdim 5431245431Sdim // Replace the TLS_base_addr instruction I with a copy from 5432245431Sdim // TLSBaseAddrReg, returning the new instruction. 5433245431Sdim MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I, 5434245431Sdim unsigned TLSBaseAddrReg) { 5435245431Sdim MachineFunction *MF = I->getParent()->getParent(); 5436245431Sdim const X86TargetMachine *TM = 5437245431Sdim static_cast<const X86TargetMachine *>(&MF->getTarget()); 5438245431Sdim const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit(); 5439245431Sdim const X86InstrInfo *TII = TM->getInstrInfo(); 5440245431Sdim 5441245431Sdim // Insert a Copy from TLSBaseAddrReg to RAX/EAX. 5442245431Sdim MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), 5443245431Sdim TII->get(TargetOpcode::COPY), 5444245431Sdim is64Bit ? X86::RAX : X86::EAX) 5445245431Sdim .addReg(TLSBaseAddrReg); 5446245431Sdim 5447245431Sdim // Erase the TLS_base_addr instruction. 5448245431Sdim I->eraseFromParent(); 5449245431Sdim 5450245431Sdim return Copy; 5451245431Sdim } 5452245431Sdim 5453245431Sdim // Create a virtal register in *TLSBaseAddrReg, and populate it by 5454245431Sdim // inserting a copy instruction after I. Returns the new instruction. 5455245431Sdim MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { 5456245431Sdim MachineFunction *MF = I->getParent()->getParent(); 5457245431Sdim const X86TargetMachine *TM = 5458245431Sdim static_cast<const X86TargetMachine *>(&MF->getTarget()); 5459245431Sdim const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit(); 5460245431Sdim const X86InstrInfo *TII = TM->getInstrInfo(); 5461245431Sdim 5462245431Sdim // Create a virtual register for the TLS base address. 5463245431Sdim MachineRegisterInfo &RegInfo = MF->getRegInfo(); 5464245431Sdim *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit 5465245431Sdim ? &X86::GR64RegClass 5466245431Sdim : &X86::GR32RegClass); 5467245431Sdim 5468245431Sdim // Insert a copy from RAX/EAX to TLSBaseAddrReg. 5469245431Sdim MachineInstr *Next = I->getNextNode(); 5470245431Sdim MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), 5471245431Sdim TII->get(TargetOpcode::COPY), 5472245431Sdim *TLSBaseAddrReg) 5473245431Sdim .addReg(is64Bit ? X86::RAX : X86::EAX); 5474245431Sdim 5475245431Sdim return Copy; 5476245431Sdim } 5477245431Sdim 5478245431Sdim virtual const char *getPassName() const { 5479245431Sdim return "Local Dynamic TLS Access Clean-up"; 5480245431Sdim } 5481245431Sdim 5482245431Sdim virtual void getAnalysisUsage(AnalysisUsage &AU) const { 5483245431Sdim AU.setPreservesCFG(); 5484245431Sdim AU.addRequired<MachineDominatorTree>(); 5485245431Sdim MachineFunctionPass::getAnalysisUsage(AU); 5486245431Sdim } 5487245431Sdim }; 5488245431Sdim} 5489245431Sdim 5490245431Sdimchar LDTLSCleanup::ID = 0; 5491245431SdimFunctionPass* 5492245431Sdimllvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } 5493