X86InstrInfo.cpp revision 224145
1193323Sed//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file contains the X86 implementation of the TargetInstrInfo class. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#include "X86InstrInfo.h" 15193323Sed#include "X86.h" 16193323Sed#include "X86InstrBuilder.h" 17193323Sed#include "X86MachineFunctionInfo.h" 18193323Sed#include "X86Subtarget.h" 19193323Sed#include "X86TargetMachine.h" 20193323Sed#include "llvm/DerivedTypes.h" 21198090Srdivacky#include "llvm/LLVMContext.h" 22193323Sed#include "llvm/ADT/STLExtras.h" 23193323Sed#include "llvm/CodeGen/MachineConstantPool.h" 24193323Sed#include "llvm/CodeGen/MachineFrameInfo.h" 25193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 26193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 27193323Sed#include "llvm/CodeGen/LiveVariables.h" 28199481Srdivacky#include "llvm/CodeGen/PseudoSourceValue.h" 29207618Srdivacky#include "llvm/MC/MCInst.h" 30193323Sed#include "llvm/Support/CommandLine.h" 31202375Srdivacky#include "llvm/Support/Debug.h" 32198090Srdivacky#include "llvm/Support/ErrorHandling.h" 33198090Srdivacky#include "llvm/Support/raw_ostream.h" 34193323Sed#include "llvm/Target/TargetOptions.h" 35198090Srdivacky#include "llvm/MC/MCAsmInfo.h" 36199481Srdivacky#include <limits> 37199481Srdivacky 38224145Sdim#define GET_INSTRINFO_CTOR 39224145Sdim#include "X86GenInstrInfo.inc" 40224145Sdim 41193323Sedusing namespace llvm; 42193323Sed 43198090Srdivackystatic cl::opt<bool> 44198090SrdivackyNoFusing("disable-spill-fusing", 45198090Srdivacky cl::desc("Disable fusing of spill code into instructions")); 46198090Srdivackystatic cl::opt<bool> 47198090SrdivackyPrintFailedFusing("print-failed-fuse-candidates", 48198090Srdivacky cl::desc("Print instructions that the allocator wants to" 49198090Srdivacky " fuse, but the X86 backend currently can't"), 50198090Srdivacky cl::Hidden); 51198090Srdivackystatic cl::opt<bool> 52198090SrdivackyReMatPICStubLoad("remat-pic-stub-load", 53198090Srdivacky cl::desc("Re-materialize load from stub in PIC mode"), 54198090Srdivacky cl::init(false), cl::Hidden); 55193323Sed 56193323SedX86InstrInfo::X86InstrInfo(X86TargetMachine &tm) 57224145Sdim : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit() 58224145Sdim ? X86::ADJCALLSTACKDOWN64 59224145Sdim : X86::ADJCALLSTACKDOWN32), 60224145Sdim (tm.getSubtarget<X86Subtarget>().is64Bit() 61224145Sdim ? X86::ADJCALLSTACKUP64 62224145Sdim : X86::ADJCALLSTACKUP32)), 63193323Sed TM(tm), RI(tm, *this) { 64218893Sdim enum { 65218893Sdim TB_NOT_REVERSABLE = 1U << 31, 66218893Sdim TB_FLAGS = TB_NOT_REVERSABLE 67218893Sdim }; 68218893Sdim 69193323Sed static const unsigned OpTbl2Addr[][2] = { 70193323Sed { X86::ADC32ri, X86::ADC32mi }, 71193323Sed { X86::ADC32ri8, X86::ADC32mi8 }, 72193323Sed { X86::ADC32rr, X86::ADC32mr }, 73193323Sed { X86::ADC64ri32, X86::ADC64mi32 }, 74193323Sed { X86::ADC64ri8, X86::ADC64mi8 }, 75193323Sed { X86::ADC64rr, X86::ADC64mr }, 76193323Sed { X86::ADD16ri, X86::ADD16mi }, 77193323Sed { X86::ADD16ri8, X86::ADD16mi8 }, 78218893Sdim { X86::ADD16ri_DB, X86::ADD16mi | TB_NOT_REVERSABLE }, 79218893Sdim { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE }, 80193323Sed { X86::ADD16rr, X86::ADD16mr }, 81218893Sdim { X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE }, 82193323Sed { X86::ADD32ri, X86::ADD32mi }, 83193323Sed { X86::ADD32ri8, X86::ADD32mi8 }, 84218893Sdim { X86::ADD32ri_DB, X86::ADD32mi | TB_NOT_REVERSABLE }, 85218893Sdim { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE }, 86193323Sed { X86::ADD32rr, X86::ADD32mr }, 87218893Sdim { X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE }, 88193323Sed { X86::ADD64ri32, X86::ADD64mi32 }, 89193323Sed { X86::ADD64ri8, X86::ADD64mi8 }, 90218893Sdim { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE }, 91218893Sdim { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE }, 92193323Sed { X86::ADD64rr, X86::ADD64mr }, 93218893Sdim { X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE }, 94193323Sed { X86::ADD8ri, X86::ADD8mi }, 95193323Sed { X86::ADD8rr, X86::ADD8mr }, 96193323Sed { X86::AND16ri, X86::AND16mi }, 97193323Sed { X86::AND16ri8, X86::AND16mi8 }, 98193323Sed { X86::AND16rr, X86::AND16mr }, 99193323Sed { X86::AND32ri, X86::AND32mi }, 100193323Sed { X86::AND32ri8, X86::AND32mi8 }, 101193323Sed { X86::AND32rr, X86::AND32mr }, 102193323Sed { X86::AND64ri32, X86::AND64mi32 }, 103193323Sed { X86::AND64ri8, X86::AND64mi8 }, 104193323Sed { X86::AND64rr, X86::AND64mr }, 105193323Sed { X86::AND8ri, X86::AND8mi }, 106193323Sed { X86::AND8rr, X86::AND8mr }, 107193323Sed { X86::DEC16r, X86::DEC16m }, 108193323Sed { X86::DEC32r, X86::DEC32m }, 109193323Sed { X86::DEC64_16r, X86::DEC64_16m }, 110193323Sed { X86::DEC64_32r, X86::DEC64_32m }, 111193323Sed { X86::DEC64r, X86::DEC64m }, 112193323Sed { X86::DEC8r, X86::DEC8m }, 113193323Sed { X86::INC16r, X86::INC16m }, 114193323Sed { X86::INC32r, X86::INC32m }, 115193323Sed { X86::INC64_16r, X86::INC64_16m }, 116193323Sed { X86::INC64_32r, X86::INC64_32m }, 117193323Sed { X86::INC64r, X86::INC64m }, 118193323Sed { X86::INC8r, X86::INC8m }, 119193323Sed { X86::NEG16r, X86::NEG16m }, 120193323Sed { X86::NEG32r, X86::NEG32m }, 121193323Sed { X86::NEG64r, X86::NEG64m }, 122193323Sed { X86::NEG8r, X86::NEG8m }, 123193323Sed { X86::NOT16r, X86::NOT16m }, 124193323Sed { X86::NOT32r, X86::NOT32m }, 125193323Sed { X86::NOT64r, X86::NOT64m }, 126193323Sed { X86::NOT8r, X86::NOT8m }, 127193323Sed { X86::OR16ri, X86::OR16mi }, 128193323Sed { X86::OR16ri8, X86::OR16mi8 }, 129193323Sed { X86::OR16rr, X86::OR16mr }, 130193323Sed { X86::OR32ri, X86::OR32mi }, 131193323Sed { X86::OR32ri8, X86::OR32mi8 }, 132193323Sed { X86::OR32rr, X86::OR32mr }, 133193323Sed { X86::OR64ri32, X86::OR64mi32 }, 134193323Sed { X86::OR64ri8, X86::OR64mi8 }, 135193323Sed { X86::OR64rr, X86::OR64mr }, 136193323Sed { X86::OR8ri, X86::OR8mi }, 137193323Sed { X86::OR8rr, X86::OR8mr }, 138193323Sed { X86::ROL16r1, X86::ROL16m1 }, 139193323Sed { X86::ROL16rCL, X86::ROL16mCL }, 140193323Sed { X86::ROL16ri, X86::ROL16mi }, 141193323Sed { X86::ROL32r1, X86::ROL32m1 }, 142193323Sed { X86::ROL32rCL, X86::ROL32mCL }, 143193323Sed { X86::ROL32ri, X86::ROL32mi }, 144193323Sed { X86::ROL64r1, X86::ROL64m1 }, 145193323Sed { X86::ROL64rCL, X86::ROL64mCL }, 146193323Sed { X86::ROL64ri, X86::ROL64mi }, 147193323Sed { X86::ROL8r1, X86::ROL8m1 }, 148193323Sed { X86::ROL8rCL, X86::ROL8mCL }, 149193323Sed { X86::ROL8ri, X86::ROL8mi }, 150193323Sed { X86::ROR16r1, X86::ROR16m1 }, 151193323Sed { X86::ROR16rCL, X86::ROR16mCL }, 152193323Sed { X86::ROR16ri, X86::ROR16mi }, 153193323Sed { X86::ROR32r1, X86::ROR32m1 }, 154193323Sed { X86::ROR32rCL, X86::ROR32mCL }, 155193323Sed { X86::ROR32ri, X86::ROR32mi }, 156193323Sed { X86::ROR64r1, X86::ROR64m1 }, 157193323Sed { X86::ROR64rCL, X86::ROR64mCL }, 158193323Sed { X86::ROR64ri, X86::ROR64mi }, 159193323Sed { X86::ROR8r1, X86::ROR8m1 }, 160193323Sed { X86::ROR8rCL, X86::ROR8mCL }, 161193323Sed { X86::ROR8ri, X86::ROR8mi }, 162193323Sed { X86::SAR16r1, X86::SAR16m1 }, 163193323Sed { X86::SAR16rCL, X86::SAR16mCL }, 164193323Sed { X86::SAR16ri, X86::SAR16mi }, 165193323Sed { X86::SAR32r1, X86::SAR32m1 }, 166193323Sed { X86::SAR32rCL, X86::SAR32mCL }, 167193323Sed { X86::SAR32ri, X86::SAR32mi }, 168193323Sed { X86::SAR64r1, X86::SAR64m1 }, 169193323Sed { X86::SAR64rCL, X86::SAR64mCL }, 170193323Sed { X86::SAR64ri, X86::SAR64mi }, 171193323Sed { X86::SAR8r1, X86::SAR8m1 }, 172193323Sed { X86::SAR8rCL, X86::SAR8mCL }, 173193323Sed { X86::SAR8ri, X86::SAR8mi }, 174193323Sed { X86::SBB32ri, X86::SBB32mi }, 175193323Sed { X86::SBB32ri8, X86::SBB32mi8 }, 176193323Sed { X86::SBB32rr, X86::SBB32mr }, 177193323Sed { X86::SBB64ri32, X86::SBB64mi32 }, 178193323Sed { X86::SBB64ri8, X86::SBB64mi8 }, 179193323Sed { X86::SBB64rr, X86::SBB64mr }, 180193323Sed { X86::SHL16rCL, X86::SHL16mCL }, 181193323Sed { X86::SHL16ri, X86::SHL16mi }, 182193323Sed { X86::SHL32rCL, X86::SHL32mCL }, 183193323Sed { X86::SHL32ri, X86::SHL32mi }, 184193323Sed { X86::SHL64rCL, X86::SHL64mCL }, 185193323Sed { X86::SHL64ri, X86::SHL64mi }, 186193323Sed { X86::SHL8rCL, X86::SHL8mCL }, 187193323Sed { X86::SHL8ri, X86::SHL8mi }, 188193323Sed { X86::SHLD16rrCL, X86::SHLD16mrCL }, 189193323Sed { X86::SHLD16rri8, X86::SHLD16mri8 }, 190193323Sed { X86::SHLD32rrCL, X86::SHLD32mrCL }, 191193323Sed { X86::SHLD32rri8, X86::SHLD32mri8 }, 192193323Sed { X86::SHLD64rrCL, X86::SHLD64mrCL }, 193193323Sed { X86::SHLD64rri8, X86::SHLD64mri8 }, 194193323Sed { X86::SHR16r1, X86::SHR16m1 }, 195193323Sed { X86::SHR16rCL, X86::SHR16mCL }, 196193323Sed { X86::SHR16ri, X86::SHR16mi }, 197193323Sed { X86::SHR32r1, X86::SHR32m1 }, 198193323Sed { X86::SHR32rCL, X86::SHR32mCL }, 199193323Sed { X86::SHR32ri, X86::SHR32mi }, 200193323Sed { X86::SHR64r1, X86::SHR64m1 }, 201193323Sed { X86::SHR64rCL, X86::SHR64mCL }, 202193323Sed { X86::SHR64ri, X86::SHR64mi }, 203193323Sed { X86::SHR8r1, X86::SHR8m1 }, 204193323Sed { X86::SHR8rCL, X86::SHR8mCL }, 205193323Sed { X86::SHR8ri, X86::SHR8mi }, 206193323Sed { X86::SHRD16rrCL, X86::SHRD16mrCL }, 207193323Sed { X86::SHRD16rri8, X86::SHRD16mri8 }, 208193323Sed { X86::SHRD32rrCL, X86::SHRD32mrCL }, 209193323Sed { X86::SHRD32rri8, X86::SHRD32mri8 }, 210193323Sed { X86::SHRD64rrCL, X86::SHRD64mrCL }, 211193323Sed { X86::SHRD64rri8, X86::SHRD64mri8 }, 212193323Sed { X86::SUB16ri, X86::SUB16mi }, 213193323Sed { X86::SUB16ri8, X86::SUB16mi8 }, 214193323Sed { X86::SUB16rr, X86::SUB16mr }, 215193323Sed { X86::SUB32ri, X86::SUB32mi }, 216193323Sed { X86::SUB32ri8, X86::SUB32mi8 }, 217193323Sed { X86::SUB32rr, X86::SUB32mr }, 218193323Sed { X86::SUB64ri32, X86::SUB64mi32 }, 219193323Sed { X86::SUB64ri8, X86::SUB64mi8 }, 220193323Sed { X86::SUB64rr, X86::SUB64mr }, 221193323Sed { X86::SUB8ri, X86::SUB8mi }, 222193323Sed { X86::SUB8rr, X86::SUB8mr }, 223193323Sed { X86::XOR16ri, X86::XOR16mi }, 224193323Sed { X86::XOR16ri8, X86::XOR16mi8 }, 225193323Sed { X86::XOR16rr, X86::XOR16mr }, 226193323Sed { X86::XOR32ri, X86::XOR32mi }, 227193323Sed { X86::XOR32ri8, X86::XOR32mi8 }, 228193323Sed { X86::XOR32rr, X86::XOR32mr }, 229193323Sed { X86::XOR64ri32, X86::XOR64mi32 }, 230193323Sed { X86::XOR64ri8, X86::XOR64mi8 }, 231193323Sed { X86::XOR64rr, X86::XOR64mr }, 232193323Sed { X86::XOR8ri, X86::XOR8mi }, 233193323Sed { X86::XOR8rr, X86::XOR8mr } 234193323Sed }; 235193323Sed 236193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { 237193323Sed unsigned RegOp = OpTbl2Addr[i][0]; 238218893Sdim unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS; 239218893Sdim assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?"); 240218893Sdim RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U); 241218893Sdim 242221345Sdim // If this is not a reversible operation (because there is a many->one) 243218893Sdim // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. 244218893Sdim if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE) 245218893Sdim continue; 246218893Sdim 247198090Srdivacky // Index 0, folded load and store, no alignment requirement. 248198090Srdivacky unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); 249218893Sdim 250218893Sdim assert(!MemOp2RegOpTable.count(MemOp) && 251218893Sdim "Duplicated entries in unfolding maps?"); 252218893Sdim MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); 253193323Sed } 254193323Sed 255193323Sed // If the third value is 1, then it's folding either a load or a store. 256198090Srdivacky static const unsigned OpTbl0[][4] = { 257198090Srdivacky { X86::BT16ri8, X86::BT16mi8, 1, 0 }, 258198090Srdivacky { X86::BT32ri8, X86::BT32mi8, 1, 0 }, 259198090Srdivacky { X86::BT64ri8, X86::BT64mi8, 1, 0 }, 260198090Srdivacky { X86::CALL32r, X86::CALL32m, 1, 0 }, 261198090Srdivacky { X86::CALL64r, X86::CALL64m, 1, 0 }, 262212904Sdim { X86::WINCALL64r, X86::WINCALL64m, 1, 0 }, 263198090Srdivacky { X86::CMP16ri, X86::CMP16mi, 1, 0 }, 264198090Srdivacky { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, 265198090Srdivacky { X86::CMP16rr, X86::CMP16mr, 1, 0 }, 266198090Srdivacky { X86::CMP32ri, X86::CMP32mi, 1, 0 }, 267198090Srdivacky { X86::CMP32ri8, X86::CMP32mi8, 1, 0 }, 268198090Srdivacky { X86::CMP32rr, X86::CMP32mr, 1, 0 }, 269198090Srdivacky { X86::CMP64ri32, X86::CMP64mi32, 1, 0 }, 270198090Srdivacky { X86::CMP64ri8, X86::CMP64mi8, 1, 0 }, 271198090Srdivacky { X86::CMP64rr, X86::CMP64mr, 1, 0 }, 272198090Srdivacky { X86::CMP8ri, X86::CMP8mi, 1, 0 }, 273198090Srdivacky { X86::CMP8rr, X86::CMP8mr, 1, 0 }, 274198090Srdivacky { X86::DIV16r, X86::DIV16m, 1, 0 }, 275198090Srdivacky { X86::DIV32r, X86::DIV32m, 1, 0 }, 276198090Srdivacky { X86::DIV64r, X86::DIV64m, 1, 0 }, 277198090Srdivacky { X86::DIV8r, X86::DIV8m, 1, 0 }, 278198090Srdivacky { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, 279218893Sdim { X86::FsMOVAPDrr, X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 }, 280218893Sdim { X86::FsMOVAPSrr, X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 }, 281198090Srdivacky { X86::IDIV16r, X86::IDIV16m, 1, 0 }, 282198090Srdivacky { X86::IDIV32r, X86::IDIV32m, 1, 0 }, 283198090Srdivacky { X86::IDIV64r, X86::IDIV64m, 1, 0 }, 284198090Srdivacky { X86::IDIV8r, X86::IDIV8m, 1, 0 }, 285198090Srdivacky { X86::IMUL16r, X86::IMUL16m, 1, 0 }, 286198090Srdivacky { X86::IMUL32r, X86::IMUL32m, 1, 0 }, 287198090Srdivacky { X86::IMUL64r, X86::IMUL64m, 1, 0 }, 288198090Srdivacky { X86::IMUL8r, X86::IMUL8m, 1, 0 }, 289198090Srdivacky { X86::JMP32r, X86::JMP32m, 1, 0 }, 290198090Srdivacky { X86::JMP64r, X86::JMP64m, 1, 0 }, 291198090Srdivacky { X86::MOV16ri, X86::MOV16mi, 0, 0 }, 292198090Srdivacky { X86::MOV16rr, X86::MOV16mr, 0, 0 }, 293198090Srdivacky { X86::MOV32ri, X86::MOV32mi, 0, 0 }, 294198090Srdivacky { X86::MOV32rr, X86::MOV32mr, 0, 0 }, 295198090Srdivacky { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, 296198090Srdivacky { X86::MOV64rr, X86::MOV64mr, 0, 0 }, 297198090Srdivacky { X86::MOV8ri, X86::MOV8mi, 0, 0 }, 298198090Srdivacky { X86::MOV8rr, X86::MOV8mr, 0, 0 }, 299198090Srdivacky { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 }, 300198090Srdivacky { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, 301198090Srdivacky { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, 302198090Srdivacky { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, 303224145Sdim { X86::VMOVAPDYrr, X86::VMOVAPDYmr, 0, 32 }, 304224145Sdim { X86::VMOVAPSYrr, X86::VMOVAPSYmr, 0, 32 }, 305224145Sdim { X86::VMOVDQAYrr, X86::VMOVDQAYmr, 0, 32 }, 306198090Srdivacky { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, 307198090Srdivacky { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, 308198090Srdivacky { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, 309198090Srdivacky { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, 310198090Srdivacky { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, 311198090Srdivacky { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, 312224145Sdim { X86::VMOVUPDYrr, X86::VMOVUPDYmr, 0, 0 }, 313224145Sdim { X86::VMOVUPSYrr, X86::VMOVUPSYmr, 0, 0 }, 314198090Srdivacky { X86::MUL16r, X86::MUL16m, 1, 0 }, 315198090Srdivacky { X86::MUL32r, X86::MUL32m, 1, 0 }, 316198090Srdivacky { X86::MUL64r, X86::MUL64m, 1, 0 }, 317198090Srdivacky { X86::MUL8r, X86::MUL8m, 1, 0 }, 318198090Srdivacky { X86::SETAEr, X86::SETAEm, 0, 0 }, 319198090Srdivacky { X86::SETAr, X86::SETAm, 0, 0 }, 320198090Srdivacky { X86::SETBEr, X86::SETBEm, 0, 0 }, 321198090Srdivacky { X86::SETBr, X86::SETBm, 0, 0 }, 322198090Srdivacky { X86::SETEr, X86::SETEm, 0, 0 }, 323198090Srdivacky { X86::SETGEr, X86::SETGEm, 0, 0 }, 324198090Srdivacky { X86::SETGr, X86::SETGm, 0, 0 }, 325198090Srdivacky { X86::SETLEr, X86::SETLEm, 0, 0 }, 326198090Srdivacky { X86::SETLr, X86::SETLm, 0, 0 }, 327198090Srdivacky { X86::SETNEr, X86::SETNEm, 0, 0 }, 328198090Srdivacky { X86::SETNOr, X86::SETNOm, 0, 0 }, 329198090Srdivacky { X86::SETNPr, X86::SETNPm, 0, 0 }, 330198090Srdivacky { X86::SETNSr, X86::SETNSm, 0, 0 }, 331198090Srdivacky { X86::SETOr, X86::SETOm, 0, 0 }, 332198090Srdivacky { X86::SETPr, X86::SETPm, 0, 0 }, 333198090Srdivacky { X86::SETSr, X86::SETSm, 0, 0 }, 334198090Srdivacky { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, 335205218Srdivacky { X86::TAILJMPr64, X86::TAILJMPm64, 1, 0 }, 336198090Srdivacky { X86::TEST16ri, X86::TEST16mi, 1, 0 }, 337198090Srdivacky { X86::TEST32ri, X86::TEST32mi, 1, 0 }, 338198090Srdivacky { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, 339198090Srdivacky { X86::TEST8ri, X86::TEST8mi, 1, 0 } 340193323Sed }; 341193323Sed 342193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { 343218893Sdim unsigned RegOp = OpTbl0[i][0]; 344218893Sdim unsigned MemOp = OpTbl0[i][1] & ~TB_FLAGS; 345193323Sed unsigned FoldedLoad = OpTbl0[i][2]; 346218893Sdim unsigned Align = OpTbl0[i][3]; 347218893Sdim assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?"); 348218893Sdim RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align); 349218893Sdim 350221345Sdim // If this is not a reversible operation (because there is a many->one) 351218893Sdim // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. 352218893Sdim if (OpTbl0[i][1] & TB_NOT_REVERSABLE) 353218893Sdim continue; 354218893Sdim 355193323Sed // Index 0, folded load or store. 356193323Sed unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); 357218893Sdim assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?"); 358218893Sdim MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); 359193323Sed } 360193323Sed 361198090Srdivacky static const unsigned OpTbl1[][3] = { 362198090Srdivacky { X86::CMP16rr, X86::CMP16rm, 0 }, 363198090Srdivacky { X86::CMP32rr, X86::CMP32rm, 0 }, 364198090Srdivacky { X86::CMP64rr, X86::CMP64rm, 0 }, 365198090Srdivacky { X86::CMP8rr, X86::CMP8rm, 0 }, 366198090Srdivacky { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, 367198090Srdivacky { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, 368198090Srdivacky { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, 369198090Srdivacky { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, 370198090Srdivacky { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, 371198090Srdivacky { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, 372198090Srdivacky { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, 373198090Srdivacky { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, 374198090Srdivacky { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, 375198090Srdivacky { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, 376218893Sdim { X86::FsMOVAPDrr, X86::MOVSDrm | TB_NOT_REVERSABLE , 0 }, 377218893Sdim { X86::FsMOVAPSrr, X86::MOVSSrm | TB_NOT_REVERSABLE , 0 }, 378198090Srdivacky { X86::IMUL16rri, X86::IMUL16rmi, 0 }, 379198090Srdivacky { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, 380198090Srdivacky { X86::IMUL32rri, X86::IMUL32rmi, 0 }, 381198090Srdivacky { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, 382198090Srdivacky { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, 383198090Srdivacky { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, 384198090Srdivacky { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, 385198090Srdivacky { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, 386198090Srdivacky { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, 387198090Srdivacky { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 }, 388198090Srdivacky { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 }, 389198090Srdivacky { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, 390198090Srdivacky { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, 391198090Srdivacky { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, 392218893Sdim { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 }, 393218893Sdim { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, 394198090Srdivacky { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, 395198090Srdivacky { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, 396198090Srdivacky { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, 397198090Srdivacky { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, 398198090Srdivacky { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, 399198090Srdivacky { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, 400198090Srdivacky { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, 401198090Srdivacky { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, 402218893Sdim { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, 16 }, 403218893Sdim { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, 16 }, 404198090Srdivacky { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, 405198090Srdivacky { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, 406198090Srdivacky { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, 407198090Srdivacky { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, 408198090Srdivacky { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, 409198090Srdivacky { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, 410198090Srdivacky { X86::MOV16rr, X86::MOV16rm, 0 }, 411198090Srdivacky { X86::MOV32rr, X86::MOV32rm, 0 }, 412198090Srdivacky { X86::MOV64rr, X86::MOV64rm, 0 }, 413198090Srdivacky { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, 414198090Srdivacky { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, 415198090Srdivacky { X86::MOV8rr, X86::MOV8rm, 0 }, 416198090Srdivacky { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, 417198090Srdivacky { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, 418224145Sdim { X86::VMOVAPDYrr, X86::VMOVAPDYrm, 32 }, 419224145Sdim { X86::VMOVAPSYrr, X86::VMOVAPSYrm, 32 }, 420198090Srdivacky { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, 421198090Srdivacky { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, 422198090Srdivacky { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, 423198090Srdivacky { X86::MOVDQArr, X86::MOVDQArm, 16 }, 424224145Sdim { X86::VMOVDQAYrr, X86::VMOVDQAYrm, 16 }, 425198090Srdivacky { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, 426198090Srdivacky { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, 427198090Srdivacky { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, 428198090Srdivacky { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, 429198090Srdivacky { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, 430198090Srdivacky { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, 431198090Srdivacky { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, 432198090Srdivacky { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, 433198090Srdivacky { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, 434202878Srdivacky { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, 435224145Sdim { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, 436224145Sdim { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, 437198090Srdivacky { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, 438198090Srdivacky { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, 439198090Srdivacky { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, 440198090Srdivacky { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, 441198090Srdivacky { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, 442198090Srdivacky { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, 443198090Srdivacky { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, 444198090Srdivacky { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, 445198090Srdivacky { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, 446198090Srdivacky { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, 447198090Srdivacky { X86::PSHUFDri, X86::PSHUFDmi, 16 }, 448198090Srdivacky { X86::PSHUFHWri, X86::PSHUFHWmi, 16 }, 449198090Srdivacky { X86::PSHUFLWri, X86::PSHUFLWmi, 16 }, 450198090Srdivacky { X86::RCPPSr, X86::RCPPSm, 16 }, 451198090Srdivacky { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 }, 452198090Srdivacky { X86::RSQRTPSr, X86::RSQRTPSm, 16 }, 453198090Srdivacky { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 }, 454198090Srdivacky { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, 455198090Srdivacky { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, 456198090Srdivacky { X86::SQRTPDr, X86::SQRTPDm, 16 }, 457198090Srdivacky { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 }, 458198090Srdivacky { X86::SQRTPSr, X86::SQRTPSm, 16 }, 459198090Srdivacky { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 }, 460198090Srdivacky { X86::SQRTSDr, X86::SQRTSDm, 0 }, 461198090Srdivacky { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, 462198090Srdivacky { X86::SQRTSSr, X86::SQRTSSm, 0 }, 463198090Srdivacky { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, 464198090Srdivacky { X86::TEST16rr, X86::TEST16rm, 0 }, 465198090Srdivacky { X86::TEST32rr, X86::TEST32rm, 0 }, 466198090Srdivacky { X86::TEST64rr, X86::TEST64rm, 0 }, 467198090Srdivacky { X86::TEST8rr, X86::TEST8rm, 0 }, 468193323Sed // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 469198090Srdivacky { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, 470198090Srdivacky { X86::UCOMISSrr, X86::UCOMISSrm, 0 } 471193323Sed }; 472193323Sed 473193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { 474193323Sed unsigned RegOp = OpTbl1[i][0]; 475218893Sdim unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS; 476198090Srdivacky unsigned Align = OpTbl1[i][2]; 477218893Sdim assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries"); 478218893Sdim RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align); 479218893Sdim 480221345Sdim // If this is not a reversible operation (because there is a many->one) 481218893Sdim // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. 482218893Sdim if (OpTbl1[i][1] & TB_NOT_REVERSABLE) 483218893Sdim continue; 484218893Sdim 485198090Srdivacky // Index 1, folded load 486198090Srdivacky unsigned AuxInfo = 1 | (1 << 4); 487218893Sdim assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries"); 488218893Sdim MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); 489193323Sed } 490193323Sed 491198090Srdivacky static const unsigned OpTbl2[][3] = { 492198090Srdivacky { X86::ADC32rr, X86::ADC32rm, 0 }, 493198090Srdivacky { X86::ADC64rr, X86::ADC64rm, 0 }, 494198090Srdivacky { X86::ADD16rr, X86::ADD16rm, 0 }, 495218893Sdim { X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 }, 496198090Srdivacky { X86::ADD32rr, X86::ADD32rm, 0 }, 497218893Sdim { X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 }, 498198090Srdivacky { X86::ADD64rr, X86::ADD64rm, 0 }, 499218893Sdim { X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 }, 500198090Srdivacky { X86::ADD8rr, X86::ADD8rm, 0 }, 501198090Srdivacky { X86::ADDPDrr, X86::ADDPDrm, 16 }, 502198090Srdivacky { X86::ADDPSrr, X86::ADDPSrm, 16 }, 503198090Srdivacky { X86::ADDSDrr, X86::ADDSDrm, 0 }, 504198090Srdivacky { X86::ADDSSrr, X86::ADDSSrm, 0 }, 505198090Srdivacky { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 }, 506198090Srdivacky { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 }, 507198090Srdivacky { X86::AND16rr, X86::AND16rm, 0 }, 508198090Srdivacky { X86::AND32rr, X86::AND32rm, 0 }, 509198090Srdivacky { X86::AND64rr, X86::AND64rm, 0 }, 510198090Srdivacky { X86::AND8rr, X86::AND8rm, 0 }, 511198090Srdivacky { X86::ANDNPDrr, X86::ANDNPDrm, 16 }, 512198090Srdivacky { X86::ANDNPSrr, X86::ANDNPSrm, 16 }, 513198090Srdivacky { X86::ANDPDrr, X86::ANDPDrm, 16 }, 514198090Srdivacky { X86::ANDPSrr, X86::ANDPSrm, 16 }, 515198090Srdivacky { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, 516198090Srdivacky { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, 517198090Srdivacky { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, 518198090Srdivacky { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, 519198090Srdivacky { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, 520198090Srdivacky { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, 521198090Srdivacky { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, 522198090Srdivacky { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, 523198090Srdivacky { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, 524198090Srdivacky { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, 525198090Srdivacky { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, 526198090Srdivacky { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, 527198090Srdivacky { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, 528198090Srdivacky { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, 529198090Srdivacky { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, 530198090Srdivacky { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, 531198090Srdivacky { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, 532198090Srdivacky { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, 533198090Srdivacky { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, 534198090Srdivacky { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, 535198090Srdivacky { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, 536198090Srdivacky { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, 537198090Srdivacky { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, 538198090Srdivacky { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, 539198090Srdivacky { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, 540198090Srdivacky { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, 541198090Srdivacky { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, 542198090Srdivacky { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, 543198090Srdivacky { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, 544198090Srdivacky { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, 545198090Srdivacky { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, 546198090Srdivacky { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, 547198090Srdivacky { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, 548198090Srdivacky { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, 549198090Srdivacky { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, 550198090Srdivacky { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, 551198090Srdivacky { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, 552198090Srdivacky { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, 553198090Srdivacky { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, 554198090Srdivacky { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, 555198090Srdivacky { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, 556198090Srdivacky { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, 557198090Srdivacky { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, 558198090Srdivacky { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, 559198090Srdivacky { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, 560198090Srdivacky { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, 561198090Srdivacky { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, 562198090Srdivacky { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, 563198090Srdivacky { X86::CMPPDrri, X86::CMPPDrmi, 16 }, 564198090Srdivacky { X86::CMPPSrri, X86::CMPPSrmi, 16 }, 565198090Srdivacky { X86::CMPSDrr, X86::CMPSDrm, 0 }, 566198090Srdivacky { X86::CMPSSrr, X86::CMPSSrm, 0 }, 567198090Srdivacky { X86::DIVPDrr, X86::DIVPDrm, 16 }, 568198090Srdivacky { X86::DIVPSrr, X86::DIVPSrm, 16 }, 569198090Srdivacky { X86::DIVSDrr, X86::DIVSDrm, 0 }, 570198090Srdivacky { X86::DIVSSrr, X86::DIVSSrm, 0 }, 571198090Srdivacky { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 }, 572198090Srdivacky { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 }, 573198090Srdivacky { X86::FsANDPDrr, X86::FsANDPDrm, 16 }, 574198090Srdivacky { X86::FsANDPSrr, X86::FsANDPSrm, 16 }, 575198090Srdivacky { X86::FsORPDrr, X86::FsORPDrm, 16 }, 576198090Srdivacky { X86::FsORPSrr, X86::FsORPSrm, 16 }, 577198090Srdivacky { X86::FsXORPDrr, X86::FsXORPDrm, 16 }, 578198090Srdivacky { X86::FsXORPSrr, X86::FsXORPSrm, 16 }, 579198090Srdivacky { X86::HADDPDrr, X86::HADDPDrm, 16 }, 580198090Srdivacky { X86::HADDPSrr, X86::HADDPSrm, 16 }, 581198090Srdivacky { X86::HSUBPDrr, X86::HSUBPDrm, 16 }, 582198090Srdivacky { X86::HSUBPSrr, X86::HSUBPSrm, 16 }, 583198090Srdivacky { X86::IMUL16rr, X86::IMUL16rm, 0 }, 584198090Srdivacky { X86::IMUL32rr, X86::IMUL32rm, 0 }, 585198090Srdivacky { X86::IMUL64rr, X86::IMUL64rm, 0 }, 586219077Sdim { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, 587219077Sdim { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, 588198090Srdivacky { X86::MAXPDrr, X86::MAXPDrm, 16 }, 589198090Srdivacky { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, 590198090Srdivacky { X86::MAXPSrr, X86::MAXPSrm, 16 }, 591198090Srdivacky { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 }, 592198090Srdivacky { X86::MAXSDrr, X86::MAXSDrm, 0 }, 593198090Srdivacky { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, 594198090Srdivacky { X86::MAXSSrr, X86::MAXSSrm, 0 }, 595198090Srdivacky { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, 596198090Srdivacky { X86::MINPDrr, X86::MINPDrm, 16 }, 597198090Srdivacky { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 }, 598198090Srdivacky { X86::MINPSrr, X86::MINPSrm, 16 }, 599198090Srdivacky { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 }, 600198090Srdivacky { X86::MINSDrr, X86::MINSDrm, 0 }, 601198090Srdivacky { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, 602198090Srdivacky { X86::MINSSrr, X86::MINSSrm, 0 }, 603198090Srdivacky { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, 604198090Srdivacky { X86::MULPDrr, X86::MULPDrm, 16 }, 605198090Srdivacky { X86::MULPSrr, X86::MULPSrm, 16 }, 606198090Srdivacky { X86::MULSDrr, X86::MULSDrm, 0 }, 607198090Srdivacky { X86::MULSSrr, X86::MULSSrm, 0 }, 608198090Srdivacky { X86::OR16rr, X86::OR16rm, 0 }, 609198090Srdivacky { X86::OR32rr, X86::OR32rm, 0 }, 610198090Srdivacky { X86::OR64rr, X86::OR64rm, 0 }, 611198090Srdivacky { X86::OR8rr, X86::OR8rm, 0 }, 612198090Srdivacky { X86::ORPDrr, X86::ORPDrm, 16 }, 613198090Srdivacky { X86::ORPSrr, X86::ORPSrm, 16 }, 614198090Srdivacky { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 }, 615198090Srdivacky { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 }, 616198090Srdivacky { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 }, 617198090Srdivacky { X86::PADDBrr, X86::PADDBrm, 16 }, 618198090Srdivacky { X86::PADDDrr, X86::PADDDrm, 16 }, 619198090Srdivacky { X86::PADDQrr, X86::PADDQrm, 16 }, 620198090Srdivacky { X86::PADDSBrr, X86::PADDSBrm, 16 }, 621198090Srdivacky { X86::PADDSWrr, X86::PADDSWrm, 16 }, 622198090Srdivacky { X86::PADDWrr, X86::PADDWrm, 16 }, 623198090Srdivacky { X86::PANDNrr, X86::PANDNrm, 16 }, 624198090Srdivacky { X86::PANDrr, X86::PANDrm, 16 }, 625198090Srdivacky { X86::PAVGBrr, X86::PAVGBrm, 16 }, 626198090Srdivacky { X86::PAVGWrr, X86::PAVGWrm, 16 }, 627198090Srdivacky { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 }, 628198090Srdivacky { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 }, 629198090Srdivacky { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 }, 630198090Srdivacky { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 }, 631198090Srdivacky { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 }, 632198090Srdivacky { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 }, 633198090Srdivacky { X86::PINSRWrri, X86::PINSRWrmi, 16 }, 634198090Srdivacky { X86::PMADDWDrr, X86::PMADDWDrm, 16 }, 635198090Srdivacky { X86::PMAXSWrr, X86::PMAXSWrm, 16 }, 636198090Srdivacky { X86::PMAXUBrr, X86::PMAXUBrm, 16 }, 637198090Srdivacky { X86::PMINSWrr, X86::PMINSWrm, 16 }, 638198090Srdivacky { X86::PMINUBrr, X86::PMINUBrm, 16 }, 639198090Srdivacky { X86::PMULDQrr, X86::PMULDQrm, 16 }, 640198090Srdivacky { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, 641198090Srdivacky { X86::PMULHWrr, X86::PMULHWrm, 16 }, 642198090Srdivacky { X86::PMULLDrr, X86::PMULLDrm, 16 }, 643198090Srdivacky { X86::PMULLWrr, X86::PMULLWrm, 16 }, 644198090Srdivacky { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, 645198090Srdivacky { X86::PORrr, X86::PORrm, 16 }, 646198090Srdivacky { X86::PSADBWrr, X86::PSADBWrm, 16 }, 647198090Srdivacky { X86::PSLLDrr, X86::PSLLDrm, 16 }, 648198090Srdivacky { X86::PSLLQrr, X86::PSLLQrm, 16 }, 649198090Srdivacky { X86::PSLLWrr, X86::PSLLWrm, 16 }, 650198090Srdivacky { X86::PSRADrr, X86::PSRADrm, 16 }, 651198090Srdivacky { X86::PSRAWrr, X86::PSRAWrm, 16 }, 652198090Srdivacky { X86::PSRLDrr, X86::PSRLDrm, 16 }, 653198090Srdivacky { X86::PSRLQrr, X86::PSRLQrm, 16 }, 654198090Srdivacky { X86::PSRLWrr, X86::PSRLWrm, 16 }, 655198090Srdivacky { X86::PSUBBrr, X86::PSUBBrm, 16 }, 656198090Srdivacky { X86::PSUBDrr, X86::PSUBDrm, 16 }, 657198090Srdivacky { X86::PSUBSBrr, X86::PSUBSBrm, 16 }, 658198090Srdivacky { X86::PSUBSWrr, X86::PSUBSWrm, 16 }, 659198090Srdivacky { X86::PSUBWrr, X86::PSUBWrm, 16 }, 660198090Srdivacky { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 }, 661198090Srdivacky { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 }, 662198090Srdivacky { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 }, 663198090Srdivacky { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 }, 664198090Srdivacky { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 }, 665198090Srdivacky { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 }, 666198090Srdivacky { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 }, 667198090Srdivacky { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 }, 668198090Srdivacky { X86::PXORrr, X86::PXORrm, 16 }, 669198090Srdivacky { X86::SBB32rr, X86::SBB32rm, 0 }, 670198090Srdivacky { X86::SBB64rr, X86::SBB64rm, 0 }, 671198090Srdivacky { X86::SHUFPDrri, X86::SHUFPDrmi, 16 }, 672198090Srdivacky { X86::SHUFPSrri, X86::SHUFPSrmi, 16 }, 673198090Srdivacky { X86::SUB16rr, X86::SUB16rm, 0 }, 674198090Srdivacky { X86::SUB32rr, X86::SUB32rm, 0 }, 675198090Srdivacky { X86::SUB64rr, X86::SUB64rm, 0 }, 676198090Srdivacky { X86::SUB8rr, X86::SUB8rm, 0 }, 677198090Srdivacky { X86::SUBPDrr, X86::SUBPDrm, 16 }, 678198090Srdivacky { X86::SUBPSrr, X86::SUBPSrm, 16 }, 679198090Srdivacky { X86::SUBSDrr, X86::SUBSDrm, 0 }, 680198090Srdivacky { X86::SUBSSrr, X86::SUBSSrm, 0 }, 681193323Sed // FIXME: TEST*rr -> swapped operand of TEST*mr. 682198090Srdivacky { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 }, 683198090Srdivacky { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 }, 684198090Srdivacky { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 }, 685198090Srdivacky { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 }, 686198090Srdivacky { X86::XOR16rr, X86::XOR16rm, 0 }, 687198090Srdivacky { X86::XOR32rr, X86::XOR32rm, 0 }, 688198090Srdivacky { X86::XOR64rr, X86::XOR64rm, 0 }, 689198090Srdivacky { X86::XOR8rr, X86::XOR8rm, 0 }, 690198090Srdivacky { X86::XORPDrr, X86::XORPDrm, 16 }, 691198090Srdivacky { X86::XORPSrr, X86::XORPSrm, 16 } 692193323Sed }; 693193323Sed 694193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { 695193323Sed unsigned RegOp = OpTbl2[i][0]; 696218893Sdim unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS; 697198090Srdivacky unsigned Align = OpTbl2[i][2]; 698218893Sdim 699218893Sdim assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!"); 700218893Sdim RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align); 701218893Sdim 702221345Sdim // If this is not a reversible operation (because there is a many->one) 703218893Sdim // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. 704218893Sdim if (OpTbl2[i][1] & TB_NOT_REVERSABLE) 705218893Sdim continue; 706218893Sdim 707198090Srdivacky // Index 2, folded load 708198090Srdivacky unsigned AuxInfo = 2 | (1 << 4); 709218893Sdim assert(!MemOp2RegOpTable.count(MemOp) && 710218893Sdim "Duplicated entries in unfolding maps?"); 711218893Sdim MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); 712193323Sed } 713193323Sed} 714193323Sed 715202375Srdivackybool 716202375SrdivackyX86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 717202375Srdivacky unsigned &SrcReg, unsigned &DstReg, 718202375Srdivacky unsigned &SubIdx) const { 719202375Srdivacky switch (MI.getOpcode()) { 720202375Srdivacky default: break; 721202375Srdivacky case X86::MOVSX16rr8: 722202375Srdivacky case X86::MOVZX16rr8: 723202375Srdivacky case X86::MOVSX32rr8: 724202375Srdivacky case X86::MOVZX32rr8: 725202375Srdivacky case X86::MOVSX64rr8: 726202375Srdivacky case X86::MOVZX64rr8: 727202375Srdivacky if (!TM.getSubtarget<X86Subtarget>().is64Bit()) 728202375Srdivacky // It's not always legal to reference the low 8-bit of the larger 729202375Srdivacky // register in 32-bit mode. 730202375Srdivacky return false; 731202375Srdivacky case X86::MOVSX32rr16: 732202375Srdivacky case X86::MOVZX32rr16: 733202375Srdivacky case X86::MOVSX64rr16: 734202375Srdivacky case X86::MOVZX64rr16: 735202375Srdivacky case X86::MOVSX64rr32: 736202375Srdivacky case X86::MOVZX64rr32: { 737202375Srdivacky if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) 738202375Srdivacky // Be conservative. 739202375Srdivacky return false; 740202375Srdivacky SrcReg = MI.getOperand(1).getReg(); 741202375Srdivacky DstReg = MI.getOperand(0).getReg(); 742202375Srdivacky switch (MI.getOpcode()) { 743202375Srdivacky default: 744202375Srdivacky llvm_unreachable(0); 745202375Srdivacky break; 746202375Srdivacky case X86::MOVSX16rr8: 747202375Srdivacky case X86::MOVZX16rr8: 748202375Srdivacky case X86::MOVSX32rr8: 749202375Srdivacky case X86::MOVZX32rr8: 750202375Srdivacky case X86::MOVSX64rr8: 751202375Srdivacky case X86::MOVZX64rr8: 752208599Srdivacky SubIdx = X86::sub_8bit; 753202375Srdivacky break; 754202375Srdivacky case X86::MOVSX32rr16: 755202375Srdivacky case X86::MOVZX32rr16: 756202375Srdivacky case X86::MOVSX64rr16: 757202375Srdivacky case X86::MOVZX64rr16: 758208599Srdivacky SubIdx = X86::sub_16bit; 759202375Srdivacky break; 760202375Srdivacky case X86::MOVSX64rr32: 761202375Srdivacky case X86::MOVZX64rr32: 762208599Srdivacky SubIdx = X86::sub_32bit; 763202375Srdivacky break; 764202375Srdivacky } 765202375Srdivacky return true; 766202375Srdivacky } 767202375Srdivacky } 768202375Srdivacky return false; 769202375Srdivacky} 770202375Srdivacky 771199481Srdivacky/// isFrameOperand - Return true and the FrameIndex if the specified 772199481Srdivacky/// operand and follow operands form a reference to the stack frame. 773199481Srdivackybool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, 774199481Srdivacky int &FrameIndex) const { 775199481Srdivacky if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && 776199481Srdivacky MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && 777199481Srdivacky MI->getOperand(Op+1).getImm() == 1 && 778199481Srdivacky MI->getOperand(Op+2).getReg() == 0 && 779199481Srdivacky MI->getOperand(Op+3).getImm() == 0) { 780199481Srdivacky FrameIndex = MI->getOperand(Op).getIndex(); 781199481Srdivacky return true; 782199481Srdivacky } 783199481Srdivacky return false; 784199481Srdivacky} 785199481Srdivacky 786199481Srdivackystatic bool isFrameLoadOpcode(int Opcode) { 787199481Srdivacky switch (Opcode) { 788193323Sed default: break; 789193323Sed case X86::MOV8rm: 790193323Sed case X86::MOV16rm: 791193323Sed case X86::MOV32rm: 792193323Sed case X86::MOV64rm: 793193323Sed case X86::LD_Fp64m: 794193323Sed case X86::MOVSSrm: 795193323Sed case X86::MOVSDrm: 796193323Sed case X86::MOVAPSrm: 797193323Sed case X86::MOVAPDrm: 798193323Sed case X86::MOVDQArm: 799224145Sdim case X86::VMOVAPSYrm: 800224145Sdim case X86::VMOVAPDYrm: 801224145Sdim case X86::VMOVDQAYrm: 802193323Sed case X86::MMX_MOVD64rm: 803193323Sed case X86::MMX_MOVQ64rm: 804199481Srdivacky return true; 805193323Sed break; 806193323Sed } 807199481Srdivacky return false; 808193323Sed} 809193323Sed 810199481Srdivackystatic bool isFrameStoreOpcode(int Opcode) { 811199481Srdivacky switch (Opcode) { 812193323Sed default: break; 813193323Sed case X86::MOV8mr: 814193323Sed case X86::MOV16mr: 815193323Sed case X86::MOV32mr: 816193323Sed case X86::MOV64mr: 817193323Sed case X86::ST_FpP64m: 818193323Sed case X86::MOVSSmr: 819193323Sed case X86::MOVSDmr: 820193323Sed case X86::MOVAPSmr: 821193323Sed case X86::MOVAPDmr: 822193323Sed case X86::MOVDQAmr: 823224145Sdim case X86::VMOVAPSYmr: 824224145Sdim case X86::VMOVAPDYmr: 825224145Sdim case X86::VMOVDQAYmr: 826193323Sed case X86::MMX_MOVD64mr: 827193323Sed case X86::MMX_MOVQ64mr: 828193323Sed case X86::MMX_MOVNTQmr: 829199481Srdivacky return true; 830199481Srdivacky } 831199481Srdivacky return false; 832199481Srdivacky} 833199481Srdivacky 834218893Sdimunsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 835199481Srdivacky int &FrameIndex) const { 836199481Srdivacky if (isFrameLoadOpcode(MI->getOpcode())) 837212904Sdim if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) 838199481Srdivacky return MI->getOperand(0).getReg(); 839199481Srdivacky return 0; 840199481Srdivacky} 841199481Srdivacky 842218893Sdimunsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 843199481Srdivacky int &FrameIndex) const { 844199481Srdivacky if (isFrameLoadOpcode(MI->getOpcode())) { 845199481Srdivacky unsigned Reg; 846199481Srdivacky if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) 847199481Srdivacky return Reg; 848199481Srdivacky // Check for post-frame index elimination operations 849200581Srdivacky const MachineMemOperand *Dummy; 850200581Srdivacky return hasLoadFromStackSlot(MI, Dummy, FrameIndex); 851199481Srdivacky } 852199481Srdivacky return 0; 853199481Srdivacky} 854199481Srdivacky 855199481Srdivackybool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, 856200581Srdivacky const MachineMemOperand *&MMO, 857199481Srdivacky int &FrameIndex) const { 858199481Srdivacky for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 859199481Srdivacky oe = MI->memoperands_end(); 860199481Srdivacky o != oe; 861199481Srdivacky ++o) { 862199481Srdivacky if ((*o)->isLoad() && (*o)->getValue()) 863199481Srdivacky if (const FixedStackPseudoSourceValue *Value = 864199481Srdivacky dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 865199481Srdivacky FrameIndex = Value->getFrameIndex(); 866200581Srdivacky MMO = *o; 867199481Srdivacky return true; 868199481Srdivacky } 869199481Srdivacky } 870199481Srdivacky return false; 871199481Srdivacky} 872199481Srdivacky 873199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 874199481Srdivacky int &FrameIndex) const { 875199481Srdivacky if (isFrameStoreOpcode(MI->getOpcode())) 876212904Sdim if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 && 877212904Sdim isFrameOperand(MI, 0, FrameIndex)) 878210299Sed return MI->getOperand(X86::AddrNumOperands).getReg(); 879199481Srdivacky return 0; 880199481Srdivacky} 881199481Srdivacky 882199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 883199481Srdivacky int &FrameIndex) const { 884199481Srdivacky if (isFrameStoreOpcode(MI->getOpcode())) { 885199481Srdivacky unsigned Reg; 886199481Srdivacky if ((Reg = isStoreToStackSlot(MI, FrameIndex))) 887199481Srdivacky return Reg; 888199481Srdivacky // Check for post-frame index elimination operations 889200581Srdivacky const MachineMemOperand *Dummy; 890200581Srdivacky return hasStoreToStackSlot(MI, Dummy, FrameIndex); 891193323Sed } 892193323Sed return 0; 893193323Sed} 894193323Sed 895199481Srdivackybool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, 896200581Srdivacky const MachineMemOperand *&MMO, 897199481Srdivacky int &FrameIndex) const { 898199481Srdivacky for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 899199481Srdivacky oe = MI->memoperands_end(); 900199481Srdivacky o != oe; 901199481Srdivacky ++o) { 902199481Srdivacky if ((*o)->isStore() && (*o)->getValue()) 903199481Srdivacky if (const FixedStackPseudoSourceValue *Value = 904199481Srdivacky dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 905199481Srdivacky FrameIndex = Value->getFrameIndex(); 906200581Srdivacky MMO = *o; 907199481Srdivacky return true; 908199481Srdivacky } 909199481Srdivacky } 910199481Srdivacky return false; 911199481Srdivacky} 912199481Srdivacky 913193323Sed/// regIsPICBase - Return true if register is PIC base (i.e.g defined by 914193323Sed/// X86::MOVPC32r. 915193323Sedstatic bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { 916193323Sed bool isPICBase = false; 917193323Sed for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 918193323Sed E = MRI.def_end(); I != E; ++I) { 919193323Sed MachineInstr *DefMI = I.getOperand().getParent(); 920193323Sed if (DefMI->getOpcode() != X86::MOVPC32r) 921193323Sed return false; 922193323Sed assert(!isPICBase && "More than one PIC base?"); 923193323Sed isPICBase = true; 924193323Sed } 925193323Sed return isPICBase; 926193323Sed} 927193323Sed 928193323Sedbool 929198090SrdivackyX86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, 930198090Srdivacky AliasAnalysis *AA) const { 931193323Sed switch (MI->getOpcode()) { 932193323Sed default: break; 933193323Sed case X86::MOV8rm: 934193323Sed case X86::MOV16rm: 935193323Sed case X86::MOV32rm: 936193323Sed case X86::MOV64rm: 937193323Sed case X86::LD_Fp64m: 938193323Sed case X86::MOVSSrm: 939193323Sed case X86::MOVSDrm: 940193323Sed case X86::MOVAPSrm: 941199481Srdivacky case X86::MOVUPSrm: 942193323Sed case X86::MOVAPDrm: 943193323Sed case X86::MOVDQArm: 944224145Sdim case X86::VMOVAPSYrm: 945224145Sdim case X86::VMOVUPSYrm: 946224145Sdim case X86::VMOVAPDYrm: 947224145Sdim case X86::VMOVDQAYrm: 948193323Sed case X86::MMX_MOVD64rm: 949199481Srdivacky case X86::MMX_MOVQ64rm: 950199481Srdivacky case X86::FsMOVAPSrm: 951199481Srdivacky case X86::FsMOVAPDrm: { 952193323Sed // Loads from constant pools are trivially rematerializable. 953193323Sed if (MI->getOperand(1).isReg() && 954193323Sed MI->getOperand(2).isImm() && 955193323Sed MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 956198090Srdivacky MI->isInvariantLoad(AA)) { 957193323Sed unsigned BaseReg = MI->getOperand(1).getReg(); 958195098Sed if (BaseReg == 0 || BaseReg == X86::RIP) 959193323Sed return true; 960193323Sed // Allow re-materialization of PIC load. 961193323Sed if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) 962193323Sed return false; 963193323Sed const MachineFunction &MF = *MI->getParent()->getParent(); 964193323Sed const MachineRegisterInfo &MRI = MF.getRegInfo(); 965193323Sed bool isPICBase = false; 966193323Sed for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 967193323Sed E = MRI.def_end(); I != E; ++I) { 968193323Sed MachineInstr *DefMI = I.getOperand().getParent(); 969193323Sed if (DefMI->getOpcode() != X86::MOVPC32r) 970193323Sed return false; 971193323Sed assert(!isPICBase && "More than one PIC base?"); 972193323Sed isPICBase = true; 973193323Sed } 974193323Sed return isPICBase; 975218893Sdim } 976193323Sed return false; 977193323Sed } 978218893Sdim 979193323Sed case X86::LEA32r: 980193323Sed case X86::LEA64r: { 981193323Sed if (MI->getOperand(2).isImm() && 982193323Sed MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 983193323Sed !MI->getOperand(4).isReg()) { 984193323Sed // lea fi#, lea GV, etc. are all rematerializable. 985193323Sed if (!MI->getOperand(1).isReg()) 986193323Sed return true; 987193323Sed unsigned BaseReg = MI->getOperand(1).getReg(); 988193323Sed if (BaseReg == 0) 989193323Sed return true; 990193323Sed // Allow re-materialization of lea PICBase + x. 991193323Sed const MachineFunction &MF = *MI->getParent()->getParent(); 992193323Sed const MachineRegisterInfo &MRI = MF.getRegInfo(); 993193323Sed return regIsPICBase(BaseReg, MRI); 994193323Sed } 995193323Sed return false; 996193323Sed } 997193323Sed } 998193323Sed 999193323Sed // All other instructions marked M_REMATERIALIZABLE are always trivially 1000193323Sed // rematerializable. 1001193323Sed return true; 1002193323Sed} 1003193323Sed 1004193323Sed/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that 1005193323Sed/// would clobber the EFLAGS condition register. Note the result may be 1006193323Sed/// conservative. If it cannot definitely determine the safety after visiting 1007198090Srdivacky/// a few instructions in each direction it assumes it's not safe. 1008193323Sedstatic bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, 1009193323Sed MachineBasicBlock::iterator I) { 1010206083Srdivacky MachineBasicBlock::iterator E = MBB.end(); 1011206083Srdivacky 1012193323Sed // It's always safe to clobber EFLAGS at the end of a block. 1013206083Srdivacky if (I == E) 1014193323Sed return true; 1015193323Sed 1016193323Sed // For compile time consideration, if we are not able to determine the 1017198090Srdivacky // safety after visiting 4 instructions in each direction, we will assume 1018198090Srdivacky // it's not safe. 1019198090Srdivacky MachineBasicBlock::iterator Iter = I; 1020198090Srdivacky for (unsigned i = 0; i < 4; ++i) { 1021193323Sed bool SeenDef = false; 1022198090Srdivacky for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 1023198090Srdivacky MachineOperand &MO = Iter->getOperand(j); 1024193323Sed if (!MO.isReg()) 1025193323Sed continue; 1026193323Sed if (MO.getReg() == X86::EFLAGS) { 1027193323Sed if (MO.isUse()) 1028193323Sed return false; 1029193323Sed SeenDef = true; 1030193323Sed } 1031193323Sed } 1032193323Sed 1033193323Sed if (SeenDef) 1034193323Sed // This instruction defines EFLAGS, no need to look any further. 1035193323Sed return true; 1036198090Srdivacky ++Iter; 1037206083Srdivacky // Skip over DBG_VALUE. 1038206083Srdivacky while (Iter != E && Iter->isDebugValue()) 1039206083Srdivacky ++Iter; 1040193323Sed 1041193323Sed // If we make it to the end of the block, it's safe to clobber EFLAGS. 1042206083Srdivacky if (Iter == E) 1043193323Sed return true; 1044193323Sed } 1045193323Sed 1046206083Srdivacky MachineBasicBlock::iterator B = MBB.begin(); 1047198090Srdivacky Iter = I; 1048198090Srdivacky for (unsigned i = 0; i < 4; ++i) { 1049198090Srdivacky // If we make it to the beginning of the block, it's safe to clobber 1050198090Srdivacky // EFLAGS iff EFLAGS is not live-in. 1051206083Srdivacky if (Iter == B) 1052198090Srdivacky return !MBB.isLiveIn(X86::EFLAGS); 1053198090Srdivacky 1054198090Srdivacky --Iter; 1055206083Srdivacky // Skip over DBG_VALUE. 1056206083Srdivacky while (Iter != B && Iter->isDebugValue()) 1057206083Srdivacky --Iter; 1058206083Srdivacky 1059198090Srdivacky bool SawKill = false; 1060198090Srdivacky for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 1061198090Srdivacky MachineOperand &MO = Iter->getOperand(j); 1062198090Srdivacky if (MO.isReg() && MO.getReg() == X86::EFLAGS) { 1063198090Srdivacky if (MO.isDef()) return MO.isDead(); 1064198090Srdivacky if (MO.isKill()) SawKill = true; 1065198090Srdivacky } 1066198090Srdivacky } 1067198090Srdivacky 1068198090Srdivacky if (SawKill) 1069198090Srdivacky // This instruction kills EFLAGS and doesn't redefine it, so 1070198090Srdivacky // there's no need to look further. 1071198090Srdivacky return true; 1072198090Srdivacky } 1073198090Srdivacky 1074193323Sed // Conservative answer. 1075193323Sed return false; 1076193323Sed} 1077193323Sed 1078193323Sedvoid X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, 1079193323Sed MachineBasicBlock::iterator I, 1080198090Srdivacky unsigned DestReg, unsigned SubIdx, 1081199481Srdivacky const MachineInstr *Orig, 1082210299Sed const TargetRegisterInfo &TRI) const { 1083208599Srdivacky DebugLoc DL = Orig->getDebugLoc(); 1084193323Sed 1085193323Sed // MOV32r0 etc. are implemented with xor which clobbers condition code. 1086193323Sed // Re-materialize them as movri instructions to avoid side effects. 1087198090Srdivacky bool Clone = true; 1088198090Srdivacky unsigned Opc = Orig->getOpcode(); 1089198090Srdivacky switch (Opc) { 1090193323Sed default: break; 1091193323Sed case X86::MOV8r0: 1092202375Srdivacky case X86::MOV16r0: 1093202375Srdivacky case X86::MOV32r0: 1094202375Srdivacky case X86::MOV64r0: { 1095193323Sed if (!isSafeToClobberEFLAGS(MBB, I)) { 1096198090Srdivacky switch (Opc) { 1097193323Sed default: break; 1098193323Sed case X86::MOV8r0: Opc = X86::MOV8ri; break; 1099202375Srdivacky case X86::MOV16r0: Opc = X86::MOV16ri; break; 1100193323Sed case X86::MOV32r0: Opc = X86::MOV32ri; break; 1101204642Srdivacky case X86::MOV64r0: Opc = X86::MOV64ri64i32; break; 1102193323Sed } 1103198090Srdivacky Clone = false; 1104193323Sed } 1105193323Sed break; 1106193323Sed } 1107193323Sed } 1108193323Sed 1109198090Srdivacky if (Clone) { 1110193323Sed MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 1111193323Sed MBB.insert(I, MI); 1112198090Srdivacky } else { 1113210299Sed BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0); 1114193323Sed } 1115193323Sed 1116198090Srdivacky MachineInstr *NewMI = prior(I); 1117210299Sed NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); 1118193323Sed} 1119193323Sed 1120193323Sed/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that 1121193323Sed/// is not marked dead. 1122193323Sedstatic bool hasLiveCondCodeDef(MachineInstr *MI) { 1123193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1124193323Sed MachineOperand &MO = MI->getOperand(i); 1125193323Sed if (MO.isReg() && MO.isDef() && 1126193323Sed MO.getReg() == X86::EFLAGS && !MO.isDead()) { 1127193323Sed return true; 1128193323Sed } 1129193323Sed } 1130193323Sed return false; 1131193323Sed} 1132193323Sed 1133200581Srdivacky/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when 1134200581Srdivacky/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting 1135200581Srdivacky/// to a 32-bit superregister and then truncating back down to a 16-bit 1136200581Srdivacky/// subregister. 1137200581SrdivackyMachineInstr * 1138200581SrdivackyX86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, 1139200581Srdivacky MachineFunction::iterator &MFI, 1140200581Srdivacky MachineBasicBlock::iterator &MBBI, 1141200581Srdivacky LiveVariables *LV) const { 1142200581Srdivacky MachineInstr *MI = MBBI; 1143200581Srdivacky unsigned Dest = MI->getOperand(0).getReg(); 1144200581Srdivacky unsigned Src = MI->getOperand(1).getReg(); 1145200581Srdivacky bool isDead = MI->getOperand(0).isDead(); 1146200581Srdivacky bool isKill = MI->getOperand(1).isKill(); 1147200581Srdivacky 1148200581Srdivacky unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() 1149200581Srdivacky ? X86::LEA64_32r : X86::LEA32r; 1150200581Srdivacky MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); 1151218893Sdim unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); 1152200581Srdivacky unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1153218893Sdim 1154200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 1155218893Sdim // well be shifting and then extracting the lower 16-bits. 1156200581Srdivacky // This has the potential to cause partial register stall. e.g. 1157200581Srdivacky // movw (%rbp,%rcx,2), %dx 1158200581Srdivacky // leal -65(%rdx), %esi 1159200581Srdivacky // But testing has shown this *does* help performance in 64-bit mode (at 1160200581Srdivacky // least on modern x86 machines). 1161200581Srdivacky BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); 1162200581Srdivacky MachineInstr *InsMI = 1163210299Sed BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) 1164210299Sed .addReg(leaInReg, RegState::Define, X86::sub_16bit) 1165210299Sed .addReg(Src, getKillRegState(isKill)); 1166200581Srdivacky 1167200581Srdivacky MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), 1168200581Srdivacky get(Opc), leaOutReg); 1169200581Srdivacky switch (MIOpc) { 1170200581Srdivacky default: 1171200581Srdivacky llvm_unreachable(0); 1172200581Srdivacky break; 1173200581Srdivacky case X86::SHL16ri: { 1174200581Srdivacky unsigned ShAmt = MI->getOperand(2).getImm(); 1175200581Srdivacky MIB.addReg(0).addImm(1 << ShAmt) 1176210299Sed .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0); 1177200581Srdivacky break; 1178200581Srdivacky } 1179200581Srdivacky case X86::INC16r: 1180200581Srdivacky case X86::INC64_16r: 1181210299Sed addRegOffset(MIB, leaInReg, true, 1); 1182200581Srdivacky break; 1183200581Srdivacky case X86::DEC16r: 1184200581Srdivacky case X86::DEC64_16r: 1185210299Sed addRegOffset(MIB, leaInReg, true, -1); 1186200581Srdivacky break; 1187200581Srdivacky case X86::ADD16ri: 1188200581Srdivacky case X86::ADD16ri8: 1189218893Sdim case X86::ADD16ri_DB: 1190218893Sdim case X86::ADD16ri8_DB: 1191218893Sdim addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); 1192200581Srdivacky break; 1193218893Sdim case X86::ADD16rr: 1194218893Sdim case X86::ADD16rr_DB: { 1195200581Srdivacky unsigned Src2 = MI->getOperand(2).getReg(); 1196200581Srdivacky bool isKill2 = MI->getOperand(2).isKill(); 1197200581Srdivacky unsigned leaInReg2 = 0; 1198200581Srdivacky MachineInstr *InsMI2 = 0; 1199200581Srdivacky if (Src == Src2) { 1200200581Srdivacky // ADD16rr %reg1028<kill>, %reg1028 1201200581Srdivacky // just a single insert_subreg. 1202200581Srdivacky addRegReg(MIB, leaInReg, true, leaInReg, false); 1203200581Srdivacky } else { 1204218893Sdim leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); 1205200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 1206218893Sdim // well be shifting and then extracting the lower 16-bits. 1207200581Srdivacky BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); 1208200581Srdivacky InsMI2 = 1209210299Sed BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) 1210210299Sed .addReg(leaInReg2, RegState::Define, X86::sub_16bit) 1211210299Sed .addReg(Src2, getKillRegState(isKill2)); 1212200581Srdivacky addRegReg(MIB, leaInReg, true, leaInReg2, true); 1213200581Srdivacky } 1214200581Srdivacky if (LV && isKill2 && InsMI2) 1215200581Srdivacky LV->replaceKillInstruction(Src2, MI, InsMI2); 1216200581Srdivacky break; 1217200581Srdivacky } 1218200581Srdivacky } 1219200581Srdivacky 1220200581Srdivacky MachineInstr *NewMI = MIB; 1221200581Srdivacky MachineInstr *ExtMI = 1222210299Sed BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) 1223200581Srdivacky .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1224210299Sed .addReg(leaOutReg, RegState::Kill, X86::sub_16bit); 1225200581Srdivacky 1226200581Srdivacky if (LV) { 1227200581Srdivacky // Update live variables 1228200581Srdivacky LV->getVarInfo(leaInReg).Kills.push_back(NewMI); 1229200581Srdivacky LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); 1230200581Srdivacky if (isKill) 1231200581Srdivacky LV->replaceKillInstruction(Src, MI, InsMI); 1232200581Srdivacky if (isDead) 1233200581Srdivacky LV->replaceKillInstruction(Dest, MI, ExtMI); 1234200581Srdivacky } 1235200581Srdivacky 1236200581Srdivacky return ExtMI; 1237200581Srdivacky} 1238200581Srdivacky 1239193323Sed/// convertToThreeAddress - This method must be implemented by targets that 1240193323Sed/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target 1241193323Sed/// may be able to convert a two-address instruction into a true 1242193323Sed/// three-address instruction on demand. This allows the X86 target (for 1243193323Sed/// example) to convert ADD and SHL instructions into LEA instructions if they 1244193323Sed/// would require register copies due to two-addressness. 1245193323Sed/// 1246193323Sed/// This method returns a null pointer if the transformation cannot be 1247193323Sed/// performed, otherwise it returns the new instruction. 1248193323Sed/// 1249193323SedMachineInstr * 1250193323SedX86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 1251193323Sed MachineBasicBlock::iterator &MBBI, 1252193323Sed LiveVariables *LV) const { 1253193323Sed MachineInstr *MI = MBBI; 1254193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1255193323Sed // All instructions input are two-addr instructions. Get the known operands. 1256193323Sed unsigned Dest = MI->getOperand(0).getReg(); 1257193323Sed unsigned Src = MI->getOperand(1).getReg(); 1258193323Sed bool isDead = MI->getOperand(0).isDead(); 1259193323Sed bool isKill = MI->getOperand(1).isKill(); 1260193323Sed 1261193323Sed MachineInstr *NewMI = NULL; 1262193323Sed // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When 1263193323Sed // we have better subtarget support, enable the 16-bit LEA generation here. 1264200581Srdivacky // 16-bit LEA is also slow on Core2. 1265193323Sed bool DisableLEA16 = true; 1266200581Srdivacky bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 1267193323Sed 1268193323Sed unsigned MIOpc = MI->getOpcode(); 1269193323Sed switch (MIOpc) { 1270193323Sed case X86::SHUFPSrri: { 1271193323Sed assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); 1272193323Sed if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0; 1273218893Sdim 1274193323Sed unsigned B = MI->getOperand(1).getReg(); 1275193323Sed unsigned C = MI->getOperand(2).getReg(); 1276193323Sed if (B != C) return 0; 1277193323Sed unsigned A = MI->getOperand(0).getReg(); 1278193323Sed unsigned M = MI->getOperand(3).getImm(); 1279193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) 1280193323Sed .addReg(A, RegState::Define | getDeadRegState(isDead)) 1281193323Sed .addReg(B, getKillRegState(isKill)).addImm(M); 1282193323Sed break; 1283193323Sed } 1284193323Sed case X86::SHL64ri: { 1285193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1286193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1287193323Sed // the flags produced by a shift yet, so this is safe. 1288193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1289193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1290193323Sed 1291218893Sdim // LEA can't handle RSP. 1292218893Sdim if (TargetRegisterInfo::isVirtualRegister(Src) && 1293218893Sdim !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass)) 1294218893Sdim return 0; 1295218893Sdim 1296193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1297193323Sed .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1298193323Sed .addReg(0).addImm(1 << ShAmt) 1299193323Sed .addReg(Src, getKillRegState(isKill)) 1300210299Sed .addImm(0).addReg(0); 1301193323Sed break; 1302193323Sed } 1303193323Sed case X86::SHL32ri: { 1304193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1305193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1306193323Sed // the flags produced by a shift yet, so this is safe. 1307193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1308193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1309193323Sed 1310218893Sdim // LEA can't handle ESP. 1311218893Sdim if (TargetRegisterInfo::isVirtualRegister(Src) && 1312218893Sdim !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass)) 1313218893Sdim return 0; 1314218893Sdim 1315200581Srdivacky unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1316193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1317193323Sed .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1318193323Sed .addReg(0).addImm(1 << ShAmt) 1319210299Sed .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0); 1320193323Sed break; 1321193323Sed } 1322193323Sed case X86::SHL16ri: { 1323193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1324193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1325193323Sed // the flags produced by a shift yet, so this is safe. 1326193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1327193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1328193323Sed 1329200581Srdivacky if (DisableLEA16) 1330200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1331200581Srdivacky NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1332200581Srdivacky .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1333200581Srdivacky .addReg(0).addImm(1 << ShAmt) 1334200581Srdivacky .addReg(Src, getKillRegState(isKill)) 1335210299Sed .addImm(0).addReg(0); 1336193323Sed break; 1337193323Sed } 1338193323Sed default: { 1339193323Sed // The following opcodes also sets the condition code register(s). Only 1340193323Sed // convert them to equivalent lea if the condition code register def's 1341193323Sed // are dead! 1342193323Sed if (hasLiveCondCodeDef(MI)) 1343193323Sed return 0; 1344193323Sed 1345193323Sed switch (MIOpc) { 1346193323Sed default: return 0; 1347193323Sed case X86::INC64r: 1348193323Sed case X86::INC32r: 1349193323Sed case X86::INC64_32r: { 1350193323Sed assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1351193323Sed unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r 1352193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1353218893Sdim 1354218893Sdim // LEA can't handle RSP. 1355218893Sdim if (TargetRegisterInfo::isVirtualRegister(Src) && 1356218893Sdim !MF.getRegInfo().constrainRegClass(Src, 1357218893Sdim MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass : 1358218893Sdim X86::GR32_NOSPRegisterClass)) 1359218893Sdim return 0; 1360218893Sdim 1361210299Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1362193323Sed .addReg(Dest, RegState::Define | 1363193323Sed getDeadRegState(isDead)), 1364193323Sed Src, isKill, 1); 1365193323Sed break; 1366193323Sed } 1367193323Sed case X86::INC16r: 1368193323Sed case X86::INC64_16r: 1369200581Srdivacky if (DisableLEA16) 1370200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1371193323Sed assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1372193323Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1373193323Sed .addReg(Dest, RegState::Define | 1374193323Sed getDeadRegState(isDead)), 1375193323Sed Src, isKill, 1); 1376193323Sed break; 1377193323Sed case X86::DEC64r: 1378193323Sed case X86::DEC32r: 1379193323Sed case X86::DEC64_32r: { 1380193323Sed assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1381193323Sed unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r 1382193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1383218893Sdim // LEA can't handle RSP. 1384218893Sdim if (TargetRegisterInfo::isVirtualRegister(Src) && 1385218893Sdim !MF.getRegInfo().constrainRegClass(Src, 1386218893Sdim MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass : 1387218893Sdim X86::GR32_NOSPRegisterClass)) 1388218893Sdim return 0; 1389218893Sdim 1390210299Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1391193323Sed .addReg(Dest, RegState::Define | 1392193323Sed getDeadRegState(isDead)), 1393193323Sed Src, isKill, -1); 1394193323Sed break; 1395193323Sed } 1396193323Sed case X86::DEC16r: 1397193323Sed case X86::DEC64_16r: 1398200581Srdivacky if (DisableLEA16) 1399200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1400193323Sed assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1401193323Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1402193323Sed .addReg(Dest, RegState::Define | 1403193323Sed getDeadRegState(isDead)), 1404193323Sed Src, isKill, -1); 1405193323Sed break; 1406193323Sed case X86::ADD64rr: 1407218893Sdim case X86::ADD64rr_DB: 1408218893Sdim case X86::ADD32rr: 1409218893Sdim case X86::ADD32rr_DB: { 1410193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1411218893Sdim unsigned Opc; 1412218893Sdim TargetRegisterClass *RC; 1413218893Sdim if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) { 1414218893Sdim Opc = X86::LEA64r; 1415218893Sdim RC = X86::GR64_NOSPRegisterClass; 1416218893Sdim } else { 1417218893Sdim Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1418218893Sdim RC = X86::GR32_NOSPRegisterClass; 1419218893Sdim } 1420218893Sdim 1421218893Sdim 1422193323Sed unsigned Src2 = MI->getOperand(2).getReg(); 1423193323Sed bool isKill2 = MI->getOperand(2).isKill(); 1424218893Sdim 1425218893Sdim // LEA can't handle RSP. 1426218893Sdim if (TargetRegisterInfo::isVirtualRegister(Src2) && 1427218893Sdim !MF.getRegInfo().constrainRegClass(Src2, RC)) 1428218893Sdim return 0; 1429218893Sdim 1430193323Sed NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1431193323Sed .addReg(Dest, RegState::Define | 1432193323Sed getDeadRegState(isDead)), 1433193323Sed Src, isKill, Src2, isKill2); 1434193323Sed if (LV && isKill2) 1435193323Sed LV->replaceKillInstruction(Src2, MI, NewMI); 1436193323Sed break; 1437193323Sed } 1438218893Sdim case X86::ADD16rr: 1439218893Sdim case X86::ADD16rr_DB: { 1440200581Srdivacky if (DisableLEA16) 1441200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1442193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1443193323Sed unsigned Src2 = MI->getOperand(2).getReg(); 1444193323Sed bool isKill2 = MI->getOperand(2).isKill(); 1445193323Sed NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1446193323Sed .addReg(Dest, RegState::Define | 1447193323Sed getDeadRegState(isDead)), 1448193323Sed Src, isKill, Src2, isKill2); 1449193323Sed if (LV && isKill2) 1450193323Sed LV->replaceKillInstruction(Src2, MI, NewMI); 1451193323Sed break; 1452193323Sed } 1453193323Sed case X86::ADD64ri32: 1454193323Sed case X86::ADD64ri8: 1455218893Sdim case X86::ADD64ri32_DB: 1456218893Sdim case X86::ADD64ri8_DB: 1457193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1458210299Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1459200581Srdivacky .addReg(Dest, RegState::Define | 1460200581Srdivacky getDeadRegState(isDead)), 1461200581Srdivacky Src, isKill, MI->getOperand(2).getImm()); 1462193323Sed break; 1463193323Sed case X86::ADD32ri: 1464218893Sdim case X86::ADD32ri8: 1465218893Sdim case X86::ADD32ri_DB: 1466218893Sdim case X86::ADD32ri8_DB: { 1467193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1468200581Srdivacky unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1469210299Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1470200581Srdivacky .addReg(Dest, RegState::Define | 1471200581Srdivacky getDeadRegState(isDead)), 1472193323Sed Src, isKill, MI->getOperand(2).getImm()); 1473193323Sed break; 1474200581Srdivacky } 1475193323Sed case X86::ADD16ri: 1476193323Sed case X86::ADD16ri8: 1477218893Sdim case X86::ADD16ri_DB: 1478218893Sdim case X86::ADD16ri8_DB: 1479200581Srdivacky if (DisableLEA16) 1480200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1481193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1482210299Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1483200581Srdivacky .addReg(Dest, RegState::Define | 1484200581Srdivacky getDeadRegState(isDead)), 1485200581Srdivacky Src, isKill, MI->getOperand(2).getImm()); 1486193323Sed break; 1487193323Sed } 1488193323Sed } 1489193323Sed } 1490193323Sed 1491193323Sed if (!NewMI) return 0; 1492193323Sed 1493193323Sed if (LV) { // Update live variables 1494193323Sed if (isKill) 1495193323Sed LV->replaceKillInstruction(Src, MI, NewMI); 1496193323Sed if (isDead) 1497193323Sed LV->replaceKillInstruction(Dest, MI, NewMI); 1498193323Sed } 1499193323Sed 1500218893Sdim MFI->insert(MBBI, NewMI); // Insert the new inst 1501193323Sed return NewMI; 1502193323Sed} 1503193323Sed 1504193323Sed/// commuteInstruction - We have a few instructions that must be hacked on to 1505193323Sed/// commute them. 1506193323Sed/// 1507193323SedMachineInstr * 1508193323SedX86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { 1509193323Sed switch (MI->getOpcode()) { 1510193323Sed case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) 1511193323Sed case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) 1512193323Sed case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) 1513193323Sed case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) 1514193323Sed case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) 1515193323Sed case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) 1516193323Sed unsigned Opc; 1517193323Sed unsigned Size; 1518193323Sed switch (MI->getOpcode()) { 1519198090Srdivacky default: llvm_unreachable("Unreachable!"); 1520193323Sed case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; 1521193323Sed case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; 1522193323Sed case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; 1523193323Sed case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break; 1524193323Sed case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break; 1525193323Sed case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; 1526193323Sed } 1527193323Sed unsigned Amt = MI->getOperand(3).getImm(); 1528193323Sed if (NewMI) { 1529193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1530193323Sed MI = MF.CloneMachineInstr(MI); 1531193323Sed NewMI = false; 1532193323Sed } 1533193323Sed MI->setDesc(get(Opc)); 1534193323Sed MI->getOperand(3).setImm(Size-Amt); 1535193323Sed return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1536193323Sed } 1537193323Sed case X86::CMOVB16rr: 1538193323Sed case X86::CMOVB32rr: 1539193323Sed case X86::CMOVB64rr: 1540193323Sed case X86::CMOVAE16rr: 1541193323Sed case X86::CMOVAE32rr: 1542193323Sed case X86::CMOVAE64rr: 1543193323Sed case X86::CMOVE16rr: 1544193323Sed case X86::CMOVE32rr: 1545193323Sed case X86::CMOVE64rr: 1546193323Sed case X86::CMOVNE16rr: 1547193323Sed case X86::CMOVNE32rr: 1548193323Sed case X86::CMOVNE64rr: 1549193323Sed case X86::CMOVBE16rr: 1550193323Sed case X86::CMOVBE32rr: 1551193323Sed case X86::CMOVBE64rr: 1552193323Sed case X86::CMOVA16rr: 1553193323Sed case X86::CMOVA32rr: 1554193323Sed case X86::CMOVA64rr: 1555193323Sed case X86::CMOVL16rr: 1556193323Sed case X86::CMOVL32rr: 1557193323Sed case X86::CMOVL64rr: 1558193323Sed case X86::CMOVGE16rr: 1559193323Sed case X86::CMOVGE32rr: 1560193323Sed case X86::CMOVGE64rr: 1561193323Sed case X86::CMOVLE16rr: 1562193323Sed case X86::CMOVLE32rr: 1563193323Sed case X86::CMOVLE64rr: 1564193323Sed case X86::CMOVG16rr: 1565193323Sed case X86::CMOVG32rr: 1566193323Sed case X86::CMOVG64rr: 1567193323Sed case X86::CMOVS16rr: 1568193323Sed case X86::CMOVS32rr: 1569193323Sed case X86::CMOVS64rr: 1570193323Sed case X86::CMOVNS16rr: 1571193323Sed case X86::CMOVNS32rr: 1572193323Sed case X86::CMOVNS64rr: 1573193323Sed case X86::CMOVP16rr: 1574193323Sed case X86::CMOVP32rr: 1575193323Sed case X86::CMOVP64rr: 1576193323Sed case X86::CMOVNP16rr: 1577193323Sed case X86::CMOVNP32rr: 1578193323Sed case X86::CMOVNP64rr: 1579193323Sed case X86::CMOVO16rr: 1580193323Sed case X86::CMOVO32rr: 1581193323Sed case X86::CMOVO64rr: 1582193323Sed case X86::CMOVNO16rr: 1583193323Sed case X86::CMOVNO32rr: 1584193323Sed case X86::CMOVNO64rr: { 1585193323Sed unsigned Opc = 0; 1586193323Sed switch (MI->getOpcode()) { 1587193323Sed default: break; 1588193323Sed case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; 1589193323Sed case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; 1590193323Sed case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; 1591193323Sed case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break; 1592193323Sed case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break; 1593193323Sed case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break; 1594193323Sed case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break; 1595193323Sed case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break; 1596193323Sed case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break; 1597193323Sed case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; 1598193323Sed case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; 1599193323Sed case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; 1600193323Sed case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; 1601193323Sed case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; 1602193323Sed case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; 1603193323Sed case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; 1604193323Sed case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; 1605193323Sed case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; 1606193323Sed case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; 1607193323Sed case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; 1608193323Sed case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; 1609193323Sed case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break; 1610193323Sed case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break; 1611193323Sed case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break; 1612193323Sed case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break; 1613193323Sed case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break; 1614193323Sed case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break; 1615193323Sed case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break; 1616193323Sed case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break; 1617193323Sed case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; 1618193323Sed case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; 1619193323Sed case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; 1620193323Sed case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; 1621193323Sed case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; 1622193323Sed case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; 1623193323Sed case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; 1624193323Sed case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; 1625193323Sed case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; 1626193323Sed case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; 1627193323Sed case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; 1628193323Sed case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; 1629193323Sed case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; 1630193323Sed case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; 1631193323Sed case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; 1632193323Sed case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; 1633193323Sed case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; 1634193323Sed case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; 1635193323Sed case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; 1636193323Sed } 1637193323Sed if (NewMI) { 1638193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1639193323Sed MI = MF.CloneMachineInstr(MI); 1640193323Sed NewMI = false; 1641193323Sed } 1642193323Sed MI->setDesc(get(Opc)); 1643193323Sed // Fallthrough intended. 1644193323Sed } 1645193323Sed default: 1646193323Sed return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1647193323Sed } 1648193323Sed} 1649193323Sed 1650193323Sedstatic X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { 1651193323Sed switch (BrOpc) { 1652193323Sed default: return X86::COND_INVALID; 1653203954Srdivacky case X86::JE_4: return X86::COND_E; 1654203954Srdivacky case X86::JNE_4: return X86::COND_NE; 1655203954Srdivacky case X86::JL_4: return X86::COND_L; 1656203954Srdivacky case X86::JLE_4: return X86::COND_LE; 1657203954Srdivacky case X86::JG_4: return X86::COND_G; 1658203954Srdivacky case X86::JGE_4: return X86::COND_GE; 1659203954Srdivacky case X86::JB_4: return X86::COND_B; 1660203954Srdivacky case X86::JBE_4: return X86::COND_BE; 1661203954Srdivacky case X86::JA_4: return X86::COND_A; 1662203954Srdivacky case X86::JAE_4: return X86::COND_AE; 1663203954Srdivacky case X86::JS_4: return X86::COND_S; 1664203954Srdivacky case X86::JNS_4: return X86::COND_NS; 1665203954Srdivacky case X86::JP_4: return X86::COND_P; 1666203954Srdivacky case X86::JNP_4: return X86::COND_NP; 1667203954Srdivacky case X86::JO_4: return X86::COND_O; 1668203954Srdivacky case X86::JNO_4: return X86::COND_NO; 1669193323Sed } 1670193323Sed} 1671193323Sed 1672193323Sedunsigned X86::GetCondBranchFromCond(X86::CondCode CC) { 1673193323Sed switch (CC) { 1674198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 1675203954Srdivacky case X86::COND_E: return X86::JE_4; 1676203954Srdivacky case X86::COND_NE: return X86::JNE_4; 1677203954Srdivacky case X86::COND_L: return X86::JL_4; 1678203954Srdivacky case X86::COND_LE: return X86::JLE_4; 1679203954Srdivacky case X86::COND_G: return X86::JG_4; 1680203954Srdivacky case X86::COND_GE: return X86::JGE_4; 1681203954Srdivacky case X86::COND_B: return X86::JB_4; 1682203954Srdivacky case X86::COND_BE: return X86::JBE_4; 1683203954Srdivacky case X86::COND_A: return X86::JA_4; 1684203954Srdivacky case X86::COND_AE: return X86::JAE_4; 1685203954Srdivacky case X86::COND_S: return X86::JS_4; 1686203954Srdivacky case X86::COND_NS: return X86::JNS_4; 1687203954Srdivacky case X86::COND_P: return X86::JP_4; 1688203954Srdivacky case X86::COND_NP: return X86::JNP_4; 1689203954Srdivacky case X86::COND_O: return X86::JO_4; 1690203954Srdivacky case X86::COND_NO: return X86::JNO_4; 1691193323Sed } 1692193323Sed} 1693193323Sed 1694193323Sed/// GetOppositeBranchCondition - Return the inverse of the specified condition, 1695193323Sed/// e.g. turning COND_E to COND_NE. 1696193323SedX86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { 1697193323Sed switch (CC) { 1698198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 1699193323Sed case X86::COND_E: return X86::COND_NE; 1700193323Sed case X86::COND_NE: return X86::COND_E; 1701193323Sed case X86::COND_L: return X86::COND_GE; 1702193323Sed case X86::COND_LE: return X86::COND_G; 1703193323Sed case X86::COND_G: return X86::COND_LE; 1704193323Sed case X86::COND_GE: return X86::COND_L; 1705193323Sed case X86::COND_B: return X86::COND_AE; 1706193323Sed case X86::COND_BE: return X86::COND_A; 1707193323Sed case X86::COND_A: return X86::COND_BE; 1708193323Sed case X86::COND_AE: return X86::COND_B; 1709193323Sed case X86::COND_S: return X86::COND_NS; 1710193323Sed case X86::COND_NS: return X86::COND_S; 1711193323Sed case X86::COND_P: return X86::COND_NP; 1712193323Sed case X86::COND_NP: return X86::COND_P; 1713193323Sed case X86::COND_O: return X86::COND_NO; 1714193323Sed case X86::COND_NO: return X86::COND_O; 1715193323Sed } 1716193323Sed} 1717193323Sed 1718193323Sedbool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { 1719224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 1720224145Sdim if (!MCID.isTerminator()) return false; 1721218893Sdim 1722193323Sed // Conditional branch is a special case. 1723224145Sdim if (MCID.isBranch() && !MCID.isBarrier()) 1724193323Sed return true; 1725224145Sdim if (!MCID.isPredicable()) 1726193323Sed return true; 1727193323Sed return !isPredicated(MI); 1728193323Sed} 1729193323Sed 1730218893Sdimbool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 1731193323Sed MachineBasicBlock *&TBB, 1732193323Sed MachineBasicBlock *&FBB, 1733193323Sed SmallVectorImpl<MachineOperand> &Cond, 1734193323Sed bool AllowModify) const { 1735193323Sed // Start from the bottom of the block and work up, examining the 1736193323Sed // terminator instructions. 1737193323Sed MachineBasicBlock::iterator I = MBB.end(); 1738207618Srdivacky MachineBasicBlock::iterator UnCondBrIter = MBB.end(); 1739193323Sed while (I != MBB.begin()) { 1740193323Sed --I; 1741206083Srdivacky if (I->isDebugValue()) 1742206083Srdivacky continue; 1743200581Srdivacky 1744200581Srdivacky // Working from the bottom, when we see a non-terminator instruction, we're 1745200581Srdivacky // done. 1746212904Sdim if (!isUnpredicatedTerminator(I)) 1747193323Sed break; 1748200581Srdivacky 1749200581Srdivacky // A terminator that isn't a branch can't easily be handled by this 1750200581Srdivacky // analysis. 1751193323Sed if (!I->getDesc().isBranch()) 1752193323Sed return true; 1753200581Srdivacky 1754193323Sed // Handle unconditional branches. 1755203954Srdivacky if (I->getOpcode() == X86::JMP_4) { 1756207618Srdivacky UnCondBrIter = I; 1757207618Srdivacky 1758193323Sed if (!AllowModify) { 1759193323Sed TBB = I->getOperand(0).getMBB(); 1760193323Sed continue; 1761193323Sed } 1762193323Sed 1763193323Sed // If the block has any instructions after a JMP, delete them. 1764200581Srdivacky while (llvm::next(I) != MBB.end()) 1765200581Srdivacky llvm::next(I)->eraseFromParent(); 1766200581Srdivacky 1767193323Sed Cond.clear(); 1768193323Sed FBB = 0; 1769200581Srdivacky 1770193323Sed // Delete the JMP if it's equivalent to a fall-through. 1771193323Sed if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { 1772193323Sed TBB = 0; 1773193323Sed I->eraseFromParent(); 1774193323Sed I = MBB.end(); 1775207618Srdivacky UnCondBrIter = MBB.end(); 1776193323Sed continue; 1777193323Sed } 1778200581Srdivacky 1779207618Srdivacky // TBB is used to indicate the unconditional destination. 1780193323Sed TBB = I->getOperand(0).getMBB(); 1781193323Sed continue; 1782193323Sed } 1783200581Srdivacky 1784193323Sed // Handle conditional branches. 1785193323Sed X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); 1786193323Sed if (BranchCode == X86::COND_INVALID) 1787193323Sed return true; // Can't handle indirect branch. 1788200581Srdivacky 1789193323Sed // Working from the bottom, handle the first conditional branch. 1790193323Sed if (Cond.empty()) { 1791207618Srdivacky MachineBasicBlock *TargetBB = I->getOperand(0).getMBB(); 1792207618Srdivacky if (AllowModify && UnCondBrIter != MBB.end() && 1793207618Srdivacky MBB.isLayoutSuccessor(TargetBB)) { 1794207618Srdivacky // If we can modify the code and it ends in something like: 1795207618Srdivacky // 1796207618Srdivacky // jCC L1 1797207618Srdivacky // jmp L2 1798207618Srdivacky // L1: 1799207618Srdivacky // ... 1800207618Srdivacky // L2: 1801207618Srdivacky // 1802207618Srdivacky // Then we can change this to: 1803207618Srdivacky // 1804207618Srdivacky // jnCC L2 1805207618Srdivacky // L1: 1806207618Srdivacky // ... 1807207618Srdivacky // L2: 1808207618Srdivacky // 1809207618Srdivacky // Which is a bit more efficient. 1810207618Srdivacky // We conditionally jump to the fall-through block. 1811207618Srdivacky BranchCode = GetOppositeBranchCondition(BranchCode); 1812207618Srdivacky unsigned JNCC = GetCondBranchFromCond(BranchCode); 1813207618Srdivacky MachineBasicBlock::iterator OldInst = I; 1814207618Srdivacky 1815207618Srdivacky BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC)) 1816207618Srdivacky .addMBB(UnCondBrIter->getOperand(0).getMBB()); 1817207618Srdivacky BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4)) 1818207618Srdivacky .addMBB(TargetBB); 1819207618Srdivacky 1820207618Srdivacky OldInst->eraseFromParent(); 1821207618Srdivacky UnCondBrIter->eraseFromParent(); 1822207618Srdivacky 1823207618Srdivacky // Restart the analysis. 1824207618Srdivacky UnCondBrIter = MBB.end(); 1825207618Srdivacky I = MBB.end(); 1826207618Srdivacky continue; 1827207618Srdivacky } 1828207618Srdivacky 1829193323Sed FBB = TBB; 1830193323Sed TBB = I->getOperand(0).getMBB(); 1831193323Sed Cond.push_back(MachineOperand::CreateImm(BranchCode)); 1832193323Sed continue; 1833193323Sed } 1834200581Srdivacky 1835200581Srdivacky // Handle subsequent conditional branches. Only handle the case where all 1836200581Srdivacky // conditional branches branch to the same destination and their condition 1837200581Srdivacky // opcodes fit one of the special multi-branch idioms. 1838193323Sed assert(Cond.size() == 1); 1839193323Sed assert(TBB); 1840200581Srdivacky 1841200581Srdivacky // Only handle the case where all conditional branches branch to the same 1842200581Srdivacky // destination. 1843193323Sed if (TBB != I->getOperand(0).getMBB()) 1844193323Sed return true; 1845200581Srdivacky 1846200581Srdivacky // If the conditions are the same, we can leave them alone. 1847193323Sed X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); 1848193323Sed if (OldBranchCode == BranchCode) 1849193323Sed continue; 1850200581Srdivacky 1851200581Srdivacky // If they differ, see if they fit one of the known patterns. Theoretically, 1852200581Srdivacky // we could handle more patterns here, but we shouldn't expect to see them 1853200581Srdivacky // if instruction selection has done a reasonable job. 1854193323Sed if ((OldBranchCode == X86::COND_NP && 1855193323Sed BranchCode == X86::COND_E) || 1856193323Sed (OldBranchCode == X86::COND_E && 1857193323Sed BranchCode == X86::COND_NP)) 1858193323Sed BranchCode = X86::COND_NP_OR_E; 1859193323Sed else if ((OldBranchCode == X86::COND_P && 1860193323Sed BranchCode == X86::COND_NE) || 1861193323Sed (OldBranchCode == X86::COND_NE && 1862193323Sed BranchCode == X86::COND_P)) 1863193323Sed BranchCode = X86::COND_NE_OR_P; 1864193323Sed else 1865193323Sed return true; 1866200581Srdivacky 1867193323Sed // Update the MachineOperand. 1868193323Sed Cond[0].setImm(BranchCode); 1869193323Sed } 1870193323Sed 1871193323Sed return false; 1872193323Sed} 1873193323Sed 1874193323Sedunsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 1875193323Sed MachineBasicBlock::iterator I = MBB.end(); 1876193323Sed unsigned Count = 0; 1877193323Sed 1878193323Sed while (I != MBB.begin()) { 1879193323Sed --I; 1880206083Srdivacky if (I->isDebugValue()) 1881206083Srdivacky continue; 1882203954Srdivacky if (I->getOpcode() != X86::JMP_4 && 1883193323Sed GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) 1884193323Sed break; 1885193323Sed // Remove the branch. 1886193323Sed I->eraseFromParent(); 1887193323Sed I = MBB.end(); 1888193323Sed ++Count; 1889193323Sed } 1890218893Sdim 1891193323Sed return Count; 1892193323Sed} 1893193323Sed 1894193323Sedunsigned 1895193323SedX86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 1896193323Sed MachineBasicBlock *FBB, 1897210299Sed const SmallVectorImpl<MachineOperand> &Cond, 1898210299Sed DebugLoc DL) const { 1899193323Sed // Shouldn't be a fall through. 1900193323Sed assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 1901193323Sed assert((Cond.size() == 1 || Cond.size() == 0) && 1902193323Sed "X86 branch conditions have one component!"); 1903193323Sed 1904193323Sed if (Cond.empty()) { 1905193323Sed // Unconditional branch? 1906193323Sed assert(!FBB && "Unconditional branch with multiple successors!"); 1907210299Sed BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB); 1908193323Sed return 1; 1909193323Sed } 1910193323Sed 1911193323Sed // Conditional branch. 1912193323Sed unsigned Count = 0; 1913193323Sed X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); 1914193323Sed switch (CC) { 1915193323Sed case X86::COND_NP_OR_E: 1916193323Sed // Synthesize NP_OR_E with two branches. 1917210299Sed BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB); 1918193323Sed ++Count; 1919210299Sed BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB); 1920193323Sed ++Count; 1921193323Sed break; 1922193323Sed case X86::COND_NE_OR_P: 1923193323Sed // Synthesize NE_OR_P with two branches. 1924210299Sed BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB); 1925193323Sed ++Count; 1926210299Sed BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB); 1927193323Sed ++Count; 1928193323Sed break; 1929193323Sed default: { 1930193323Sed unsigned Opc = GetCondBranchFromCond(CC); 1931210299Sed BuildMI(&MBB, DL, get(Opc)).addMBB(TBB); 1932193323Sed ++Count; 1933193323Sed } 1934193323Sed } 1935193323Sed if (FBB) { 1936193323Sed // Two-way Conditional branch. Insert the second branch. 1937210299Sed BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB); 1938193323Sed ++Count; 1939193323Sed } 1940193323Sed return Count; 1941193323Sed} 1942193323Sed 1943193323Sed/// isHReg - Test if the given register is a physical h register. 1944193323Sedstatic bool isHReg(unsigned Reg) { 1945193323Sed return X86::GR8_ABCD_HRegClass.contains(Reg); 1946193323Sed} 1947193323Sed 1948212904Sdim// Try and copy between VR128/VR64 and GR64 registers. 1949212904Sdimstatic unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) { 1950212904Sdim // SrcReg(VR128) -> DestReg(GR64) 1951212904Sdim // SrcReg(VR64) -> DestReg(GR64) 1952212904Sdim // SrcReg(GR64) -> DestReg(VR128) 1953212904Sdim // SrcReg(GR64) -> DestReg(VR64) 1954212904Sdim 1955212904Sdim if (X86::GR64RegClass.contains(DestReg)) { 1956212904Sdim if (X86::VR128RegClass.contains(SrcReg)) { 1957212904Sdim // Copy from a VR128 register to a GR64 register. 1958212904Sdim return X86::MOVPQIto64rr; 1959212904Sdim } else if (X86::VR64RegClass.contains(SrcReg)) { 1960212904Sdim // Copy from a VR64 register to a GR64 register. 1961212904Sdim return X86::MOVSDto64rr; 1962212904Sdim } 1963212904Sdim } else if (X86::GR64RegClass.contains(SrcReg)) { 1964212904Sdim // Copy from a GR64 register to a VR128 register. 1965212904Sdim if (X86::VR128RegClass.contains(DestReg)) 1966212904Sdim return X86::MOV64toPQIrr; 1967212904Sdim // Copy from a GR64 register to a VR64 register. 1968212904Sdim else if (X86::VR64RegClass.contains(DestReg)) 1969212904Sdim return X86::MOV64toSDrr; 1970212904Sdim } 1971212904Sdim 1972212904Sdim return 0; 1973212904Sdim} 1974212904Sdim 1975210299Sedvoid X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 1976210299Sed MachineBasicBlock::iterator MI, DebugLoc DL, 1977210299Sed unsigned DestReg, unsigned SrcReg, 1978210299Sed bool KillSrc) const { 1979210299Sed // First deal with the normal symmetric copies. 1980210299Sed unsigned Opc = 0; 1981210299Sed if (X86::GR64RegClass.contains(DestReg, SrcReg)) 1982210299Sed Opc = X86::MOV64rr; 1983210299Sed else if (X86::GR32RegClass.contains(DestReg, SrcReg)) 1984210299Sed Opc = X86::MOV32rr; 1985210299Sed else if (X86::GR16RegClass.contains(DestReg, SrcReg)) 1986210299Sed Opc = X86::MOV16rr; 1987210299Sed else if (X86::GR8RegClass.contains(DestReg, SrcReg)) { 1988210299Sed // Copying to or from a physical H register on x86-64 requires a NOREX 1989210299Sed // move. Otherwise use a normal move. 1990210299Sed if ((isHReg(DestReg) || isHReg(SrcReg)) && 1991210299Sed TM.getSubtarget<X86Subtarget>().is64Bit()) 1992210299Sed Opc = X86::MOV8rr_NOREX; 1993198090Srdivacky else 1994210299Sed Opc = X86::MOV8rr; 1995210299Sed } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) 1996210299Sed Opc = X86::MOVAPSrr; 1997224145Sdim else if (X86::VR256RegClass.contains(DestReg, SrcReg)) 1998224145Sdim Opc = X86::VMOVAPSYrr; 1999210299Sed else if (X86::VR64RegClass.contains(DestReg, SrcReg)) 2000210299Sed Opc = X86::MMX_MOVQ64rr; 2001212904Sdim else 2002212904Sdim Opc = CopyToFromAsymmetricReg(DestReg, SrcReg); 2003193323Sed 2004210299Sed if (Opc) { 2005210299Sed BuildMI(MBB, MI, DL, get(Opc), DestReg) 2006210299Sed .addReg(SrcReg, getKillRegState(KillSrc)); 2007210299Sed return; 2008193323Sed } 2009198090Srdivacky 2010193323Sed // Moving EFLAGS to / from another register requires a push and a pop. 2011210299Sed if (SrcReg == X86::EFLAGS) { 2012210299Sed if (X86::GR64RegClass.contains(DestReg)) { 2013208599Srdivacky BuildMI(MBB, MI, DL, get(X86::PUSHF64)); 2014193323Sed BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); 2015210299Sed return; 2016210299Sed } else if (X86::GR32RegClass.contains(DestReg)) { 2017208599Srdivacky BuildMI(MBB, MI, DL, get(X86::PUSHF32)); 2018193323Sed BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); 2019210299Sed return; 2020193323Sed } 2021210299Sed } 2022210299Sed if (DestReg == X86::EFLAGS) { 2023210299Sed if (X86::GR64RegClass.contains(SrcReg)) { 2024210299Sed BuildMI(MBB, MI, DL, get(X86::PUSH64r)) 2025210299Sed .addReg(SrcReg, getKillRegState(KillSrc)); 2026208599Srdivacky BuildMI(MBB, MI, DL, get(X86::POPF64)); 2027210299Sed return; 2028210299Sed } else if (X86::GR32RegClass.contains(SrcReg)) { 2029210299Sed BuildMI(MBB, MI, DL, get(X86::PUSH32r)) 2030210299Sed .addReg(SrcReg, getKillRegState(KillSrc)); 2031208599Srdivacky BuildMI(MBB, MI, DL, get(X86::POPF32)); 2032210299Sed return; 2033193323Sed } 2034193323Sed } 2035193323Sed 2036210299Sed DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) 2037210299Sed << " to " << RI.getName(DestReg) << '\n'); 2038210299Sed llvm_unreachable("Cannot emit physreg copy instruction"); 2039193323Sed} 2040193323Sed 2041210299Sedstatic unsigned getLoadStoreRegOpcode(unsigned Reg, 2042210299Sed const TargetRegisterClass *RC, 2043210299Sed bool isStackAligned, 2044210299Sed const TargetMachine &TM, 2045210299Sed bool load) { 2046223017Sdim switch (RC->getSize()) { 2047210299Sed default: 2048223017Sdim llvm_unreachable("Unknown spill size"); 2049223017Sdim case 1: 2050223017Sdim assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass"); 2051223017Sdim if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2052223017Sdim // Copying to or from a physical H register on x86-64 requires a NOREX 2053223017Sdim // move. Otherwise use a normal move. 2054223017Sdim if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC)) 2055223017Sdim return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; 2056223017Sdim return load ? X86::MOV8rm : X86::MOV8mr; 2057223017Sdim case 2: 2058223017Sdim assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass"); 2059210299Sed return load ? X86::MOV16rm : X86::MOV16mr; 2060223017Sdim case 4: 2061223017Sdim if (X86::GR32RegClass.hasSubClassEq(RC)) 2062223017Sdim return load ? X86::MOV32rm : X86::MOV32mr; 2063223017Sdim if (X86::FR32RegClass.hasSubClassEq(RC)) 2064223017Sdim return load ? X86::MOVSSrm : X86::MOVSSmr; 2065223017Sdim if (X86::RFP32RegClass.hasSubClassEq(RC)) 2066223017Sdim return load ? X86::LD_Fp32m : X86::ST_Fp32m; 2067223017Sdim llvm_unreachable("Unknown 4-byte regclass"); 2068223017Sdim case 8: 2069223017Sdim if (X86::GR64RegClass.hasSubClassEq(RC)) 2070223017Sdim return load ? X86::MOV64rm : X86::MOV64mr; 2071223017Sdim if (X86::FR64RegClass.hasSubClassEq(RC)) 2072223017Sdim return load ? X86::MOVSDrm : X86::MOVSDmr; 2073223017Sdim if (X86::VR64RegClass.hasSubClassEq(RC)) 2074223017Sdim return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; 2075223017Sdim if (X86::RFP64RegClass.hasSubClassEq(RC)) 2076223017Sdim return load ? X86::LD_Fp64m : X86::ST_Fp64m; 2077223017Sdim llvm_unreachable("Unknown 8-byte regclass"); 2078223017Sdim case 10: 2079223017Sdim assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); 2080210299Sed return load ? X86::LD_Fp80m : X86::ST_FpP80m; 2081223017Sdim case 16: 2082223017Sdim assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass"); 2083193323Sed // If stack is realigned we can use aligned stores. 2084210299Sed if (isStackAligned) 2085210299Sed return load ? X86::MOVAPSrm : X86::MOVAPSmr; 2086210299Sed else 2087210299Sed return load ? X86::MOVUPSrm : X86::MOVUPSmr; 2088224145Sdim case 32: 2089224145Sdim assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass"); 2090224145Sdim // If stack is realigned we can use aligned stores. 2091224145Sdim if (isStackAligned) 2092224145Sdim return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr; 2093224145Sdim else 2094224145Sdim return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr; 2095193323Sed } 2096210299Sed} 2097193323Sed 2098210299Sedstatic unsigned getStoreRegOpcode(unsigned SrcReg, 2099210299Sed const TargetRegisterClass *RC, 2100210299Sed bool isStackAligned, 2101210299Sed TargetMachine &TM) { 2102210299Sed return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false); 2103193323Sed} 2104193323Sed 2105210299Sed 2106210299Sedstatic unsigned getLoadRegOpcode(unsigned DestReg, 2107210299Sed const TargetRegisterClass *RC, 2108210299Sed bool isStackAligned, 2109210299Sed const TargetMachine &TM) { 2110210299Sed return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true); 2111210299Sed} 2112210299Sed 2113193323Sedvoid X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 2114193323Sed MachineBasicBlock::iterator MI, 2115193323Sed unsigned SrcReg, bool isKill, int FrameIdx, 2116208599Srdivacky const TargetRegisterClass *RC, 2117208599Srdivacky const TargetRegisterInfo *TRI) const { 2118193323Sed const MachineFunction &MF = *MBB.getParent(); 2119212904Sdim assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && 2120212904Sdim "Stack slot too small for store"); 2121224145Sdim bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || 2122224145Sdim RI.canRealignStack(MF); 2123193323Sed unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 2124203954Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 2125193323Sed addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) 2126193323Sed .addReg(SrcReg, getKillRegState(isKill)); 2127193323Sed} 2128193323Sed 2129193323Sedvoid X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, 2130193323Sed bool isKill, 2131193323Sed SmallVectorImpl<MachineOperand> &Addr, 2132193323Sed const TargetRegisterClass *RC, 2133198090Srdivacky MachineInstr::mmo_iterator MMOBegin, 2134198090Srdivacky MachineInstr::mmo_iterator MMOEnd, 2135193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2136210299Sed bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; 2137193323Sed unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 2138206124Srdivacky DebugLoc DL; 2139193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); 2140193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2141193323Sed MIB.addOperand(Addr[i]); 2142193323Sed MIB.addReg(SrcReg, getKillRegState(isKill)); 2143198090Srdivacky (*MIB).setMemRefs(MMOBegin, MMOEnd); 2144193323Sed NewMIs.push_back(MIB); 2145193323Sed} 2146193323Sed 2147193323Sed 2148193323Sedvoid X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 2149193323Sed MachineBasicBlock::iterator MI, 2150193323Sed unsigned DestReg, int FrameIdx, 2151208599Srdivacky const TargetRegisterClass *RC, 2152208599Srdivacky const TargetRegisterInfo *TRI) const { 2153193323Sed const MachineFunction &MF = *MBB.getParent(); 2154224145Sdim bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || 2155224145Sdim RI.canRealignStack(MF); 2156193323Sed unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2157203954Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 2158193323Sed addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); 2159193323Sed} 2160193323Sed 2161193323Sedvoid X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, 2162193323Sed SmallVectorImpl<MachineOperand> &Addr, 2163193323Sed const TargetRegisterClass *RC, 2164198090Srdivacky MachineInstr::mmo_iterator MMOBegin, 2165198090Srdivacky MachineInstr::mmo_iterator MMOEnd, 2166193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2167210299Sed bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; 2168193323Sed unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2169206124Srdivacky DebugLoc DL; 2170193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); 2171193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2172193323Sed MIB.addOperand(Addr[i]); 2173198090Srdivacky (*MIB).setMemRefs(MMOBegin, MMOEnd); 2174193323Sed NewMIs.push_back(MIB); 2175193323Sed} 2176193323Sed 2177207618SrdivackyMachineInstr* 2178207618SrdivackyX86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, 2179207618Srdivacky int FrameIx, uint64_t Offset, 2180207618Srdivacky const MDNode *MDPtr, 2181207618Srdivacky DebugLoc DL) const { 2182207618Srdivacky X86AddressMode AM; 2183207618Srdivacky AM.BaseType = X86AddressMode::FrameIndexBase; 2184207618Srdivacky AM.Base.FrameIndex = FrameIx; 2185207618Srdivacky MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE)); 2186207618Srdivacky addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr); 2187207618Srdivacky return &*MIB; 2188207618Srdivacky} 2189207618Srdivacky 2190193323Sedstatic MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, 2191193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2192193323Sed MachineInstr *MI, 2193193323Sed const TargetInstrInfo &TII) { 2194193323Sed // Create the base instruction with the memory operand as the first part. 2195193323Sed MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2196193323Sed MI->getDebugLoc(), true); 2197193323Sed MachineInstrBuilder MIB(NewMI); 2198193323Sed unsigned NumAddrOps = MOs.size(); 2199193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2200193323Sed MIB.addOperand(MOs[i]); 2201193323Sed if (NumAddrOps < 4) // FrameIndex only 2202193323Sed addOffset(MIB, 0); 2203218893Sdim 2204193323Sed // Loop over the rest of the ri operands, converting them over. 2205193323Sed unsigned NumOps = MI->getDesc().getNumOperands()-2; 2206193323Sed for (unsigned i = 0; i != NumOps; ++i) { 2207193323Sed MachineOperand &MO = MI->getOperand(i+2); 2208193323Sed MIB.addOperand(MO); 2209193323Sed } 2210193323Sed for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { 2211193323Sed MachineOperand &MO = MI->getOperand(i); 2212193323Sed MIB.addOperand(MO); 2213193323Sed } 2214193323Sed return MIB; 2215193323Sed} 2216193323Sed 2217193323Sedstatic MachineInstr *FuseInst(MachineFunction &MF, 2218193323Sed unsigned Opcode, unsigned OpNo, 2219193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2220193323Sed MachineInstr *MI, const TargetInstrInfo &TII) { 2221193323Sed MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2222193323Sed MI->getDebugLoc(), true); 2223193323Sed MachineInstrBuilder MIB(NewMI); 2224218893Sdim 2225193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2226193323Sed MachineOperand &MO = MI->getOperand(i); 2227193323Sed if (i == OpNo) { 2228193323Sed assert(MO.isReg() && "Expected to fold into reg operand!"); 2229193323Sed unsigned NumAddrOps = MOs.size(); 2230193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2231193323Sed MIB.addOperand(MOs[i]); 2232193323Sed if (NumAddrOps < 4) // FrameIndex only 2233193323Sed addOffset(MIB, 0); 2234193323Sed } else { 2235193323Sed MIB.addOperand(MO); 2236193323Sed } 2237193323Sed } 2238193323Sed return MIB; 2239193323Sed} 2240193323Sed 2241193323Sedstatic MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, 2242193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2243193323Sed MachineInstr *MI) { 2244193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 2245193323Sed MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); 2246193323Sed 2247193323Sed unsigned NumAddrOps = MOs.size(); 2248193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2249193323Sed MIB.addOperand(MOs[i]); 2250193323Sed if (NumAddrOps < 4) // FrameIndex only 2251193323Sed addOffset(MIB, 0); 2252193323Sed return MIB.addImm(0); 2253193323Sed} 2254193323Sed 2255193323SedMachineInstr* 2256193323SedX86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2257193323Sed MachineInstr *MI, unsigned i, 2258198090Srdivacky const SmallVectorImpl<MachineOperand> &MOs, 2259198090Srdivacky unsigned Size, unsigned Align) const { 2260218893Sdim const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0; 2261193323Sed bool isTwoAddrFold = false; 2262193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 2263193323Sed bool isTwoAddr = NumOps > 1 && 2264224145Sdim MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; 2265193323Sed 2266221345Sdim // FIXME: AsmPrinter doesn't know how to handle 2267221345Sdim // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. 2268221345Sdim if (MI->getOpcode() == X86::ADD32ri && 2269221345Sdim MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) 2270221345Sdim return NULL; 2271221345Sdim 2272193323Sed MachineInstr *NewMI = NULL; 2273193323Sed // Folding a memory location into the two-address part of a two-address 2274193323Sed // instruction is different than folding it other places. It requires 2275193323Sed // replacing the *two* registers with the memory location. 2276193323Sed if (isTwoAddr && NumOps >= 2 && i < 2 && 2277193323Sed MI->getOperand(0).isReg() && 2278193323Sed MI->getOperand(1).isReg() && 2279218893Sdim MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { 2280193323Sed OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2281193323Sed isTwoAddrFold = true; 2282193323Sed } else if (i == 0) { // If operand 0 2283202375Srdivacky if (MI->getOpcode() == X86::MOV64r0) 2284202375Srdivacky NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); 2285202375Srdivacky else if (MI->getOpcode() == X86::MOV32r0) 2286193323Sed NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); 2287202375Srdivacky else if (MI->getOpcode() == X86::MOV16r0) 2288202375Srdivacky NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); 2289193323Sed else if (MI->getOpcode() == X86::MOV8r0) 2290193323Sed NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); 2291193323Sed if (NewMI) 2292193323Sed return NewMI; 2293218893Sdim 2294193323Sed OpcodeTablePtr = &RegOp2MemOpTable0; 2295193323Sed } else if (i == 1) { 2296193323Sed OpcodeTablePtr = &RegOp2MemOpTable1; 2297193323Sed } else if (i == 2) { 2298193323Sed OpcodeTablePtr = &RegOp2MemOpTable2; 2299193323Sed } 2300218893Sdim 2301193323Sed // If table selected... 2302193323Sed if (OpcodeTablePtr) { 2303193323Sed // Find the Opcode to fuse 2304218893Sdim DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = 2305218893Sdim OpcodeTablePtr->find(MI->getOpcode()); 2306193323Sed if (I != OpcodeTablePtr->end()) { 2307198090Srdivacky unsigned Opcode = I->second.first; 2308198090Srdivacky unsigned MinAlign = I->second.second; 2309198090Srdivacky if (Align < MinAlign) 2310198090Srdivacky return NULL; 2311198090Srdivacky bool NarrowToMOV32rm = false; 2312198090Srdivacky if (Size) { 2313224145Sdim unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize(); 2314198090Srdivacky if (Size < RCSize) { 2315198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 2316198090Srdivacky // narrower than the load width, then it's not. 2317198090Srdivacky if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) 2318198090Srdivacky return NULL; 2319198090Srdivacky // If this is a 64-bit load, but the spill slot is 32, then we can do 2320198090Srdivacky // a 32-bit load which is implicitly zero-extended. This likely is due 2321198090Srdivacky // to liveintervalanalysis remat'ing a load from stack slot. 2322198090Srdivacky if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) 2323198090Srdivacky return NULL; 2324198090Srdivacky Opcode = X86::MOV32rm; 2325198090Srdivacky NarrowToMOV32rm = true; 2326198090Srdivacky } 2327198090Srdivacky } 2328198090Srdivacky 2329193323Sed if (isTwoAddrFold) 2330198090Srdivacky NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); 2331193323Sed else 2332198090Srdivacky NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this); 2333198090Srdivacky 2334198090Srdivacky if (NarrowToMOV32rm) { 2335198090Srdivacky // If this is the special case where we use a MOV32rm to load a 32-bit 2336198090Srdivacky // value and zero-extend the top bits. Change the destination register 2337198090Srdivacky // to a 32-bit one. 2338198090Srdivacky unsigned DstReg = NewMI->getOperand(0).getReg(); 2339198090Srdivacky if (TargetRegisterInfo::isPhysicalRegister(DstReg)) 2340198090Srdivacky NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, 2341208599Srdivacky X86::sub_32bit)); 2342198090Srdivacky else 2343208599Srdivacky NewMI->getOperand(0).setSubReg(X86::sub_32bit); 2344198090Srdivacky } 2345193323Sed return NewMI; 2346193323Sed } 2347193323Sed } 2348218893Sdim 2349218893Sdim // No fusion 2350210299Sed if (PrintFailedFusing && !MI->isCopy()) 2351202375Srdivacky dbgs() << "We failed to fuse operand " << i << " in " << *MI; 2352193323Sed return NULL; 2353193323Sed} 2354193323Sed 2355193323Sed 2356193323SedMachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2357193323Sed MachineInstr *MI, 2358198090Srdivacky const SmallVectorImpl<unsigned> &Ops, 2359193323Sed int FrameIndex) const { 2360218893Sdim // Check switch flag 2361193323Sed if (NoFusing) return NULL; 2362193323Sed 2363201360Srdivacky if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 2364201360Srdivacky switch (MI->getOpcode()) { 2365201360Srdivacky case X86::CVTSD2SSrr: 2366201360Srdivacky case X86::Int_CVTSD2SSrr: 2367201360Srdivacky case X86::CVTSS2SDrr: 2368201360Srdivacky case X86::Int_CVTSS2SDrr: 2369201360Srdivacky case X86::RCPSSr: 2370201360Srdivacky case X86::RCPSSr_Int: 2371218893Sdim case X86::ROUNDSDr: 2372218893Sdim case X86::ROUNDSSr: 2373201360Srdivacky case X86::RSQRTSSr: 2374201360Srdivacky case X86::RSQRTSSr_Int: 2375201360Srdivacky case X86::SQRTSSr: 2376201360Srdivacky case X86::SQRTSSr_Int: 2377201360Srdivacky return 0; 2378201360Srdivacky } 2379201360Srdivacky 2380193323Sed const MachineFrameInfo *MFI = MF.getFrameInfo(); 2381198090Srdivacky unsigned Size = MFI->getObjectSize(FrameIndex); 2382193323Sed unsigned Alignment = MFI->getObjectAlignment(FrameIndex); 2383193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2384193323Sed unsigned NewOpc = 0; 2385198090Srdivacky unsigned RCSize = 0; 2386193323Sed switch (MI->getOpcode()) { 2387193323Sed default: return NULL; 2388198090Srdivacky case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; 2389208599Srdivacky case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break; 2390208599Srdivacky case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break; 2391208599Srdivacky case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break; 2392193323Sed } 2393198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 2394198090Srdivacky // narrower than the load width, then it's not. 2395198090Srdivacky if (Size < RCSize) 2396198090Srdivacky return NULL; 2397193323Sed // Change to CMPXXri r, 0 first. 2398193323Sed MI->setDesc(get(NewOpc)); 2399193323Sed MI->getOperand(1).ChangeToImmediate(0); 2400193323Sed } else if (Ops.size() != 1) 2401193323Sed return NULL; 2402193323Sed 2403193323Sed SmallVector<MachineOperand,4> MOs; 2404193323Sed MOs.push_back(MachineOperand::CreateFI(FrameIndex)); 2405198090Srdivacky return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment); 2406193323Sed} 2407193323Sed 2408193323SedMachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2409193323Sed MachineInstr *MI, 2410198090Srdivacky const SmallVectorImpl<unsigned> &Ops, 2411193323Sed MachineInstr *LoadMI) const { 2412218893Sdim // Check switch flag 2413193323Sed if (NoFusing) return NULL; 2414193323Sed 2415201360Srdivacky if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 2416201360Srdivacky switch (MI->getOpcode()) { 2417201360Srdivacky case X86::CVTSD2SSrr: 2418201360Srdivacky case X86::Int_CVTSD2SSrr: 2419201360Srdivacky case X86::CVTSS2SDrr: 2420201360Srdivacky case X86::Int_CVTSS2SDrr: 2421201360Srdivacky case X86::RCPSSr: 2422201360Srdivacky case X86::RCPSSr_Int: 2423218893Sdim case X86::ROUNDSDr: 2424218893Sdim case X86::ROUNDSSr: 2425201360Srdivacky case X86::RSQRTSSr: 2426201360Srdivacky case X86::RSQRTSSr_Int: 2427201360Srdivacky case X86::SQRTSSr: 2428201360Srdivacky case X86::SQRTSSr_Int: 2429201360Srdivacky return 0; 2430201360Srdivacky } 2431201360Srdivacky 2432193323Sed // Determine the alignment of the load. 2433193323Sed unsigned Alignment = 0; 2434193323Sed if (LoadMI->hasOneMemOperand()) 2435198090Srdivacky Alignment = (*LoadMI->memoperands_begin())->getAlignment(); 2436198090Srdivacky else 2437198090Srdivacky switch (LoadMI->getOpcode()) { 2438212904Sdim case X86::AVX_SET0PSY: 2439212904Sdim case X86::AVX_SET0PDY: 2440212904Sdim Alignment = 32; 2441212904Sdim break; 2442206083Srdivacky case X86::V_SET0PS: 2443206083Srdivacky case X86::V_SET0PD: 2444206083Srdivacky case X86::V_SET0PI: 2445198090Srdivacky case X86::V_SETALLONES: 2446212904Sdim case X86::AVX_SET0PS: 2447212904Sdim case X86::AVX_SET0PD: 2448212904Sdim case X86::AVX_SET0PI: 2449198090Srdivacky Alignment = 16; 2450198090Srdivacky break; 2451198090Srdivacky case X86::FsFLD0SD: 2452218893Sdim case X86::VFsFLD0SD: 2453198090Srdivacky Alignment = 8; 2454198090Srdivacky break; 2455198090Srdivacky case X86::FsFLD0SS: 2456218893Sdim case X86::VFsFLD0SS: 2457198090Srdivacky Alignment = 4; 2458198090Srdivacky break; 2459198090Srdivacky default: 2460223017Sdim return 0; 2461193323Sed } 2462193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2463193323Sed unsigned NewOpc = 0; 2464193323Sed switch (MI->getOpcode()) { 2465193323Sed default: return NULL; 2466193323Sed case X86::TEST8rr: NewOpc = X86::CMP8ri; break; 2467208599Srdivacky case X86::TEST16rr: NewOpc = X86::CMP16ri8; break; 2468208599Srdivacky case X86::TEST32rr: NewOpc = X86::CMP32ri8; break; 2469208599Srdivacky case X86::TEST64rr: NewOpc = X86::CMP64ri8; break; 2470193323Sed } 2471193323Sed // Change to CMPXXri r, 0 first. 2472193323Sed MI->setDesc(get(NewOpc)); 2473193323Sed MI->getOperand(1).ChangeToImmediate(0); 2474193323Sed } else if (Ops.size() != 1) 2475193323Sed return NULL; 2476193323Sed 2477212904Sdim // Make sure the subregisters match. 2478212904Sdim // Otherwise we risk changing the size of the load. 2479212904Sdim if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg()) 2480212904Sdim return NULL; 2481212904Sdim 2482210299Sed SmallVector<MachineOperand,X86::AddrNumOperands> MOs; 2483198090Srdivacky switch (LoadMI->getOpcode()) { 2484206083Srdivacky case X86::V_SET0PS: 2485206083Srdivacky case X86::V_SET0PD: 2486206083Srdivacky case X86::V_SET0PI: 2487198090Srdivacky case X86::V_SETALLONES: 2488212904Sdim case X86::AVX_SET0PS: 2489212904Sdim case X86::AVX_SET0PD: 2490212904Sdim case X86::AVX_SET0PI: 2491212904Sdim case X86::AVX_SET0PSY: 2492212904Sdim case X86::AVX_SET0PDY: 2493198090Srdivacky case X86::FsFLD0SD: 2494198090Srdivacky case X86::FsFLD0SS: { 2495206083Srdivacky // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure. 2496193323Sed // Create a constant-pool entry and operands to load from it. 2497193323Sed 2498204961Srdivacky // Medium and large mode can't fold loads this way. 2499204961Srdivacky if (TM.getCodeModel() != CodeModel::Small && 2500204961Srdivacky TM.getCodeModel() != CodeModel::Kernel) 2501204961Srdivacky return NULL; 2502204961Srdivacky 2503193323Sed // x86-32 PIC requires a PIC base register for constant pools. 2504193323Sed unsigned PICBase = 0; 2505198090Srdivacky if (TM.getRelocationModel() == Reloc::PIC_) { 2506198090Srdivacky if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2507198090Srdivacky PICBase = X86::RIP; 2508198090Srdivacky else 2509210299Sed // FIXME: PICBase = getGlobalBaseReg(&MF); 2510198090Srdivacky // This doesn't work for several reasons. 2511198090Srdivacky // 1. GlobalBaseReg may have been spilled. 2512198090Srdivacky // 2. It may not be live at MI. 2513198090Srdivacky return NULL; 2514198090Srdivacky } 2515193323Sed 2516198090Srdivacky // Create a constant-pool entry. 2517193323Sed MachineConstantPool &MCP = *MF.getConstantPool(); 2518198090Srdivacky const Type *Ty; 2519212904Sdim unsigned Opc = LoadMI->getOpcode(); 2520218893Sdim if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS) 2521198090Srdivacky Ty = Type::getFloatTy(MF.getFunction()->getContext()); 2522218893Sdim else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD) 2523198090Srdivacky Ty = Type::getDoubleTy(MF.getFunction()->getContext()); 2524212904Sdim else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) 2525212904Sdim Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); 2526198090Srdivacky else 2527198090Srdivacky Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); 2528207618Srdivacky const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? 2529198090Srdivacky Constant::getAllOnesValue(Ty) : 2530198090Srdivacky Constant::getNullValue(Ty); 2531198090Srdivacky unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); 2532193323Sed 2533193323Sed // Create operands to load from the constant pool entry. 2534193323Sed MOs.push_back(MachineOperand::CreateReg(PICBase, false)); 2535193323Sed MOs.push_back(MachineOperand::CreateImm(1)); 2536193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 2537193323Sed MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); 2538193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 2539198090Srdivacky break; 2540198090Srdivacky } 2541198090Srdivacky default: { 2542193323Sed // Folding a normal load. Just copy the load's address operands. 2543193323Sed unsigned NumOps = LoadMI->getDesc().getNumOperands(); 2544210299Sed for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) 2545193323Sed MOs.push_back(LoadMI->getOperand(i)); 2546198090Srdivacky break; 2547193323Sed } 2548198090Srdivacky } 2549198090Srdivacky return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment); 2550193323Sed} 2551193323Sed 2552193323Sed 2553193323Sedbool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, 2554193323Sed const SmallVectorImpl<unsigned> &Ops) const { 2555218893Sdim // Check switch flag 2556193323Sed if (NoFusing) return 0; 2557193323Sed 2558193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2559193323Sed switch (MI->getOpcode()) { 2560193323Sed default: return false; 2561218893Sdim case X86::TEST8rr: 2562193323Sed case X86::TEST16rr: 2563193323Sed case X86::TEST32rr: 2564193323Sed case X86::TEST64rr: 2565193323Sed return true; 2566221345Sdim case X86::ADD32ri: 2567221345Sdim // FIXME: AsmPrinter doesn't know how to handle 2568221345Sdim // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. 2569221345Sdim if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) 2570221345Sdim return false; 2571221345Sdim break; 2572193323Sed } 2573193323Sed } 2574193323Sed 2575193323Sed if (Ops.size() != 1) 2576193323Sed return false; 2577193323Sed 2578193323Sed unsigned OpNum = Ops[0]; 2579193323Sed unsigned Opc = MI->getOpcode(); 2580193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 2581193323Sed bool isTwoAddr = NumOps > 1 && 2582224145Sdim MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; 2583193323Sed 2584193323Sed // Folding a memory location into the two-address part of a two-address 2585193323Sed // instruction is different than folding it other places. It requires 2586193323Sed // replacing the *two* registers with the memory location. 2587218893Sdim const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0; 2588218893Sdim if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 2589193323Sed OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2590193323Sed } else if (OpNum == 0) { // If operand 0 2591193323Sed switch (Opc) { 2592198090Srdivacky case X86::MOV8r0: 2593202375Srdivacky case X86::MOV16r0: 2594193323Sed case X86::MOV32r0: 2595218893Sdim case X86::MOV64r0: return true; 2596193323Sed default: break; 2597193323Sed } 2598193323Sed OpcodeTablePtr = &RegOp2MemOpTable0; 2599193323Sed } else if (OpNum == 1) { 2600193323Sed OpcodeTablePtr = &RegOp2MemOpTable1; 2601193323Sed } else if (OpNum == 2) { 2602193323Sed OpcodeTablePtr = &RegOp2MemOpTable2; 2603193323Sed } 2604218893Sdim 2605218893Sdim if (OpcodeTablePtr && OpcodeTablePtr->count(Opc)) 2606218893Sdim return true; 2607210299Sed return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops); 2608193323Sed} 2609193323Sed 2610193323Sedbool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, 2611193323Sed unsigned Reg, bool UnfoldLoad, bool UnfoldStore, 2612193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2613218893Sdim DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = 2614218893Sdim MemOp2RegOpTable.find(MI->getOpcode()); 2615193323Sed if (I == MemOp2RegOpTable.end()) 2616193323Sed return false; 2617193323Sed unsigned Opc = I->second.first; 2618193323Sed unsigned Index = I->second.second & 0xf; 2619193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2620193323Sed bool FoldedStore = I->second.second & (1 << 5); 2621193323Sed if (UnfoldLoad && !FoldedLoad) 2622193323Sed return false; 2623193323Sed UnfoldLoad &= FoldedLoad; 2624193323Sed if (UnfoldStore && !FoldedStore) 2625193323Sed return false; 2626193323Sed UnfoldStore &= FoldedStore; 2627193323Sed 2628224145Sdim const MCInstrDesc &MCID = get(Opc); 2629224145Sdim const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); 2630210299Sed if (!MI->hasOneMemOperand() && 2631210299Sed RC == &X86::VR128RegClass && 2632210299Sed !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) 2633210299Sed // Without memoperands, loadRegFromAddr and storeRegToStackSlot will 2634210299Sed // conservatively assume the address is unaligned. That's bad for 2635210299Sed // performance. 2636210299Sed return false; 2637210299Sed SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps; 2638193323Sed SmallVector<MachineOperand,2> BeforeOps; 2639193323Sed SmallVector<MachineOperand,2> AfterOps; 2640193323Sed SmallVector<MachineOperand,4> ImpOps; 2641193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2642193323Sed MachineOperand &Op = MI->getOperand(i); 2643210299Sed if (i >= Index && i < Index + X86::AddrNumOperands) 2644193323Sed AddrOps.push_back(Op); 2645193323Sed else if (Op.isReg() && Op.isImplicit()) 2646193323Sed ImpOps.push_back(Op); 2647193323Sed else if (i < Index) 2648193323Sed BeforeOps.push_back(Op); 2649193323Sed else if (i > Index) 2650193323Sed AfterOps.push_back(Op); 2651193323Sed } 2652193323Sed 2653193323Sed // Emit the load instruction. 2654193323Sed if (UnfoldLoad) { 2655198090Srdivacky std::pair<MachineInstr::mmo_iterator, 2656198090Srdivacky MachineInstr::mmo_iterator> MMOs = 2657198090Srdivacky MF.extractLoadMemRefs(MI->memoperands_begin(), 2658198090Srdivacky MI->memoperands_end()); 2659198090Srdivacky loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); 2660193323Sed if (UnfoldStore) { 2661193323Sed // Address operands cannot be marked isKill. 2662210299Sed for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) { 2663193323Sed MachineOperand &MO = NewMIs[0]->getOperand(i); 2664193323Sed if (MO.isReg()) 2665193323Sed MO.setIsKill(false); 2666193323Sed } 2667193323Sed } 2668193323Sed } 2669193323Sed 2670193323Sed // Emit the data processing instruction. 2671224145Sdim MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true); 2672193323Sed MachineInstrBuilder MIB(DataMI); 2673218893Sdim 2674193323Sed if (FoldedStore) 2675193323Sed MIB.addReg(Reg, RegState::Define); 2676193323Sed for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) 2677193323Sed MIB.addOperand(BeforeOps[i]); 2678193323Sed if (FoldedLoad) 2679193323Sed MIB.addReg(Reg); 2680193323Sed for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) 2681193323Sed MIB.addOperand(AfterOps[i]); 2682193323Sed for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { 2683193323Sed MachineOperand &MO = ImpOps[i]; 2684193323Sed MIB.addReg(MO.getReg(), 2685193323Sed getDefRegState(MO.isDef()) | 2686193323Sed RegState::Implicit | 2687193323Sed getKillRegState(MO.isKill()) | 2688195340Sed getDeadRegState(MO.isDead()) | 2689195340Sed getUndefRegState(MO.isUndef())); 2690193323Sed } 2691193323Sed // Change CMP32ri r, 0 back to TEST32rr r, r, etc. 2692193323Sed unsigned NewOpc = 0; 2693193323Sed switch (DataMI->getOpcode()) { 2694193323Sed default: break; 2695193323Sed case X86::CMP64ri32: 2696208599Srdivacky case X86::CMP64ri8: 2697193323Sed case X86::CMP32ri: 2698208599Srdivacky case X86::CMP32ri8: 2699193323Sed case X86::CMP16ri: 2700208599Srdivacky case X86::CMP16ri8: 2701193323Sed case X86::CMP8ri: { 2702193323Sed MachineOperand &MO0 = DataMI->getOperand(0); 2703193323Sed MachineOperand &MO1 = DataMI->getOperand(1); 2704193323Sed if (MO1.getImm() == 0) { 2705193323Sed switch (DataMI->getOpcode()) { 2706193323Sed default: break; 2707208599Srdivacky case X86::CMP64ri8: 2708193323Sed case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; 2709208599Srdivacky case X86::CMP32ri8: 2710193323Sed case X86::CMP32ri: NewOpc = X86::TEST32rr; break; 2711208599Srdivacky case X86::CMP16ri8: 2712193323Sed case X86::CMP16ri: NewOpc = X86::TEST16rr; break; 2713193323Sed case X86::CMP8ri: NewOpc = X86::TEST8rr; break; 2714193323Sed } 2715193323Sed DataMI->setDesc(get(NewOpc)); 2716193323Sed MO1.ChangeToRegister(MO0.getReg(), false); 2717193323Sed } 2718193323Sed } 2719193323Sed } 2720193323Sed NewMIs.push_back(DataMI); 2721193323Sed 2722193323Sed // Emit the store instruction. 2723193323Sed if (UnfoldStore) { 2724224145Sdim const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI); 2725198090Srdivacky std::pair<MachineInstr::mmo_iterator, 2726198090Srdivacky MachineInstr::mmo_iterator> MMOs = 2727198090Srdivacky MF.extractStoreMemRefs(MI->memoperands_begin(), 2728198090Srdivacky MI->memoperands_end()); 2729198090Srdivacky storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs); 2730193323Sed } 2731193323Sed 2732193323Sed return true; 2733193323Sed} 2734193323Sed 2735193323Sedbool 2736193323SedX86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, 2737193323Sed SmallVectorImpl<SDNode*> &NewNodes) const { 2738193323Sed if (!N->isMachineOpcode()) 2739193323Sed return false; 2740193323Sed 2741218893Sdim DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = 2742218893Sdim MemOp2RegOpTable.find(N->getMachineOpcode()); 2743193323Sed if (I == MemOp2RegOpTable.end()) 2744193323Sed return false; 2745193323Sed unsigned Opc = I->second.first; 2746193323Sed unsigned Index = I->second.second & 0xf; 2747193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2748193323Sed bool FoldedStore = I->second.second & (1 << 5); 2749224145Sdim const MCInstrDesc &MCID = get(Opc); 2750224145Sdim const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); 2751224145Sdim unsigned NumDefs = MCID.NumDefs; 2752193323Sed std::vector<SDValue> AddrOps; 2753193323Sed std::vector<SDValue> BeforeOps; 2754193323Sed std::vector<SDValue> AfterOps; 2755193323Sed DebugLoc dl = N->getDebugLoc(); 2756193323Sed unsigned NumOps = N->getNumOperands(); 2757193323Sed for (unsigned i = 0; i != NumOps-1; ++i) { 2758193323Sed SDValue Op = N->getOperand(i); 2759210299Sed if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands) 2760193323Sed AddrOps.push_back(Op); 2761193323Sed else if (i < Index-NumDefs) 2762193323Sed BeforeOps.push_back(Op); 2763193323Sed else if (i > Index-NumDefs) 2764193323Sed AfterOps.push_back(Op); 2765193323Sed } 2766193323Sed SDValue Chain = N->getOperand(NumOps-1); 2767193323Sed AddrOps.push_back(Chain); 2768193323Sed 2769193323Sed // Emit the load instruction. 2770193323Sed SDNode *Load = 0; 2771198090Srdivacky MachineFunction &MF = DAG.getMachineFunction(); 2772193323Sed if (FoldedLoad) { 2773198090Srdivacky EVT VT = *RC->vt_begin(); 2774199481Srdivacky std::pair<MachineInstr::mmo_iterator, 2775199481Srdivacky MachineInstr::mmo_iterator> MMOs = 2776199481Srdivacky MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2777199481Srdivacky cast<MachineSDNode>(N)->memoperands_end()); 2778210299Sed if (!(*MMOs.first) && 2779210299Sed RC == &X86::VR128RegClass && 2780210299Sed !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) 2781210299Sed // Do not introduce a slow unaligned load. 2782210299Sed return false; 2783210299Sed bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; 2784198090Srdivacky Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, 2785198090Srdivacky VT, MVT::Other, &AddrOps[0], AddrOps.size()); 2786193323Sed NewNodes.push_back(Load); 2787198090Srdivacky 2788198090Srdivacky // Preserve memory reference information. 2789198090Srdivacky cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2790193323Sed } 2791193323Sed 2792193323Sed // Emit the data processing instruction. 2793198090Srdivacky std::vector<EVT> VTs; 2794193323Sed const TargetRegisterClass *DstRC = 0; 2795224145Sdim if (MCID.getNumDefs() > 0) { 2796224145Sdim DstRC = getRegClass(MCID, 0, &RI); 2797193323Sed VTs.push_back(*DstRC->vt_begin()); 2798193323Sed } 2799193323Sed for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 2800198090Srdivacky EVT VT = N->getValueType(i); 2801224145Sdim if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs()) 2802193323Sed VTs.push_back(VT); 2803193323Sed } 2804193323Sed if (Load) 2805193323Sed BeforeOps.push_back(SDValue(Load, 0)); 2806193323Sed std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); 2807198090Srdivacky SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0], 2808198090Srdivacky BeforeOps.size()); 2809193323Sed NewNodes.push_back(NewNode); 2810193323Sed 2811193323Sed // Emit the store instruction. 2812193323Sed if (FoldedStore) { 2813193323Sed AddrOps.pop_back(); 2814193323Sed AddrOps.push_back(SDValue(NewNode, 0)); 2815193323Sed AddrOps.push_back(Chain); 2816199481Srdivacky std::pair<MachineInstr::mmo_iterator, 2817199481Srdivacky MachineInstr::mmo_iterator> MMOs = 2818199481Srdivacky MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2819199481Srdivacky cast<MachineSDNode>(N)->memoperands_end()); 2820210299Sed if (!(*MMOs.first) && 2821210299Sed RC == &X86::VR128RegClass && 2822210299Sed !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) 2823210299Sed // Do not introduce a slow unaligned store. 2824210299Sed return false; 2825210299Sed bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; 2826198090Srdivacky SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, 2827198090Srdivacky isAligned, TM), 2828198090Srdivacky dl, MVT::Other, 2829198090Srdivacky &AddrOps[0], AddrOps.size()); 2830193323Sed NewNodes.push_back(Store); 2831198090Srdivacky 2832198090Srdivacky // Preserve memory reference information. 2833198090Srdivacky cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2834193323Sed } 2835193323Sed 2836193323Sed return true; 2837193323Sed} 2838193323Sed 2839193323Sedunsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, 2840198892Srdivacky bool UnfoldLoad, bool UnfoldStore, 2841198892Srdivacky unsigned *LoadRegIndex) const { 2842218893Sdim DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = 2843218893Sdim MemOp2RegOpTable.find(Opc); 2844193323Sed if (I == MemOp2RegOpTable.end()) 2845193323Sed return 0; 2846193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2847193323Sed bool FoldedStore = I->second.second & (1 << 5); 2848193323Sed if (UnfoldLoad && !FoldedLoad) 2849193323Sed return 0; 2850193323Sed if (UnfoldStore && !FoldedStore) 2851193323Sed return 0; 2852198892Srdivacky if (LoadRegIndex) 2853198892Srdivacky *LoadRegIndex = I->second.second & 0xf; 2854193323Sed return I->second.first; 2855193323Sed} 2856193323Sed 2857202878Srdivackybool 2858202878SrdivackyX86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 2859202878Srdivacky int64_t &Offset1, int64_t &Offset2) const { 2860202878Srdivacky if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 2861202878Srdivacky return false; 2862202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 2863202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 2864202878Srdivacky switch (Opc1) { 2865202878Srdivacky default: return false; 2866202878Srdivacky case X86::MOV8rm: 2867202878Srdivacky case X86::MOV16rm: 2868202878Srdivacky case X86::MOV32rm: 2869202878Srdivacky case X86::MOV64rm: 2870202878Srdivacky case X86::LD_Fp32m: 2871202878Srdivacky case X86::LD_Fp64m: 2872202878Srdivacky case X86::LD_Fp80m: 2873202878Srdivacky case X86::MOVSSrm: 2874202878Srdivacky case X86::MOVSDrm: 2875202878Srdivacky case X86::MMX_MOVD64rm: 2876202878Srdivacky case X86::MMX_MOVQ64rm: 2877202878Srdivacky case X86::FsMOVAPSrm: 2878202878Srdivacky case X86::FsMOVAPDrm: 2879202878Srdivacky case X86::MOVAPSrm: 2880202878Srdivacky case X86::MOVUPSrm: 2881202878Srdivacky case X86::MOVAPDrm: 2882202878Srdivacky case X86::MOVDQArm: 2883202878Srdivacky case X86::MOVDQUrm: 2884224145Sdim case X86::VMOVAPSYrm: 2885224145Sdim case X86::VMOVUPSYrm: 2886224145Sdim case X86::VMOVAPDYrm: 2887224145Sdim case X86::VMOVDQAYrm: 2888224145Sdim case X86::VMOVDQUYrm: 2889202878Srdivacky break; 2890202878Srdivacky } 2891202878Srdivacky switch (Opc2) { 2892202878Srdivacky default: return false; 2893202878Srdivacky case X86::MOV8rm: 2894202878Srdivacky case X86::MOV16rm: 2895202878Srdivacky case X86::MOV32rm: 2896202878Srdivacky case X86::MOV64rm: 2897202878Srdivacky case X86::LD_Fp32m: 2898202878Srdivacky case X86::LD_Fp64m: 2899202878Srdivacky case X86::LD_Fp80m: 2900202878Srdivacky case X86::MOVSSrm: 2901202878Srdivacky case X86::MOVSDrm: 2902202878Srdivacky case X86::MMX_MOVD64rm: 2903202878Srdivacky case X86::MMX_MOVQ64rm: 2904202878Srdivacky case X86::FsMOVAPSrm: 2905202878Srdivacky case X86::FsMOVAPDrm: 2906202878Srdivacky case X86::MOVAPSrm: 2907202878Srdivacky case X86::MOVUPSrm: 2908202878Srdivacky case X86::MOVAPDrm: 2909202878Srdivacky case X86::MOVDQArm: 2910202878Srdivacky case X86::MOVDQUrm: 2911224145Sdim case X86::VMOVAPSYrm: 2912224145Sdim case X86::VMOVUPSYrm: 2913224145Sdim case X86::VMOVAPDYrm: 2914224145Sdim case X86::VMOVDQAYrm: 2915224145Sdim case X86::VMOVDQUYrm: 2916202878Srdivacky break; 2917202878Srdivacky } 2918202878Srdivacky 2919202878Srdivacky // Check if chain operands and base addresses match. 2920202878Srdivacky if (Load1->getOperand(0) != Load2->getOperand(0) || 2921202878Srdivacky Load1->getOperand(5) != Load2->getOperand(5)) 2922202878Srdivacky return false; 2923202878Srdivacky // Segment operands should match as well. 2924202878Srdivacky if (Load1->getOperand(4) != Load2->getOperand(4)) 2925202878Srdivacky return false; 2926202878Srdivacky // Scale should be 1, Index should be Reg0. 2927202878Srdivacky if (Load1->getOperand(1) == Load2->getOperand(1) && 2928202878Srdivacky Load1->getOperand(2) == Load2->getOperand(2)) { 2929202878Srdivacky if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1) 2930202878Srdivacky return false; 2931202878Srdivacky 2932202878Srdivacky // Now let's examine the displacements. 2933202878Srdivacky if (isa<ConstantSDNode>(Load1->getOperand(3)) && 2934202878Srdivacky isa<ConstantSDNode>(Load2->getOperand(3))) { 2935202878Srdivacky Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue(); 2936202878Srdivacky Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue(); 2937202878Srdivacky return true; 2938202878Srdivacky } 2939202878Srdivacky } 2940202878Srdivacky return false; 2941202878Srdivacky} 2942202878Srdivacky 2943202878Srdivackybool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 2944202878Srdivacky int64_t Offset1, int64_t Offset2, 2945202878Srdivacky unsigned NumLoads) const { 2946202878Srdivacky assert(Offset2 > Offset1); 2947202878Srdivacky if ((Offset2 - Offset1) / 8 > 64) 2948202878Srdivacky return false; 2949202878Srdivacky 2950202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 2951202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 2952202878Srdivacky if (Opc1 != Opc2) 2953202878Srdivacky return false; // FIXME: overly conservative? 2954202878Srdivacky 2955202878Srdivacky switch (Opc1) { 2956202878Srdivacky default: break; 2957202878Srdivacky case X86::LD_Fp32m: 2958202878Srdivacky case X86::LD_Fp64m: 2959202878Srdivacky case X86::LD_Fp80m: 2960202878Srdivacky case X86::MMX_MOVD64rm: 2961202878Srdivacky case X86::MMX_MOVQ64rm: 2962202878Srdivacky return false; 2963202878Srdivacky } 2964202878Srdivacky 2965202878Srdivacky EVT VT = Load1->getValueType(0); 2966202878Srdivacky switch (VT.getSimpleVT().SimpleTy) { 2967210299Sed default: 2968202878Srdivacky // XMM registers. In 64-bit mode we can be a bit more aggressive since we 2969202878Srdivacky // have 16 of them to play with. 2970202878Srdivacky if (TM.getSubtargetImpl()->is64Bit()) { 2971202878Srdivacky if (NumLoads >= 3) 2972202878Srdivacky return false; 2973210299Sed } else if (NumLoads) { 2974202878Srdivacky return false; 2975210299Sed } 2976202878Srdivacky break; 2977202878Srdivacky case MVT::i8: 2978202878Srdivacky case MVT::i16: 2979202878Srdivacky case MVT::i32: 2980202878Srdivacky case MVT::i64: 2981202878Srdivacky case MVT::f32: 2982202878Srdivacky case MVT::f64: 2983202878Srdivacky if (NumLoads) 2984202878Srdivacky return false; 2985210299Sed break; 2986202878Srdivacky } 2987202878Srdivacky 2988202878Srdivacky return true; 2989202878Srdivacky} 2990202878Srdivacky 2991202878Srdivacky 2992193323Sedbool X86InstrInfo:: 2993193323SedReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 2994193323Sed assert(Cond.size() == 1 && "Invalid X86 branch condition!"); 2995193323Sed X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm()); 2996193323Sed if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E) 2997193323Sed return true; 2998193323Sed Cond[0].setImm(GetOppositeBranchCondition(CC)); 2999193323Sed return false; 3000193323Sed} 3001193323Sed 3002193323Sedbool X86InstrInfo:: 3003193323SedisSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 3004193323Sed // FIXME: Return false for x87 stack register classes for now. We can't 3005193323Sed // allow any loads of these registers before FpGet_ST0_80. 3006193323Sed return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || 3007193323Sed RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); 3008193323Sed} 3009193323Sed 3010193323Sed 3011203954Srdivacky/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher) 3012203954Srdivacky/// register? e.g. r8, xmm8, xmm13, etc. 3013203954Srdivackybool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { 3014203954Srdivacky switch (RegNo) { 3015193323Sed default: break; 3016193323Sed case X86::R8: case X86::R9: case X86::R10: case X86::R11: 3017193323Sed case X86::R12: case X86::R13: case X86::R14: case X86::R15: 3018193323Sed case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: 3019193323Sed case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: 3020193323Sed case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: 3021193323Sed case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: 3022193323Sed case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: 3023193323Sed case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: 3024193323Sed case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: 3025193323Sed case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: 3026210299Sed case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: 3027210299Sed case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: 3028218893Sdim case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11: 3029218893Sdim case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15: 3030193323Sed return true; 3031193323Sed } 3032193323Sed return false; 3033193323Sed} 3034193323Sed 3035193323Sed/// getGlobalBaseReg - Return a virtual register initialized with the 3036193323Sed/// the global base register value. Output instructions required to 3037193323Sed/// initialize the register in the function entry block, if necessary. 3038193323Sed/// 3039210299Sed/// TODO: Eliminate this and move the code to X86MachineFunctionInfo. 3040210299Sed/// 3041193323Sedunsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { 3042193323Sed assert(!TM.getSubtarget<X86Subtarget>().is64Bit() && 3043193323Sed "X86-64 PIC uses RIP relative addressing"); 3044193323Sed 3045193323Sed X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); 3046193323Sed unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 3047193323Sed if (GlobalBaseReg != 0) 3048193323Sed return GlobalBaseReg; 3049193323Sed 3050210299Sed // Create the register. The code to initialize it is inserted 3051210299Sed // later, by the CGBR pass (below). 3052193323Sed MachineRegisterInfo &RegInfo = MF->getRegInfo(); 3053210299Sed GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3054193323Sed X86FI->setGlobalBaseReg(GlobalBaseReg); 3055193323Sed return GlobalBaseReg; 3056193323Sed} 3057206083Srdivacky 3058206083Srdivacky// These are the replaceable SSE instructions. Some of these have Int variants 3059206083Srdivacky// that we don't include here. We don't want to replace instructions selected 3060206083Srdivacky// by intrinsics. 3061206083Srdivackystatic const unsigned ReplaceableInstrs[][3] = { 3062212904Sdim //PackedSingle PackedDouble PackedInt 3063206083Srdivacky { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, 3064206083Srdivacky { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, 3065206083Srdivacky { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, 3066206083Srdivacky { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr }, 3067206083Srdivacky { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm }, 3068206083Srdivacky { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr }, 3069206083Srdivacky { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm }, 3070206083Srdivacky { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr }, 3071206083Srdivacky { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm }, 3072206083Srdivacky { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, 3073206083Srdivacky { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, 3074206083Srdivacky { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, 3075206083Srdivacky { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI }, 3076206083Srdivacky { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, 3077206083Srdivacky { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, 3078212904Sdim // AVX 128-bit support 3079212904Sdim { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, 3080212904Sdim { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, 3081212904Sdim { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr }, 3082212904Sdim { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr }, 3083212904Sdim { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm }, 3084212904Sdim { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr }, 3085212904Sdim { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm }, 3086212904Sdim { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr }, 3087212904Sdim { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm }, 3088212904Sdim { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr }, 3089212904Sdim { X86::VORPSrm, X86::VORPDrm, X86::VPORrm }, 3090212904Sdim { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, 3091212904Sdim { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI }, 3092212904Sdim { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, 3093212904Sdim { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, 3094224145Sdim // AVX 256-bit support 3095224145Sdim { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, 3096224145Sdim { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, 3097224145Sdim { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr }, 3098224145Sdim { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr }, 3099224145Sdim { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm }, 3100224145Sdim { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }, 3101206083Srdivacky}; 3102206083Srdivacky 3103206083Srdivacky// FIXME: Some shuffle and unpack instructions have equivalents in different 3104206083Srdivacky// domains, but they require a bit more work than just switching opcodes. 3105206083Srdivacky 3106206083Srdivackystatic const unsigned *lookup(unsigned opcode, unsigned domain) { 3107206083Srdivacky for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) 3108206083Srdivacky if (ReplaceableInstrs[i][domain-1] == opcode) 3109206083Srdivacky return ReplaceableInstrs[i]; 3110206083Srdivacky return 0; 3111206083Srdivacky} 3112206083Srdivacky 3113206083Srdivackystd::pair<uint16_t, uint16_t> 3114206083SrdivackyX86InstrInfo::GetSSEDomain(const MachineInstr *MI) const { 3115206083Srdivacky uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 3116206083Srdivacky return std::make_pair(domain, 3117206083Srdivacky domain && lookup(MI->getOpcode(), domain) ? 0xe : 0); 3118206083Srdivacky} 3119206083Srdivacky 3120206083Srdivackyvoid X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const { 3121206083Srdivacky assert(Domain>0 && Domain<4 && "Invalid execution domain"); 3122206083Srdivacky uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 3123206083Srdivacky assert(dom && "Not an SSE instruction"); 3124206083Srdivacky const unsigned *table = lookup(MI->getOpcode(), dom); 3125206083Srdivacky assert(table && "Cannot change domain"); 3126206083Srdivacky MI->setDesc(get(table[Domain-1])); 3127206083Srdivacky} 3128207618Srdivacky 3129207618Srdivacky/// getNoopForMachoTarget - Return the noop instruction to use for a noop. 3130207618Srdivackyvoid X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { 3131207618Srdivacky NopInst.setOpcode(X86::NOOP); 3132207618Srdivacky} 3133207618Srdivacky 3134221345Sdimbool X86InstrInfo::isHighLatencyDef(int opc) const { 3135221345Sdim switch (opc) { 3136218893Sdim default: return false; 3137218893Sdim case X86::DIVSDrm: 3138218893Sdim case X86::DIVSDrm_Int: 3139218893Sdim case X86::DIVSDrr: 3140218893Sdim case X86::DIVSDrr_Int: 3141218893Sdim case X86::DIVSSrm: 3142218893Sdim case X86::DIVSSrm_Int: 3143218893Sdim case X86::DIVSSrr: 3144218893Sdim case X86::DIVSSrr_Int: 3145218893Sdim case X86::SQRTPDm: 3146218893Sdim case X86::SQRTPDm_Int: 3147218893Sdim case X86::SQRTPDr: 3148218893Sdim case X86::SQRTPDr_Int: 3149218893Sdim case X86::SQRTPSm: 3150218893Sdim case X86::SQRTPSm_Int: 3151218893Sdim case X86::SQRTPSr: 3152218893Sdim case X86::SQRTPSr_Int: 3153218893Sdim case X86::SQRTSDm: 3154218893Sdim case X86::SQRTSDm_Int: 3155218893Sdim case X86::SQRTSDr: 3156218893Sdim case X86::SQRTSDr_Int: 3157218893Sdim case X86::SQRTSSm: 3158218893Sdim case X86::SQRTSSm_Int: 3159218893Sdim case X86::SQRTSSr: 3160218893Sdim case X86::SQRTSSr_Int: 3161218893Sdim return true; 3162218893Sdim } 3163218893Sdim} 3164218893Sdim 3165221345Sdimbool X86InstrInfo:: 3166221345SdimhasHighOperandLatency(const InstrItineraryData *ItinData, 3167221345Sdim const MachineRegisterInfo *MRI, 3168221345Sdim const MachineInstr *DefMI, unsigned DefIdx, 3169221345Sdim const MachineInstr *UseMI, unsigned UseIdx) const { 3170221345Sdim return isHighLatencyDef(DefMI->getOpcode()); 3171221345Sdim} 3172221345Sdim 3173210299Sednamespace { 3174210299Sed /// CGBR - Create Global Base Reg pass. This initializes the PIC 3175210299Sed /// global base register for x86-32. 3176210299Sed struct CGBR : public MachineFunctionPass { 3177210299Sed static char ID; 3178212904Sdim CGBR() : MachineFunctionPass(ID) {} 3179210299Sed 3180210299Sed virtual bool runOnMachineFunction(MachineFunction &MF) { 3181210299Sed const X86TargetMachine *TM = 3182210299Sed static_cast<const X86TargetMachine *>(&MF.getTarget()); 3183210299Sed 3184210299Sed assert(!TM->getSubtarget<X86Subtarget>().is64Bit() && 3185210299Sed "X86-64 PIC uses RIP relative addressing"); 3186210299Sed 3187210299Sed // Only emit a global base reg in PIC mode. 3188210299Sed if (TM->getRelocationModel() != Reloc::PIC_) 3189210299Sed return false; 3190210299Sed 3191218893Sdim X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 3192218893Sdim unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 3193218893Sdim 3194218893Sdim // If we didn't need a GlobalBaseReg, don't insert code. 3195218893Sdim if (GlobalBaseReg == 0) 3196218893Sdim return false; 3197218893Sdim 3198210299Sed // Insert the set of GlobalBaseReg into the first MBB of the function 3199210299Sed MachineBasicBlock &FirstMBB = MF.front(); 3200210299Sed MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 3201210299Sed DebugLoc DL = FirstMBB.findDebugLoc(MBBI); 3202210299Sed MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3203210299Sed const X86InstrInfo *TII = TM->getInstrInfo(); 3204210299Sed 3205210299Sed unsigned PC; 3206210299Sed if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) 3207210299Sed PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3208210299Sed else 3209218893Sdim PC = GlobalBaseReg; 3210218893Sdim 3211210299Sed // Operand of MovePCtoStack is completely ignored by asm printer. It's 3212210299Sed // only used in JIT code emission as displacement to pc. 3213210299Sed BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); 3214218893Sdim 3215210299Sed // If we're using vanilla 'GOT' PIC style, we should use relative addressing 3216210299Sed // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. 3217210299Sed if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) { 3218210299Sed // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register 3219210299Sed BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) 3220210299Sed .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 3221210299Sed X86II::MO_GOT_ABSOLUTE_ADDRESS); 3222210299Sed } 3223210299Sed 3224210299Sed return true; 3225210299Sed } 3226210299Sed 3227210299Sed virtual const char *getPassName() const { 3228210299Sed return "X86 PIC Global Base Reg Initialization"; 3229210299Sed } 3230210299Sed 3231210299Sed virtual void getAnalysisUsage(AnalysisUsage &AU) const { 3232210299Sed AU.setPreservesCFG(); 3233210299Sed MachineFunctionPass::getAnalysisUsage(AU); 3234210299Sed } 3235210299Sed }; 3236210299Sed} 3237210299Sed 3238210299Sedchar CGBR::ID = 0; 3239210299SedFunctionPass* 3240210299Sedllvm::createGlobalBaseRegPass() { return new CGBR(); } 3241