X86InstrInfo.cpp revision 206083
1193323Sed//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file contains the X86 implementation of the TargetInstrInfo class. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#include "X86InstrInfo.h" 15193323Sed#include "X86.h" 16193323Sed#include "X86GenInstrInfo.inc" 17193323Sed#include "X86InstrBuilder.h" 18193323Sed#include "X86MachineFunctionInfo.h" 19193323Sed#include "X86Subtarget.h" 20193323Sed#include "X86TargetMachine.h" 21193323Sed#include "llvm/DerivedTypes.h" 22198090Srdivacky#include "llvm/LLVMContext.h" 23193323Sed#include "llvm/ADT/STLExtras.h" 24193323Sed#include "llvm/CodeGen/MachineConstantPool.h" 25193323Sed#include "llvm/CodeGen/MachineFrameInfo.h" 26193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 27193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 28193323Sed#include "llvm/CodeGen/LiveVariables.h" 29199481Srdivacky#include "llvm/CodeGen/PseudoSourceValue.h" 30193323Sed#include "llvm/Support/CommandLine.h" 31202375Srdivacky#include "llvm/Support/Debug.h" 32198090Srdivacky#include "llvm/Support/ErrorHandling.h" 33198090Srdivacky#include "llvm/Support/raw_ostream.h" 34193323Sed#include "llvm/Target/TargetOptions.h" 35198090Srdivacky#include "llvm/MC/MCAsmInfo.h" 36199481Srdivacky 37199481Srdivacky#include <limits> 38199481Srdivacky 39193323Sedusing namespace llvm; 40193323Sed 41198090Srdivackystatic cl::opt<bool> 42198090SrdivackyNoFusing("disable-spill-fusing", 43198090Srdivacky cl::desc("Disable fusing of spill code into instructions")); 44198090Srdivackystatic cl::opt<bool> 45198090SrdivackyPrintFailedFusing("print-failed-fuse-candidates", 46198090Srdivacky cl::desc("Print instructions that the allocator wants to" 47198090Srdivacky " fuse, but the X86 backend currently can't"), 48198090Srdivacky cl::Hidden); 49198090Srdivackystatic cl::opt<bool> 50198090SrdivackyReMatPICStubLoad("remat-pic-stub-load", 51198090Srdivacky cl::desc("Re-materialize load from stub in PIC mode"), 52198090Srdivacky cl::init(false), cl::Hidden); 53193323Sed 54193323SedX86InstrInfo::X86InstrInfo(X86TargetMachine &tm) 55193323Sed : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), 56193323Sed TM(tm), RI(tm, *this) { 57193323Sed SmallVector<unsigned,16> AmbEntries; 58193323Sed static const unsigned OpTbl2Addr[][2] = { 59193323Sed { X86::ADC32ri, X86::ADC32mi }, 60193323Sed { X86::ADC32ri8, X86::ADC32mi8 }, 61193323Sed { X86::ADC32rr, X86::ADC32mr }, 62193323Sed { X86::ADC64ri32, X86::ADC64mi32 }, 63193323Sed { X86::ADC64ri8, X86::ADC64mi8 }, 64193323Sed { X86::ADC64rr, X86::ADC64mr }, 65193323Sed { X86::ADD16ri, X86::ADD16mi }, 66193323Sed { X86::ADD16ri8, X86::ADD16mi8 }, 67193323Sed { X86::ADD16rr, X86::ADD16mr }, 68193323Sed { X86::ADD32ri, X86::ADD32mi }, 69193323Sed { X86::ADD32ri8, X86::ADD32mi8 }, 70193323Sed { X86::ADD32rr, X86::ADD32mr }, 71193323Sed { X86::ADD64ri32, X86::ADD64mi32 }, 72193323Sed { X86::ADD64ri8, X86::ADD64mi8 }, 73193323Sed { X86::ADD64rr, X86::ADD64mr }, 74193323Sed { X86::ADD8ri, X86::ADD8mi }, 75193323Sed { X86::ADD8rr, X86::ADD8mr }, 76193323Sed { X86::AND16ri, X86::AND16mi }, 77193323Sed { X86::AND16ri8, X86::AND16mi8 }, 78193323Sed { X86::AND16rr, X86::AND16mr }, 79193323Sed { X86::AND32ri, X86::AND32mi }, 80193323Sed { X86::AND32ri8, X86::AND32mi8 }, 81193323Sed { X86::AND32rr, X86::AND32mr }, 82193323Sed { X86::AND64ri32, X86::AND64mi32 }, 83193323Sed { X86::AND64ri8, X86::AND64mi8 }, 84193323Sed { X86::AND64rr, X86::AND64mr }, 85193323Sed { X86::AND8ri, X86::AND8mi }, 86193323Sed { X86::AND8rr, X86::AND8mr }, 87193323Sed { X86::DEC16r, X86::DEC16m }, 88193323Sed { X86::DEC32r, X86::DEC32m }, 89193323Sed { X86::DEC64_16r, X86::DEC64_16m }, 90193323Sed { X86::DEC64_32r, X86::DEC64_32m }, 91193323Sed { X86::DEC64r, X86::DEC64m }, 92193323Sed { X86::DEC8r, X86::DEC8m }, 93193323Sed { X86::INC16r, X86::INC16m }, 94193323Sed { X86::INC32r, X86::INC32m }, 95193323Sed { X86::INC64_16r, X86::INC64_16m }, 96193323Sed { X86::INC64_32r, X86::INC64_32m }, 97193323Sed { X86::INC64r, X86::INC64m }, 98193323Sed { X86::INC8r, X86::INC8m }, 99193323Sed { X86::NEG16r, X86::NEG16m }, 100193323Sed { X86::NEG32r, X86::NEG32m }, 101193323Sed { X86::NEG64r, X86::NEG64m }, 102193323Sed { X86::NEG8r, X86::NEG8m }, 103193323Sed { X86::NOT16r, X86::NOT16m }, 104193323Sed { X86::NOT32r, X86::NOT32m }, 105193323Sed { X86::NOT64r, X86::NOT64m }, 106193323Sed { X86::NOT8r, X86::NOT8m }, 107193323Sed { X86::OR16ri, X86::OR16mi }, 108193323Sed { X86::OR16ri8, X86::OR16mi8 }, 109193323Sed { X86::OR16rr, X86::OR16mr }, 110193323Sed { X86::OR32ri, X86::OR32mi }, 111193323Sed { X86::OR32ri8, X86::OR32mi8 }, 112193323Sed { X86::OR32rr, X86::OR32mr }, 113193323Sed { X86::OR64ri32, X86::OR64mi32 }, 114193323Sed { X86::OR64ri8, X86::OR64mi8 }, 115193323Sed { X86::OR64rr, X86::OR64mr }, 116193323Sed { X86::OR8ri, X86::OR8mi }, 117193323Sed { X86::OR8rr, X86::OR8mr }, 118193323Sed { X86::ROL16r1, X86::ROL16m1 }, 119193323Sed { X86::ROL16rCL, X86::ROL16mCL }, 120193323Sed { X86::ROL16ri, X86::ROL16mi }, 121193323Sed { X86::ROL32r1, X86::ROL32m1 }, 122193323Sed { X86::ROL32rCL, X86::ROL32mCL }, 123193323Sed { X86::ROL32ri, X86::ROL32mi }, 124193323Sed { X86::ROL64r1, X86::ROL64m1 }, 125193323Sed { X86::ROL64rCL, X86::ROL64mCL }, 126193323Sed { X86::ROL64ri, X86::ROL64mi }, 127193323Sed { X86::ROL8r1, X86::ROL8m1 }, 128193323Sed { X86::ROL8rCL, X86::ROL8mCL }, 129193323Sed { X86::ROL8ri, X86::ROL8mi }, 130193323Sed { X86::ROR16r1, X86::ROR16m1 }, 131193323Sed { X86::ROR16rCL, X86::ROR16mCL }, 132193323Sed { X86::ROR16ri, X86::ROR16mi }, 133193323Sed { X86::ROR32r1, X86::ROR32m1 }, 134193323Sed { X86::ROR32rCL, X86::ROR32mCL }, 135193323Sed { X86::ROR32ri, X86::ROR32mi }, 136193323Sed { X86::ROR64r1, X86::ROR64m1 }, 137193323Sed { X86::ROR64rCL, X86::ROR64mCL }, 138193323Sed { X86::ROR64ri, X86::ROR64mi }, 139193323Sed { X86::ROR8r1, X86::ROR8m1 }, 140193323Sed { X86::ROR8rCL, X86::ROR8mCL }, 141193323Sed { X86::ROR8ri, X86::ROR8mi }, 142193323Sed { X86::SAR16r1, X86::SAR16m1 }, 143193323Sed { X86::SAR16rCL, X86::SAR16mCL }, 144193323Sed { X86::SAR16ri, X86::SAR16mi }, 145193323Sed { X86::SAR32r1, X86::SAR32m1 }, 146193323Sed { X86::SAR32rCL, X86::SAR32mCL }, 147193323Sed { X86::SAR32ri, X86::SAR32mi }, 148193323Sed { X86::SAR64r1, X86::SAR64m1 }, 149193323Sed { X86::SAR64rCL, X86::SAR64mCL }, 150193323Sed { X86::SAR64ri, X86::SAR64mi }, 151193323Sed { X86::SAR8r1, X86::SAR8m1 }, 152193323Sed { X86::SAR8rCL, X86::SAR8mCL }, 153193323Sed { X86::SAR8ri, X86::SAR8mi }, 154193323Sed { X86::SBB32ri, X86::SBB32mi }, 155193323Sed { X86::SBB32ri8, X86::SBB32mi8 }, 156193323Sed { X86::SBB32rr, X86::SBB32mr }, 157193323Sed { X86::SBB64ri32, X86::SBB64mi32 }, 158193323Sed { X86::SBB64ri8, X86::SBB64mi8 }, 159193323Sed { X86::SBB64rr, X86::SBB64mr }, 160193323Sed { X86::SHL16rCL, X86::SHL16mCL }, 161193323Sed { X86::SHL16ri, X86::SHL16mi }, 162193323Sed { X86::SHL32rCL, X86::SHL32mCL }, 163193323Sed { X86::SHL32ri, X86::SHL32mi }, 164193323Sed { X86::SHL64rCL, X86::SHL64mCL }, 165193323Sed { X86::SHL64ri, X86::SHL64mi }, 166193323Sed { X86::SHL8rCL, X86::SHL8mCL }, 167193323Sed { X86::SHL8ri, X86::SHL8mi }, 168193323Sed { X86::SHLD16rrCL, X86::SHLD16mrCL }, 169193323Sed { X86::SHLD16rri8, X86::SHLD16mri8 }, 170193323Sed { X86::SHLD32rrCL, X86::SHLD32mrCL }, 171193323Sed { X86::SHLD32rri8, X86::SHLD32mri8 }, 172193323Sed { X86::SHLD64rrCL, X86::SHLD64mrCL }, 173193323Sed { X86::SHLD64rri8, X86::SHLD64mri8 }, 174193323Sed { X86::SHR16r1, X86::SHR16m1 }, 175193323Sed { X86::SHR16rCL, X86::SHR16mCL }, 176193323Sed { X86::SHR16ri, X86::SHR16mi }, 177193323Sed { X86::SHR32r1, X86::SHR32m1 }, 178193323Sed { X86::SHR32rCL, X86::SHR32mCL }, 179193323Sed { X86::SHR32ri, X86::SHR32mi }, 180193323Sed { X86::SHR64r1, X86::SHR64m1 }, 181193323Sed { X86::SHR64rCL, X86::SHR64mCL }, 182193323Sed { X86::SHR64ri, X86::SHR64mi }, 183193323Sed { X86::SHR8r1, X86::SHR8m1 }, 184193323Sed { X86::SHR8rCL, X86::SHR8mCL }, 185193323Sed { X86::SHR8ri, X86::SHR8mi }, 186193323Sed { X86::SHRD16rrCL, X86::SHRD16mrCL }, 187193323Sed { X86::SHRD16rri8, X86::SHRD16mri8 }, 188193323Sed { X86::SHRD32rrCL, X86::SHRD32mrCL }, 189193323Sed { X86::SHRD32rri8, X86::SHRD32mri8 }, 190193323Sed { X86::SHRD64rrCL, X86::SHRD64mrCL }, 191193323Sed { X86::SHRD64rri8, X86::SHRD64mri8 }, 192193323Sed { X86::SUB16ri, X86::SUB16mi }, 193193323Sed { X86::SUB16ri8, X86::SUB16mi8 }, 194193323Sed { X86::SUB16rr, X86::SUB16mr }, 195193323Sed { X86::SUB32ri, X86::SUB32mi }, 196193323Sed { X86::SUB32ri8, X86::SUB32mi8 }, 197193323Sed { X86::SUB32rr, X86::SUB32mr }, 198193323Sed { X86::SUB64ri32, X86::SUB64mi32 }, 199193323Sed { X86::SUB64ri8, X86::SUB64mi8 }, 200193323Sed { X86::SUB64rr, X86::SUB64mr }, 201193323Sed { X86::SUB8ri, X86::SUB8mi }, 202193323Sed { X86::SUB8rr, X86::SUB8mr }, 203193323Sed { X86::XOR16ri, X86::XOR16mi }, 204193323Sed { X86::XOR16ri8, X86::XOR16mi8 }, 205193323Sed { X86::XOR16rr, X86::XOR16mr }, 206193323Sed { X86::XOR32ri, X86::XOR32mi }, 207193323Sed { X86::XOR32ri8, X86::XOR32mi8 }, 208193323Sed { X86::XOR32rr, X86::XOR32mr }, 209193323Sed { X86::XOR64ri32, X86::XOR64mi32 }, 210193323Sed { X86::XOR64ri8, X86::XOR64mi8 }, 211193323Sed { X86::XOR64rr, X86::XOR64mr }, 212193323Sed { X86::XOR8ri, X86::XOR8mi }, 213193323Sed { X86::XOR8rr, X86::XOR8mr } 214193323Sed }; 215193323Sed 216193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { 217193323Sed unsigned RegOp = OpTbl2Addr[i][0]; 218193323Sed unsigned MemOp = OpTbl2Addr[i][1]; 219193323Sed if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, 220198090Srdivacky std::make_pair(MemOp,0))).second) 221193323Sed assert(false && "Duplicated entries?"); 222198090Srdivacky // Index 0, folded load and store, no alignment requirement. 223198090Srdivacky unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); 224193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 225193323Sed std::make_pair(RegOp, 226193323Sed AuxInfo))).second) 227193323Sed AmbEntries.push_back(MemOp); 228193323Sed } 229193323Sed 230193323Sed // If the third value is 1, then it's folding either a load or a store. 231198090Srdivacky static const unsigned OpTbl0[][4] = { 232198090Srdivacky { X86::BT16ri8, X86::BT16mi8, 1, 0 }, 233198090Srdivacky { X86::BT32ri8, X86::BT32mi8, 1, 0 }, 234198090Srdivacky { X86::BT64ri8, X86::BT64mi8, 1, 0 }, 235198090Srdivacky { X86::CALL32r, X86::CALL32m, 1, 0 }, 236198090Srdivacky { X86::CALL64r, X86::CALL64m, 1, 0 }, 237198090Srdivacky { X86::CMP16ri, X86::CMP16mi, 1, 0 }, 238198090Srdivacky { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, 239198090Srdivacky { X86::CMP16rr, X86::CMP16mr, 1, 0 }, 240198090Srdivacky { X86::CMP32ri, X86::CMP32mi, 1, 0 }, 241198090Srdivacky { X86::CMP32ri8, X86::CMP32mi8, 1, 0 }, 242198090Srdivacky { X86::CMP32rr, X86::CMP32mr, 1, 0 }, 243198090Srdivacky { X86::CMP64ri32, X86::CMP64mi32, 1, 0 }, 244198090Srdivacky { X86::CMP64ri8, X86::CMP64mi8, 1, 0 }, 245198090Srdivacky { X86::CMP64rr, X86::CMP64mr, 1, 0 }, 246198090Srdivacky { X86::CMP8ri, X86::CMP8mi, 1, 0 }, 247198090Srdivacky { X86::CMP8rr, X86::CMP8mr, 1, 0 }, 248198090Srdivacky { X86::DIV16r, X86::DIV16m, 1, 0 }, 249198090Srdivacky { X86::DIV32r, X86::DIV32m, 1, 0 }, 250198090Srdivacky { X86::DIV64r, X86::DIV64m, 1, 0 }, 251198090Srdivacky { X86::DIV8r, X86::DIV8m, 1, 0 }, 252198090Srdivacky { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, 253198090Srdivacky { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 }, 254198090Srdivacky { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 }, 255198090Srdivacky { X86::IDIV16r, X86::IDIV16m, 1, 0 }, 256198090Srdivacky { X86::IDIV32r, X86::IDIV32m, 1, 0 }, 257198090Srdivacky { X86::IDIV64r, X86::IDIV64m, 1, 0 }, 258198090Srdivacky { X86::IDIV8r, X86::IDIV8m, 1, 0 }, 259198090Srdivacky { X86::IMUL16r, X86::IMUL16m, 1, 0 }, 260198090Srdivacky { X86::IMUL32r, X86::IMUL32m, 1, 0 }, 261198090Srdivacky { X86::IMUL64r, X86::IMUL64m, 1, 0 }, 262198090Srdivacky { X86::IMUL8r, X86::IMUL8m, 1, 0 }, 263198090Srdivacky { X86::JMP32r, X86::JMP32m, 1, 0 }, 264198090Srdivacky { X86::JMP64r, X86::JMP64m, 1, 0 }, 265198090Srdivacky { X86::MOV16ri, X86::MOV16mi, 0, 0 }, 266198090Srdivacky { X86::MOV16rr, X86::MOV16mr, 0, 0 }, 267198090Srdivacky { X86::MOV32ri, X86::MOV32mi, 0, 0 }, 268198090Srdivacky { X86::MOV32rr, X86::MOV32mr, 0, 0 }, 269205218Srdivacky { X86::MOV32rr_TC, X86::MOV32mr_TC, 0, 0 }, 270198090Srdivacky { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, 271198090Srdivacky { X86::MOV64rr, X86::MOV64mr, 0, 0 }, 272198090Srdivacky { X86::MOV8ri, X86::MOV8mi, 0, 0 }, 273198090Srdivacky { X86::MOV8rr, X86::MOV8mr, 0, 0 }, 274198090Srdivacky { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 }, 275198090Srdivacky { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, 276198090Srdivacky { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, 277198090Srdivacky { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, 278198090Srdivacky { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, 279198090Srdivacky { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, 280198090Srdivacky { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, 281198090Srdivacky { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, 282198090Srdivacky { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, 283198090Srdivacky { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, 284198090Srdivacky { X86::MUL16r, X86::MUL16m, 1, 0 }, 285198090Srdivacky { X86::MUL32r, X86::MUL32m, 1, 0 }, 286198090Srdivacky { X86::MUL64r, X86::MUL64m, 1, 0 }, 287198090Srdivacky { X86::MUL8r, X86::MUL8m, 1, 0 }, 288198090Srdivacky { X86::SETAEr, X86::SETAEm, 0, 0 }, 289198090Srdivacky { X86::SETAr, X86::SETAm, 0, 0 }, 290198090Srdivacky { X86::SETBEr, X86::SETBEm, 0, 0 }, 291198090Srdivacky { X86::SETBr, X86::SETBm, 0, 0 }, 292198090Srdivacky { X86::SETEr, X86::SETEm, 0, 0 }, 293198090Srdivacky { X86::SETGEr, X86::SETGEm, 0, 0 }, 294198090Srdivacky { X86::SETGr, X86::SETGm, 0, 0 }, 295198090Srdivacky { X86::SETLEr, X86::SETLEm, 0, 0 }, 296198090Srdivacky { X86::SETLr, X86::SETLm, 0, 0 }, 297198090Srdivacky { X86::SETNEr, X86::SETNEm, 0, 0 }, 298198090Srdivacky { X86::SETNOr, X86::SETNOm, 0, 0 }, 299198090Srdivacky { X86::SETNPr, X86::SETNPm, 0, 0 }, 300198090Srdivacky { X86::SETNSr, X86::SETNSm, 0, 0 }, 301198090Srdivacky { X86::SETOr, X86::SETOm, 0, 0 }, 302198090Srdivacky { X86::SETPr, X86::SETPm, 0, 0 }, 303198090Srdivacky { X86::SETSr, X86::SETSm, 0, 0 }, 304198090Srdivacky { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, 305205218Srdivacky { X86::TAILJMPr64, X86::TAILJMPm64, 1, 0 }, 306198090Srdivacky { X86::TEST16ri, X86::TEST16mi, 1, 0 }, 307198090Srdivacky { X86::TEST32ri, X86::TEST32mi, 1, 0 }, 308198090Srdivacky { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, 309198090Srdivacky { X86::TEST8ri, X86::TEST8mi, 1, 0 } 310193323Sed }; 311193323Sed 312193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { 313193323Sed unsigned RegOp = OpTbl0[i][0]; 314193323Sed unsigned MemOp = OpTbl0[i][1]; 315198090Srdivacky unsigned Align = OpTbl0[i][3]; 316193323Sed if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, 317198090Srdivacky std::make_pair(MemOp,Align))).second) 318193323Sed assert(false && "Duplicated entries?"); 319193323Sed unsigned FoldedLoad = OpTbl0[i][2]; 320193323Sed // Index 0, folded load or store. 321193323Sed unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); 322193323Sed if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 323193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 324193323Sed std::make_pair(RegOp, AuxInfo))).second) 325193323Sed AmbEntries.push_back(MemOp); 326193323Sed } 327193323Sed 328198090Srdivacky static const unsigned OpTbl1[][3] = { 329198090Srdivacky { X86::CMP16rr, X86::CMP16rm, 0 }, 330198090Srdivacky { X86::CMP32rr, X86::CMP32rm, 0 }, 331198090Srdivacky { X86::CMP64rr, X86::CMP64rm, 0 }, 332198090Srdivacky { X86::CMP8rr, X86::CMP8rm, 0 }, 333198090Srdivacky { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, 334198090Srdivacky { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, 335198090Srdivacky { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, 336198090Srdivacky { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, 337198090Srdivacky { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, 338198090Srdivacky { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, 339198090Srdivacky { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, 340198090Srdivacky { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, 341198090Srdivacky { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, 342198090Srdivacky { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, 343198090Srdivacky { X86::FsMOVAPDrr, X86::MOVSDrm, 0 }, 344198090Srdivacky { X86::FsMOVAPSrr, X86::MOVSSrm, 0 }, 345198090Srdivacky { X86::IMUL16rri, X86::IMUL16rmi, 0 }, 346198090Srdivacky { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, 347198090Srdivacky { X86::IMUL32rri, X86::IMUL32rmi, 0 }, 348198090Srdivacky { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, 349198090Srdivacky { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, 350198090Srdivacky { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, 351198090Srdivacky { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, 352198090Srdivacky { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, 353198090Srdivacky { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, 354198090Srdivacky { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, 355198090Srdivacky { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, 356198090Srdivacky { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 }, 357198090Srdivacky { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 }, 358198090Srdivacky { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, 359198090Srdivacky { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, 360198090Srdivacky { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, 361198090Srdivacky { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 }, 362198090Srdivacky { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 }, 363198090Srdivacky { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, 364198090Srdivacky { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, 365198090Srdivacky { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, 366198090Srdivacky { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, 367198090Srdivacky { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, 368198090Srdivacky { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, 369198090Srdivacky { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, 370198090Srdivacky { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, 371198090Srdivacky { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 }, 372198090Srdivacky { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 }, 373198090Srdivacky { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, 374198090Srdivacky { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, 375198090Srdivacky { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, 376198090Srdivacky { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, 377198090Srdivacky { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, 378198090Srdivacky { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, 379198090Srdivacky { X86::MOV16rr, X86::MOV16rm, 0 }, 380198090Srdivacky { X86::MOV32rr, X86::MOV32rm, 0 }, 381205218Srdivacky { X86::MOV32rr_TC, X86::MOV32rm_TC, 0 }, 382198090Srdivacky { X86::MOV64rr, X86::MOV64rm, 0 }, 383198090Srdivacky { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, 384198090Srdivacky { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, 385198090Srdivacky { X86::MOV8rr, X86::MOV8rm, 0 }, 386198090Srdivacky { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, 387198090Srdivacky { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, 388198090Srdivacky { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, 389198090Srdivacky { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, 390198090Srdivacky { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, 391198090Srdivacky { X86::MOVDQArr, X86::MOVDQArm, 16 }, 392198090Srdivacky { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, 393198090Srdivacky { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, 394198090Srdivacky { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, 395198090Srdivacky { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, 396198090Srdivacky { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, 397198090Srdivacky { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, 398198090Srdivacky { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, 399198090Srdivacky { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, 400198090Srdivacky { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, 401202878Srdivacky { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, 402198090Srdivacky { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, 403198090Srdivacky { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, 404198090Srdivacky { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, 405198090Srdivacky { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, 406198090Srdivacky { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, 407198090Srdivacky { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, 408198090Srdivacky { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, 409198090Srdivacky { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, 410198090Srdivacky { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, 411198090Srdivacky { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, 412198090Srdivacky { X86::PSHUFDri, X86::PSHUFDmi, 16 }, 413198090Srdivacky { X86::PSHUFHWri, X86::PSHUFHWmi, 16 }, 414198090Srdivacky { X86::PSHUFLWri, X86::PSHUFLWmi, 16 }, 415198090Srdivacky { X86::RCPPSr, X86::RCPPSm, 16 }, 416198090Srdivacky { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 }, 417198090Srdivacky { X86::RSQRTPSr, X86::RSQRTPSm, 16 }, 418198090Srdivacky { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 }, 419198090Srdivacky { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, 420198090Srdivacky { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, 421198090Srdivacky { X86::SQRTPDr, X86::SQRTPDm, 16 }, 422198090Srdivacky { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 }, 423198090Srdivacky { X86::SQRTPSr, X86::SQRTPSm, 16 }, 424198090Srdivacky { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 }, 425198090Srdivacky { X86::SQRTSDr, X86::SQRTSDm, 0 }, 426198090Srdivacky { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, 427198090Srdivacky { X86::SQRTSSr, X86::SQRTSSm, 0 }, 428198090Srdivacky { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, 429198090Srdivacky { X86::TEST16rr, X86::TEST16rm, 0 }, 430198090Srdivacky { X86::TEST32rr, X86::TEST32rm, 0 }, 431198090Srdivacky { X86::TEST64rr, X86::TEST64rm, 0 }, 432198090Srdivacky { X86::TEST8rr, X86::TEST8rm, 0 }, 433193323Sed // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 434198090Srdivacky { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, 435198090Srdivacky { X86::UCOMISSrr, X86::UCOMISSrm, 0 } 436193323Sed }; 437193323Sed 438193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { 439193323Sed unsigned RegOp = OpTbl1[i][0]; 440193323Sed unsigned MemOp = OpTbl1[i][1]; 441198090Srdivacky unsigned Align = OpTbl1[i][2]; 442193323Sed if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, 443198090Srdivacky std::make_pair(MemOp,Align))).second) 444193323Sed assert(false && "Duplicated entries?"); 445198090Srdivacky // Index 1, folded load 446198090Srdivacky unsigned AuxInfo = 1 | (1 << 4); 447193323Sed if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 448193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 449193323Sed std::make_pair(RegOp, AuxInfo))).second) 450193323Sed AmbEntries.push_back(MemOp); 451193323Sed } 452193323Sed 453198090Srdivacky static const unsigned OpTbl2[][3] = { 454198090Srdivacky { X86::ADC32rr, X86::ADC32rm, 0 }, 455198090Srdivacky { X86::ADC64rr, X86::ADC64rm, 0 }, 456198090Srdivacky { X86::ADD16rr, X86::ADD16rm, 0 }, 457198090Srdivacky { X86::ADD32rr, X86::ADD32rm, 0 }, 458198090Srdivacky { X86::ADD64rr, X86::ADD64rm, 0 }, 459198090Srdivacky { X86::ADD8rr, X86::ADD8rm, 0 }, 460198090Srdivacky { X86::ADDPDrr, X86::ADDPDrm, 16 }, 461198090Srdivacky { X86::ADDPSrr, X86::ADDPSrm, 16 }, 462198090Srdivacky { X86::ADDSDrr, X86::ADDSDrm, 0 }, 463198090Srdivacky { X86::ADDSSrr, X86::ADDSSrm, 0 }, 464198090Srdivacky { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 }, 465198090Srdivacky { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 }, 466198090Srdivacky { X86::AND16rr, X86::AND16rm, 0 }, 467198090Srdivacky { X86::AND32rr, X86::AND32rm, 0 }, 468198090Srdivacky { X86::AND64rr, X86::AND64rm, 0 }, 469198090Srdivacky { X86::AND8rr, X86::AND8rm, 0 }, 470198090Srdivacky { X86::ANDNPDrr, X86::ANDNPDrm, 16 }, 471198090Srdivacky { X86::ANDNPSrr, X86::ANDNPSrm, 16 }, 472198090Srdivacky { X86::ANDPDrr, X86::ANDPDrm, 16 }, 473198090Srdivacky { X86::ANDPSrr, X86::ANDPSrm, 16 }, 474198090Srdivacky { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, 475198090Srdivacky { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, 476198090Srdivacky { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, 477198090Srdivacky { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, 478198090Srdivacky { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, 479198090Srdivacky { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, 480198090Srdivacky { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, 481198090Srdivacky { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, 482198090Srdivacky { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, 483198090Srdivacky { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, 484198090Srdivacky { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, 485198090Srdivacky { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, 486198090Srdivacky { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, 487198090Srdivacky { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, 488198090Srdivacky { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, 489198090Srdivacky { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, 490198090Srdivacky { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, 491198090Srdivacky { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, 492198090Srdivacky { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, 493198090Srdivacky { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, 494198090Srdivacky { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, 495198090Srdivacky { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, 496198090Srdivacky { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, 497198090Srdivacky { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, 498198090Srdivacky { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, 499198090Srdivacky { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, 500198090Srdivacky { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, 501198090Srdivacky { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, 502198090Srdivacky { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, 503198090Srdivacky { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, 504198090Srdivacky { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, 505198090Srdivacky { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, 506198090Srdivacky { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, 507198090Srdivacky { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, 508198090Srdivacky { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, 509198090Srdivacky { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, 510198090Srdivacky { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, 511198090Srdivacky { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, 512198090Srdivacky { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, 513198090Srdivacky { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, 514198090Srdivacky { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, 515198090Srdivacky { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, 516198090Srdivacky { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, 517198090Srdivacky { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, 518198090Srdivacky { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, 519198090Srdivacky { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, 520198090Srdivacky { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, 521198090Srdivacky { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, 522198090Srdivacky { X86::CMPPDrri, X86::CMPPDrmi, 16 }, 523198090Srdivacky { X86::CMPPSrri, X86::CMPPSrmi, 16 }, 524198090Srdivacky { X86::CMPSDrr, X86::CMPSDrm, 0 }, 525198090Srdivacky { X86::CMPSSrr, X86::CMPSSrm, 0 }, 526198090Srdivacky { X86::DIVPDrr, X86::DIVPDrm, 16 }, 527198090Srdivacky { X86::DIVPSrr, X86::DIVPSrm, 16 }, 528198090Srdivacky { X86::DIVSDrr, X86::DIVSDrm, 0 }, 529198090Srdivacky { X86::DIVSSrr, X86::DIVSSrm, 0 }, 530198090Srdivacky { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 }, 531198090Srdivacky { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 }, 532198090Srdivacky { X86::FsANDPDrr, X86::FsANDPDrm, 16 }, 533198090Srdivacky { X86::FsANDPSrr, X86::FsANDPSrm, 16 }, 534198090Srdivacky { X86::FsORPDrr, X86::FsORPDrm, 16 }, 535198090Srdivacky { X86::FsORPSrr, X86::FsORPSrm, 16 }, 536198090Srdivacky { X86::FsXORPDrr, X86::FsXORPDrm, 16 }, 537198090Srdivacky { X86::FsXORPSrr, X86::FsXORPSrm, 16 }, 538198090Srdivacky { X86::HADDPDrr, X86::HADDPDrm, 16 }, 539198090Srdivacky { X86::HADDPSrr, X86::HADDPSrm, 16 }, 540198090Srdivacky { X86::HSUBPDrr, X86::HSUBPDrm, 16 }, 541198090Srdivacky { X86::HSUBPSrr, X86::HSUBPSrm, 16 }, 542198090Srdivacky { X86::IMUL16rr, X86::IMUL16rm, 0 }, 543198090Srdivacky { X86::IMUL32rr, X86::IMUL32rm, 0 }, 544198090Srdivacky { X86::IMUL64rr, X86::IMUL64rm, 0 }, 545198090Srdivacky { X86::MAXPDrr, X86::MAXPDrm, 16 }, 546198090Srdivacky { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, 547198090Srdivacky { X86::MAXPSrr, X86::MAXPSrm, 16 }, 548198090Srdivacky { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 }, 549198090Srdivacky { X86::MAXSDrr, X86::MAXSDrm, 0 }, 550198090Srdivacky { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, 551198090Srdivacky { X86::MAXSSrr, X86::MAXSSrm, 0 }, 552198090Srdivacky { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, 553198090Srdivacky { X86::MINPDrr, X86::MINPDrm, 16 }, 554198090Srdivacky { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 }, 555198090Srdivacky { X86::MINPSrr, X86::MINPSrm, 16 }, 556198090Srdivacky { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 }, 557198090Srdivacky { X86::MINSDrr, X86::MINSDrm, 0 }, 558198090Srdivacky { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, 559198090Srdivacky { X86::MINSSrr, X86::MINSSrm, 0 }, 560198090Srdivacky { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, 561198090Srdivacky { X86::MULPDrr, X86::MULPDrm, 16 }, 562198090Srdivacky { X86::MULPSrr, X86::MULPSrm, 16 }, 563198090Srdivacky { X86::MULSDrr, X86::MULSDrm, 0 }, 564198090Srdivacky { X86::MULSSrr, X86::MULSSrm, 0 }, 565198090Srdivacky { X86::OR16rr, X86::OR16rm, 0 }, 566198090Srdivacky { X86::OR32rr, X86::OR32rm, 0 }, 567198090Srdivacky { X86::OR64rr, X86::OR64rm, 0 }, 568198090Srdivacky { X86::OR8rr, X86::OR8rm, 0 }, 569198090Srdivacky { X86::ORPDrr, X86::ORPDrm, 16 }, 570198090Srdivacky { X86::ORPSrr, X86::ORPSrm, 16 }, 571198090Srdivacky { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 }, 572198090Srdivacky { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 }, 573198090Srdivacky { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 }, 574198090Srdivacky { X86::PADDBrr, X86::PADDBrm, 16 }, 575198090Srdivacky { X86::PADDDrr, X86::PADDDrm, 16 }, 576198090Srdivacky { X86::PADDQrr, X86::PADDQrm, 16 }, 577198090Srdivacky { X86::PADDSBrr, X86::PADDSBrm, 16 }, 578198090Srdivacky { X86::PADDSWrr, X86::PADDSWrm, 16 }, 579198090Srdivacky { X86::PADDWrr, X86::PADDWrm, 16 }, 580198090Srdivacky { X86::PANDNrr, X86::PANDNrm, 16 }, 581198090Srdivacky { X86::PANDrr, X86::PANDrm, 16 }, 582198090Srdivacky { X86::PAVGBrr, X86::PAVGBrm, 16 }, 583198090Srdivacky { X86::PAVGWrr, X86::PAVGWrm, 16 }, 584198090Srdivacky { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 }, 585198090Srdivacky { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 }, 586198090Srdivacky { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 }, 587198090Srdivacky { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 }, 588198090Srdivacky { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 }, 589198090Srdivacky { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 }, 590198090Srdivacky { X86::PINSRWrri, X86::PINSRWrmi, 16 }, 591198090Srdivacky { X86::PMADDWDrr, X86::PMADDWDrm, 16 }, 592198090Srdivacky { X86::PMAXSWrr, X86::PMAXSWrm, 16 }, 593198090Srdivacky { X86::PMAXUBrr, X86::PMAXUBrm, 16 }, 594198090Srdivacky { X86::PMINSWrr, X86::PMINSWrm, 16 }, 595198090Srdivacky { X86::PMINUBrr, X86::PMINUBrm, 16 }, 596198090Srdivacky { X86::PMULDQrr, X86::PMULDQrm, 16 }, 597198090Srdivacky { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, 598198090Srdivacky { X86::PMULHWrr, X86::PMULHWrm, 16 }, 599198090Srdivacky { X86::PMULLDrr, X86::PMULLDrm, 16 }, 600198090Srdivacky { X86::PMULLWrr, X86::PMULLWrm, 16 }, 601198090Srdivacky { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, 602198090Srdivacky { X86::PORrr, X86::PORrm, 16 }, 603198090Srdivacky { X86::PSADBWrr, X86::PSADBWrm, 16 }, 604198090Srdivacky { X86::PSLLDrr, X86::PSLLDrm, 16 }, 605198090Srdivacky { X86::PSLLQrr, X86::PSLLQrm, 16 }, 606198090Srdivacky { X86::PSLLWrr, X86::PSLLWrm, 16 }, 607198090Srdivacky { X86::PSRADrr, X86::PSRADrm, 16 }, 608198090Srdivacky { X86::PSRAWrr, X86::PSRAWrm, 16 }, 609198090Srdivacky { X86::PSRLDrr, X86::PSRLDrm, 16 }, 610198090Srdivacky { X86::PSRLQrr, X86::PSRLQrm, 16 }, 611198090Srdivacky { X86::PSRLWrr, X86::PSRLWrm, 16 }, 612198090Srdivacky { X86::PSUBBrr, X86::PSUBBrm, 16 }, 613198090Srdivacky { X86::PSUBDrr, X86::PSUBDrm, 16 }, 614198090Srdivacky { X86::PSUBSBrr, X86::PSUBSBrm, 16 }, 615198090Srdivacky { X86::PSUBSWrr, X86::PSUBSWrm, 16 }, 616198090Srdivacky { X86::PSUBWrr, X86::PSUBWrm, 16 }, 617198090Srdivacky { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 }, 618198090Srdivacky { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 }, 619198090Srdivacky { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 }, 620198090Srdivacky { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 }, 621198090Srdivacky { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 }, 622198090Srdivacky { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 }, 623198090Srdivacky { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 }, 624198090Srdivacky { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 }, 625198090Srdivacky { X86::PXORrr, X86::PXORrm, 16 }, 626198090Srdivacky { X86::SBB32rr, X86::SBB32rm, 0 }, 627198090Srdivacky { X86::SBB64rr, X86::SBB64rm, 0 }, 628198090Srdivacky { X86::SHUFPDrri, X86::SHUFPDrmi, 16 }, 629198090Srdivacky { X86::SHUFPSrri, X86::SHUFPSrmi, 16 }, 630198090Srdivacky { X86::SUB16rr, X86::SUB16rm, 0 }, 631198090Srdivacky { X86::SUB32rr, X86::SUB32rm, 0 }, 632198090Srdivacky { X86::SUB64rr, X86::SUB64rm, 0 }, 633198090Srdivacky { X86::SUB8rr, X86::SUB8rm, 0 }, 634198090Srdivacky { X86::SUBPDrr, X86::SUBPDrm, 16 }, 635198090Srdivacky { X86::SUBPSrr, X86::SUBPSrm, 16 }, 636198090Srdivacky { X86::SUBSDrr, X86::SUBSDrm, 0 }, 637198090Srdivacky { X86::SUBSSrr, X86::SUBSSrm, 0 }, 638193323Sed // FIXME: TEST*rr -> swapped operand of TEST*mr. 639198090Srdivacky { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 }, 640198090Srdivacky { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 }, 641198090Srdivacky { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 }, 642198090Srdivacky { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 }, 643198090Srdivacky { X86::XOR16rr, X86::XOR16rm, 0 }, 644198090Srdivacky { X86::XOR32rr, X86::XOR32rm, 0 }, 645198090Srdivacky { X86::XOR64rr, X86::XOR64rm, 0 }, 646198090Srdivacky { X86::XOR8rr, X86::XOR8rm, 0 }, 647198090Srdivacky { X86::XORPDrr, X86::XORPDrm, 16 }, 648198090Srdivacky { X86::XORPSrr, X86::XORPSrm, 16 } 649193323Sed }; 650193323Sed 651193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { 652193323Sed unsigned RegOp = OpTbl2[i][0]; 653193323Sed unsigned MemOp = OpTbl2[i][1]; 654198090Srdivacky unsigned Align = OpTbl2[i][2]; 655193323Sed if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, 656198090Srdivacky std::make_pair(MemOp,Align))).second) 657193323Sed assert(false && "Duplicated entries?"); 658198090Srdivacky // Index 2, folded load 659198090Srdivacky unsigned AuxInfo = 2 | (1 << 4); 660193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 661193323Sed std::make_pair(RegOp, AuxInfo))).second) 662193323Sed AmbEntries.push_back(MemOp); 663193323Sed } 664193323Sed 665193323Sed // Remove ambiguous entries. 666193323Sed assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); 667193323Sed} 668193323Sed 669193323Sedbool X86InstrInfo::isMoveInstr(const MachineInstr& MI, 670193323Sed unsigned &SrcReg, unsigned &DstReg, 671193323Sed unsigned &SrcSubIdx, unsigned &DstSubIdx) const { 672193323Sed switch (MI.getOpcode()) { 673193323Sed default: 674193323Sed return false; 675193323Sed case X86::MOV8rr: 676193323Sed case X86::MOV8rr_NOREX: 677193323Sed case X86::MOV16rr: 678193323Sed case X86::MOV32rr: 679193323Sed case X86::MOV64rr: 680205218Srdivacky case X86::MOV32rr_TC: 681205218Srdivacky case X86::MOV64rr_TC: 682193323Sed 683193323Sed // FP Stack register class copies 684193323Sed case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: 685193323Sed case X86::MOV_Fp3264: case X86::MOV_Fp3280: 686193323Sed case X86::MOV_Fp6432: case X86::MOV_Fp8032: 687204642Srdivacky 688204642Srdivacky // Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64 689204642Srdivacky // copies are done with FsMOVAPSrr and FsMOVAPDrr. 690204642Srdivacky 691193323Sed case X86::FsMOVAPSrr: 692193323Sed case X86::FsMOVAPDrr: 693193323Sed case X86::MOVAPSrr: 694193323Sed case X86::MOVAPDrr: 695193323Sed case X86::MOVDQArr: 696193323Sed case X86::MMX_MOVQ64rr: 697193323Sed assert(MI.getNumOperands() >= 2 && 698193323Sed MI.getOperand(0).isReg() && 699193323Sed MI.getOperand(1).isReg() && 700193323Sed "invalid register-register move instruction"); 701193323Sed SrcReg = MI.getOperand(1).getReg(); 702193323Sed DstReg = MI.getOperand(0).getReg(); 703193323Sed SrcSubIdx = MI.getOperand(1).getSubReg(); 704193323Sed DstSubIdx = MI.getOperand(0).getSubReg(); 705193323Sed return true; 706193323Sed } 707193323Sed} 708193323Sed 709202375Srdivackybool 710202375SrdivackyX86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 711202375Srdivacky unsigned &SrcReg, unsigned &DstReg, 712202375Srdivacky unsigned &SubIdx) const { 713202375Srdivacky switch (MI.getOpcode()) { 714202375Srdivacky default: break; 715202375Srdivacky case X86::MOVSX16rr8: 716202375Srdivacky case X86::MOVZX16rr8: 717202375Srdivacky case X86::MOVSX32rr8: 718202375Srdivacky case X86::MOVZX32rr8: 719202375Srdivacky case X86::MOVSX64rr8: 720202375Srdivacky case X86::MOVZX64rr8: 721202375Srdivacky if (!TM.getSubtarget<X86Subtarget>().is64Bit()) 722202375Srdivacky // It's not always legal to reference the low 8-bit of the larger 723202375Srdivacky // register in 32-bit mode. 724202375Srdivacky return false; 725202375Srdivacky case X86::MOVSX32rr16: 726202375Srdivacky case X86::MOVZX32rr16: 727202375Srdivacky case X86::MOVSX64rr16: 728202375Srdivacky case X86::MOVZX64rr16: 729202375Srdivacky case X86::MOVSX64rr32: 730202375Srdivacky case X86::MOVZX64rr32: { 731202375Srdivacky if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) 732202375Srdivacky // Be conservative. 733202375Srdivacky return false; 734202375Srdivacky SrcReg = MI.getOperand(1).getReg(); 735202375Srdivacky DstReg = MI.getOperand(0).getReg(); 736202375Srdivacky switch (MI.getOpcode()) { 737202375Srdivacky default: 738202375Srdivacky llvm_unreachable(0); 739202375Srdivacky break; 740202375Srdivacky case X86::MOVSX16rr8: 741202375Srdivacky case X86::MOVZX16rr8: 742202375Srdivacky case X86::MOVSX32rr8: 743202375Srdivacky case X86::MOVZX32rr8: 744202375Srdivacky case X86::MOVSX64rr8: 745202375Srdivacky case X86::MOVZX64rr8: 746202375Srdivacky SubIdx = 1; 747202375Srdivacky break; 748202375Srdivacky case X86::MOVSX32rr16: 749202375Srdivacky case X86::MOVZX32rr16: 750202375Srdivacky case X86::MOVSX64rr16: 751202375Srdivacky case X86::MOVZX64rr16: 752202375Srdivacky SubIdx = 3; 753202375Srdivacky break; 754202375Srdivacky case X86::MOVSX64rr32: 755202375Srdivacky case X86::MOVZX64rr32: 756202375Srdivacky SubIdx = 4; 757202375Srdivacky break; 758202375Srdivacky } 759202375Srdivacky return true; 760202375Srdivacky } 761202375Srdivacky } 762202375Srdivacky return false; 763202375Srdivacky} 764202375Srdivacky 765199481Srdivacky/// isFrameOperand - Return true and the FrameIndex if the specified 766199481Srdivacky/// operand and follow operands form a reference to the stack frame. 767199481Srdivackybool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, 768199481Srdivacky int &FrameIndex) const { 769199481Srdivacky if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && 770199481Srdivacky MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && 771199481Srdivacky MI->getOperand(Op+1).getImm() == 1 && 772199481Srdivacky MI->getOperand(Op+2).getReg() == 0 && 773199481Srdivacky MI->getOperand(Op+3).getImm() == 0) { 774199481Srdivacky FrameIndex = MI->getOperand(Op).getIndex(); 775199481Srdivacky return true; 776199481Srdivacky } 777199481Srdivacky return false; 778199481Srdivacky} 779199481Srdivacky 780199481Srdivackystatic bool isFrameLoadOpcode(int Opcode) { 781199481Srdivacky switch (Opcode) { 782193323Sed default: break; 783193323Sed case X86::MOV8rm: 784193323Sed case X86::MOV16rm: 785193323Sed case X86::MOV32rm: 786193323Sed case X86::MOV64rm: 787193323Sed case X86::LD_Fp64m: 788193323Sed case X86::MOVSSrm: 789193323Sed case X86::MOVSDrm: 790193323Sed case X86::MOVAPSrm: 791193323Sed case X86::MOVAPDrm: 792193323Sed case X86::MOVDQArm: 793193323Sed case X86::MMX_MOVD64rm: 794193323Sed case X86::MMX_MOVQ64rm: 795199481Srdivacky return true; 796193323Sed break; 797193323Sed } 798199481Srdivacky return false; 799193323Sed} 800193323Sed 801199481Srdivackystatic bool isFrameStoreOpcode(int Opcode) { 802199481Srdivacky switch (Opcode) { 803193323Sed default: break; 804193323Sed case X86::MOV8mr: 805193323Sed case X86::MOV16mr: 806193323Sed case X86::MOV32mr: 807193323Sed case X86::MOV64mr: 808193323Sed case X86::ST_FpP64m: 809193323Sed case X86::MOVSSmr: 810193323Sed case X86::MOVSDmr: 811193323Sed case X86::MOVAPSmr: 812193323Sed case X86::MOVAPDmr: 813193323Sed case X86::MOVDQAmr: 814193323Sed case X86::MMX_MOVD64mr: 815193323Sed case X86::MMX_MOVQ64mr: 816193323Sed case X86::MMX_MOVNTQmr: 817199481Srdivacky return true; 818199481Srdivacky } 819199481Srdivacky return false; 820199481Srdivacky} 821199481Srdivacky 822199481Srdivackyunsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 823199481Srdivacky int &FrameIndex) const { 824199481Srdivacky if (isFrameLoadOpcode(MI->getOpcode())) 825199481Srdivacky if (isFrameOperand(MI, 1, FrameIndex)) 826199481Srdivacky return MI->getOperand(0).getReg(); 827199481Srdivacky return 0; 828199481Srdivacky} 829199481Srdivacky 830199481Srdivackyunsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 831199481Srdivacky int &FrameIndex) const { 832199481Srdivacky if (isFrameLoadOpcode(MI->getOpcode())) { 833199481Srdivacky unsigned Reg; 834199481Srdivacky if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) 835199481Srdivacky return Reg; 836199481Srdivacky // Check for post-frame index elimination operations 837200581Srdivacky const MachineMemOperand *Dummy; 838200581Srdivacky return hasLoadFromStackSlot(MI, Dummy, FrameIndex); 839199481Srdivacky } 840199481Srdivacky return 0; 841199481Srdivacky} 842199481Srdivacky 843199481Srdivackybool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, 844200581Srdivacky const MachineMemOperand *&MMO, 845199481Srdivacky int &FrameIndex) const { 846199481Srdivacky for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 847199481Srdivacky oe = MI->memoperands_end(); 848199481Srdivacky o != oe; 849199481Srdivacky ++o) { 850199481Srdivacky if ((*o)->isLoad() && (*o)->getValue()) 851199481Srdivacky if (const FixedStackPseudoSourceValue *Value = 852199481Srdivacky dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 853199481Srdivacky FrameIndex = Value->getFrameIndex(); 854200581Srdivacky MMO = *o; 855199481Srdivacky return true; 856199481Srdivacky } 857199481Srdivacky } 858199481Srdivacky return false; 859199481Srdivacky} 860199481Srdivacky 861199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 862199481Srdivacky int &FrameIndex) const { 863199481Srdivacky if (isFrameStoreOpcode(MI->getOpcode())) 864199481Srdivacky if (isFrameOperand(MI, 0, FrameIndex)) 865193323Sed return MI->getOperand(X86AddrNumOperands).getReg(); 866199481Srdivacky return 0; 867199481Srdivacky} 868199481Srdivacky 869199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 870199481Srdivacky int &FrameIndex) const { 871199481Srdivacky if (isFrameStoreOpcode(MI->getOpcode())) { 872199481Srdivacky unsigned Reg; 873199481Srdivacky if ((Reg = isStoreToStackSlot(MI, FrameIndex))) 874199481Srdivacky return Reg; 875199481Srdivacky // Check for post-frame index elimination operations 876200581Srdivacky const MachineMemOperand *Dummy; 877200581Srdivacky return hasStoreToStackSlot(MI, Dummy, FrameIndex); 878193323Sed } 879193323Sed return 0; 880193323Sed} 881193323Sed 882199481Srdivackybool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, 883200581Srdivacky const MachineMemOperand *&MMO, 884199481Srdivacky int &FrameIndex) const { 885199481Srdivacky for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 886199481Srdivacky oe = MI->memoperands_end(); 887199481Srdivacky o != oe; 888199481Srdivacky ++o) { 889199481Srdivacky if ((*o)->isStore() && (*o)->getValue()) 890199481Srdivacky if (const FixedStackPseudoSourceValue *Value = 891199481Srdivacky dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 892199481Srdivacky FrameIndex = Value->getFrameIndex(); 893200581Srdivacky MMO = *o; 894199481Srdivacky return true; 895199481Srdivacky } 896199481Srdivacky } 897199481Srdivacky return false; 898199481Srdivacky} 899199481Srdivacky 900193323Sed/// regIsPICBase - Return true if register is PIC base (i.e.g defined by 901193323Sed/// X86::MOVPC32r. 902193323Sedstatic bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { 903193323Sed bool isPICBase = false; 904193323Sed for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 905193323Sed E = MRI.def_end(); I != E; ++I) { 906193323Sed MachineInstr *DefMI = I.getOperand().getParent(); 907193323Sed if (DefMI->getOpcode() != X86::MOVPC32r) 908193323Sed return false; 909193323Sed assert(!isPICBase && "More than one PIC base?"); 910193323Sed isPICBase = true; 911193323Sed } 912193323Sed return isPICBase; 913193323Sed} 914193323Sed 915193323Sedbool 916198090SrdivackyX86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, 917198090Srdivacky AliasAnalysis *AA) const { 918193323Sed switch (MI->getOpcode()) { 919193323Sed default: break; 920193323Sed case X86::MOV8rm: 921193323Sed case X86::MOV16rm: 922193323Sed case X86::MOV32rm: 923193323Sed case X86::MOV64rm: 924193323Sed case X86::LD_Fp64m: 925193323Sed case X86::MOVSSrm: 926193323Sed case X86::MOVSDrm: 927193323Sed case X86::MOVAPSrm: 928199481Srdivacky case X86::MOVUPSrm: 929199481Srdivacky case X86::MOVUPSrm_Int: 930193323Sed case X86::MOVAPDrm: 931193323Sed case X86::MOVDQArm: 932193323Sed case X86::MMX_MOVD64rm: 933199481Srdivacky case X86::MMX_MOVQ64rm: 934199481Srdivacky case X86::FsMOVAPSrm: 935199481Srdivacky case X86::FsMOVAPDrm: { 936193323Sed // Loads from constant pools are trivially rematerializable. 937193323Sed if (MI->getOperand(1).isReg() && 938193323Sed MI->getOperand(2).isImm() && 939193323Sed MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 940198090Srdivacky MI->isInvariantLoad(AA)) { 941193323Sed unsigned BaseReg = MI->getOperand(1).getReg(); 942195098Sed if (BaseReg == 0 || BaseReg == X86::RIP) 943193323Sed return true; 944193323Sed // Allow re-materialization of PIC load. 945193323Sed if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) 946193323Sed return false; 947193323Sed const MachineFunction &MF = *MI->getParent()->getParent(); 948193323Sed const MachineRegisterInfo &MRI = MF.getRegInfo(); 949193323Sed bool isPICBase = false; 950193323Sed for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 951193323Sed E = MRI.def_end(); I != E; ++I) { 952193323Sed MachineInstr *DefMI = I.getOperand().getParent(); 953193323Sed if (DefMI->getOpcode() != X86::MOVPC32r) 954193323Sed return false; 955193323Sed assert(!isPICBase && "More than one PIC base?"); 956193323Sed isPICBase = true; 957193323Sed } 958193323Sed return isPICBase; 959193323Sed } 960193323Sed return false; 961193323Sed } 962193323Sed 963193323Sed case X86::LEA32r: 964193323Sed case X86::LEA64r: { 965193323Sed if (MI->getOperand(2).isImm() && 966193323Sed MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 967193323Sed !MI->getOperand(4).isReg()) { 968193323Sed // lea fi#, lea GV, etc. are all rematerializable. 969193323Sed if (!MI->getOperand(1).isReg()) 970193323Sed return true; 971193323Sed unsigned BaseReg = MI->getOperand(1).getReg(); 972193323Sed if (BaseReg == 0) 973193323Sed return true; 974193323Sed // Allow re-materialization of lea PICBase + x. 975193323Sed const MachineFunction &MF = *MI->getParent()->getParent(); 976193323Sed const MachineRegisterInfo &MRI = MF.getRegInfo(); 977193323Sed return regIsPICBase(BaseReg, MRI); 978193323Sed } 979193323Sed return false; 980193323Sed } 981193323Sed } 982193323Sed 983193323Sed // All other instructions marked M_REMATERIALIZABLE are always trivially 984193323Sed // rematerializable. 985193323Sed return true; 986193323Sed} 987193323Sed 988193323Sed/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that 989193323Sed/// would clobber the EFLAGS condition register. Note the result may be 990193323Sed/// conservative. If it cannot definitely determine the safety after visiting 991198090Srdivacky/// a few instructions in each direction it assumes it's not safe. 992193323Sedstatic bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, 993193323Sed MachineBasicBlock::iterator I) { 994206083Srdivacky MachineBasicBlock::iterator E = MBB.end(); 995206083Srdivacky 996193323Sed // It's always safe to clobber EFLAGS at the end of a block. 997206083Srdivacky if (I == E) 998193323Sed return true; 999193323Sed 1000193323Sed // For compile time consideration, if we are not able to determine the 1001198090Srdivacky // safety after visiting 4 instructions in each direction, we will assume 1002198090Srdivacky // it's not safe. 1003198090Srdivacky MachineBasicBlock::iterator Iter = I; 1004198090Srdivacky for (unsigned i = 0; i < 4; ++i) { 1005193323Sed bool SeenDef = false; 1006198090Srdivacky for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 1007198090Srdivacky MachineOperand &MO = Iter->getOperand(j); 1008193323Sed if (!MO.isReg()) 1009193323Sed continue; 1010193323Sed if (MO.getReg() == X86::EFLAGS) { 1011193323Sed if (MO.isUse()) 1012193323Sed return false; 1013193323Sed SeenDef = true; 1014193323Sed } 1015193323Sed } 1016193323Sed 1017193323Sed if (SeenDef) 1018193323Sed // This instruction defines EFLAGS, no need to look any further. 1019193323Sed return true; 1020198090Srdivacky ++Iter; 1021206083Srdivacky // Skip over DBG_VALUE. 1022206083Srdivacky while (Iter != E && Iter->isDebugValue()) 1023206083Srdivacky ++Iter; 1024193323Sed 1025193323Sed // If we make it to the end of the block, it's safe to clobber EFLAGS. 1026206083Srdivacky if (Iter == E) 1027193323Sed return true; 1028193323Sed } 1029193323Sed 1030206083Srdivacky MachineBasicBlock::iterator B = MBB.begin(); 1031198090Srdivacky Iter = I; 1032198090Srdivacky for (unsigned i = 0; i < 4; ++i) { 1033198090Srdivacky // If we make it to the beginning of the block, it's safe to clobber 1034198090Srdivacky // EFLAGS iff EFLAGS is not live-in. 1035206083Srdivacky if (Iter == B) 1036198090Srdivacky return !MBB.isLiveIn(X86::EFLAGS); 1037198090Srdivacky 1038198090Srdivacky --Iter; 1039206083Srdivacky // Skip over DBG_VALUE. 1040206083Srdivacky while (Iter != B && Iter->isDebugValue()) 1041206083Srdivacky --Iter; 1042206083Srdivacky 1043198090Srdivacky bool SawKill = false; 1044198090Srdivacky for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 1045198090Srdivacky MachineOperand &MO = Iter->getOperand(j); 1046198090Srdivacky if (MO.isReg() && MO.getReg() == X86::EFLAGS) { 1047198090Srdivacky if (MO.isDef()) return MO.isDead(); 1048198090Srdivacky if (MO.isKill()) SawKill = true; 1049198090Srdivacky } 1050198090Srdivacky } 1051198090Srdivacky 1052198090Srdivacky if (SawKill) 1053198090Srdivacky // This instruction kills EFLAGS and doesn't redefine it, so 1054198090Srdivacky // there's no need to look further. 1055198090Srdivacky return true; 1056198090Srdivacky } 1057198090Srdivacky 1058193323Sed // Conservative answer. 1059193323Sed return false; 1060193323Sed} 1061193323Sed 1062193323Sedvoid X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, 1063193323Sed MachineBasicBlock::iterator I, 1064198090Srdivacky unsigned DestReg, unsigned SubIdx, 1065199481Srdivacky const MachineInstr *Orig, 1066199481Srdivacky const TargetRegisterInfo *TRI) const { 1067203954Srdivacky DebugLoc DL = MBB.findDebugLoc(I); 1068193323Sed 1069193323Sed if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { 1070199481Srdivacky DestReg = TRI->getSubReg(DestReg, SubIdx); 1071193323Sed SubIdx = 0; 1072193323Sed } 1073193323Sed 1074193323Sed // MOV32r0 etc. are implemented with xor which clobbers condition code. 1075193323Sed // Re-materialize them as movri instructions to avoid side effects. 1076198090Srdivacky bool Clone = true; 1077198090Srdivacky unsigned Opc = Orig->getOpcode(); 1078198090Srdivacky switch (Opc) { 1079193323Sed default: break; 1080193323Sed case X86::MOV8r0: 1081202375Srdivacky case X86::MOV16r0: 1082202375Srdivacky case X86::MOV32r0: 1083202375Srdivacky case X86::MOV64r0: { 1084193323Sed if (!isSafeToClobberEFLAGS(MBB, I)) { 1085198090Srdivacky switch (Opc) { 1086193323Sed default: break; 1087193323Sed case X86::MOV8r0: Opc = X86::MOV8ri; break; 1088202375Srdivacky case X86::MOV16r0: Opc = X86::MOV16ri; break; 1089193323Sed case X86::MOV32r0: Opc = X86::MOV32ri; break; 1090204642Srdivacky case X86::MOV64r0: Opc = X86::MOV64ri64i32; break; 1091193323Sed } 1092198090Srdivacky Clone = false; 1093193323Sed } 1094193323Sed break; 1095193323Sed } 1096193323Sed } 1097193323Sed 1098198090Srdivacky if (Clone) { 1099193323Sed MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 1100193323Sed MI->getOperand(0).setReg(DestReg); 1101193323Sed MBB.insert(I, MI); 1102198090Srdivacky } else { 1103198090Srdivacky BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); 1104193323Sed } 1105193323Sed 1106198090Srdivacky MachineInstr *NewMI = prior(I); 1107198090Srdivacky NewMI->getOperand(0).setSubReg(SubIdx); 1108193323Sed} 1109193323Sed 1110193323Sed/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that 1111193323Sed/// is not marked dead. 1112193323Sedstatic bool hasLiveCondCodeDef(MachineInstr *MI) { 1113193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1114193323Sed MachineOperand &MO = MI->getOperand(i); 1115193323Sed if (MO.isReg() && MO.isDef() && 1116193323Sed MO.getReg() == X86::EFLAGS && !MO.isDead()) { 1117193323Sed return true; 1118193323Sed } 1119193323Sed } 1120193323Sed return false; 1121193323Sed} 1122193323Sed 1123200581Srdivacky/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when 1124200581Srdivacky/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting 1125200581Srdivacky/// to a 32-bit superregister and then truncating back down to a 16-bit 1126200581Srdivacky/// subregister. 1127200581SrdivackyMachineInstr * 1128200581SrdivackyX86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, 1129200581Srdivacky MachineFunction::iterator &MFI, 1130200581Srdivacky MachineBasicBlock::iterator &MBBI, 1131200581Srdivacky LiveVariables *LV) const { 1132200581Srdivacky MachineInstr *MI = MBBI; 1133200581Srdivacky unsigned Dest = MI->getOperand(0).getReg(); 1134200581Srdivacky unsigned Src = MI->getOperand(1).getReg(); 1135200581Srdivacky bool isDead = MI->getOperand(0).isDead(); 1136200581Srdivacky bool isKill = MI->getOperand(1).isKill(); 1137200581Srdivacky 1138200581Srdivacky unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() 1139200581Srdivacky ? X86::LEA64_32r : X86::LEA32r; 1140200581Srdivacky MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); 1141200581Srdivacky unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1142200581Srdivacky unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1143200581Srdivacky 1144200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 1145200581Srdivacky // well be shifting and then extracting the lower 16-bits. 1146200581Srdivacky // This has the potential to cause partial register stall. e.g. 1147200581Srdivacky // movw (%rbp,%rcx,2), %dx 1148200581Srdivacky // leal -65(%rdx), %esi 1149200581Srdivacky // But testing has shown this *does* help performance in 64-bit mode (at 1150200581Srdivacky // least on modern x86 machines). 1151200581Srdivacky BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); 1152200581Srdivacky MachineInstr *InsMI = 1153200581Srdivacky BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) 1154200581Srdivacky .addReg(leaInReg) 1155200581Srdivacky .addReg(Src, getKillRegState(isKill)) 1156200581Srdivacky .addImm(X86::SUBREG_16BIT); 1157200581Srdivacky 1158200581Srdivacky MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), 1159200581Srdivacky get(Opc), leaOutReg); 1160200581Srdivacky switch (MIOpc) { 1161200581Srdivacky default: 1162200581Srdivacky llvm_unreachable(0); 1163200581Srdivacky break; 1164200581Srdivacky case X86::SHL16ri: { 1165200581Srdivacky unsigned ShAmt = MI->getOperand(2).getImm(); 1166200581Srdivacky MIB.addReg(0).addImm(1 << ShAmt) 1167200581Srdivacky .addReg(leaInReg, RegState::Kill).addImm(0); 1168200581Srdivacky break; 1169200581Srdivacky } 1170200581Srdivacky case X86::INC16r: 1171200581Srdivacky case X86::INC64_16r: 1172200581Srdivacky addLeaRegOffset(MIB, leaInReg, true, 1); 1173200581Srdivacky break; 1174200581Srdivacky case X86::DEC16r: 1175200581Srdivacky case X86::DEC64_16r: 1176200581Srdivacky addLeaRegOffset(MIB, leaInReg, true, -1); 1177200581Srdivacky break; 1178200581Srdivacky case X86::ADD16ri: 1179200581Srdivacky case X86::ADD16ri8: 1180200581Srdivacky addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); 1181200581Srdivacky break; 1182200581Srdivacky case X86::ADD16rr: { 1183200581Srdivacky unsigned Src2 = MI->getOperand(2).getReg(); 1184200581Srdivacky bool isKill2 = MI->getOperand(2).isKill(); 1185200581Srdivacky unsigned leaInReg2 = 0; 1186200581Srdivacky MachineInstr *InsMI2 = 0; 1187200581Srdivacky if (Src == Src2) { 1188200581Srdivacky // ADD16rr %reg1028<kill>, %reg1028 1189200581Srdivacky // just a single insert_subreg. 1190200581Srdivacky addRegReg(MIB, leaInReg, true, leaInReg, false); 1191200581Srdivacky } else { 1192200581Srdivacky leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1193200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 1194200581Srdivacky // well be shifting and then extracting the lower 16-bits. 1195200581Srdivacky BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); 1196200581Srdivacky InsMI2 = 1197200581Srdivacky BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) 1198200581Srdivacky .addReg(leaInReg2) 1199200581Srdivacky .addReg(Src2, getKillRegState(isKill2)) 1200200581Srdivacky .addImm(X86::SUBREG_16BIT); 1201200581Srdivacky addRegReg(MIB, leaInReg, true, leaInReg2, true); 1202200581Srdivacky } 1203200581Srdivacky if (LV && isKill2 && InsMI2) 1204200581Srdivacky LV->replaceKillInstruction(Src2, MI, InsMI2); 1205200581Srdivacky break; 1206200581Srdivacky } 1207200581Srdivacky } 1208200581Srdivacky 1209200581Srdivacky MachineInstr *NewMI = MIB; 1210200581Srdivacky MachineInstr *ExtMI = 1211200581Srdivacky BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) 1212200581Srdivacky .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1213200581Srdivacky .addReg(leaOutReg, RegState::Kill) 1214200581Srdivacky .addImm(X86::SUBREG_16BIT); 1215200581Srdivacky 1216200581Srdivacky if (LV) { 1217200581Srdivacky // Update live variables 1218200581Srdivacky LV->getVarInfo(leaInReg).Kills.push_back(NewMI); 1219200581Srdivacky LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); 1220200581Srdivacky if (isKill) 1221200581Srdivacky LV->replaceKillInstruction(Src, MI, InsMI); 1222200581Srdivacky if (isDead) 1223200581Srdivacky LV->replaceKillInstruction(Dest, MI, ExtMI); 1224200581Srdivacky } 1225200581Srdivacky 1226200581Srdivacky return ExtMI; 1227200581Srdivacky} 1228200581Srdivacky 1229193323Sed/// convertToThreeAddress - This method must be implemented by targets that 1230193323Sed/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target 1231193323Sed/// may be able to convert a two-address instruction into a true 1232193323Sed/// three-address instruction on demand. This allows the X86 target (for 1233193323Sed/// example) to convert ADD and SHL instructions into LEA instructions if they 1234193323Sed/// would require register copies due to two-addressness. 1235193323Sed/// 1236193323Sed/// This method returns a null pointer if the transformation cannot be 1237193323Sed/// performed, otherwise it returns the new instruction. 1238193323Sed/// 1239193323SedMachineInstr * 1240193323SedX86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 1241193323Sed MachineBasicBlock::iterator &MBBI, 1242193323Sed LiveVariables *LV) const { 1243193323Sed MachineInstr *MI = MBBI; 1244193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1245193323Sed // All instructions input are two-addr instructions. Get the known operands. 1246193323Sed unsigned Dest = MI->getOperand(0).getReg(); 1247193323Sed unsigned Src = MI->getOperand(1).getReg(); 1248193323Sed bool isDead = MI->getOperand(0).isDead(); 1249193323Sed bool isKill = MI->getOperand(1).isKill(); 1250193323Sed 1251193323Sed MachineInstr *NewMI = NULL; 1252193323Sed // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When 1253193323Sed // we have better subtarget support, enable the 16-bit LEA generation here. 1254200581Srdivacky // 16-bit LEA is also slow on Core2. 1255193323Sed bool DisableLEA16 = true; 1256200581Srdivacky bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 1257193323Sed 1258193323Sed unsigned MIOpc = MI->getOpcode(); 1259193323Sed switch (MIOpc) { 1260193323Sed case X86::SHUFPSrri: { 1261193323Sed assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); 1262193323Sed if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0; 1263193323Sed 1264193323Sed unsigned B = MI->getOperand(1).getReg(); 1265193323Sed unsigned C = MI->getOperand(2).getReg(); 1266193323Sed if (B != C) return 0; 1267193323Sed unsigned A = MI->getOperand(0).getReg(); 1268193323Sed unsigned M = MI->getOperand(3).getImm(); 1269193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) 1270193323Sed .addReg(A, RegState::Define | getDeadRegState(isDead)) 1271193323Sed .addReg(B, getKillRegState(isKill)).addImm(M); 1272193323Sed break; 1273193323Sed } 1274193323Sed case X86::SHL64ri: { 1275193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1276193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1277193323Sed // the flags produced by a shift yet, so this is safe. 1278193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1279193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1280193323Sed 1281193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1282193323Sed .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1283193323Sed .addReg(0).addImm(1 << ShAmt) 1284193323Sed .addReg(Src, getKillRegState(isKill)) 1285193323Sed .addImm(0); 1286193323Sed break; 1287193323Sed } 1288193323Sed case X86::SHL32ri: { 1289193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1290193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1291193323Sed // the flags produced by a shift yet, so this is safe. 1292193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1293193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1294193323Sed 1295200581Srdivacky unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1296193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1297193323Sed .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1298193323Sed .addReg(0).addImm(1 << ShAmt) 1299193323Sed .addReg(Src, getKillRegState(isKill)).addImm(0); 1300193323Sed break; 1301193323Sed } 1302193323Sed case X86::SHL16ri: { 1303193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1304193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1305193323Sed // the flags produced by a shift yet, so this is safe. 1306193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1307193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1308193323Sed 1309200581Srdivacky if (DisableLEA16) 1310200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1311200581Srdivacky NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1312200581Srdivacky .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1313200581Srdivacky .addReg(0).addImm(1 << ShAmt) 1314200581Srdivacky .addReg(Src, getKillRegState(isKill)) 1315200581Srdivacky .addImm(0); 1316193323Sed break; 1317193323Sed } 1318193323Sed default: { 1319193323Sed // The following opcodes also sets the condition code register(s). Only 1320193323Sed // convert them to equivalent lea if the condition code register def's 1321193323Sed // are dead! 1322193323Sed if (hasLiveCondCodeDef(MI)) 1323193323Sed return 0; 1324193323Sed 1325193323Sed switch (MIOpc) { 1326193323Sed default: return 0; 1327193323Sed case X86::INC64r: 1328193323Sed case X86::INC32r: 1329193323Sed case X86::INC64_32r: { 1330193323Sed assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1331193323Sed unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r 1332193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1333193323Sed NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1334193323Sed .addReg(Dest, RegState::Define | 1335193323Sed getDeadRegState(isDead)), 1336193323Sed Src, isKill, 1); 1337193323Sed break; 1338193323Sed } 1339193323Sed case X86::INC16r: 1340193323Sed case X86::INC64_16r: 1341200581Srdivacky if (DisableLEA16) 1342200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1343193323Sed assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1344193323Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1345193323Sed .addReg(Dest, RegState::Define | 1346193323Sed getDeadRegState(isDead)), 1347193323Sed Src, isKill, 1); 1348193323Sed break; 1349193323Sed case X86::DEC64r: 1350193323Sed case X86::DEC32r: 1351193323Sed case X86::DEC64_32r: { 1352193323Sed assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1353193323Sed unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r 1354193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1355193323Sed NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1356193323Sed .addReg(Dest, RegState::Define | 1357193323Sed getDeadRegState(isDead)), 1358193323Sed Src, isKill, -1); 1359193323Sed break; 1360193323Sed } 1361193323Sed case X86::DEC16r: 1362193323Sed case X86::DEC64_16r: 1363200581Srdivacky if (DisableLEA16) 1364200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1365193323Sed assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1366193323Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1367193323Sed .addReg(Dest, RegState::Define | 1368193323Sed getDeadRegState(isDead)), 1369193323Sed Src, isKill, -1); 1370193323Sed break; 1371193323Sed case X86::ADD64rr: 1372193323Sed case X86::ADD32rr: { 1373193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1374193323Sed unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r 1375193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1376193323Sed unsigned Src2 = MI->getOperand(2).getReg(); 1377193323Sed bool isKill2 = MI->getOperand(2).isKill(); 1378193323Sed NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1379193323Sed .addReg(Dest, RegState::Define | 1380193323Sed getDeadRegState(isDead)), 1381193323Sed Src, isKill, Src2, isKill2); 1382193323Sed if (LV && isKill2) 1383193323Sed LV->replaceKillInstruction(Src2, MI, NewMI); 1384193323Sed break; 1385193323Sed } 1386193323Sed case X86::ADD16rr: { 1387200581Srdivacky if (DisableLEA16) 1388200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1389193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1390193323Sed unsigned Src2 = MI->getOperand(2).getReg(); 1391193323Sed bool isKill2 = MI->getOperand(2).isKill(); 1392193323Sed NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1393193323Sed .addReg(Dest, RegState::Define | 1394193323Sed getDeadRegState(isDead)), 1395193323Sed Src, isKill, Src2, isKill2); 1396193323Sed if (LV && isKill2) 1397193323Sed LV->replaceKillInstruction(Src2, MI, NewMI); 1398193323Sed break; 1399193323Sed } 1400193323Sed case X86::ADD64ri32: 1401193323Sed case X86::ADD64ri8: 1402193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1403200581Srdivacky NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1404200581Srdivacky .addReg(Dest, RegState::Define | 1405200581Srdivacky getDeadRegState(isDead)), 1406200581Srdivacky Src, isKill, MI->getOperand(2).getImm()); 1407193323Sed break; 1408193323Sed case X86::ADD32ri: 1409200581Srdivacky case X86::ADD32ri8: { 1410193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1411200581Srdivacky unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1412200581Srdivacky NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1413200581Srdivacky .addReg(Dest, RegState::Define | 1414200581Srdivacky getDeadRegState(isDead)), 1415193323Sed Src, isKill, MI->getOperand(2).getImm()); 1416193323Sed break; 1417200581Srdivacky } 1418193323Sed case X86::ADD16ri: 1419193323Sed case X86::ADD16ri8: 1420200581Srdivacky if (DisableLEA16) 1421200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1422193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1423200581Srdivacky NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1424200581Srdivacky .addReg(Dest, RegState::Define | 1425200581Srdivacky getDeadRegState(isDead)), 1426200581Srdivacky Src, isKill, MI->getOperand(2).getImm()); 1427193323Sed break; 1428193323Sed } 1429193323Sed } 1430193323Sed } 1431193323Sed 1432193323Sed if (!NewMI) return 0; 1433193323Sed 1434193323Sed if (LV) { // Update live variables 1435193323Sed if (isKill) 1436193323Sed LV->replaceKillInstruction(Src, MI, NewMI); 1437193323Sed if (isDead) 1438193323Sed LV->replaceKillInstruction(Dest, MI, NewMI); 1439193323Sed } 1440193323Sed 1441193323Sed MFI->insert(MBBI, NewMI); // Insert the new inst 1442193323Sed return NewMI; 1443193323Sed} 1444193323Sed 1445193323Sed/// commuteInstruction - We have a few instructions that must be hacked on to 1446193323Sed/// commute them. 1447193323Sed/// 1448193323SedMachineInstr * 1449193323SedX86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { 1450193323Sed switch (MI->getOpcode()) { 1451193323Sed case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) 1452193323Sed case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) 1453193323Sed case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) 1454193323Sed case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) 1455193323Sed case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) 1456193323Sed case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) 1457193323Sed unsigned Opc; 1458193323Sed unsigned Size; 1459193323Sed switch (MI->getOpcode()) { 1460198090Srdivacky default: llvm_unreachable("Unreachable!"); 1461193323Sed case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; 1462193323Sed case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; 1463193323Sed case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; 1464193323Sed case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break; 1465193323Sed case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break; 1466193323Sed case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; 1467193323Sed } 1468193323Sed unsigned Amt = MI->getOperand(3).getImm(); 1469193323Sed if (NewMI) { 1470193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1471193323Sed MI = MF.CloneMachineInstr(MI); 1472193323Sed NewMI = false; 1473193323Sed } 1474193323Sed MI->setDesc(get(Opc)); 1475193323Sed MI->getOperand(3).setImm(Size-Amt); 1476193323Sed return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1477193323Sed } 1478193323Sed case X86::CMOVB16rr: 1479193323Sed case X86::CMOVB32rr: 1480193323Sed case X86::CMOVB64rr: 1481193323Sed case X86::CMOVAE16rr: 1482193323Sed case X86::CMOVAE32rr: 1483193323Sed case X86::CMOVAE64rr: 1484193323Sed case X86::CMOVE16rr: 1485193323Sed case X86::CMOVE32rr: 1486193323Sed case X86::CMOVE64rr: 1487193323Sed case X86::CMOVNE16rr: 1488193323Sed case X86::CMOVNE32rr: 1489193323Sed case X86::CMOVNE64rr: 1490193323Sed case X86::CMOVBE16rr: 1491193323Sed case X86::CMOVBE32rr: 1492193323Sed case X86::CMOVBE64rr: 1493193323Sed case X86::CMOVA16rr: 1494193323Sed case X86::CMOVA32rr: 1495193323Sed case X86::CMOVA64rr: 1496193323Sed case X86::CMOVL16rr: 1497193323Sed case X86::CMOVL32rr: 1498193323Sed case X86::CMOVL64rr: 1499193323Sed case X86::CMOVGE16rr: 1500193323Sed case X86::CMOVGE32rr: 1501193323Sed case X86::CMOVGE64rr: 1502193323Sed case X86::CMOVLE16rr: 1503193323Sed case X86::CMOVLE32rr: 1504193323Sed case X86::CMOVLE64rr: 1505193323Sed case X86::CMOVG16rr: 1506193323Sed case X86::CMOVG32rr: 1507193323Sed case X86::CMOVG64rr: 1508193323Sed case X86::CMOVS16rr: 1509193323Sed case X86::CMOVS32rr: 1510193323Sed case X86::CMOVS64rr: 1511193323Sed case X86::CMOVNS16rr: 1512193323Sed case X86::CMOVNS32rr: 1513193323Sed case X86::CMOVNS64rr: 1514193323Sed case X86::CMOVP16rr: 1515193323Sed case X86::CMOVP32rr: 1516193323Sed case X86::CMOVP64rr: 1517193323Sed case X86::CMOVNP16rr: 1518193323Sed case X86::CMOVNP32rr: 1519193323Sed case X86::CMOVNP64rr: 1520193323Sed case X86::CMOVO16rr: 1521193323Sed case X86::CMOVO32rr: 1522193323Sed case X86::CMOVO64rr: 1523193323Sed case X86::CMOVNO16rr: 1524193323Sed case X86::CMOVNO32rr: 1525193323Sed case X86::CMOVNO64rr: { 1526193323Sed unsigned Opc = 0; 1527193323Sed switch (MI->getOpcode()) { 1528193323Sed default: break; 1529193323Sed case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; 1530193323Sed case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; 1531193323Sed case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; 1532193323Sed case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break; 1533193323Sed case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break; 1534193323Sed case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break; 1535193323Sed case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break; 1536193323Sed case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break; 1537193323Sed case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break; 1538193323Sed case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; 1539193323Sed case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; 1540193323Sed case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; 1541193323Sed case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; 1542193323Sed case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; 1543193323Sed case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; 1544193323Sed case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; 1545193323Sed case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; 1546193323Sed case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; 1547193323Sed case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; 1548193323Sed case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; 1549193323Sed case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; 1550193323Sed case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break; 1551193323Sed case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break; 1552193323Sed case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break; 1553193323Sed case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break; 1554193323Sed case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break; 1555193323Sed case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break; 1556193323Sed case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break; 1557193323Sed case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break; 1558193323Sed case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; 1559193323Sed case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; 1560193323Sed case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; 1561193323Sed case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; 1562193323Sed case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; 1563193323Sed case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; 1564193323Sed case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; 1565193323Sed case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; 1566193323Sed case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; 1567193323Sed case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; 1568193323Sed case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; 1569193323Sed case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; 1570193323Sed case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; 1571193323Sed case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; 1572193323Sed case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; 1573193323Sed case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; 1574193323Sed case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; 1575193323Sed case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; 1576193323Sed case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; 1577193323Sed } 1578193323Sed if (NewMI) { 1579193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1580193323Sed MI = MF.CloneMachineInstr(MI); 1581193323Sed NewMI = false; 1582193323Sed } 1583193323Sed MI->setDesc(get(Opc)); 1584193323Sed // Fallthrough intended. 1585193323Sed } 1586193323Sed default: 1587193323Sed return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1588193323Sed } 1589193323Sed} 1590193323Sed 1591193323Sedstatic X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { 1592193323Sed switch (BrOpc) { 1593193323Sed default: return X86::COND_INVALID; 1594203954Srdivacky case X86::JE_4: return X86::COND_E; 1595203954Srdivacky case X86::JNE_4: return X86::COND_NE; 1596203954Srdivacky case X86::JL_4: return X86::COND_L; 1597203954Srdivacky case X86::JLE_4: return X86::COND_LE; 1598203954Srdivacky case X86::JG_4: return X86::COND_G; 1599203954Srdivacky case X86::JGE_4: return X86::COND_GE; 1600203954Srdivacky case X86::JB_4: return X86::COND_B; 1601203954Srdivacky case X86::JBE_4: return X86::COND_BE; 1602203954Srdivacky case X86::JA_4: return X86::COND_A; 1603203954Srdivacky case X86::JAE_4: return X86::COND_AE; 1604203954Srdivacky case X86::JS_4: return X86::COND_S; 1605203954Srdivacky case X86::JNS_4: return X86::COND_NS; 1606203954Srdivacky case X86::JP_4: return X86::COND_P; 1607203954Srdivacky case X86::JNP_4: return X86::COND_NP; 1608203954Srdivacky case X86::JO_4: return X86::COND_O; 1609203954Srdivacky case X86::JNO_4: return X86::COND_NO; 1610193323Sed } 1611193323Sed} 1612193323Sed 1613193323Sedunsigned X86::GetCondBranchFromCond(X86::CondCode CC) { 1614193323Sed switch (CC) { 1615198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 1616203954Srdivacky case X86::COND_E: return X86::JE_4; 1617203954Srdivacky case X86::COND_NE: return X86::JNE_4; 1618203954Srdivacky case X86::COND_L: return X86::JL_4; 1619203954Srdivacky case X86::COND_LE: return X86::JLE_4; 1620203954Srdivacky case X86::COND_G: return X86::JG_4; 1621203954Srdivacky case X86::COND_GE: return X86::JGE_4; 1622203954Srdivacky case X86::COND_B: return X86::JB_4; 1623203954Srdivacky case X86::COND_BE: return X86::JBE_4; 1624203954Srdivacky case X86::COND_A: return X86::JA_4; 1625203954Srdivacky case X86::COND_AE: return X86::JAE_4; 1626203954Srdivacky case X86::COND_S: return X86::JS_4; 1627203954Srdivacky case X86::COND_NS: return X86::JNS_4; 1628203954Srdivacky case X86::COND_P: return X86::JP_4; 1629203954Srdivacky case X86::COND_NP: return X86::JNP_4; 1630203954Srdivacky case X86::COND_O: return X86::JO_4; 1631203954Srdivacky case X86::COND_NO: return X86::JNO_4; 1632193323Sed } 1633193323Sed} 1634193323Sed 1635193323Sed/// GetOppositeBranchCondition - Return the inverse of the specified condition, 1636193323Sed/// e.g. turning COND_E to COND_NE. 1637193323SedX86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { 1638193323Sed switch (CC) { 1639198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 1640193323Sed case X86::COND_E: return X86::COND_NE; 1641193323Sed case X86::COND_NE: return X86::COND_E; 1642193323Sed case X86::COND_L: return X86::COND_GE; 1643193323Sed case X86::COND_LE: return X86::COND_G; 1644193323Sed case X86::COND_G: return X86::COND_LE; 1645193323Sed case X86::COND_GE: return X86::COND_L; 1646193323Sed case X86::COND_B: return X86::COND_AE; 1647193323Sed case X86::COND_BE: return X86::COND_A; 1648193323Sed case X86::COND_A: return X86::COND_BE; 1649193323Sed case X86::COND_AE: return X86::COND_B; 1650193323Sed case X86::COND_S: return X86::COND_NS; 1651193323Sed case X86::COND_NS: return X86::COND_S; 1652193323Sed case X86::COND_P: return X86::COND_NP; 1653193323Sed case X86::COND_NP: return X86::COND_P; 1654193323Sed case X86::COND_O: return X86::COND_NO; 1655193323Sed case X86::COND_NO: return X86::COND_O; 1656193323Sed } 1657193323Sed} 1658193323Sed 1659193323Sedbool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { 1660193323Sed const TargetInstrDesc &TID = MI->getDesc(); 1661193323Sed if (!TID.isTerminator()) return false; 1662193323Sed 1663193323Sed // Conditional branch is a special case. 1664193323Sed if (TID.isBranch() && !TID.isBarrier()) 1665193323Sed return true; 1666193323Sed if (!TID.isPredicable()) 1667193323Sed return true; 1668193323Sed return !isPredicated(MI); 1669193323Sed} 1670193323Sed 1671193323Sed// For purposes of branch analysis do not count FP_REG_KILL as a terminator. 1672193323Sedstatic bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI, 1673193323Sed const X86InstrInfo &TII) { 1674193323Sed if (MI->getOpcode() == X86::FP_REG_KILL) 1675193323Sed return false; 1676193323Sed return TII.isUnpredicatedTerminator(MI); 1677193323Sed} 1678193323Sed 1679193323Sedbool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 1680193323Sed MachineBasicBlock *&TBB, 1681193323Sed MachineBasicBlock *&FBB, 1682193323Sed SmallVectorImpl<MachineOperand> &Cond, 1683193323Sed bool AllowModify) const { 1684193323Sed // Start from the bottom of the block and work up, examining the 1685193323Sed // terminator instructions. 1686193323Sed MachineBasicBlock::iterator I = MBB.end(); 1687193323Sed while (I != MBB.begin()) { 1688193323Sed --I; 1689206083Srdivacky if (I->isDebugValue()) 1690206083Srdivacky continue; 1691200581Srdivacky 1692200581Srdivacky // Working from the bottom, when we see a non-terminator instruction, we're 1693200581Srdivacky // done. 1694193323Sed if (!isBrAnalysisUnpredicatedTerminator(I, *this)) 1695193323Sed break; 1696200581Srdivacky 1697200581Srdivacky // A terminator that isn't a branch can't easily be handled by this 1698200581Srdivacky // analysis. 1699193323Sed if (!I->getDesc().isBranch()) 1700193323Sed return true; 1701200581Srdivacky 1702193323Sed // Handle unconditional branches. 1703203954Srdivacky if (I->getOpcode() == X86::JMP_4) { 1704193323Sed if (!AllowModify) { 1705193323Sed TBB = I->getOperand(0).getMBB(); 1706193323Sed continue; 1707193323Sed } 1708193323Sed 1709193323Sed // If the block has any instructions after a JMP, delete them. 1710200581Srdivacky while (llvm::next(I) != MBB.end()) 1711200581Srdivacky llvm::next(I)->eraseFromParent(); 1712200581Srdivacky 1713193323Sed Cond.clear(); 1714193323Sed FBB = 0; 1715200581Srdivacky 1716193323Sed // Delete the JMP if it's equivalent to a fall-through. 1717193323Sed if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { 1718193323Sed TBB = 0; 1719193323Sed I->eraseFromParent(); 1720193323Sed I = MBB.end(); 1721193323Sed continue; 1722193323Sed } 1723200581Srdivacky 1724193323Sed // TBB is used to indicate the unconditinal destination. 1725193323Sed TBB = I->getOperand(0).getMBB(); 1726193323Sed continue; 1727193323Sed } 1728200581Srdivacky 1729193323Sed // Handle conditional branches. 1730193323Sed X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); 1731193323Sed if (BranchCode == X86::COND_INVALID) 1732193323Sed return true; // Can't handle indirect branch. 1733200581Srdivacky 1734193323Sed // Working from the bottom, handle the first conditional branch. 1735193323Sed if (Cond.empty()) { 1736193323Sed FBB = TBB; 1737193323Sed TBB = I->getOperand(0).getMBB(); 1738193323Sed Cond.push_back(MachineOperand::CreateImm(BranchCode)); 1739193323Sed continue; 1740193323Sed } 1741200581Srdivacky 1742200581Srdivacky // Handle subsequent conditional branches. Only handle the case where all 1743200581Srdivacky // conditional branches branch to the same destination and their condition 1744200581Srdivacky // opcodes fit one of the special multi-branch idioms. 1745193323Sed assert(Cond.size() == 1); 1746193323Sed assert(TBB); 1747200581Srdivacky 1748200581Srdivacky // Only handle the case where all conditional branches branch to the same 1749200581Srdivacky // destination. 1750193323Sed if (TBB != I->getOperand(0).getMBB()) 1751193323Sed return true; 1752200581Srdivacky 1753200581Srdivacky // If the conditions are the same, we can leave them alone. 1754193323Sed X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); 1755193323Sed if (OldBranchCode == BranchCode) 1756193323Sed continue; 1757200581Srdivacky 1758200581Srdivacky // If they differ, see if they fit one of the known patterns. Theoretically, 1759200581Srdivacky // we could handle more patterns here, but we shouldn't expect to see them 1760200581Srdivacky // if instruction selection has done a reasonable job. 1761193323Sed if ((OldBranchCode == X86::COND_NP && 1762193323Sed BranchCode == X86::COND_E) || 1763193323Sed (OldBranchCode == X86::COND_E && 1764193323Sed BranchCode == X86::COND_NP)) 1765193323Sed BranchCode = X86::COND_NP_OR_E; 1766193323Sed else if ((OldBranchCode == X86::COND_P && 1767193323Sed BranchCode == X86::COND_NE) || 1768193323Sed (OldBranchCode == X86::COND_NE && 1769193323Sed BranchCode == X86::COND_P)) 1770193323Sed BranchCode = X86::COND_NE_OR_P; 1771193323Sed else 1772193323Sed return true; 1773200581Srdivacky 1774193323Sed // Update the MachineOperand. 1775193323Sed Cond[0].setImm(BranchCode); 1776193323Sed } 1777193323Sed 1778193323Sed return false; 1779193323Sed} 1780193323Sed 1781193323Sedunsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 1782193323Sed MachineBasicBlock::iterator I = MBB.end(); 1783193323Sed unsigned Count = 0; 1784193323Sed 1785193323Sed while (I != MBB.begin()) { 1786193323Sed --I; 1787206083Srdivacky if (I->isDebugValue()) 1788206083Srdivacky continue; 1789203954Srdivacky if (I->getOpcode() != X86::JMP_4 && 1790193323Sed GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) 1791193323Sed break; 1792193323Sed // Remove the branch. 1793193323Sed I->eraseFromParent(); 1794193323Sed I = MBB.end(); 1795193323Sed ++Count; 1796193323Sed } 1797193323Sed 1798193323Sed return Count; 1799193323Sed} 1800193323Sed 1801193323Sedunsigned 1802193323SedX86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 1803193323Sed MachineBasicBlock *FBB, 1804193323Sed const SmallVectorImpl<MachineOperand> &Cond) const { 1805193323Sed // FIXME this should probably have a DebugLoc operand 1806193323Sed DebugLoc dl = DebugLoc::getUnknownLoc(); 1807193323Sed // Shouldn't be a fall through. 1808193323Sed assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 1809193323Sed assert((Cond.size() == 1 || Cond.size() == 0) && 1810193323Sed "X86 branch conditions have one component!"); 1811193323Sed 1812193323Sed if (Cond.empty()) { 1813193323Sed // Unconditional branch? 1814193323Sed assert(!FBB && "Unconditional branch with multiple successors!"); 1815203954Srdivacky BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB); 1816193323Sed return 1; 1817193323Sed } 1818193323Sed 1819193323Sed // Conditional branch. 1820193323Sed unsigned Count = 0; 1821193323Sed X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); 1822193323Sed switch (CC) { 1823193323Sed case X86::COND_NP_OR_E: 1824193323Sed // Synthesize NP_OR_E with two branches. 1825203954Srdivacky BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); 1826193323Sed ++Count; 1827203954Srdivacky BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); 1828193323Sed ++Count; 1829193323Sed break; 1830193323Sed case X86::COND_NE_OR_P: 1831193323Sed // Synthesize NE_OR_P with two branches. 1832203954Srdivacky BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); 1833193323Sed ++Count; 1834203954Srdivacky BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); 1835193323Sed ++Count; 1836193323Sed break; 1837193323Sed default: { 1838193323Sed unsigned Opc = GetCondBranchFromCond(CC); 1839193323Sed BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); 1840193323Sed ++Count; 1841193323Sed } 1842193323Sed } 1843193323Sed if (FBB) { 1844193323Sed // Two-way Conditional branch. Insert the second branch. 1845203954Srdivacky BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB); 1846193323Sed ++Count; 1847193323Sed } 1848193323Sed return Count; 1849193323Sed} 1850193323Sed 1851193323Sed/// isHReg - Test if the given register is a physical h register. 1852193323Sedstatic bool isHReg(unsigned Reg) { 1853193323Sed return X86::GR8_ABCD_HRegClass.contains(Reg); 1854193323Sed} 1855193323Sed 1856193323Sedbool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, 1857193323Sed MachineBasicBlock::iterator MI, 1858193323Sed unsigned DestReg, unsigned SrcReg, 1859193323Sed const TargetRegisterClass *DestRC, 1860193323Sed const TargetRegisterClass *SrcRC) const { 1861203954Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 1862193323Sed 1863193323Sed // Determine if DstRC and SrcRC have a common superclass in common. 1864193323Sed const TargetRegisterClass *CommonRC = DestRC; 1865193323Sed if (DestRC == SrcRC) 1866193323Sed /* Source and destination have the same register class. */; 1867193323Sed else if (CommonRC->hasSuperClass(SrcRC)) 1868193323Sed CommonRC = SrcRC; 1869198090Srdivacky else if (!DestRC->hasSubClass(SrcRC)) { 1870198090Srdivacky // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, 1871204642Srdivacky // but we want to copy them as GR64. Similarly, for GR32_NOREX and 1872198090Srdivacky // GR32_NOSP, copy as GR32. 1873198090Srdivacky if (SrcRC->hasSuperClass(&X86::GR64RegClass) && 1874198090Srdivacky DestRC->hasSuperClass(&X86::GR64RegClass)) 1875198090Srdivacky CommonRC = &X86::GR64RegClass; 1876198090Srdivacky else if (SrcRC->hasSuperClass(&X86::GR32RegClass) && 1877198090Srdivacky DestRC->hasSuperClass(&X86::GR32RegClass)) 1878198090Srdivacky CommonRC = &X86::GR32RegClass; 1879198090Srdivacky else 1880198090Srdivacky CommonRC = 0; 1881198090Srdivacky } 1882193323Sed 1883193323Sed if (CommonRC) { 1884193323Sed unsigned Opc; 1885198090Srdivacky if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) { 1886193323Sed Opc = X86::MOV64rr; 1887198090Srdivacky } else if (CommonRC == &X86::GR32RegClass || 1888198090Srdivacky CommonRC == &X86::GR32_NOSPRegClass) { 1889193323Sed Opc = X86::MOV32rr; 1890193323Sed } else if (CommonRC == &X86::GR16RegClass) { 1891193323Sed Opc = X86::MOV16rr; 1892193323Sed } else if (CommonRC == &X86::GR8RegClass) { 1893193323Sed // Copying to or from a physical H register on x86-64 requires a NOREX 1894193323Sed // move. Otherwise use a normal move. 1895193323Sed if ((isHReg(DestReg) || isHReg(SrcReg)) && 1896193323Sed TM.getSubtarget<X86Subtarget>().is64Bit()) 1897193323Sed Opc = X86::MOV8rr_NOREX; 1898193323Sed else 1899193323Sed Opc = X86::MOV8rr; 1900193323Sed } else if (CommonRC == &X86::GR64_ABCDRegClass) { 1901193323Sed Opc = X86::MOV64rr; 1902193323Sed } else if (CommonRC == &X86::GR32_ABCDRegClass) { 1903193323Sed Opc = X86::MOV32rr; 1904193323Sed } else if (CommonRC == &X86::GR16_ABCDRegClass) { 1905193323Sed Opc = X86::MOV16rr; 1906193323Sed } else if (CommonRC == &X86::GR8_ABCD_LRegClass) { 1907193323Sed Opc = X86::MOV8rr; 1908193323Sed } else if (CommonRC == &X86::GR8_ABCD_HRegClass) { 1909193323Sed if (TM.getSubtarget<X86Subtarget>().is64Bit()) 1910193323Sed Opc = X86::MOV8rr_NOREX; 1911193323Sed else 1912193323Sed Opc = X86::MOV8rr; 1913198090Srdivacky } else if (CommonRC == &X86::GR64_NOREXRegClass || 1914198090Srdivacky CommonRC == &X86::GR64_NOREX_NOSPRegClass) { 1915193323Sed Opc = X86::MOV64rr; 1916193323Sed } else if (CommonRC == &X86::GR32_NOREXRegClass) { 1917193323Sed Opc = X86::MOV32rr; 1918193323Sed } else if (CommonRC == &X86::GR16_NOREXRegClass) { 1919193323Sed Opc = X86::MOV16rr; 1920193323Sed } else if (CommonRC == &X86::GR8_NOREXRegClass) { 1921193323Sed Opc = X86::MOV8rr; 1922205218Srdivacky } else if (CommonRC == &X86::GR64_TCRegClass) { 1923205218Srdivacky Opc = X86::MOV64rr_TC; 1924205218Srdivacky } else if (CommonRC == &X86::GR32_TCRegClass) { 1925205218Srdivacky Opc = X86::MOV32rr_TC; 1926193323Sed } else if (CommonRC == &X86::RFP32RegClass) { 1927193323Sed Opc = X86::MOV_Fp3232; 1928193323Sed } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) { 1929193323Sed Opc = X86::MOV_Fp6464; 1930193323Sed } else if (CommonRC == &X86::RFP80RegClass) { 1931193323Sed Opc = X86::MOV_Fp8080; 1932193323Sed } else if (CommonRC == &X86::FR32RegClass) { 1933193323Sed Opc = X86::FsMOVAPSrr; 1934193323Sed } else if (CommonRC == &X86::FR64RegClass) { 1935193323Sed Opc = X86::FsMOVAPDrr; 1936193323Sed } else if (CommonRC == &X86::VR128RegClass) { 1937193323Sed Opc = X86::MOVAPSrr; 1938193323Sed } else if (CommonRC == &X86::VR64RegClass) { 1939193323Sed Opc = X86::MMX_MOVQ64rr; 1940193323Sed } else { 1941193323Sed return false; 1942193323Sed } 1943193323Sed BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); 1944193323Sed return true; 1945193323Sed } 1946198090Srdivacky 1947193323Sed // Moving EFLAGS to / from another register requires a push and a pop. 1948193323Sed if (SrcRC == &X86::CCRRegClass) { 1949193323Sed if (SrcReg != X86::EFLAGS) 1950193323Sed return false; 1951198090Srdivacky if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { 1952201360Srdivacky BuildMI(MBB, MI, DL, get(X86::PUSHFQ64)); 1953193323Sed BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); 1954193323Sed return true; 1955198090Srdivacky } else if (DestRC == &X86::GR32RegClass || 1956198090Srdivacky DestRC == &X86::GR32_NOSPRegClass) { 1957193323Sed BuildMI(MBB, MI, DL, get(X86::PUSHFD)); 1958193323Sed BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); 1959193323Sed return true; 1960193323Sed } 1961193323Sed } else if (DestRC == &X86::CCRRegClass) { 1962193323Sed if (DestReg != X86::EFLAGS) 1963193323Sed return false; 1964198090Srdivacky if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { 1965193323Sed BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); 1966193323Sed BuildMI(MBB, MI, DL, get(X86::POPFQ)); 1967193323Sed return true; 1968198090Srdivacky } else if (SrcRC == &X86::GR32RegClass || 1969198090Srdivacky DestRC == &X86::GR32_NOSPRegClass) { 1970193323Sed BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); 1971193323Sed BuildMI(MBB, MI, DL, get(X86::POPFD)); 1972193323Sed return true; 1973193323Sed } 1974193323Sed } 1975193323Sed 1976193323Sed // Moving from ST(0) turns into FpGET_ST0_32 etc. 1977193323Sed if (SrcRC == &X86::RSTRegClass) { 1978193323Sed // Copying from ST(0)/ST(1). 1979193323Sed if (SrcReg != X86::ST0 && SrcReg != X86::ST1) 1980193323Sed // Can only copy from ST(0)/ST(1) right now 1981193323Sed return false; 1982193323Sed bool isST0 = SrcReg == X86::ST0; 1983193323Sed unsigned Opc; 1984193323Sed if (DestRC == &X86::RFP32RegClass) 1985193323Sed Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32; 1986193323Sed else if (DestRC == &X86::RFP64RegClass) 1987193323Sed Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64; 1988193323Sed else { 1989193323Sed if (DestRC != &X86::RFP80RegClass) 1990193323Sed return false; 1991193323Sed Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80; 1992193323Sed } 1993193323Sed BuildMI(MBB, MI, DL, get(Opc), DestReg); 1994193323Sed return true; 1995193323Sed } 1996193323Sed 1997193323Sed // Moving to ST(0) turns into FpSET_ST0_32 etc. 1998193323Sed if (DestRC == &X86::RSTRegClass) { 1999193323Sed // Copying to ST(0) / ST(1). 2000193323Sed if (DestReg != X86::ST0 && DestReg != X86::ST1) 2001193323Sed // Can only copy to TOS right now 2002193323Sed return false; 2003193323Sed bool isST0 = DestReg == X86::ST0; 2004193323Sed unsigned Opc; 2005193323Sed if (SrcRC == &X86::RFP32RegClass) 2006193323Sed Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32; 2007193323Sed else if (SrcRC == &X86::RFP64RegClass) 2008193323Sed Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64; 2009193323Sed else { 2010193323Sed if (SrcRC != &X86::RFP80RegClass) 2011193323Sed return false; 2012193323Sed Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80; 2013193323Sed } 2014193323Sed BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg); 2015193323Sed return true; 2016193323Sed } 2017193323Sed 2018193323Sed // Not yet supported! 2019193323Sed return false; 2020193323Sed} 2021193323Sed 2022193323Sedstatic unsigned getStoreRegOpcode(unsigned SrcReg, 2023193323Sed const TargetRegisterClass *RC, 2024193323Sed bool isStackAligned, 2025193323Sed TargetMachine &TM) { 2026193323Sed unsigned Opc = 0; 2027198090Srdivacky if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { 2028193323Sed Opc = X86::MOV64mr; 2029198090Srdivacky } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { 2030193323Sed Opc = X86::MOV32mr; 2031193323Sed } else if (RC == &X86::GR16RegClass) { 2032193323Sed Opc = X86::MOV16mr; 2033193323Sed } else if (RC == &X86::GR8RegClass) { 2034193323Sed // Copying to or from a physical H register on x86-64 requires a NOREX 2035193323Sed // move. Otherwise use a normal move. 2036193323Sed if (isHReg(SrcReg) && 2037193323Sed TM.getSubtarget<X86Subtarget>().is64Bit()) 2038193323Sed Opc = X86::MOV8mr_NOREX; 2039193323Sed else 2040193323Sed Opc = X86::MOV8mr; 2041193323Sed } else if (RC == &X86::GR64_ABCDRegClass) { 2042193323Sed Opc = X86::MOV64mr; 2043193323Sed } else if (RC == &X86::GR32_ABCDRegClass) { 2044193323Sed Opc = X86::MOV32mr; 2045193323Sed } else if (RC == &X86::GR16_ABCDRegClass) { 2046193323Sed Opc = X86::MOV16mr; 2047193323Sed } else if (RC == &X86::GR8_ABCD_LRegClass) { 2048193323Sed Opc = X86::MOV8mr; 2049193323Sed } else if (RC == &X86::GR8_ABCD_HRegClass) { 2050193323Sed if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2051193323Sed Opc = X86::MOV8mr_NOREX; 2052193323Sed else 2053193323Sed Opc = X86::MOV8mr; 2054198090Srdivacky } else if (RC == &X86::GR64_NOREXRegClass || 2055198090Srdivacky RC == &X86::GR64_NOREX_NOSPRegClass) { 2056193323Sed Opc = X86::MOV64mr; 2057193323Sed } else if (RC == &X86::GR32_NOREXRegClass) { 2058193323Sed Opc = X86::MOV32mr; 2059193323Sed } else if (RC == &X86::GR16_NOREXRegClass) { 2060193323Sed Opc = X86::MOV16mr; 2061193323Sed } else if (RC == &X86::GR8_NOREXRegClass) { 2062193323Sed Opc = X86::MOV8mr; 2063205218Srdivacky } else if (RC == &X86::GR64_TCRegClass) { 2064205218Srdivacky Opc = X86::MOV64mr_TC; 2065205218Srdivacky } else if (RC == &X86::GR32_TCRegClass) { 2066205218Srdivacky Opc = X86::MOV32mr_TC; 2067193323Sed } else if (RC == &X86::RFP80RegClass) { 2068193323Sed Opc = X86::ST_FpP80m; // pops 2069193323Sed } else if (RC == &X86::RFP64RegClass) { 2070193323Sed Opc = X86::ST_Fp64m; 2071193323Sed } else if (RC == &X86::RFP32RegClass) { 2072193323Sed Opc = X86::ST_Fp32m; 2073193323Sed } else if (RC == &X86::FR32RegClass) { 2074193323Sed Opc = X86::MOVSSmr; 2075193323Sed } else if (RC == &X86::FR64RegClass) { 2076193323Sed Opc = X86::MOVSDmr; 2077193323Sed } else if (RC == &X86::VR128RegClass) { 2078193323Sed // If stack is realigned we can use aligned stores. 2079193323Sed Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr; 2080193323Sed } else if (RC == &X86::VR64RegClass) { 2081193323Sed Opc = X86::MMX_MOVQ64mr; 2082193323Sed } else { 2083198090Srdivacky llvm_unreachable("Unknown regclass"); 2084193323Sed } 2085193323Sed 2086193323Sed return Opc; 2087193323Sed} 2088193323Sed 2089193323Sedvoid X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 2090193323Sed MachineBasicBlock::iterator MI, 2091193323Sed unsigned SrcReg, bool isKill, int FrameIdx, 2092193323Sed const TargetRegisterClass *RC) const { 2093193323Sed const MachineFunction &MF = *MBB.getParent(); 2094202878Srdivacky bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); 2095193323Sed unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 2096203954Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 2097193323Sed addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) 2098193323Sed .addReg(SrcReg, getKillRegState(isKill)); 2099193323Sed} 2100193323Sed 2101193323Sedvoid X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, 2102193323Sed bool isKill, 2103193323Sed SmallVectorImpl<MachineOperand> &Addr, 2104193323Sed const TargetRegisterClass *RC, 2105198090Srdivacky MachineInstr::mmo_iterator MMOBegin, 2106198090Srdivacky MachineInstr::mmo_iterator MMOEnd, 2107193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2108199481Srdivacky bool isAligned = (*MMOBegin)->getAlignment() >= 16; 2109193323Sed unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 2110193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 2111193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); 2112193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2113193323Sed MIB.addOperand(Addr[i]); 2114193323Sed MIB.addReg(SrcReg, getKillRegState(isKill)); 2115198090Srdivacky (*MIB).setMemRefs(MMOBegin, MMOEnd); 2116193323Sed NewMIs.push_back(MIB); 2117193323Sed} 2118193323Sed 2119193323Sedstatic unsigned getLoadRegOpcode(unsigned DestReg, 2120193323Sed const TargetRegisterClass *RC, 2121193323Sed bool isStackAligned, 2122193323Sed const TargetMachine &TM) { 2123193323Sed unsigned Opc = 0; 2124198090Srdivacky if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { 2125193323Sed Opc = X86::MOV64rm; 2126198090Srdivacky } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { 2127193323Sed Opc = X86::MOV32rm; 2128193323Sed } else if (RC == &X86::GR16RegClass) { 2129193323Sed Opc = X86::MOV16rm; 2130193323Sed } else if (RC == &X86::GR8RegClass) { 2131193323Sed // Copying to or from a physical H register on x86-64 requires a NOREX 2132193323Sed // move. Otherwise use a normal move. 2133193323Sed if (isHReg(DestReg) && 2134193323Sed TM.getSubtarget<X86Subtarget>().is64Bit()) 2135193323Sed Opc = X86::MOV8rm_NOREX; 2136193323Sed else 2137193323Sed Opc = X86::MOV8rm; 2138193323Sed } else if (RC == &X86::GR64_ABCDRegClass) { 2139193323Sed Opc = X86::MOV64rm; 2140193323Sed } else if (RC == &X86::GR32_ABCDRegClass) { 2141193323Sed Opc = X86::MOV32rm; 2142193323Sed } else if (RC == &X86::GR16_ABCDRegClass) { 2143193323Sed Opc = X86::MOV16rm; 2144193323Sed } else if (RC == &X86::GR8_ABCD_LRegClass) { 2145193323Sed Opc = X86::MOV8rm; 2146193323Sed } else if (RC == &X86::GR8_ABCD_HRegClass) { 2147193323Sed if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2148193323Sed Opc = X86::MOV8rm_NOREX; 2149193323Sed else 2150193323Sed Opc = X86::MOV8rm; 2151198090Srdivacky } else if (RC == &X86::GR64_NOREXRegClass || 2152198090Srdivacky RC == &X86::GR64_NOREX_NOSPRegClass) { 2153193323Sed Opc = X86::MOV64rm; 2154193323Sed } else if (RC == &X86::GR32_NOREXRegClass) { 2155193323Sed Opc = X86::MOV32rm; 2156193323Sed } else if (RC == &X86::GR16_NOREXRegClass) { 2157193323Sed Opc = X86::MOV16rm; 2158193323Sed } else if (RC == &X86::GR8_NOREXRegClass) { 2159193323Sed Opc = X86::MOV8rm; 2160205218Srdivacky } else if (RC == &X86::GR64_TCRegClass) { 2161205218Srdivacky Opc = X86::MOV64rm_TC; 2162205218Srdivacky } else if (RC == &X86::GR32_TCRegClass) { 2163205218Srdivacky Opc = X86::MOV32rm_TC; 2164193323Sed } else if (RC == &X86::RFP80RegClass) { 2165193323Sed Opc = X86::LD_Fp80m; 2166193323Sed } else if (RC == &X86::RFP64RegClass) { 2167193323Sed Opc = X86::LD_Fp64m; 2168193323Sed } else if (RC == &X86::RFP32RegClass) { 2169193323Sed Opc = X86::LD_Fp32m; 2170193323Sed } else if (RC == &X86::FR32RegClass) { 2171193323Sed Opc = X86::MOVSSrm; 2172193323Sed } else if (RC == &X86::FR64RegClass) { 2173193323Sed Opc = X86::MOVSDrm; 2174193323Sed } else if (RC == &X86::VR128RegClass) { 2175193323Sed // If stack is realigned we can use aligned loads. 2176193323Sed Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm; 2177193323Sed } else if (RC == &X86::VR64RegClass) { 2178193323Sed Opc = X86::MMX_MOVQ64rm; 2179193323Sed } else { 2180198090Srdivacky llvm_unreachable("Unknown regclass"); 2181193323Sed } 2182193323Sed 2183193323Sed return Opc; 2184193323Sed} 2185193323Sed 2186193323Sedvoid X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 2187193323Sed MachineBasicBlock::iterator MI, 2188193323Sed unsigned DestReg, int FrameIdx, 2189193323Sed const TargetRegisterClass *RC) const{ 2190193323Sed const MachineFunction &MF = *MBB.getParent(); 2191202878Srdivacky bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); 2192193323Sed unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2193203954Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 2194193323Sed addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); 2195193323Sed} 2196193323Sed 2197193323Sedvoid X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, 2198193323Sed SmallVectorImpl<MachineOperand> &Addr, 2199193323Sed const TargetRegisterClass *RC, 2200198090Srdivacky MachineInstr::mmo_iterator MMOBegin, 2201198090Srdivacky MachineInstr::mmo_iterator MMOEnd, 2202193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2203199481Srdivacky bool isAligned = (*MMOBegin)->getAlignment() >= 16; 2204193323Sed unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2205193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 2206193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); 2207193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2208193323Sed MIB.addOperand(Addr[i]); 2209198090Srdivacky (*MIB).setMemRefs(MMOBegin, MMOEnd); 2210193323Sed NewMIs.push_back(MIB); 2211193323Sed} 2212193323Sed 2213193323Sedbool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 2214193323Sed MachineBasicBlock::iterator MI, 2215193323Sed const std::vector<CalleeSavedInfo> &CSI) const { 2216193323Sed if (CSI.empty()) 2217193323Sed return false; 2218193323Sed 2219202878Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 2220193323Sed 2221193323Sed bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2222198090Srdivacky bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); 2223193323Sed unsigned SlotSize = is64Bit ? 8 : 4; 2224193323Sed 2225193323Sed MachineFunction &MF = *MBB.getParent(); 2226198090Srdivacky unsigned FPReg = RI.getFrameRegister(MF); 2227193323Sed X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2228193574Sed unsigned CalleeFrameSize = 0; 2229193323Sed 2230193323Sed unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; 2231193323Sed for (unsigned i = CSI.size(); i != 0; --i) { 2232193323Sed unsigned Reg = CSI[i-1].getReg(); 2233193574Sed const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); 2234193323Sed // Add the callee-saved register as live-in. It's killed at the spill. 2235193323Sed MBB.addLiveIn(Reg); 2236198090Srdivacky if (Reg == FPReg) 2237198090Srdivacky // X86RegisterInfo::emitPrologue will handle spilling of frame register. 2238198090Srdivacky continue; 2239198090Srdivacky if (RegClass != &X86::VR128RegClass && !isWin64) { 2240193574Sed CalleeFrameSize += SlotSize; 2241198090Srdivacky BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); 2242193574Sed } else { 2243193574Sed storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); 2244193574Sed } 2245193323Sed } 2246193574Sed 2247193574Sed X86FI->setCalleeSavedFrameSize(CalleeFrameSize); 2248193323Sed return true; 2249193323Sed} 2250193323Sed 2251193323Sedbool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2252193323Sed MachineBasicBlock::iterator MI, 2253193323Sed const std::vector<CalleeSavedInfo> &CSI) const { 2254193323Sed if (CSI.empty()) 2255193323Sed return false; 2256193323Sed 2257202878Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 2258193323Sed 2259198090Srdivacky MachineFunction &MF = *MBB.getParent(); 2260198090Srdivacky unsigned FPReg = RI.getFrameRegister(MF); 2261193323Sed bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2262198090Srdivacky bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); 2263193323Sed unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; 2264193323Sed for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2265193323Sed unsigned Reg = CSI[i].getReg(); 2266198090Srdivacky if (Reg == FPReg) 2267198090Srdivacky // X86RegisterInfo::emitEpilogue will handle restoring of frame register. 2268198090Srdivacky continue; 2269193574Sed const TargetRegisterClass *RegClass = CSI[i].getRegClass(); 2270198090Srdivacky if (RegClass != &X86::VR128RegClass && !isWin64) { 2271193574Sed BuildMI(MBB, MI, DL, get(Opc), Reg); 2272193574Sed } else { 2273193574Sed loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); 2274193574Sed } 2275193323Sed } 2276193323Sed return true; 2277193323Sed} 2278193323Sed 2279193323Sedstatic MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, 2280193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2281193323Sed MachineInstr *MI, 2282193323Sed const TargetInstrInfo &TII) { 2283193323Sed // Create the base instruction with the memory operand as the first part. 2284193323Sed MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2285193323Sed MI->getDebugLoc(), true); 2286193323Sed MachineInstrBuilder MIB(NewMI); 2287193323Sed unsigned NumAddrOps = MOs.size(); 2288193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2289193323Sed MIB.addOperand(MOs[i]); 2290193323Sed if (NumAddrOps < 4) // FrameIndex only 2291193323Sed addOffset(MIB, 0); 2292193323Sed 2293193323Sed // Loop over the rest of the ri operands, converting them over. 2294193323Sed unsigned NumOps = MI->getDesc().getNumOperands()-2; 2295193323Sed for (unsigned i = 0; i != NumOps; ++i) { 2296193323Sed MachineOperand &MO = MI->getOperand(i+2); 2297193323Sed MIB.addOperand(MO); 2298193323Sed } 2299193323Sed for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { 2300193323Sed MachineOperand &MO = MI->getOperand(i); 2301193323Sed MIB.addOperand(MO); 2302193323Sed } 2303193323Sed return MIB; 2304193323Sed} 2305193323Sed 2306193323Sedstatic MachineInstr *FuseInst(MachineFunction &MF, 2307193323Sed unsigned Opcode, unsigned OpNo, 2308193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2309193323Sed MachineInstr *MI, const TargetInstrInfo &TII) { 2310193323Sed MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2311193323Sed MI->getDebugLoc(), true); 2312193323Sed MachineInstrBuilder MIB(NewMI); 2313193323Sed 2314193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2315193323Sed MachineOperand &MO = MI->getOperand(i); 2316193323Sed if (i == OpNo) { 2317193323Sed assert(MO.isReg() && "Expected to fold into reg operand!"); 2318193323Sed unsigned NumAddrOps = MOs.size(); 2319193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2320193323Sed MIB.addOperand(MOs[i]); 2321193323Sed if (NumAddrOps < 4) // FrameIndex only 2322193323Sed addOffset(MIB, 0); 2323193323Sed } else { 2324193323Sed MIB.addOperand(MO); 2325193323Sed } 2326193323Sed } 2327193323Sed return MIB; 2328193323Sed} 2329193323Sed 2330193323Sedstatic MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, 2331193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2332193323Sed MachineInstr *MI) { 2333193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 2334193323Sed MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); 2335193323Sed 2336193323Sed unsigned NumAddrOps = MOs.size(); 2337193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2338193323Sed MIB.addOperand(MOs[i]); 2339193323Sed if (NumAddrOps < 4) // FrameIndex only 2340193323Sed addOffset(MIB, 0); 2341193323Sed return MIB.addImm(0); 2342193323Sed} 2343193323Sed 2344193323SedMachineInstr* 2345193323SedX86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2346193323Sed MachineInstr *MI, unsigned i, 2347198090Srdivacky const SmallVectorImpl<MachineOperand> &MOs, 2348198090Srdivacky unsigned Size, unsigned Align) const { 2349198090Srdivacky const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; 2350193323Sed bool isTwoAddrFold = false; 2351193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 2352193323Sed bool isTwoAddr = NumOps > 1 && 2353193323Sed MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2354193323Sed 2355193323Sed MachineInstr *NewMI = NULL; 2356193323Sed // Folding a memory location into the two-address part of a two-address 2357193323Sed // instruction is different than folding it other places. It requires 2358193323Sed // replacing the *two* registers with the memory location. 2359193323Sed if (isTwoAddr && NumOps >= 2 && i < 2 && 2360193323Sed MI->getOperand(0).isReg() && 2361193323Sed MI->getOperand(1).isReg() && 2362193323Sed MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { 2363193323Sed OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2364193323Sed isTwoAddrFold = true; 2365193323Sed } else if (i == 0) { // If operand 0 2366202375Srdivacky if (MI->getOpcode() == X86::MOV64r0) 2367202375Srdivacky NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); 2368202375Srdivacky else if (MI->getOpcode() == X86::MOV32r0) 2369193323Sed NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); 2370202375Srdivacky else if (MI->getOpcode() == X86::MOV16r0) 2371202375Srdivacky NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); 2372193323Sed else if (MI->getOpcode() == X86::MOV8r0) 2373193323Sed NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); 2374193323Sed if (NewMI) 2375193323Sed return NewMI; 2376193323Sed 2377193323Sed OpcodeTablePtr = &RegOp2MemOpTable0; 2378193323Sed } else if (i == 1) { 2379193323Sed OpcodeTablePtr = &RegOp2MemOpTable1; 2380193323Sed } else if (i == 2) { 2381193323Sed OpcodeTablePtr = &RegOp2MemOpTable2; 2382193323Sed } 2383193323Sed 2384193323Sed // If table selected... 2385193323Sed if (OpcodeTablePtr) { 2386193323Sed // Find the Opcode to fuse 2387199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2388193323Sed OpcodeTablePtr->find((unsigned*)MI->getOpcode()); 2389193323Sed if (I != OpcodeTablePtr->end()) { 2390198090Srdivacky unsigned Opcode = I->second.first; 2391198090Srdivacky unsigned MinAlign = I->second.second; 2392198090Srdivacky if (Align < MinAlign) 2393198090Srdivacky return NULL; 2394198090Srdivacky bool NarrowToMOV32rm = false; 2395198090Srdivacky if (Size) { 2396198090Srdivacky unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); 2397198090Srdivacky if (Size < RCSize) { 2398198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 2399198090Srdivacky // narrower than the load width, then it's not. 2400198090Srdivacky if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) 2401198090Srdivacky return NULL; 2402198090Srdivacky // If this is a 64-bit load, but the spill slot is 32, then we can do 2403198090Srdivacky // a 32-bit load which is implicitly zero-extended. This likely is due 2404198090Srdivacky // to liveintervalanalysis remat'ing a load from stack slot. 2405198090Srdivacky if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) 2406198090Srdivacky return NULL; 2407198090Srdivacky Opcode = X86::MOV32rm; 2408198090Srdivacky NarrowToMOV32rm = true; 2409198090Srdivacky } 2410198090Srdivacky } 2411198090Srdivacky 2412193323Sed if (isTwoAddrFold) 2413198090Srdivacky NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); 2414193323Sed else 2415198090Srdivacky NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this); 2416198090Srdivacky 2417198090Srdivacky if (NarrowToMOV32rm) { 2418198090Srdivacky // If this is the special case where we use a MOV32rm to load a 32-bit 2419198090Srdivacky // value and zero-extend the top bits. Change the destination register 2420198090Srdivacky // to a 32-bit one. 2421198090Srdivacky unsigned DstReg = NewMI->getOperand(0).getReg(); 2422198090Srdivacky if (TargetRegisterInfo::isPhysicalRegister(DstReg)) 2423198090Srdivacky NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, 2424198090Srdivacky 4/*x86_subreg_32bit*/)); 2425198090Srdivacky else 2426198090Srdivacky NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/); 2427198090Srdivacky } 2428193323Sed return NewMI; 2429193323Sed } 2430193323Sed } 2431193323Sed 2432193323Sed // No fusion 2433193323Sed if (PrintFailedFusing) 2434202375Srdivacky dbgs() << "We failed to fuse operand " << i << " in " << *MI; 2435193323Sed return NULL; 2436193323Sed} 2437193323Sed 2438193323Sed 2439193323SedMachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2440193323Sed MachineInstr *MI, 2441198090Srdivacky const SmallVectorImpl<unsigned> &Ops, 2442193323Sed int FrameIndex) const { 2443193323Sed // Check switch flag 2444193323Sed if (NoFusing) return NULL; 2445193323Sed 2446201360Srdivacky if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 2447201360Srdivacky switch (MI->getOpcode()) { 2448201360Srdivacky case X86::CVTSD2SSrr: 2449201360Srdivacky case X86::Int_CVTSD2SSrr: 2450201360Srdivacky case X86::CVTSS2SDrr: 2451201360Srdivacky case X86::Int_CVTSS2SDrr: 2452201360Srdivacky case X86::RCPSSr: 2453201360Srdivacky case X86::RCPSSr_Int: 2454201360Srdivacky case X86::ROUNDSDr_Int: 2455201360Srdivacky case X86::ROUNDSSr_Int: 2456201360Srdivacky case X86::RSQRTSSr: 2457201360Srdivacky case X86::RSQRTSSr_Int: 2458201360Srdivacky case X86::SQRTSSr: 2459201360Srdivacky case X86::SQRTSSr_Int: 2460201360Srdivacky return 0; 2461201360Srdivacky } 2462201360Srdivacky 2463193323Sed const MachineFrameInfo *MFI = MF.getFrameInfo(); 2464198090Srdivacky unsigned Size = MFI->getObjectSize(FrameIndex); 2465193323Sed unsigned Alignment = MFI->getObjectAlignment(FrameIndex); 2466193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2467193323Sed unsigned NewOpc = 0; 2468198090Srdivacky unsigned RCSize = 0; 2469193323Sed switch (MI->getOpcode()) { 2470193323Sed default: return NULL; 2471198090Srdivacky case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; 2472198090Srdivacky case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; 2473198090Srdivacky case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; 2474198090Srdivacky case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; 2475193323Sed } 2476198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 2477198090Srdivacky // narrower than the load width, then it's not. 2478198090Srdivacky if (Size < RCSize) 2479198090Srdivacky return NULL; 2480193323Sed // Change to CMPXXri r, 0 first. 2481193323Sed MI->setDesc(get(NewOpc)); 2482193323Sed MI->getOperand(1).ChangeToImmediate(0); 2483193323Sed } else if (Ops.size() != 1) 2484193323Sed return NULL; 2485193323Sed 2486193323Sed SmallVector<MachineOperand,4> MOs; 2487193323Sed MOs.push_back(MachineOperand::CreateFI(FrameIndex)); 2488198090Srdivacky return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment); 2489193323Sed} 2490193323Sed 2491193323SedMachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2492193323Sed MachineInstr *MI, 2493198090Srdivacky const SmallVectorImpl<unsigned> &Ops, 2494193323Sed MachineInstr *LoadMI) const { 2495193323Sed // Check switch flag 2496193323Sed if (NoFusing) return NULL; 2497193323Sed 2498201360Srdivacky if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 2499201360Srdivacky switch (MI->getOpcode()) { 2500201360Srdivacky case X86::CVTSD2SSrr: 2501201360Srdivacky case X86::Int_CVTSD2SSrr: 2502201360Srdivacky case X86::CVTSS2SDrr: 2503201360Srdivacky case X86::Int_CVTSS2SDrr: 2504201360Srdivacky case X86::RCPSSr: 2505201360Srdivacky case X86::RCPSSr_Int: 2506201360Srdivacky case X86::ROUNDSDr_Int: 2507201360Srdivacky case X86::ROUNDSSr_Int: 2508201360Srdivacky case X86::RSQRTSSr: 2509201360Srdivacky case X86::RSQRTSSr_Int: 2510201360Srdivacky case X86::SQRTSSr: 2511201360Srdivacky case X86::SQRTSSr_Int: 2512201360Srdivacky return 0; 2513201360Srdivacky } 2514201360Srdivacky 2515193323Sed // Determine the alignment of the load. 2516193323Sed unsigned Alignment = 0; 2517193323Sed if (LoadMI->hasOneMemOperand()) 2518198090Srdivacky Alignment = (*LoadMI->memoperands_begin())->getAlignment(); 2519198090Srdivacky else 2520198090Srdivacky switch (LoadMI->getOpcode()) { 2521206083Srdivacky case X86::V_SET0PS: 2522206083Srdivacky case X86::V_SET0PD: 2523206083Srdivacky case X86::V_SET0PI: 2524198090Srdivacky case X86::V_SETALLONES: 2525198090Srdivacky Alignment = 16; 2526198090Srdivacky break; 2527198090Srdivacky case X86::FsFLD0SD: 2528198090Srdivacky Alignment = 8; 2529198090Srdivacky break; 2530198090Srdivacky case X86::FsFLD0SS: 2531198090Srdivacky Alignment = 4; 2532198090Srdivacky break; 2533198090Srdivacky default: 2534198090Srdivacky llvm_unreachable("Don't know how to fold this instruction!"); 2535193323Sed } 2536193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2537193323Sed unsigned NewOpc = 0; 2538193323Sed switch (MI->getOpcode()) { 2539193323Sed default: return NULL; 2540193323Sed case X86::TEST8rr: NewOpc = X86::CMP8ri; break; 2541193323Sed case X86::TEST16rr: NewOpc = X86::CMP16ri; break; 2542193323Sed case X86::TEST32rr: NewOpc = X86::CMP32ri; break; 2543193323Sed case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; 2544193323Sed } 2545193323Sed // Change to CMPXXri r, 0 first. 2546193323Sed MI->setDesc(get(NewOpc)); 2547193323Sed MI->getOperand(1).ChangeToImmediate(0); 2548193323Sed } else if (Ops.size() != 1) 2549193323Sed return NULL; 2550193323Sed 2551193323Sed SmallVector<MachineOperand,X86AddrNumOperands> MOs; 2552198090Srdivacky switch (LoadMI->getOpcode()) { 2553206083Srdivacky case X86::V_SET0PS: 2554206083Srdivacky case X86::V_SET0PD: 2555206083Srdivacky case X86::V_SET0PI: 2556198090Srdivacky case X86::V_SETALLONES: 2557198090Srdivacky case X86::FsFLD0SD: 2558198090Srdivacky case X86::FsFLD0SS: { 2559206083Srdivacky // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure. 2560193323Sed // Create a constant-pool entry and operands to load from it. 2561193323Sed 2562204961Srdivacky // Medium and large mode can't fold loads this way. 2563204961Srdivacky if (TM.getCodeModel() != CodeModel::Small && 2564204961Srdivacky TM.getCodeModel() != CodeModel::Kernel) 2565204961Srdivacky return NULL; 2566204961Srdivacky 2567193323Sed // x86-32 PIC requires a PIC base register for constant pools. 2568193323Sed unsigned PICBase = 0; 2569198090Srdivacky if (TM.getRelocationModel() == Reloc::PIC_) { 2570198090Srdivacky if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2571198090Srdivacky PICBase = X86::RIP; 2572198090Srdivacky else 2573198090Srdivacky // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); 2574198090Srdivacky // This doesn't work for several reasons. 2575198090Srdivacky // 1. GlobalBaseReg may have been spilled. 2576198090Srdivacky // 2. It may not be live at MI. 2577198090Srdivacky return NULL; 2578198090Srdivacky } 2579193323Sed 2580198090Srdivacky // Create a constant-pool entry. 2581193323Sed MachineConstantPool &MCP = *MF.getConstantPool(); 2582198090Srdivacky const Type *Ty; 2583198090Srdivacky if (LoadMI->getOpcode() == X86::FsFLD0SS) 2584198090Srdivacky Ty = Type::getFloatTy(MF.getFunction()->getContext()); 2585198090Srdivacky else if (LoadMI->getOpcode() == X86::FsFLD0SD) 2586198090Srdivacky Ty = Type::getDoubleTy(MF.getFunction()->getContext()); 2587198090Srdivacky else 2588198090Srdivacky Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); 2589198090Srdivacky Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? 2590198090Srdivacky Constant::getAllOnesValue(Ty) : 2591198090Srdivacky Constant::getNullValue(Ty); 2592198090Srdivacky unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); 2593193323Sed 2594193323Sed // Create operands to load from the constant pool entry. 2595193323Sed MOs.push_back(MachineOperand::CreateReg(PICBase, false)); 2596193323Sed MOs.push_back(MachineOperand::CreateImm(1)); 2597193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 2598193323Sed MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); 2599193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 2600198090Srdivacky break; 2601198090Srdivacky } 2602198090Srdivacky default: { 2603193323Sed // Folding a normal load. Just copy the load's address operands. 2604193323Sed unsigned NumOps = LoadMI->getDesc().getNumOperands(); 2605193323Sed for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) 2606193323Sed MOs.push_back(LoadMI->getOperand(i)); 2607198090Srdivacky break; 2608193323Sed } 2609198090Srdivacky } 2610198090Srdivacky return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment); 2611193323Sed} 2612193323Sed 2613193323Sed 2614193323Sedbool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, 2615193323Sed const SmallVectorImpl<unsigned> &Ops) const { 2616193323Sed // Check switch flag 2617193323Sed if (NoFusing) return 0; 2618193323Sed 2619193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2620193323Sed switch (MI->getOpcode()) { 2621193323Sed default: return false; 2622193323Sed case X86::TEST8rr: 2623193323Sed case X86::TEST16rr: 2624193323Sed case X86::TEST32rr: 2625193323Sed case X86::TEST64rr: 2626193323Sed return true; 2627193323Sed } 2628193323Sed } 2629193323Sed 2630193323Sed if (Ops.size() != 1) 2631193323Sed return false; 2632193323Sed 2633193323Sed unsigned OpNum = Ops[0]; 2634193323Sed unsigned Opc = MI->getOpcode(); 2635193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 2636193323Sed bool isTwoAddr = NumOps > 1 && 2637193323Sed MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2638193323Sed 2639193323Sed // Folding a memory location into the two-address part of a two-address 2640193323Sed // instruction is different than folding it other places. It requires 2641193323Sed // replacing the *two* registers with the memory location. 2642198090Srdivacky const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; 2643193323Sed if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 2644193323Sed OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2645193323Sed } else if (OpNum == 0) { // If operand 0 2646193323Sed switch (Opc) { 2647198090Srdivacky case X86::MOV8r0: 2648202375Srdivacky case X86::MOV16r0: 2649193323Sed case X86::MOV32r0: 2650202375Srdivacky case X86::MOV64r0: 2651193323Sed return true; 2652193323Sed default: break; 2653193323Sed } 2654193323Sed OpcodeTablePtr = &RegOp2MemOpTable0; 2655193323Sed } else if (OpNum == 1) { 2656193323Sed OpcodeTablePtr = &RegOp2MemOpTable1; 2657193323Sed } else if (OpNum == 2) { 2658193323Sed OpcodeTablePtr = &RegOp2MemOpTable2; 2659193323Sed } 2660193323Sed 2661193323Sed if (OpcodeTablePtr) { 2662193323Sed // Find the Opcode to fuse 2663199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2664193323Sed OpcodeTablePtr->find((unsigned*)Opc); 2665193323Sed if (I != OpcodeTablePtr->end()) 2666193323Sed return true; 2667193323Sed } 2668193323Sed return false; 2669193323Sed} 2670193323Sed 2671193323Sedbool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, 2672193323Sed unsigned Reg, bool UnfoldLoad, bool UnfoldStore, 2673193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2674199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2675193323Sed MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); 2676193323Sed if (I == MemOp2RegOpTable.end()) 2677193323Sed return false; 2678193323Sed unsigned Opc = I->second.first; 2679193323Sed unsigned Index = I->second.second & 0xf; 2680193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2681193323Sed bool FoldedStore = I->second.second & (1 << 5); 2682193323Sed if (UnfoldLoad && !FoldedLoad) 2683193323Sed return false; 2684193323Sed UnfoldLoad &= FoldedLoad; 2685193323Sed if (UnfoldStore && !FoldedStore) 2686193323Sed return false; 2687193323Sed UnfoldStore &= FoldedStore; 2688193323Sed 2689193323Sed const TargetInstrDesc &TID = get(Opc); 2690193323Sed const TargetOperandInfo &TOI = TID.OpInfo[Index]; 2691198090Srdivacky const TargetRegisterClass *RC = TOI.getRegClass(&RI); 2692193323Sed SmallVector<MachineOperand, X86AddrNumOperands> AddrOps; 2693193323Sed SmallVector<MachineOperand,2> BeforeOps; 2694193323Sed SmallVector<MachineOperand,2> AfterOps; 2695193323Sed SmallVector<MachineOperand,4> ImpOps; 2696193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2697193323Sed MachineOperand &Op = MI->getOperand(i); 2698193323Sed if (i >= Index && i < Index + X86AddrNumOperands) 2699193323Sed AddrOps.push_back(Op); 2700193323Sed else if (Op.isReg() && Op.isImplicit()) 2701193323Sed ImpOps.push_back(Op); 2702193323Sed else if (i < Index) 2703193323Sed BeforeOps.push_back(Op); 2704193323Sed else if (i > Index) 2705193323Sed AfterOps.push_back(Op); 2706193323Sed } 2707193323Sed 2708193323Sed // Emit the load instruction. 2709193323Sed if (UnfoldLoad) { 2710198090Srdivacky std::pair<MachineInstr::mmo_iterator, 2711198090Srdivacky MachineInstr::mmo_iterator> MMOs = 2712198090Srdivacky MF.extractLoadMemRefs(MI->memoperands_begin(), 2713198090Srdivacky MI->memoperands_end()); 2714198090Srdivacky loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); 2715193323Sed if (UnfoldStore) { 2716193323Sed // Address operands cannot be marked isKill. 2717193323Sed for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { 2718193323Sed MachineOperand &MO = NewMIs[0]->getOperand(i); 2719193323Sed if (MO.isReg()) 2720193323Sed MO.setIsKill(false); 2721193323Sed } 2722193323Sed } 2723193323Sed } 2724193323Sed 2725193323Sed // Emit the data processing instruction. 2726193323Sed MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); 2727193323Sed MachineInstrBuilder MIB(DataMI); 2728193323Sed 2729193323Sed if (FoldedStore) 2730193323Sed MIB.addReg(Reg, RegState::Define); 2731193323Sed for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) 2732193323Sed MIB.addOperand(BeforeOps[i]); 2733193323Sed if (FoldedLoad) 2734193323Sed MIB.addReg(Reg); 2735193323Sed for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) 2736193323Sed MIB.addOperand(AfterOps[i]); 2737193323Sed for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { 2738193323Sed MachineOperand &MO = ImpOps[i]; 2739193323Sed MIB.addReg(MO.getReg(), 2740193323Sed getDefRegState(MO.isDef()) | 2741193323Sed RegState::Implicit | 2742193323Sed getKillRegState(MO.isKill()) | 2743195340Sed getDeadRegState(MO.isDead()) | 2744195340Sed getUndefRegState(MO.isUndef())); 2745193323Sed } 2746193323Sed // Change CMP32ri r, 0 back to TEST32rr r, r, etc. 2747193323Sed unsigned NewOpc = 0; 2748193323Sed switch (DataMI->getOpcode()) { 2749193323Sed default: break; 2750193323Sed case X86::CMP64ri32: 2751193323Sed case X86::CMP32ri: 2752193323Sed case X86::CMP16ri: 2753193323Sed case X86::CMP8ri: { 2754193323Sed MachineOperand &MO0 = DataMI->getOperand(0); 2755193323Sed MachineOperand &MO1 = DataMI->getOperand(1); 2756193323Sed if (MO1.getImm() == 0) { 2757193323Sed switch (DataMI->getOpcode()) { 2758193323Sed default: break; 2759193323Sed case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; 2760193323Sed case X86::CMP32ri: NewOpc = X86::TEST32rr; break; 2761193323Sed case X86::CMP16ri: NewOpc = X86::TEST16rr; break; 2762193323Sed case X86::CMP8ri: NewOpc = X86::TEST8rr; break; 2763193323Sed } 2764193323Sed DataMI->setDesc(get(NewOpc)); 2765193323Sed MO1.ChangeToRegister(MO0.getReg(), false); 2766193323Sed } 2767193323Sed } 2768193323Sed } 2769193323Sed NewMIs.push_back(DataMI); 2770193323Sed 2771193323Sed // Emit the store instruction. 2772193323Sed if (UnfoldStore) { 2773198090Srdivacky const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); 2774198090Srdivacky std::pair<MachineInstr::mmo_iterator, 2775198090Srdivacky MachineInstr::mmo_iterator> MMOs = 2776198090Srdivacky MF.extractStoreMemRefs(MI->memoperands_begin(), 2777198090Srdivacky MI->memoperands_end()); 2778198090Srdivacky storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs); 2779193323Sed } 2780193323Sed 2781193323Sed return true; 2782193323Sed} 2783193323Sed 2784193323Sedbool 2785193323SedX86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, 2786193323Sed SmallVectorImpl<SDNode*> &NewNodes) const { 2787193323Sed if (!N->isMachineOpcode()) 2788193323Sed return false; 2789193323Sed 2790199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2791193323Sed MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); 2792193323Sed if (I == MemOp2RegOpTable.end()) 2793193323Sed return false; 2794193323Sed unsigned Opc = I->second.first; 2795193323Sed unsigned Index = I->second.second & 0xf; 2796193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2797193323Sed bool FoldedStore = I->second.second & (1 << 5); 2798193323Sed const TargetInstrDesc &TID = get(Opc); 2799198090Srdivacky const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); 2800193323Sed unsigned NumDefs = TID.NumDefs; 2801193323Sed std::vector<SDValue> AddrOps; 2802193323Sed std::vector<SDValue> BeforeOps; 2803193323Sed std::vector<SDValue> AfterOps; 2804193323Sed DebugLoc dl = N->getDebugLoc(); 2805193323Sed unsigned NumOps = N->getNumOperands(); 2806193323Sed for (unsigned i = 0; i != NumOps-1; ++i) { 2807193323Sed SDValue Op = N->getOperand(i); 2808193323Sed if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands) 2809193323Sed AddrOps.push_back(Op); 2810193323Sed else if (i < Index-NumDefs) 2811193323Sed BeforeOps.push_back(Op); 2812193323Sed else if (i > Index-NumDefs) 2813193323Sed AfterOps.push_back(Op); 2814193323Sed } 2815193323Sed SDValue Chain = N->getOperand(NumOps-1); 2816193323Sed AddrOps.push_back(Chain); 2817193323Sed 2818193323Sed // Emit the load instruction. 2819193323Sed SDNode *Load = 0; 2820198090Srdivacky MachineFunction &MF = DAG.getMachineFunction(); 2821193323Sed if (FoldedLoad) { 2822198090Srdivacky EVT VT = *RC->vt_begin(); 2823199481Srdivacky std::pair<MachineInstr::mmo_iterator, 2824199481Srdivacky MachineInstr::mmo_iterator> MMOs = 2825199481Srdivacky MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2826199481Srdivacky cast<MachineSDNode>(N)->memoperands_end()); 2827199481Srdivacky bool isAligned = (*MMOs.first)->getAlignment() >= 16; 2828198090Srdivacky Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, 2829198090Srdivacky VT, MVT::Other, &AddrOps[0], AddrOps.size()); 2830193323Sed NewNodes.push_back(Load); 2831198090Srdivacky 2832198090Srdivacky // Preserve memory reference information. 2833198090Srdivacky cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2834193323Sed } 2835193323Sed 2836193323Sed // Emit the data processing instruction. 2837198090Srdivacky std::vector<EVT> VTs; 2838193323Sed const TargetRegisterClass *DstRC = 0; 2839193323Sed if (TID.getNumDefs() > 0) { 2840198090Srdivacky DstRC = TID.OpInfo[0].getRegClass(&RI); 2841193323Sed VTs.push_back(*DstRC->vt_begin()); 2842193323Sed } 2843193323Sed for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 2844198090Srdivacky EVT VT = N->getValueType(i); 2845193323Sed if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) 2846193323Sed VTs.push_back(VT); 2847193323Sed } 2848193323Sed if (Load) 2849193323Sed BeforeOps.push_back(SDValue(Load, 0)); 2850193323Sed std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); 2851198090Srdivacky SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0], 2852198090Srdivacky BeforeOps.size()); 2853193323Sed NewNodes.push_back(NewNode); 2854193323Sed 2855193323Sed // Emit the store instruction. 2856193323Sed if (FoldedStore) { 2857193323Sed AddrOps.pop_back(); 2858193323Sed AddrOps.push_back(SDValue(NewNode, 0)); 2859193323Sed AddrOps.push_back(Chain); 2860199481Srdivacky std::pair<MachineInstr::mmo_iterator, 2861199481Srdivacky MachineInstr::mmo_iterator> MMOs = 2862199481Srdivacky MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2863199481Srdivacky cast<MachineSDNode>(N)->memoperands_end()); 2864199481Srdivacky bool isAligned = (*MMOs.first)->getAlignment() >= 16; 2865198090Srdivacky SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, 2866198090Srdivacky isAligned, TM), 2867198090Srdivacky dl, MVT::Other, 2868198090Srdivacky &AddrOps[0], AddrOps.size()); 2869193323Sed NewNodes.push_back(Store); 2870198090Srdivacky 2871198090Srdivacky // Preserve memory reference information. 2872198090Srdivacky cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2873193323Sed } 2874193323Sed 2875193323Sed return true; 2876193323Sed} 2877193323Sed 2878193323Sedunsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, 2879198892Srdivacky bool UnfoldLoad, bool UnfoldStore, 2880198892Srdivacky unsigned *LoadRegIndex) const { 2881199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2882193323Sed MemOp2RegOpTable.find((unsigned*)Opc); 2883193323Sed if (I == MemOp2RegOpTable.end()) 2884193323Sed return 0; 2885193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2886193323Sed bool FoldedStore = I->second.second & (1 << 5); 2887193323Sed if (UnfoldLoad && !FoldedLoad) 2888193323Sed return 0; 2889193323Sed if (UnfoldStore && !FoldedStore) 2890193323Sed return 0; 2891198892Srdivacky if (LoadRegIndex) 2892198892Srdivacky *LoadRegIndex = I->second.second & 0xf; 2893193323Sed return I->second.first; 2894193323Sed} 2895193323Sed 2896202878Srdivackybool 2897202878SrdivackyX86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 2898202878Srdivacky int64_t &Offset1, int64_t &Offset2) const { 2899202878Srdivacky if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 2900202878Srdivacky return false; 2901202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 2902202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 2903202878Srdivacky switch (Opc1) { 2904202878Srdivacky default: return false; 2905202878Srdivacky case X86::MOV8rm: 2906202878Srdivacky case X86::MOV16rm: 2907202878Srdivacky case X86::MOV32rm: 2908202878Srdivacky case X86::MOV64rm: 2909202878Srdivacky case X86::LD_Fp32m: 2910202878Srdivacky case X86::LD_Fp64m: 2911202878Srdivacky case X86::LD_Fp80m: 2912202878Srdivacky case X86::MOVSSrm: 2913202878Srdivacky case X86::MOVSDrm: 2914202878Srdivacky case X86::MMX_MOVD64rm: 2915202878Srdivacky case X86::MMX_MOVQ64rm: 2916202878Srdivacky case X86::FsMOVAPSrm: 2917202878Srdivacky case X86::FsMOVAPDrm: 2918202878Srdivacky case X86::MOVAPSrm: 2919202878Srdivacky case X86::MOVUPSrm: 2920202878Srdivacky case X86::MOVUPSrm_Int: 2921202878Srdivacky case X86::MOVAPDrm: 2922202878Srdivacky case X86::MOVDQArm: 2923202878Srdivacky case X86::MOVDQUrm: 2924202878Srdivacky case X86::MOVDQUrm_Int: 2925202878Srdivacky break; 2926202878Srdivacky } 2927202878Srdivacky switch (Opc2) { 2928202878Srdivacky default: return false; 2929202878Srdivacky case X86::MOV8rm: 2930202878Srdivacky case X86::MOV16rm: 2931202878Srdivacky case X86::MOV32rm: 2932202878Srdivacky case X86::MOV64rm: 2933202878Srdivacky case X86::LD_Fp32m: 2934202878Srdivacky case X86::LD_Fp64m: 2935202878Srdivacky case X86::LD_Fp80m: 2936202878Srdivacky case X86::MOVSSrm: 2937202878Srdivacky case X86::MOVSDrm: 2938202878Srdivacky case X86::MMX_MOVD64rm: 2939202878Srdivacky case X86::MMX_MOVQ64rm: 2940202878Srdivacky case X86::FsMOVAPSrm: 2941202878Srdivacky case X86::FsMOVAPDrm: 2942202878Srdivacky case X86::MOVAPSrm: 2943202878Srdivacky case X86::MOVUPSrm: 2944202878Srdivacky case X86::MOVUPSrm_Int: 2945202878Srdivacky case X86::MOVAPDrm: 2946202878Srdivacky case X86::MOVDQArm: 2947202878Srdivacky case X86::MOVDQUrm: 2948202878Srdivacky case X86::MOVDQUrm_Int: 2949202878Srdivacky break; 2950202878Srdivacky } 2951202878Srdivacky 2952202878Srdivacky // Check if chain operands and base addresses match. 2953202878Srdivacky if (Load1->getOperand(0) != Load2->getOperand(0) || 2954202878Srdivacky Load1->getOperand(5) != Load2->getOperand(5)) 2955202878Srdivacky return false; 2956202878Srdivacky // Segment operands should match as well. 2957202878Srdivacky if (Load1->getOperand(4) != Load2->getOperand(4)) 2958202878Srdivacky return false; 2959202878Srdivacky // Scale should be 1, Index should be Reg0. 2960202878Srdivacky if (Load1->getOperand(1) == Load2->getOperand(1) && 2961202878Srdivacky Load1->getOperand(2) == Load2->getOperand(2)) { 2962202878Srdivacky if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1) 2963202878Srdivacky return false; 2964202878Srdivacky SDValue Op2 = Load1->getOperand(2); 2965202878Srdivacky if (!isa<RegisterSDNode>(Op2) || 2966202878Srdivacky cast<RegisterSDNode>(Op2)->getReg() != 0) 2967202878Srdivacky return 0; 2968202878Srdivacky 2969202878Srdivacky // Now let's examine the displacements. 2970202878Srdivacky if (isa<ConstantSDNode>(Load1->getOperand(3)) && 2971202878Srdivacky isa<ConstantSDNode>(Load2->getOperand(3))) { 2972202878Srdivacky Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue(); 2973202878Srdivacky Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue(); 2974202878Srdivacky return true; 2975202878Srdivacky } 2976202878Srdivacky } 2977202878Srdivacky return false; 2978202878Srdivacky} 2979202878Srdivacky 2980202878Srdivackybool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 2981202878Srdivacky int64_t Offset1, int64_t Offset2, 2982202878Srdivacky unsigned NumLoads) const { 2983202878Srdivacky assert(Offset2 > Offset1); 2984202878Srdivacky if ((Offset2 - Offset1) / 8 > 64) 2985202878Srdivacky return false; 2986202878Srdivacky 2987202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 2988202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 2989202878Srdivacky if (Opc1 != Opc2) 2990202878Srdivacky return false; // FIXME: overly conservative? 2991202878Srdivacky 2992202878Srdivacky switch (Opc1) { 2993202878Srdivacky default: break; 2994202878Srdivacky case X86::LD_Fp32m: 2995202878Srdivacky case X86::LD_Fp64m: 2996202878Srdivacky case X86::LD_Fp80m: 2997202878Srdivacky case X86::MMX_MOVD64rm: 2998202878Srdivacky case X86::MMX_MOVQ64rm: 2999202878Srdivacky return false; 3000202878Srdivacky } 3001202878Srdivacky 3002202878Srdivacky EVT VT = Load1->getValueType(0); 3003202878Srdivacky switch (VT.getSimpleVT().SimpleTy) { 3004202878Srdivacky default: { 3005202878Srdivacky // XMM registers. In 64-bit mode we can be a bit more aggressive since we 3006202878Srdivacky // have 16 of them to play with. 3007202878Srdivacky if (TM.getSubtargetImpl()->is64Bit()) { 3008202878Srdivacky if (NumLoads >= 3) 3009202878Srdivacky return false; 3010202878Srdivacky } else if (NumLoads) 3011202878Srdivacky return false; 3012202878Srdivacky break; 3013202878Srdivacky } 3014202878Srdivacky case MVT::i8: 3015202878Srdivacky case MVT::i16: 3016202878Srdivacky case MVT::i32: 3017202878Srdivacky case MVT::i64: 3018202878Srdivacky case MVT::f32: 3019202878Srdivacky case MVT::f64: 3020202878Srdivacky if (NumLoads) 3021202878Srdivacky return false; 3022202878Srdivacky } 3023202878Srdivacky 3024202878Srdivacky return true; 3025202878Srdivacky} 3026202878Srdivacky 3027202878Srdivacky 3028193323Sedbool X86InstrInfo:: 3029193323SedReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 3030193323Sed assert(Cond.size() == 1 && "Invalid X86 branch condition!"); 3031193323Sed X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm()); 3032193323Sed if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E) 3033193323Sed return true; 3034193323Sed Cond[0].setImm(GetOppositeBranchCondition(CC)); 3035193323Sed return false; 3036193323Sed} 3037193323Sed 3038193323Sedbool X86InstrInfo:: 3039193323SedisSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 3040193323Sed // FIXME: Return false for x87 stack register classes for now. We can't 3041193323Sed // allow any loads of these registers before FpGet_ST0_80. 3042193323Sed return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || 3043193323Sed RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); 3044193323Sed} 3045193323Sed 3046193323Sed 3047203954Srdivacky/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher) 3048203954Srdivacky/// register? e.g. r8, xmm8, xmm13, etc. 3049203954Srdivackybool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { 3050203954Srdivacky switch (RegNo) { 3051193323Sed default: break; 3052193323Sed case X86::R8: case X86::R9: case X86::R10: case X86::R11: 3053193323Sed case X86::R12: case X86::R13: case X86::R14: case X86::R15: 3054193323Sed case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: 3055193323Sed case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: 3056193323Sed case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: 3057193323Sed case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: 3058193323Sed case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: 3059193323Sed case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: 3060193323Sed case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: 3061193323Sed case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: 3062193323Sed return true; 3063193323Sed } 3064193323Sed return false; 3065193323Sed} 3066193323Sed 3067193323Sed 3068193323Sed/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 3069193323Sed/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand 3070193323Sed/// size, and 3) use of X86-64 extended registers. 3071193323Sedunsigned X86InstrInfo::determineREX(const MachineInstr &MI) { 3072193323Sed unsigned REX = 0; 3073193323Sed const TargetInstrDesc &Desc = MI.getDesc(); 3074193323Sed 3075193323Sed // Pseudo instructions do not need REX prefix byte. 3076193323Sed if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) 3077193323Sed return 0; 3078193323Sed if (Desc.TSFlags & X86II::REX_W) 3079193323Sed REX |= 1 << 3; 3080193323Sed 3081193323Sed unsigned NumOps = Desc.getNumOperands(); 3082193323Sed if (NumOps) { 3083193323Sed bool isTwoAddr = NumOps > 1 && 3084193323Sed Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; 3085193323Sed 3086193323Sed // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. 3087193323Sed unsigned i = isTwoAddr ? 1 : 0; 3088193323Sed for (unsigned e = NumOps; i != e; ++i) { 3089193323Sed const MachineOperand& MO = MI.getOperand(i); 3090193323Sed if (MO.isReg()) { 3091193323Sed unsigned Reg = MO.getReg(); 3092193323Sed if (isX86_64NonExtLowByteReg(Reg)) 3093193323Sed REX |= 0x40; 3094193323Sed } 3095193323Sed } 3096193323Sed 3097193323Sed switch (Desc.TSFlags & X86II::FormMask) { 3098193323Sed case X86II::MRMInitReg: 3099193323Sed if (isX86_64ExtendedReg(MI.getOperand(0))) 3100193323Sed REX |= (1 << 0) | (1 << 2); 3101193323Sed break; 3102193323Sed case X86II::MRMSrcReg: { 3103193323Sed if (isX86_64ExtendedReg(MI.getOperand(0))) 3104193323Sed REX |= 1 << 2; 3105193323Sed i = isTwoAddr ? 2 : 1; 3106193323Sed for (unsigned e = NumOps; i != e; ++i) { 3107193323Sed const MachineOperand& MO = MI.getOperand(i); 3108193323Sed if (isX86_64ExtendedReg(MO)) 3109193323Sed REX |= 1 << 0; 3110193323Sed } 3111193323Sed break; 3112193323Sed } 3113193323Sed case X86II::MRMSrcMem: { 3114193323Sed if (isX86_64ExtendedReg(MI.getOperand(0))) 3115193323Sed REX |= 1 << 2; 3116193323Sed unsigned Bit = 0; 3117193323Sed i = isTwoAddr ? 2 : 1; 3118193323Sed for (; i != NumOps; ++i) { 3119193323Sed const MachineOperand& MO = MI.getOperand(i); 3120193323Sed if (MO.isReg()) { 3121193323Sed if (isX86_64ExtendedReg(MO)) 3122193323Sed REX |= 1 << Bit; 3123193323Sed Bit++; 3124193323Sed } 3125193323Sed } 3126193323Sed break; 3127193323Sed } 3128193323Sed case X86II::MRM0m: case X86II::MRM1m: 3129193323Sed case X86II::MRM2m: case X86II::MRM3m: 3130193323Sed case X86II::MRM4m: case X86II::MRM5m: 3131193323Sed case X86II::MRM6m: case X86II::MRM7m: 3132193323Sed case X86II::MRMDestMem: { 3133193323Sed unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); 3134193323Sed i = isTwoAddr ? 1 : 0; 3135193323Sed if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) 3136193323Sed REX |= 1 << 2; 3137193323Sed unsigned Bit = 0; 3138193323Sed for (; i != e; ++i) { 3139193323Sed const MachineOperand& MO = MI.getOperand(i); 3140193323Sed if (MO.isReg()) { 3141193323Sed if (isX86_64ExtendedReg(MO)) 3142193323Sed REX |= 1 << Bit; 3143193323Sed Bit++; 3144193323Sed } 3145193323Sed } 3146193323Sed break; 3147193323Sed } 3148193323Sed default: { 3149193323Sed if (isX86_64ExtendedReg(MI.getOperand(0))) 3150193323Sed REX |= 1 << 0; 3151193323Sed i = isTwoAddr ? 2 : 1; 3152193323Sed for (unsigned e = NumOps; i != e; ++i) { 3153193323Sed const MachineOperand& MO = MI.getOperand(i); 3154193323Sed if (isX86_64ExtendedReg(MO)) 3155193323Sed REX |= 1 << 2; 3156193323Sed } 3157193323Sed break; 3158193323Sed } 3159193323Sed } 3160193323Sed } 3161193323Sed return REX; 3162193323Sed} 3163193323Sed 3164193323Sed/// sizePCRelativeBlockAddress - This method returns the size of a PC 3165193323Sed/// relative block address instruction 3166193323Sed/// 3167193323Sedstatic unsigned sizePCRelativeBlockAddress() { 3168193323Sed return 4; 3169193323Sed} 3170193323Sed 3171193323Sed/// sizeGlobalAddress - Give the size of the emission of this global address 3172193323Sed/// 3173193323Sedstatic unsigned sizeGlobalAddress(bool dword) { 3174193323Sed return dword ? 8 : 4; 3175193323Sed} 3176193323Sed 3177193323Sed/// sizeConstPoolAddress - Give the size of the emission of this constant 3178193323Sed/// pool address 3179193323Sed/// 3180193323Sedstatic unsigned sizeConstPoolAddress(bool dword) { 3181193323Sed return dword ? 8 : 4; 3182193323Sed} 3183193323Sed 3184193323Sed/// sizeExternalSymbolAddress - Give the size of the emission of this external 3185193323Sed/// symbol 3186193323Sed/// 3187193323Sedstatic unsigned sizeExternalSymbolAddress(bool dword) { 3188193323Sed return dword ? 8 : 4; 3189193323Sed} 3190193323Sed 3191193323Sed/// sizeJumpTableAddress - Give the size of the emission of this jump 3192193323Sed/// table address 3193193323Sed/// 3194193323Sedstatic unsigned sizeJumpTableAddress(bool dword) { 3195193323Sed return dword ? 8 : 4; 3196193323Sed} 3197193323Sed 3198193323Sedstatic unsigned sizeConstant(unsigned Size) { 3199193323Sed return Size; 3200193323Sed} 3201193323Sed 3202193323Sedstatic unsigned sizeRegModRMByte(){ 3203193323Sed return 1; 3204193323Sed} 3205193323Sed 3206193323Sedstatic unsigned sizeSIBByte(){ 3207193323Sed return 1; 3208193323Sed} 3209193323Sed 3210193323Sedstatic unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { 3211193323Sed unsigned FinalSize = 0; 3212193323Sed // If this is a simple integer displacement that doesn't require a relocation. 3213193323Sed if (!RelocOp) { 3214193323Sed FinalSize += sizeConstant(4); 3215193323Sed return FinalSize; 3216193323Sed } 3217193323Sed 3218193323Sed // Otherwise, this is something that requires a relocation. 3219193323Sed if (RelocOp->isGlobal()) { 3220193323Sed FinalSize += sizeGlobalAddress(false); 3221193323Sed } else if (RelocOp->isCPI()) { 3222193323Sed FinalSize += sizeConstPoolAddress(false); 3223193323Sed } else if (RelocOp->isJTI()) { 3224193323Sed FinalSize += sizeJumpTableAddress(false); 3225193323Sed } else { 3226198090Srdivacky llvm_unreachable("Unknown value to relocate!"); 3227193323Sed } 3228193323Sed return FinalSize; 3229193323Sed} 3230193323Sed 3231193323Sedstatic unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op, 3232193323Sed bool IsPIC, bool Is64BitMode) { 3233193323Sed const MachineOperand &Op3 = MI.getOperand(Op+3); 3234193323Sed int DispVal = 0; 3235193323Sed const MachineOperand *DispForReloc = 0; 3236193323Sed unsigned FinalSize = 0; 3237193323Sed 3238193323Sed // Figure out what sort of displacement we have to handle here. 3239193323Sed if (Op3.isGlobal()) { 3240193323Sed DispForReloc = &Op3; 3241193323Sed } else if (Op3.isCPI()) { 3242193323Sed if (Is64BitMode || IsPIC) { 3243193323Sed DispForReloc = &Op3; 3244193323Sed } else { 3245193323Sed DispVal = 1; 3246193323Sed } 3247193323Sed } else if (Op3.isJTI()) { 3248193323Sed if (Is64BitMode || IsPIC) { 3249193323Sed DispForReloc = &Op3; 3250193323Sed } else { 3251193323Sed DispVal = 1; 3252193323Sed } 3253193323Sed } else { 3254193323Sed DispVal = 1; 3255193323Sed } 3256193323Sed 3257193323Sed const MachineOperand &Base = MI.getOperand(Op); 3258193323Sed const MachineOperand &IndexReg = MI.getOperand(Op+2); 3259193323Sed 3260193323Sed unsigned BaseReg = Base.getReg(); 3261193323Sed 3262193323Sed // Is a SIB byte needed? 3263193323Sed if ((!Is64BitMode || DispForReloc || BaseReg != 0) && 3264193323Sed IndexReg.getReg() == 0 && 3265193323Sed (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) { 3266193323Sed if (BaseReg == 0) { // Just a displacement? 3267193323Sed // Emit special case [disp32] encoding 3268193323Sed ++FinalSize; 3269193323Sed FinalSize += getDisplacementFieldSize(DispForReloc); 3270193323Sed } else { 3271193323Sed unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg); 3272193323Sed if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { 3273193323Sed // Emit simple indirect register encoding... [EAX] f.e. 3274193323Sed ++FinalSize; 3275193323Sed // Be pessimistic and assume it's a disp32, not a disp8 3276193323Sed } else { 3277193323Sed // Emit the most general non-SIB encoding: [REG+disp32] 3278193323Sed ++FinalSize; 3279193323Sed FinalSize += getDisplacementFieldSize(DispForReloc); 3280193323Sed } 3281193323Sed } 3282193323Sed 3283193323Sed } else { // We need a SIB byte, so start by outputting the ModR/M byte first 3284193323Sed assert(IndexReg.getReg() != X86::ESP && 3285193323Sed IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); 3286193323Sed 3287193323Sed bool ForceDisp32 = false; 3288193323Sed if (BaseReg == 0 || DispForReloc) { 3289193323Sed // Emit the normal disp32 encoding. 3290193323Sed ++FinalSize; 3291193323Sed ForceDisp32 = true; 3292193323Sed } else { 3293193323Sed ++FinalSize; 3294193323Sed } 3295193323Sed 3296193323Sed FinalSize += sizeSIBByte(); 3297193323Sed 3298193323Sed // Do we need to output a displacement? 3299193323Sed if (DispVal != 0 || ForceDisp32) { 3300193323Sed FinalSize += getDisplacementFieldSize(DispForReloc); 3301193323Sed } 3302193323Sed } 3303193323Sed return FinalSize; 3304193323Sed} 3305193323Sed 3306193323Sed 3307193323Sedstatic unsigned GetInstSizeWithDesc(const MachineInstr &MI, 3308193323Sed const TargetInstrDesc *Desc, 3309193323Sed bool IsPIC, bool Is64BitMode) { 3310193323Sed 3311193323Sed unsigned Opcode = Desc->Opcode; 3312193323Sed unsigned FinalSize = 0; 3313193323Sed 3314193323Sed // Emit the lock opcode prefix as needed. 3315193323Sed if (Desc->TSFlags & X86II::LOCK) ++FinalSize; 3316193323Sed 3317193323Sed // Emit segment override opcode prefix as needed. 3318193323Sed switch (Desc->TSFlags & X86II::SegOvrMask) { 3319193323Sed case X86II::FS: 3320193323Sed case X86II::GS: 3321193323Sed ++FinalSize; 3322193323Sed break; 3323198090Srdivacky default: llvm_unreachable("Invalid segment!"); 3324193323Sed case 0: break; // No segment override! 3325193323Sed } 3326193323Sed 3327193323Sed // Emit the repeat opcode prefix as needed. 3328193323Sed if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize; 3329193323Sed 3330193323Sed // Emit the operand size opcode prefix as needed. 3331193323Sed if (Desc->TSFlags & X86II::OpSize) ++FinalSize; 3332193323Sed 3333193323Sed // Emit the address size opcode prefix as needed. 3334193323Sed if (Desc->TSFlags & X86II::AdSize) ++FinalSize; 3335193323Sed 3336193323Sed bool Need0FPrefix = false; 3337193323Sed switch (Desc->TSFlags & X86II::Op0Mask) { 3338193323Sed case X86II::TB: // Two-byte opcode prefix 3339193323Sed case X86II::T8: // 0F 38 3340193323Sed case X86II::TA: // 0F 3A 3341193323Sed Need0FPrefix = true; 3342193323Sed break; 3343198090Srdivacky case X86II::TF: // F2 0F 38 3344198090Srdivacky ++FinalSize; 3345198090Srdivacky Need0FPrefix = true; 3346198090Srdivacky break; 3347193323Sed case X86II::REP: break; // already handled. 3348193323Sed case X86II::XS: // F3 0F 3349193323Sed ++FinalSize; 3350193323Sed Need0FPrefix = true; 3351193323Sed break; 3352193323Sed case X86II::XD: // F2 0F 3353193323Sed ++FinalSize; 3354193323Sed Need0FPrefix = true; 3355193323Sed break; 3356193323Sed case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: 3357193323Sed case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: 3358193323Sed ++FinalSize; 3359193323Sed break; // Two-byte opcode prefix 3360198090Srdivacky default: llvm_unreachable("Invalid prefix!"); 3361193323Sed case 0: break; // No prefix! 3362193323Sed } 3363193323Sed 3364193323Sed if (Is64BitMode) { 3365193323Sed // REX prefix 3366193323Sed unsigned REX = X86InstrInfo::determineREX(MI); 3367193323Sed if (REX) 3368193323Sed ++FinalSize; 3369193323Sed } 3370193323Sed 3371193323Sed // 0x0F escape code must be emitted just before the opcode. 3372193323Sed if (Need0FPrefix) 3373193323Sed ++FinalSize; 3374193323Sed 3375193323Sed switch (Desc->TSFlags & X86II::Op0Mask) { 3376193323Sed case X86II::T8: // 0F 38 3377193323Sed ++FinalSize; 3378193323Sed break; 3379193323Sed case X86II::TA: // 0F 3A 3380193323Sed ++FinalSize; 3381193323Sed break; 3382198090Srdivacky case X86II::TF: // F2 0F 38 3383198090Srdivacky ++FinalSize; 3384198090Srdivacky break; 3385193323Sed } 3386193323Sed 3387193323Sed // If this is a two-address instruction, skip one of the register operands. 3388193323Sed unsigned NumOps = Desc->getNumOperands(); 3389193323Sed unsigned CurOp = 0; 3390193323Sed if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) 3391193323Sed CurOp++; 3392193323Sed else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) 3393193323Sed // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 3394193323Sed --NumOps; 3395193323Sed 3396193323Sed switch (Desc->TSFlags & X86II::FormMask) { 3397198090Srdivacky default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); 3398193323Sed case X86II::Pseudo: 3399193323Sed // Remember the current PC offset, this is the PIC relocation 3400193323Sed // base address. 3401193323Sed switch (Opcode) { 3402193323Sed default: 3403193323Sed break; 3404203954Srdivacky case TargetOpcode::INLINEASM: { 3405193323Sed const MachineFunction *MF = MI.getParent()->getParent(); 3406198090Srdivacky const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); 3407198090Srdivacky FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), 3408198090Srdivacky *MF->getTarget().getMCAsmInfo()); 3409193323Sed break; 3410193323Sed } 3411203954Srdivacky case TargetOpcode::DBG_LABEL: 3412203954Srdivacky case TargetOpcode::EH_LABEL: 3413193323Sed break; 3414203954Srdivacky case TargetOpcode::IMPLICIT_DEF: 3415203954Srdivacky case TargetOpcode::KILL: 3416193323Sed case X86::FP_REG_KILL: 3417193323Sed break; 3418193323Sed case X86::MOVPC32r: { 3419193323Sed // This emits the "call" portion of this pseudo instruction. 3420193323Sed ++FinalSize; 3421203954Srdivacky FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); 3422193323Sed break; 3423193323Sed } 3424193323Sed } 3425193323Sed CurOp = NumOps; 3426193323Sed break; 3427193323Sed case X86II::RawFrm: 3428193323Sed ++FinalSize; 3429193323Sed 3430193323Sed if (CurOp != NumOps) { 3431193323Sed const MachineOperand &MO = MI.getOperand(CurOp++); 3432193323Sed if (MO.isMBB()) { 3433193323Sed FinalSize += sizePCRelativeBlockAddress(); 3434193323Sed } else if (MO.isGlobal()) { 3435193323Sed FinalSize += sizeGlobalAddress(false); 3436193323Sed } else if (MO.isSymbol()) { 3437193323Sed FinalSize += sizeExternalSymbolAddress(false); 3438193323Sed } else if (MO.isImm()) { 3439203954Srdivacky FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); 3440193323Sed } else { 3441198090Srdivacky llvm_unreachable("Unknown RawFrm operand!"); 3442193323Sed } 3443193323Sed } 3444193323Sed break; 3445193323Sed 3446193323Sed case X86II::AddRegFrm: 3447193323Sed ++FinalSize; 3448193323Sed ++CurOp; 3449193323Sed 3450193323Sed if (CurOp != NumOps) { 3451193323Sed const MachineOperand &MO1 = MI.getOperand(CurOp++); 3452203954Srdivacky unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); 3453193323Sed if (MO1.isImm()) 3454193323Sed FinalSize += sizeConstant(Size); 3455193323Sed else { 3456193323Sed bool dword = false; 3457193323Sed if (Opcode == X86::MOV64ri) 3458193323Sed dword = true; 3459193323Sed if (MO1.isGlobal()) { 3460193323Sed FinalSize += sizeGlobalAddress(dword); 3461193323Sed } else if (MO1.isSymbol()) 3462193323Sed FinalSize += sizeExternalSymbolAddress(dword); 3463193323Sed else if (MO1.isCPI()) 3464193323Sed FinalSize += sizeConstPoolAddress(dword); 3465193323Sed else if (MO1.isJTI()) 3466193323Sed FinalSize += sizeJumpTableAddress(dword); 3467193323Sed } 3468193323Sed } 3469193323Sed break; 3470193323Sed 3471193323Sed case X86II::MRMDestReg: { 3472193323Sed ++FinalSize; 3473193323Sed FinalSize += sizeRegModRMByte(); 3474193323Sed CurOp += 2; 3475193323Sed if (CurOp != NumOps) { 3476193323Sed ++CurOp; 3477203954Srdivacky FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); 3478193323Sed } 3479193323Sed break; 3480193323Sed } 3481193323Sed case X86II::MRMDestMem: { 3482193323Sed ++FinalSize; 3483193323Sed FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); 3484193323Sed CurOp += X86AddrNumOperands + 1; 3485193323Sed if (CurOp != NumOps) { 3486193323Sed ++CurOp; 3487203954Srdivacky FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); 3488193323Sed } 3489193323Sed break; 3490193323Sed } 3491193323Sed 3492193323Sed case X86II::MRMSrcReg: 3493193323Sed ++FinalSize; 3494193323Sed FinalSize += sizeRegModRMByte(); 3495193323Sed CurOp += 2; 3496193323Sed if (CurOp != NumOps) { 3497193323Sed ++CurOp; 3498203954Srdivacky FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); 3499193323Sed } 3500193323Sed break; 3501193323Sed 3502193323Sed case X86II::MRMSrcMem: { 3503193323Sed int AddrOperands; 3504193323Sed if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || 3505193323Sed Opcode == X86::LEA16r || Opcode == X86::LEA32r) 3506193323Sed AddrOperands = X86AddrNumOperands - 1; // No segment register 3507193323Sed else 3508193323Sed AddrOperands = X86AddrNumOperands; 3509193323Sed 3510193323Sed ++FinalSize; 3511193323Sed FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); 3512193323Sed CurOp += AddrOperands + 1; 3513193323Sed if (CurOp != NumOps) { 3514193323Sed ++CurOp; 3515203954Srdivacky FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); 3516193323Sed } 3517193323Sed break; 3518193323Sed } 3519193323Sed 3520193323Sed case X86II::MRM0r: case X86II::MRM1r: 3521193323Sed case X86II::MRM2r: case X86II::MRM3r: 3522193323Sed case X86II::MRM4r: case X86II::MRM5r: 3523193323Sed case X86II::MRM6r: case X86II::MRM7r: 3524193323Sed ++FinalSize; 3525193323Sed if (Desc->getOpcode() == X86::LFENCE || 3526193323Sed Desc->getOpcode() == X86::MFENCE) { 3527193323Sed // Special handling of lfence and mfence; 3528193323Sed FinalSize += sizeRegModRMByte(); 3529193323Sed } else if (Desc->getOpcode() == X86::MONITOR || 3530193323Sed Desc->getOpcode() == X86::MWAIT) { 3531193323Sed // Special handling of monitor and mwait. 3532193323Sed FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode. 3533193323Sed } else { 3534193323Sed ++CurOp; 3535193323Sed FinalSize += sizeRegModRMByte(); 3536193323Sed } 3537193323Sed 3538193323Sed if (CurOp != NumOps) { 3539193323Sed const MachineOperand &MO1 = MI.getOperand(CurOp++); 3540203954Srdivacky unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); 3541193323Sed if (MO1.isImm()) 3542193323Sed FinalSize += sizeConstant(Size); 3543193323Sed else { 3544193323Sed bool dword = false; 3545193323Sed if (Opcode == X86::MOV64ri32) 3546193323Sed dword = true; 3547193323Sed if (MO1.isGlobal()) { 3548193323Sed FinalSize += sizeGlobalAddress(dword); 3549193323Sed } else if (MO1.isSymbol()) 3550193323Sed FinalSize += sizeExternalSymbolAddress(dword); 3551193323Sed else if (MO1.isCPI()) 3552193323Sed FinalSize += sizeConstPoolAddress(dword); 3553193323Sed else if (MO1.isJTI()) 3554193323Sed FinalSize += sizeJumpTableAddress(dword); 3555193323Sed } 3556193323Sed } 3557193323Sed break; 3558193323Sed 3559193323Sed case X86II::MRM0m: case X86II::MRM1m: 3560193323Sed case X86II::MRM2m: case X86II::MRM3m: 3561193323Sed case X86II::MRM4m: case X86II::MRM5m: 3562193323Sed case X86II::MRM6m: case X86II::MRM7m: { 3563193323Sed 3564193323Sed ++FinalSize; 3565193323Sed FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); 3566193323Sed CurOp += X86AddrNumOperands; 3567193323Sed 3568193323Sed if (CurOp != NumOps) { 3569193323Sed const MachineOperand &MO = MI.getOperand(CurOp++); 3570203954Srdivacky unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); 3571193323Sed if (MO.isImm()) 3572193323Sed FinalSize += sizeConstant(Size); 3573193323Sed else { 3574193323Sed bool dword = false; 3575193323Sed if (Opcode == X86::MOV64mi32) 3576193323Sed dword = true; 3577193323Sed if (MO.isGlobal()) { 3578193323Sed FinalSize += sizeGlobalAddress(dword); 3579193323Sed } else if (MO.isSymbol()) 3580193323Sed FinalSize += sizeExternalSymbolAddress(dword); 3581193323Sed else if (MO.isCPI()) 3582193323Sed FinalSize += sizeConstPoolAddress(dword); 3583193323Sed else if (MO.isJTI()) 3584193323Sed FinalSize += sizeJumpTableAddress(dword); 3585193323Sed } 3586193323Sed } 3587193323Sed break; 3588203954Srdivacky 3589203954Srdivacky case X86II::MRM_C1: 3590203954Srdivacky case X86II::MRM_C8: 3591203954Srdivacky case X86II::MRM_C9: 3592203954Srdivacky case X86II::MRM_E8: 3593203954Srdivacky case X86II::MRM_F0: 3594203954Srdivacky FinalSize += 2; 3595203954Srdivacky break; 3596193323Sed } 3597193323Sed 3598193323Sed case X86II::MRMInitReg: 3599193323Sed ++FinalSize; 3600193323Sed // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). 3601193323Sed FinalSize += sizeRegModRMByte(); 3602193323Sed ++CurOp; 3603193323Sed break; 3604193323Sed } 3605193323Sed 3606193323Sed if (!Desc->isVariadic() && CurOp != NumOps) { 3607198090Srdivacky std::string msg; 3608198090Srdivacky raw_string_ostream Msg(msg); 3609198090Srdivacky Msg << "Cannot determine size: " << MI; 3610198090Srdivacky llvm_report_error(Msg.str()); 3611193323Sed } 3612193323Sed 3613193323Sed 3614193323Sed return FinalSize; 3615193323Sed} 3616193323Sed 3617193323Sed 3618193323Sedunsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 3619193323Sed const TargetInstrDesc &Desc = MI->getDesc(); 3620198090Srdivacky bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; 3621193323Sed bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); 3622193323Sed unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); 3623195098Sed if (Desc.getOpcode() == X86::MOVPC32r) 3624193323Sed Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode); 3625193323Sed return Size; 3626193323Sed} 3627193323Sed 3628193323Sed/// getGlobalBaseReg - Return a virtual register initialized with the 3629193323Sed/// the global base register value. Output instructions required to 3630193323Sed/// initialize the register in the function entry block, if necessary. 3631193323Sed/// 3632193323Sedunsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { 3633193323Sed assert(!TM.getSubtarget<X86Subtarget>().is64Bit() && 3634193323Sed "X86-64 PIC uses RIP relative addressing"); 3635193323Sed 3636193323Sed X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); 3637193323Sed unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 3638193323Sed if (GlobalBaseReg != 0) 3639193323Sed return GlobalBaseReg; 3640193323Sed 3641193323Sed // Insert the set of GlobalBaseReg into the first MBB of the function 3642193323Sed MachineBasicBlock &FirstMBB = MF->front(); 3643193323Sed MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 3644203954Srdivacky DebugLoc DL = FirstMBB.findDebugLoc(MBBI); 3645193323Sed MachineRegisterInfo &RegInfo = MF->getRegInfo(); 3646193323Sed unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3647193323Sed 3648193323Sed const TargetInstrInfo *TII = TM.getInstrInfo(); 3649193323Sed // Operand of MovePCtoStack is completely ignored by asm printer. It's 3650193323Sed // only used in JIT code emission as displacement to pc. 3651195098Sed BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); 3652193323Sed 3653193323Sed // If we're using vanilla 'GOT' PIC style, we should use relative addressing 3654195098Sed // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. 3655198090Srdivacky if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) { 3656195098Sed GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3657195098Sed // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register 3658193323Sed BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) 3659198090Srdivacky .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 3660195098Sed X86II::MO_GOT_ABSOLUTE_ADDRESS); 3661193323Sed } else { 3662193323Sed GlobalBaseReg = PC; 3663193323Sed } 3664193323Sed 3665193323Sed X86FI->setGlobalBaseReg(GlobalBaseReg); 3666193323Sed return GlobalBaseReg; 3667193323Sed} 3668206083Srdivacky 3669206083Srdivacky// These are the replaceable SSE instructions. Some of these have Int variants 3670206083Srdivacky// that we don't include here. We don't want to replace instructions selected 3671206083Srdivacky// by intrinsics. 3672206083Srdivackystatic const unsigned ReplaceableInstrs[][3] = { 3673206083Srdivacky //PackedInt PackedSingle PackedDouble 3674206083Srdivacky { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, 3675206083Srdivacky { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, 3676206083Srdivacky { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, 3677206083Srdivacky { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr }, 3678206083Srdivacky { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm }, 3679206083Srdivacky { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr }, 3680206083Srdivacky { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm }, 3681206083Srdivacky { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr }, 3682206083Srdivacky { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm }, 3683206083Srdivacky { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, 3684206083Srdivacky { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, 3685206083Srdivacky { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, 3686206083Srdivacky { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI }, 3687206083Srdivacky { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, 3688206083Srdivacky { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, 3689206083Srdivacky}; 3690206083Srdivacky 3691206083Srdivacky// FIXME: Some shuffle and unpack instructions have equivalents in different 3692206083Srdivacky// domains, but they require a bit more work than just switching opcodes. 3693206083Srdivacky 3694206083Srdivackystatic const unsigned *lookup(unsigned opcode, unsigned domain) { 3695206083Srdivacky for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) 3696206083Srdivacky if (ReplaceableInstrs[i][domain-1] == opcode) 3697206083Srdivacky return ReplaceableInstrs[i]; 3698206083Srdivacky return 0; 3699206083Srdivacky} 3700206083Srdivacky 3701206083Srdivackystd::pair<uint16_t, uint16_t> 3702206083SrdivackyX86InstrInfo::GetSSEDomain(const MachineInstr *MI) const { 3703206083Srdivacky uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 3704206083Srdivacky return std::make_pair(domain, 3705206083Srdivacky domain && lookup(MI->getOpcode(), domain) ? 0xe : 0); 3706206083Srdivacky} 3707206083Srdivacky 3708206083Srdivackyvoid X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const { 3709206083Srdivacky assert(Domain>0 && Domain<4 && "Invalid execution domain"); 3710206083Srdivacky uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 3711206083Srdivacky assert(dom && "Not an SSE instruction"); 3712206083Srdivacky const unsigned *table = lookup(MI->getOpcode(), dom); 3713206083Srdivacky assert(table && "Cannot change domain"); 3714206083Srdivacky MI->setDesc(get(table[Domain-1])); 3715206083Srdivacky} 3716