X86InstrInfo.cpp revision 202878
1193323Sed//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file contains the X86 implementation of the TargetInstrInfo class. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#include "X86InstrInfo.h" 15193323Sed#include "X86.h" 16193323Sed#include "X86GenInstrInfo.inc" 17193323Sed#include "X86InstrBuilder.h" 18193323Sed#include "X86MachineFunctionInfo.h" 19193323Sed#include "X86Subtarget.h" 20193323Sed#include "X86TargetMachine.h" 21193323Sed#include "llvm/DerivedTypes.h" 22198090Srdivacky#include "llvm/LLVMContext.h" 23193323Sed#include "llvm/ADT/STLExtras.h" 24193323Sed#include "llvm/CodeGen/MachineConstantPool.h" 25193323Sed#include "llvm/CodeGen/MachineFrameInfo.h" 26193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 27193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 28193323Sed#include "llvm/CodeGen/LiveVariables.h" 29199481Srdivacky#include "llvm/CodeGen/PseudoSourceValue.h" 30193323Sed#include "llvm/Support/CommandLine.h" 31202375Srdivacky#include "llvm/Support/Debug.h" 32198090Srdivacky#include "llvm/Support/ErrorHandling.h" 33198090Srdivacky#include "llvm/Support/raw_ostream.h" 34193323Sed#include "llvm/Target/TargetOptions.h" 35198090Srdivacky#include "llvm/MC/MCAsmInfo.h" 36199481Srdivacky 37199481Srdivacky#include <limits> 38199481Srdivacky 39193323Sedusing namespace llvm; 40193323Sed 41198090Srdivackystatic cl::opt<bool> 42198090SrdivackyNoFusing("disable-spill-fusing", 43198090Srdivacky cl::desc("Disable fusing of spill code into instructions")); 44198090Srdivackystatic cl::opt<bool> 45198090SrdivackyPrintFailedFusing("print-failed-fuse-candidates", 46198090Srdivacky cl::desc("Print instructions that the allocator wants to" 47198090Srdivacky " fuse, but the X86 backend currently can't"), 48198090Srdivacky cl::Hidden); 49198090Srdivackystatic cl::opt<bool> 50198090SrdivackyReMatPICStubLoad("remat-pic-stub-load", 51198090Srdivacky cl::desc("Re-materialize load from stub in PIC mode"), 52198090Srdivacky cl::init(false), cl::Hidden); 53193323Sed 54193323SedX86InstrInfo::X86InstrInfo(X86TargetMachine &tm) 55193323Sed : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), 56193323Sed TM(tm), RI(tm, *this) { 57193323Sed SmallVector<unsigned,16> AmbEntries; 58193323Sed static const unsigned OpTbl2Addr[][2] = { 59193323Sed { X86::ADC32ri, X86::ADC32mi }, 60193323Sed { X86::ADC32ri8, X86::ADC32mi8 }, 61193323Sed { X86::ADC32rr, X86::ADC32mr }, 62193323Sed { X86::ADC64ri32, X86::ADC64mi32 }, 63193323Sed { X86::ADC64ri8, X86::ADC64mi8 }, 64193323Sed { X86::ADC64rr, X86::ADC64mr }, 65193323Sed { X86::ADD16ri, X86::ADD16mi }, 66193323Sed { X86::ADD16ri8, X86::ADD16mi8 }, 67193323Sed { X86::ADD16rr, X86::ADD16mr }, 68193323Sed { X86::ADD32ri, X86::ADD32mi }, 69193323Sed { X86::ADD32ri8, X86::ADD32mi8 }, 70193323Sed { X86::ADD32rr, X86::ADD32mr }, 71193323Sed { X86::ADD64ri32, X86::ADD64mi32 }, 72193323Sed { X86::ADD64ri8, X86::ADD64mi8 }, 73193323Sed { X86::ADD64rr, X86::ADD64mr }, 74193323Sed { X86::ADD8ri, X86::ADD8mi }, 75193323Sed { X86::ADD8rr, X86::ADD8mr }, 76193323Sed { X86::AND16ri, X86::AND16mi }, 77193323Sed { X86::AND16ri8, X86::AND16mi8 }, 78193323Sed { X86::AND16rr, X86::AND16mr }, 79193323Sed { X86::AND32ri, X86::AND32mi }, 80193323Sed { X86::AND32ri8, X86::AND32mi8 }, 81193323Sed { X86::AND32rr, X86::AND32mr }, 82193323Sed { X86::AND64ri32, X86::AND64mi32 }, 83193323Sed { X86::AND64ri8, X86::AND64mi8 }, 84193323Sed { X86::AND64rr, X86::AND64mr }, 85193323Sed { X86::AND8ri, X86::AND8mi }, 86193323Sed { X86::AND8rr, X86::AND8mr }, 87193323Sed { X86::DEC16r, X86::DEC16m }, 88193323Sed { X86::DEC32r, X86::DEC32m }, 89193323Sed { X86::DEC64_16r, X86::DEC64_16m }, 90193323Sed { X86::DEC64_32r, X86::DEC64_32m }, 91193323Sed { X86::DEC64r, X86::DEC64m }, 92193323Sed { X86::DEC8r, X86::DEC8m }, 93193323Sed { X86::INC16r, X86::INC16m }, 94193323Sed { X86::INC32r, X86::INC32m }, 95193323Sed { X86::INC64_16r, X86::INC64_16m }, 96193323Sed { X86::INC64_32r, X86::INC64_32m }, 97193323Sed { X86::INC64r, X86::INC64m }, 98193323Sed { X86::INC8r, X86::INC8m }, 99193323Sed { X86::NEG16r, X86::NEG16m }, 100193323Sed { X86::NEG32r, X86::NEG32m }, 101193323Sed { X86::NEG64r, X86::NEG64m }, 102193323Sed { X86::NEG8r, X86::NEG8m }, 103193323Sed { X86::NOT16r, X86::NOT16m }, 104193323Sed { X86::NOT32r, X86::NOT32m }, 105193323Sed { X86::NOT64r, X86::NOT64m }, 106193323Sed { X86::NOT8r, X86::NOT8m }, 107193323Sed { X86::OR16ri, X86::OR16mi }, 108193323Sed { X86::OR16ri8, X86::OR16mi8 }, 109193323Sed { X86::OR16rr, X86::OR16mr }, 110193323Sed { X86::OR32ri, X86::OR32mi }, 111193323Sed { X86::OR32ri8, X86::OR32mi8 }, 112193323Sed { X86::OR32rr, X86::OR32mr }, 113193323Sed { X86::OR64ri32, X86::OR64mi32 }, 114193323Sed { X86::OR64ri8, X86::OR64mi8 }, 115193323Sed { X86::OR64rr, X86::OR64mr }, 116193323Sed { X86::OR8ri, X86::OR8mi }, 117193323Sed { X86::OR8rr, X86::OR8mr }, 118193323Sed { X86::ROL16r1, X86::ROL16m1 }, 119193323Sed { X86::ROL16rCL, X86::ROL16mCL }, 120193323Sed { X86::ROL16ri, X86::ROL16mi }, 121193323Sed { X86::ROL32r1, X86::ROL32m1 }, 122193323Sed { X86::ROL32rCL, X86::ROL32mCL }, 123193323Sed { X86::ROL32ri, X86::ROL32mi }, 124193323Sed { X86::ROL64r1, X86::ROL64m1 }, 125193323Sed { X86::ROL64rCL, X86::ROL64mCL }, 126193323Sed { X86::ROL64ri, X86::ROL64mi }, 127193323Sed { X86::ROL8r1, X86::ROL8m1 }, 128193323Sed { X86::ROL8rCL, X86::ROL8mCL }, 129193323Sed { X86::ROL8ri, X86::ROL8mi }, 130193323Sed { X86::ROR16r1, X86::ROR16m1 }, 131193323Sed { X86::ROR16rCL, X86::ROR16mCL }, 132193323Sed { X86::ROR16ri, X86::ROR16mi }, 133193323Sed { X86::ROR32r1, X86::ROR32m1 }, 134193323Sed { X86::ROR32rCL, X86::ROR32mCL }, 135193323Sed { X86::ROR32ri, X86::ROR32mi }, 136193323Sed { X86::ROR64r1, X86::ROR64m1 }, 137193323Sed { X86::ROR64rCL, X86::ROR64mCL }, 138193323Sed { X86::ROR64ri, X86::ROR64mi }, 139193323Sed { X86::ROR8r1, X86::ROR8m1 }, 140193323Sed { X86::ROR8rCL, X86::ROR8mCL }, 141193323Sed { X86::ROR8ri, X86::ROR8mi }, 142193323Sed { X86::SAR16r1, X86::SAR16m1 }, 143193323Sed { X86::SAR16rCL, X86::SAR16mCL }, 144193323Sed { X86::SAR16ri, X86::SAR16mi }, 145193323Sed { X86::SAR32r1, X86::SAR32m1 }, 146193323Sed { X86::SAR32rCL, X86::SAR32mCL }, 147193323Sed { X86::SAR32ri, X86::SAR32mi }, 148193323Sed { X86::SAR64r1, X86::SAR64m1 }, 149193323Sed { X86::SAR64rCL, X86::SAR64mCL }, 150193323Sed { X86::SAR64ri, X86::SAR64mi }, 151193323Sed { X86::SAR8r1, X86::SAR8m1 }, 152193323Sed { X86::SAR8rCL, X86::SAR8mCL }, 153193323Sed { X86::SAR8ri, X86::SAR8mi }, 154193323Sed { X86::SBB32ri, X86::SBB32mi }, 155193323Sed { X86::SBB32ri8, X86::SBB32mi8 }, 156193323Sed { X86::SBB32rr, X86::SBB32mr }, 157193323Sed { X86::SBB64ri32, X86::SBB64mi32 }, 158193323Sed { X86::SBB64ri8, X86::SBB64mi8 }, 159193323Sed { X86::SBB64rr, X86::SBB64mr }, 160193323Sed { X86::SHL16rCL, X86::SHL16mCL }, 161193323Sed { X86::SHL16ri, X86::SHL16mi }, 162193323Sed { X86::SHL32rCL, X86::SHL32mCL }, 163193323Sed { X86::SHL32ri, X86::SHL32mi }, 164193323Sed { X86::SHL64rCL, X86::SHL64mCL }, 165193323Sed { X86::SHL64ri, X86::SHL64mi }, 166193323Sed { X86::SHL8rCL, X86::SHL8mCL }, 167193323Sed { X86::SHL8ri, X86::SHL8mi }, 168193323Sed { X86::SHLD16rrCL, X86::SHLD16mrCL }, 169193323Sed { X86::SHLD16rri8, X86::SHLD16mri8 }, 170193323Sed { X86::SHLD32rrCL, X86::SHLD32mrCL }, 171193323Sed { X86::SHLD32rri8, X86::SHLD32mri8 }, 172193323Sed { X86::SHLD64rrCL, X86::SHLD64mrCL }, 173193323Sed { X86::SHLD64rri8, X86::SHLD64mri8 }, 174193323Sed { X86::SHR16r1, X86::SHR16m1 }, 175193323Sed { X86::SHR16rCL, X86::SHR16mCL }, 176193323Sed { X86::SHR16ri, X86::SHR16mi }, 177193323Sed { X86::SHR32r1, X86::SHR32m1 }, 178193323Sed { X86::SHR32rCL, X86::SHR32mCL }, 179193323Sed { X86::SHR32ri, X86::SHR32mi }, 180193323Sed { X86::SHR64r1, X86::SHR64m1 }, 181193323Sed { X86::SHR64rCL, X86::SHR64mCL }, 182193323Sed { X86::SHR64ri, X86::SHR64mi }, 183193323Sed { X86::SHR8r1, X86::SHR8m1 }, 184193323Sed { X86::SHR8rCL, X86::SHR8mCL }, 185193323Sed { X86::SHR8ri, X86::SHR8mi }, 186193323Sed { X86::SHRD16rrCL, X86::SHRD16mrCL }, 187193323Sed { X86::SHRD16rri8, X86::SHRD16mri8 }, 188193323Sed { X86::SHRD32rrCL, X86::SHRD32mrCL }, 189193323Sed { X86::SHRD32rri8, X86::SHRD32mri8 }, 190193323Sed { X86::SHRD64rrCL, X86::SHRD64mrCL }, 191193323Sed { X86::SHRD64rri8, X86::SHRD64mri8 }, 192193323Sed { X86::SUB16ri, X86::SUB16mi }, 193193323Sed { X86::SUB16ri8, X86::SUB16mi8 }, 194193323Sed { X86::SUB16rr, X86::SUB16mr }, 195193323Sed { X86::SUB32ri, X86::SUB32mi }, 196193323Sed { X86::SUB32ri8, X86::SUB32mi8 }, 197193323Sed { X86::SUB32rr, X86::SUB32mr }, 198193323Sed { X86::SUB64ri32, X86::SUB64mi32 }, 199193323Sed { X86::SUB64ri8, X86::SUB64mi8 }, 200193323Sed { X86::SUB64rr, X86::SUB64mr }, 201193323Sed { X86::SUB8ri, X86::SUB8mi }, 202193323Sed { X86::SUB8rr, X86::SUB8mr }, 203193323Sed { X86::XOR16ri, X86::XOR16mi }, 204193323Sed { X86::XOR16ri8, X86::XOR16mi8 }, 205193323Sed { X86::XOR16rr, X86::XOR16mr }, 206193323Sed { X86::XOR32ri, X86::XOR32mi }, 207193323Sed { X86::XOR32ri8, X86::XOR32mi8 }, 208193323Sed { X86::XOR32rr, X86::XOR32mr }, 209193323Sed { X86::XOR64ri32, X86::XOR64mi32 }, 210193323Sed { X86::XOR64ri8, X86::XOR64mi8 }, 211193323Sed { X86::XOR64rr, X86::XOR64mr }, 212193323Sed { X86::XOR8ri, X86::XOR8mi }, 213193323Sed { X86::XOR8rr, X86::XOR8mr } 214193323Sed }; 215193323Sed 216193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { 217193323Sed unsigned RegOp = OpTbl2Addr[i][0]; 218193323Sed unsigned MemOp = OpTbl2Addr[i][1]; 219193323Sed if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, 220198090Srdivacky std::make_pair(MemOp,0))).second) 221193323Sed assert(false && "Duplicated entries?"); 222198090Srdivacky // Index 0, folded load and store, no alignment requirement. 223198090Srdivacky unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); 224193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 225193323Sed std::make_pair(RegOp, 226193323Sed AuxInfo))).second) 227193323Sed AmbEntries.push_back(MemOp); 228193323Sed } 229193323Sed 230193323Sed // If the third value is 1, then it's folding either a load or a store. 231198090Srdivacky static const unsigned OpTbl0[][4] = { 232198090Srdivacky { X86::BT16ri8, X86::BT16mi8, 1, 0 }, 233198090Srdivacky { X86::BT32ri8, X86::BT32mi8, 1, 0 }, 234198090Srdivacky { X86::BT64ri8, X86::BT64mi8, 1, 0 }, 235198090Srdivacky { X86::CALL32r, X86::CALL32m, 1, 0 }, 236198090Srdivacky { X86::CALL64r, X86::CALL64m, 1, 0 }, 237198090Srdivacky { X86::CMP16ri, X86::CMP16mi, 1, 0 }, 238198090Srdivacky { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, 239198090Srdivacky { X86::CMP16rr, X86::CMP16mr, 1, 0 }, 240198090Srdivacky { X86::CMP32ri, X86::CMP32mi, 1, 0 }, 241198090Srdivacky { X86::CMP32ri8, X86::CMP32mi8, 1, 0 }, 242198090Srdivacky { X86::CMP32rr, X86::CMP32mr, 1, 0 }, 243198090Srdivacky { X86::CMP64ri32, X86::CMP64mi32, 1, 0 }, 244198090Srdivacky { X86::CMP64ri8, X86::CMP64mi8, 1, 0 }, 245198090Srdivacky { X86::CMP64rr, X86::CMP64mr, 1, 0 }, 246198090Srdivacky { X86::CMP8ri, X86::CMP8mi, 1, 0 }, 247198090Srdivacky { X86::CMP8rr, X86::CMP8mr, 1, 0 }, 248198090Srdivacky { X86::DIV16r, X86::DIV16m, 1, 0 }, 249198090Srdivacky { X86::DIV32r, X86::DIV32m, 1, 0 }, 250198090Srdivacky { X86::DIV64r, X86::DIV64m, 1, 0 }, 251198090Srdivacky { X86::DIV8r, X86::DIV8m, 1, 0 }, 252198090Srdivacky { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, 253198090Srdivacky { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 }, 254198090Srdivacky { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 }, 255198090Srdivacky { X86::IDIV16r, X86::IDIV16m, 1, 0 }, 256198090Srdivacky { X86::IDIV32r, X86::IDIV32m, 1, 0 }, 257198090Srdivacky { X86::IDIV64r, X86::IDIV64m, 1, 0 }, 258198090Srdivacky { X86::IDIV8r, X86::IDIV8m, 1, 0 }, 259198090Srdivacky { X86::IMUL16r, X86::IMUL16m, 1, 0 }, 260198090Srdivacky { X86::IMUL32r, X86::IMUL32m, 1, 0 }, 261198090Srdivacky { X86::IMUL64r, X86::IMUL64m, 1, 0 }, 262198090Srdivacky { X86::IMUL8r, X86::IMUL8m, 1, 0 }, 263198090Srdivacky { X86::JMP32r, X86::JMP32m, 1, 0 }, 264198090Srdivacky { X86::JMP64r, X86::JMP64m, 1, 0 }, 265198090Srdivacky { X86::MOV16ri, X86::MOV16mi, 0, 0 }, 266198090Srdivacky { X86::MOV16rr, X86::MOV16mr, 0, 0 }, 267198090Srdivacky { X86::MOV32ri, X86::MOV32mi, 0, 0 }, 268198090Srdivacky { X86::MOV32rr, X86::MOV32mr, 0, 0 }, 269198090Srdivacky { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, 270198090Srdivacky { X86::MOV64rr, X86::MOV64mr, 0, 0 }, 271198090Srdivacky { X86::MOV8ri, X86::MOV8mi, 0, 0 }, 272198090Srdivacky { X86::MOV8rr, X86::MOV8mr, 0, 0 }, 273198090Srdivacky { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 }, 274198090Srdivacky { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, 275198090Srdivacky { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, 276198090Srdivacky { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, 277198090Srdivacky { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, 278198090Srdivacky { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, 279198090Srdivacky { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 }, 280198090Srdivacky { X86::MOVSDrr, X86::MOVSDmr, 0, 0 }, 281198090Srdivacky { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, 282198090Srdivacky { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, 283198090Srdivacky { X86::MOVSSrr, X86::MOVSSmr, 0, 0 }, 284198090Srdivacky { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, 285198090Srdivacky { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, 286198090Srdivacky { X86::MUL16r, X86::MUL16m, 1, 0 }, 287198090Srdivacky { X86::MUL32r, X86::MUL32m, 1, 0 }, 288198090Srdivacky { X86::MUL64r, X86::MUL64m, 1, 0 }, 289198090Srdivacky { X86::MUL8r, X86::MUL8m, 1, 0 }, 290198090Srdivacky { X86::SETAEr, X86::SETAEm, 0, 0 }, 291198090Srdivacky { X86::SETAr, X86::SETAm, 0, 0 }, 292198090Srdivacky { X86::SETBEr, X86::SETBEm, 0, 0 }, 293198090Srdivacky { X86::SETBr, X86::SETBm, 0, 0 }, 294198090Srdivacky { X86::SETEr, X86::SETEm, 0, 0 }, 295198090Srdivacky { X86::SETGEr, X86::SETGEm, 0, 0 }, 296198090Srdivacky { X86::SETGr, X86::SETGm, 0, 0 }, 297198090Srdivacky { X86::SETLEr, X86::SETLEm, 0, 0 }, 298198090Srdivacky { X86::SETLr, X86::SETLm, 0, 0 }, 299198090Srdivacky { X86::SETNEr, X86::SETNEm, 0, 0 }, 300198090Srdivacky { X86::SETNOr, X86::SETNOm, 0, 0 }, 301198090Srdivacky { X86::SETNPr, X86::SETNPm, 0, 0 }, 302198090Srdivacky { X86::SETNSr, X86::SETNSm, 0, 0 }, 303198090Srdivacky { X86::SETOr, X86::SETOm, 0, 0 }, 304198090Srdivacky { X86::SETPr, X86::SETPm, 0, 0 }, 305198090Srdivacky { X86::SETSr, X86::SETSm, 0, 0 }, 306198090Srdivacky { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, 307198090Srdivacky { X86::TEST16ri, X86::TEST16mi, 1, 0 }, 308198090Srdivacky { X86::TEST32ri, X86::TEST32mi, 1, 0 }, 309198090Srdivacky { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, 310198090Srdivacky { X86::TEST8ri, X86::TEST8mi, 1, 0 } 311193323Sed }; 312193323Sed 313193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { 314193323Sed unsigned RegOp = OpTbl0[i][0]; 315193323Sed unsigned MemOp = OpTbl0[i][1]; 316198090Srdivacky unsigned Align = OpTbl0[i][3]; 317193323Sed if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, 318198090Srdivacky std::make_pair(MemOp,Align))).second) 319193323Sed assert(false && "Duplicated entries?"); 320193323Sed unsigned FoldedLoad = OpTbl0[i][2]; 321193323Sed // Index 0, folded load or store. 322193323Sed unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); 323193323Sed if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 324193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 325193323Sed std::make_pair(RegOp, AuxInfo))).second) 326193323Sed AmbEntries.push_back(MemOp); 327193323Sed } 328193323Sed 329198090Srdivacky static const unsigned OpTbl1[][3] = { 330198090Srdivacky { X86::CMP16rr, X86::CMP16rm, 0 }, 331198090Srdivacky { X86::CMP32rr, X86::CMP32rm, 0 }, 332198090Srdivacky { X86::CMP64rr, X86::CMP64rm, 0 }, 333198090Srdivacky { X86::CMP8rr, X86::CMP8rm, 0 }, 334198090Srdivacky { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, 335198090Srdivacky { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, 336198090Srdivacky { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, 337198090Srdivacky { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, 338198090Srdivacky { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, 339198090Srdivacky { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, 340198090Srdivacky { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, 341198090Srdivacky { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, 342198090Srdivacky { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, 343198090Srdivacky { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, 344198090Srdivacky { X86::FsMOVAPDrr, X86::MOVSDrm, 0 }, 345198090Srdivacky { X86::FsMOVAPSrr, X86::MOVSSrm, 0 }, 346198090Srdivacky { X86::IMUL16rri, X86::IMUL16rmi, 0 }, 347198090Srdivacky { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, 348198090Srdivacky { X86::IMUL32rri, X86::IMUL32rmi, 0 }, 349198090Srdivacky { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, 350198090Srdivacky { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, 351198090Srdivacky { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, 352198090Srdivacky { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, 353198090Srdivacky { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, 354198090Srdivacky { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, 355198090Srdivacky { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, 356198090Srdivacky { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, 357198090Srdivacky { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 }, 358198090Srdivacky { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 }, 359198090Srdivacky { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, 360198090Srdivacky { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, 361198090Srdivacky { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, 362198090Srdivacky { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 }, 363198090Srdivacky { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 }, 364198090Srdivacky { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, 365198090Srdivacky { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, 366198090Srdivacky { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, 367198090Srdivacky { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, 368198090Srdivacky { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, 369198090Srdivacky { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, 370198090Srdivacky { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, 371198090Srdivacky { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, 372198090Srdivacky { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 }, 373198090Srdivacky { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 }, 374198090Srdivacky { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, 375198090Srdivacky { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, 376198090Srdivacky { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, 377198090Srdivacky { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, 378198090Srdivacky { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, 379198090Srdivacky { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, 380198090Srdivacky { X86::MOV16rr, X86::MOV16rm, 0 }, 381198090Srdivacky { X86::MOV32rr, X86::MOV32rm, 0 }, 382198090Srdivacky { X86::MOV64rr, X86::MOV64rm, 0 }, 383198090Srdivacky { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, 384198090Srdivacky { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, 385198090Srdivacky { X86::MOV8rr, X86::MOV8rm, 0 }, 386198090Srdivacky { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, 387198090Srdivacky { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, 388198090Srdivacky { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, 389198090Srdivacky { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, 390198090Srdivacky { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, 391198090Srdivacky { X86::MOVDQArr, X86::MOVDQArm, 16 }, 392198090Srdivacky { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 }, 393198090Srdivacky { X86::MOVSDrr, X86::MOVSDrm, 0 }, 394198090Srdivacky { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, 395198090Srdivacky { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, 396198090Srdivacky { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 }, 397198090Srdivacky { X86::MOVSSrr, X86::MOVSSrm, 0 }, 398198090Srdivacky { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, 399198090Srdivacky { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, 400198090Srdivacky { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, 401198090Srdivacky { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, 402198090Srdivacky { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, 403198090Srdivacky { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, 404198090Srdivacky { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, 405202878Srdivacky { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, 406198090Srdivacky { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, 407198090Srdivacky { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, 408198090Srdivacky { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, 409198090Srdivacky { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, 410198090Srdivacky { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, 411198090Srdivacky { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, 412198090Srdivacky { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, 413198090Srdivacky { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, 414198090Srdivacky { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, 415198090Srdivacky { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, 416198090Srdivacky { X86::PSHUFDri, X86::PSHUFDmi, 16 }, 417198090Srdivacky { X86::PSHUFHWri, X86::PSHUFHWmi, 16 }, 418198090Srdivacky { X86::PSHUFLWri, X86::PSHUFLWmi, 16 }, 419198090Srdivacky { X86::RCPPSr, X86::RCPPSm, 16 }, 420198090Srdivacky { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 }, 421198090Srdivacky { X86::RSQRTPSr, X86::RSQRTPSm, 16 }, 422198090Srdivacky { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 }, 423198090Srdivacky { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, 424198090Srdivacky { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, 425198090Srdivacky { X86::SQRTPDr, X86::SQRTPDm, 16 }, 426198090Srdivacky { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 }, 427198090Srdivacky { X86::SQRTPSr, X86::SQRTPSm, 16 }, 428198090Srdivacky { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 }, 429198090Srdivacky { X86::SQRTSDr, X86::SQRTSDm, 0 }, 430198090Srdivacky { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, 431198090Srdivacky { X86::SQRTSSr, X86::SQRTSSm, 0 }, 432198090Srdivacky { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, 433198090Srdivacky { X86::TEST16rr, X86::TEST16rm, 0 }, 434198090Srdivacky { X86::TEST32rr, X86::TEST32rm, 0 }, 435198090Srdivacky { X86::TEST64rr, X86::TEST64rm, 0 }, 436198090Srdivacky { X86::TEST8rr, X86::TEST8rm, 0 }, 437193323Sed // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 438198090Srdivacky { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, 439198090Srdivacky { X86::UCOMISSrr, X86::UCOMISSrm, 0 } 440193323Sed }; 441193323Sed 442193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { 443193323Sed unsigned RegOp = OpTbl1[i][0]; 444193323Sed unsigned MemOp = OpTbl1[i][1]; 445198090Srdivacky unsigned Align = OpTbl1[i][2]; 446193323Sed if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, 447198090Srdivacky std::make_pair(MemOp,Align))).second) 448193323Sed assert(false && "Duplicated entries?"); 449198090Srdivacky // Index 1, folded load 450198090Srdivacky unsigned AuxInfo = 1 | (1 << 4); 451193323Sed if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 452193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 453193323Sed std::make_pair(RegOp, AuxInfo))).second) 454193323Sed AmbEntries.push_back(MemOp); 455193323Sed } 456193323Sed 457198090Srdivacky static const unsigned OpTbl2[][3] = { 458198090Srdivacky { X86::ADC32rr, X86::ADC32rm, 0 }, 459198090Srdivacky { X86::ADC64rr, X86::ADC64rm, 0 }, 460198090Srdivacky { X86::ADD16rr, X86::ADD16rm, 0 }, 461198090Srdivacky { X86::ADD32rr, X86::ADD32rm, 0 }, 462198090Srdivacky { X86::ADD64rr, X86::ADD64rm, 0 }, 463198090Srdivacky { X86::ADD8rr, X86::ADD8rm, 0 }, 464198090Srdivacky { X86::ADDPDrr, X86::ADDPDrm, 16 }, 465198090Srdivacky { X86::ADDPSrr, X86::ADDPSrm, 16 }, 466198090Srdivacky { X86::ADDSDrr, X86::ADDSDrm, 0 }, 467198090Srdivacky { X86::ADDSSrr, X86::ADDSSrm, 0 }, 468198090Srdivacky { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 }, 469198090Srdivacky { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 }, 470198090Srdivacky { X86::AND16rr, X86::AND16rm, 0 }, 471198090Srdivacky { X86::AND32rr, X86::AND32rm, 0 }, 472198090Srdivacky { X86::AND64rr, X86::AND64rm, 0 }, 473198090Srdivacky { X86::AND8rr, X86::AND8rm, 0 }, 474198090Srdivacky { X86::ANDNPDrr, X86::ANDNPDrm, 16 }, 475198090Srdivacky { X86::ANDNPSrr, X86::ANDNPSrm, 16 }, 476198090Srdivacky { X86::ANDPDrr, X86::ANDPDrm, 16 }, 477198090Srdivacky { X86::ANDPSrr, X86::ANDPSrm, 16 }, 478198090Srdivacky { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, 479198090Srdivacky { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, 480198090Srdivacky { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, 481198090Srdivacky { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, 482198090Srdivacky { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, 483198090Srdivacky { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, 484198090Srdivacky { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, 485198090Srdivacky { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, 486198090Srdivacky { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, 487198090Srdivacky { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, 488198090Srdivacky { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, 489198090Srdivacky { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, 490198090Srdivacky { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, 491198090Srdivacky { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, 492198090Srdivacky { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, 493198090Srdivacky { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, 494198090Srdivacky { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, 495198090Srdivacky { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, 496198090Srdivacky { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, 497198090Srdivacky { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, 498198090Srdivacky { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, 499198090Srdivacky { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, 500198090Srdivacky { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, 501198090Srdivacky { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, 502198090Srdivacky { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, 503198090Srdivacky { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, 504198090Srdivacky { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, 505198090Srdivacky { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, 506198090Srdivacky { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, 507198090Srdivacky { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, 508198090Srdivacky { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, 509198090Srdivacky { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, 510198090Srdivacky { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, 511198090Srdivacky { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, 512198090Srdivacky { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, 513198090Srdivacky { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, 514198090Srdivacky { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, 515198090Srdivacky { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, 516198090Srdivacky { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, 517198090Srdivacky { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, 518198090Srdivacky { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, 519198090Srdivacky { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, 520198090Srdivacky { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, 521198090Srdivacky { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, 522198090Srdivacky { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, 523198090Srdivacky { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, 524198090Srdivacky { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, 525198090Srdivacky { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, 526198090Srdivacky { X86::CMPPDrri, X86::CMPPDrmi, 16 }, 527198090Srdivacky { X86::CMPPSrri, X86::CMPPSrmi, 16 }, 528198090Srdivacky { X86::CMPSDrr, X86::CMPSDrm, 0 }, 529198090Srdivacky { X86::CMPSSrr, X86::CMPSSrm, 0 }, 530198090Srdivacky { X86::DIVPDrr, X86::DIVPDrm, 16 }, 531198090Srdivacky { X86::DIVPSrr, X86::DIVPSrm, 16 }, 532198090Srdivacky { X86::DIVSDrr, X86::DIVSDrm, 0 }, 533198090Srdivacky { X86::DIVSSrr, X86::DIVSSrm, 0 }, 534198090Srdivacky { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 }, 535198090Srdivacky { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 }, 536198090Srdivacky { X86::FsANDPDrr, X86::FsANDPDrm, 16 }, 537198090Srdivacky { X86::FsANDPSrr, X86::FsANDPSrm, 16 }, 538198090Srdivacky { X86::FsORPDrr, X86::FsORPDrm, 16 }, 539198090Srdivacky { X86::FsORPSrr, X86::FsORPSrm, 16 }, 540198090Srdivacky { X86::FsXORPDrr, X86::FsXORPDrm, 16 }, 541198090Srdivacky { X86::FsXORPSrr, X86::FsXORPSrm, 16 }, 542198090Srdivacky { X86::HADDPDrr, X86::HADDPDrm, 16 }, 543198090Srdivacky { X86::HADDPSrr, X86::HADDPSrm, 16 }, 544198090Srdivacky { X86::HSUBPDrr, X86::HSUBPDrm, 16 }, 545198090Srdivacky { X86::HSUBPSrr, X86::HSUBPSrm, 16 }, 546198090Srdivacky { X86::IMUL16rr, X86::IMUL16rm, 0 }, 547198090Srdivacky { X86::IMUL32rr, X86::IMUL32rm, 0 }, 548198090Srdivacky { X86::IMUL64rr, X86::IMUL64rm, 0 }, 549198090Srdivacky { X86::MAXPDrr, X86::MAXPDrm, 16 }, 550198090Srdivacky { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, 551198090Srdivacky { X86::MAXPSrr, X86::MAXPSrm, 16 }, 552198090Srdivacky { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 }, 553198090Srdivacky { X86::MAXSDrr, X86::MAXSDrm, 0 }, 554198090Srdivacky { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, 555198090Srdivacky { X86::MAXSSrr, X86::MAXSSrm, 0 }, 556198090Srdivacky { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, 557198090Srdivacky { X86::MINPDrr, X86::MINPDrm, 16 }, 558198090Srdivacky { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 }, 559198090Srdivacky { X86::MINPSrr, X86::MINPSrm, 16 }, 560198090Srdivacky { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 }, 561198090Srdivacky { X86::MINSDrr, X86::MINSDrm, 0 }, 562198090Srdivacky { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, 563198090Srdivacky { X86::MINSSrr, X86::MINSSrm, 0 }, 564198090Srdivacky { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, 565198090Srdivacky { X86::MULPDrr, X86::MULPDrm, 16 }, 566198090Srdivacky { X86::MULPSrr, X86::MULPSrm, 16 }, 567198090Srdivacky { X86::MULSDrr, X86::MULSDrm, 0 }, 568198090Srdivacky { X86::MULSSrr, X86::MULSSrm, 0 }, 569198090Srdivacky { X86::OR16rr, X86::OR16rm, 0 }, 570198090Srdivacky { X86::OR32rr, X86::OR32rm, 0 }, 571198090Srdivacky { X86::OR64rr, X86::OR64rm, 0 }, 572198090Srdivacky { X86::OR8rr, X86::OR8rm, 0 }, 573198090Srdivacky { X86::ORPDrr, X86::ORPDrm, 16 }, 574198090Srdivacky { X86::ORPSrr, X86::ORPSrm, 16 }, 575198090Srdivacky { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 }, 576198090Srdivacky { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 }, 577198090Srdivacky { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 }, 578198090Srdivacky { X86::PADDBrr, X86::PADDBrm, 16 }, 579198090Srdivacky { X86::PADDDrr, X86::PADDDrm, 16 }, 580198090Srdivacky { X86::PADDQrr, X86::PADDQrm, 16 }, 581198090Srdivacky { X86::PADDSBrr, X86::PADDSBrm, 16 }, 582198090Srdivacky { X86::PADDSWrr, X86::PADDSWrm, 16 }, 583198090Srdivacky { X86::PADDWrr, X86::PADDWrm, 16 }, 584198090Srdivacky { X86::PANDNrr, X86::PANDNrm, 16 }, 585198090Srdivacky { X86::PANDrr, X86::PANDrm, 16 }, 586198090Srdivacky { X86::PAVGBrr, X86::PAVGBrm, 16 }, 587198090Srdivacky { X86::PAVGWrr, X86::PAVGWrm, 16 }, 588198090Srdivacky { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 }, 589198090Srdivacky { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 }, 590198090Srdivacky { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 }, 591198090Srdivacky { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 }, 592198090Srdivacky { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 }, 593198090Srdivacky { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 }, 594198090Srdivacky { X86::PINSRWrri, X86::PINSRWrmi, 16 }, 595198090Srdivacky { X86::PMADDWDrr, X86::PMADDWDrm, 16 }, 596198090Srdivacky { X86::PMAXSWrr, X86::PMAXSWrm, 16 }, 597198090Srdivacky { X86::PMAXUBrr, X86::PMAXUBrm, 16 }, 598198090Srdivacky { X86::PMINSWrr, X86::PMINSWrm, 16 }, 599198090Srdivacky { X86::PMINUBrr, X86::PMINUBrm, 16 }, 600198090Srdivacky { X86::PMULDQrr, X86::PMULDQrm, 16 }, 601198090Srdivacky { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, 602198090Srdivacky { X86::PMULHWrr, X86::PMULHWrm, 16 }, 603198090Srdivacky { X86::PMULLDrr, X86::PMULLDrm, 16 }, 604198090Srdivacky { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 }, 605198090Srdivacky { X86::PMULLWrr, X86::PMULLWrm, 16 }, 606198090Srdivacky { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, 607198090Srdivacky { X86::PORrr, X86::PORrm, 16 }, 608198090Srdivacky { X86::PSADBWrr, X86::PSADBWrm, 16 }, 609198090Srdivacky { X86::PSLLDrr, X86::PSLLDrm, 16 }, 610198090Srdivacky { X86::PSLLQrr, X86::PSLLQrm, 16 }, 611198090Srdivacky { X86::PSLLWrr, X86::PSLLWrm, 16 }, 612198090Srdivacky { X86::PSRADrr, X86::PSRADrm, 16 }, 613198090Srdivacky { X86::PSRAWrr, X86::PSRAWrm, 16 }, 614198090Srdivacky { X86::PSRLDrr, X86::PSRLDrm, 16 }, 615198090Srdivacky { X86::PSRLQrr, X86::PSRLQrm, 16 }, 616198090Srdivacky { X86::PSRLWrr, X86::PSRLWrm, 16 }, 617198090Srdivacky { X86::PSUBBrr, X86::PSUBBrm, 16 }, 618198090Srdivacky { X86::PSUBDrr, X86::PSUBDrm, 16 }, 619198090Srdivacky { X86::PSUBSBrr, X86::PSUBSBrm, 16 }, 620198090Srdivacky { X86::PSUBSWrr, X86::PSUBSWrm, 16 }, 621198090Srdivacky { X86::PSUBWrr, X86::PSUBWrm, 16 }, 622198090Srdivacky { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 }, 623198090Srdivacky { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 }, 624198090Srdivacky { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 }, 625198090Srdivacky { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 }, 626198090Srdivacky { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 }, 627198090Srdivacky { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 }, 628198090Srdivacky { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 }, 629198090Srdivacky { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 }, 630198090Srdivacky { X86::PXORrr, X86::PXORrm, 16 }, 631198090Srdivacky { X86::SBB32rr, X86::SBB32rm, 0 }, 632198090Srdivacky { X86::SBB64rr, X86::SBB64rm, 0 }, 633198090Srdivacky { X86::SHUFPDrri, X86::SHUFPDrmi, 16 }, 634198090Srdivacky { X86::SHUFPSrri, X86::SHUFPSrmi, 16 }, 635198090Srdivacky { X86::SUB16rr, X86::SUB16rm, 0 }, 636198090Srdivacky { X86::SUB32rr, X86::SUB32rm, 0 }, 637198090Srdivacky { X86::SUB64rr, X86::SUB64rm, 0 }, 638198090Srdivacky { X86::SUB8rr, X86::SUB8rm, 0 }, 639198090Srdivacky { X86::SUBPDrr, X86::SUBPDrm, 16 }, 640198090Srdivacky { X86::SUBPSrr, X86::SUBPSrm, 16 }, 641198090Srdivacky { X86::SUBSDrr, X86::SUBSDrm, 0 }, 642198090Srdivacky { X86::SUBSSrr, X86::SUBSSrm, 0 }, 643193323Sed // FIXME: TEST*rr -> swapped operand of TEST*mr. 644198090Srdivacky { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 }, 645198090Srdivacky { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 }, 646198090Srdivacky { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 }, 647198090Srdivacky { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 }, 648198090Srdivacky { X86::XOR16rr, X86::XOR16rm, 0 }, 649198090Srdivacky { X86::XOR32rr, X86::XOR32rm, 0 }, 650198090Srdivacky { X86::XOR64rr, X86::XOR64rm, 0 }, 651198090Srdivacky { X86::XOR8rr, X86::XOR8rm, 0 }, 652198090Srdivacky { X86::XORPDrr, X86::XORPDrm, 16 }, 653198090Srdivacky { X86::XORPSrr, X86::XORPSrm, 16 } 654193323Sed }; 655193323Sed 656193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { 657193323Sed unsigned RegOp = OpTbl2[i][0]; 658193323Sed unsigned MemOp = OpTbl2[i][1]; 659198090Srdivacky unsigned Align = OpTbl2[i][2]; 660193323Sed if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, 661198090Srdivacky std::make_pair(MemOp,Align))).second) 662193323Sed assert(false && "Duplicated entries?"); 663198090Srdivacky // Index 2, folded load 664198090Srdivacky unsigned AuxInfo = 2 | (1 << 4); 665193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 666193323Sed std::make_pair(RegOp, AuxInfo))).second) 667193323Sed AmbEntries.push_back(MemOp); 668193323Sed } 669193323Sed 670193323Sed // Remove ambiguous entries. 671193323Sed assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); 672193323Sed} 673193323Sed 674193323Sedbool X86InstrInfo::isMoveInstr(const MachineInstr& MI, 675193323Sed unsigned &SrcReg, unsigned &DstReg, 676193323Sed unsigned &SrcSubIdx, unsigned &DstSubIdx) const { 677193323Sed switch (MI.getOpcode()) { 678193323Sed default: 679193323Sed return false; 680193323Sed case X86::MOV8rr: 681193323Sed case X86::MOV8rr_NOREX: 682193323Sed case X86::MOV16rr: 683193323Sed case X86::MOV32rr: 684193323Sed case X86::MOV64rr: 685193323Sed case X86::MOVSSrr: 686193323Sed case X86::MOVSDrr: 687193323Sed 688193323Sed // FP Stack register class copies 689193323Sed case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: 690193323Sed case X86::MOV_Fp3264: case X86::MOV_Fp3280: 691193323Sed case X86::MOV_Fp6432: case X86::MOV_Fp8032: 692193323Sed 693193323Sed case X86::FsMOVAPSrr: 694193323Sed case X86::FsMOVAPDrr: 695193323Sed case X86::MOVAPSrr: 696193323Sed case X86::MOVAPDrr: 697193323Sed case X86::MOVDQArr: 698193323Sed case X86::MOVSS2PSrr: 699193323Sed case X86::MOVSD2PDrr: 700193323Sed case X86::MOVPS2SSrr: 701193323Sed case X86::MOVPD2SDrr: 702193323Sed case X86::MMX_MOVQ64rr: 703193323Sed assert(MI.getNumOperands() >= 2 && 704193323Sed MI.getOperand(0).isReg() && 705193323Sed MI.getOperand(1).isReg() && 706193323Sed "invalid register-register move instruction"); 707193323Sed SrcReg = MI.getOperand(1).getReg(); 708193323Sed DstReg = MI.getOperand(0).getReg(); 709193323Sed SrcSubIdx = MI.getOperand(1).getSubReg(); 710193323Sed DstSubIdx = MI.getOperand(0).getSubReg(); 711193323Sed return true; 712193323Sed } 713193323Sed} 714193323Sed 715202375Srdivackybool 716202375SrdivackyX86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 717202375Srdivacky unsigned &SrcReg, unsigned &DstReg, 718202375Srdivacky unsigned &SubIdx) const { 719202375Srdivacky switch (MI.getOpcode()) { 720202375Srdivacky default: break; 721202375Srdivacky case X86::MOVSX16rr8: 722202375Srdivacky case X86::MOVZX16rr8: 723202375Srdivacky case X86::MOVSX32rr8: 724202375Srdivacky case X86::MOVZX32rr8: 725202375Srdivacky case X86::MOVSX64rr8: 726202375Srdivacky case X86::MOVZX64rr8: 727202375Srdivacky if (!TM.getSubtarget<X86Subtarget>().is64Bit()) 728202375Srdivacky // It's not always legal to reference the low 8-bit of the larger 729202375Srdivacky // register in 32-bit mode. 730202375Srdivacky return false; 731202375Srdivacky case X86::MOVSX32rr16: 732202375Srdivacky case X86::MOVZX32rr16: 733202375Srdivacky case X86::MOVSX64rr16: 734202375Srdivacky case X86::MOVZX64rr16: 735202375Srdivacky case X86::MOVSX64rr32: 736202375Srdivacky case X86::MOVZX64rr32: { 737202375Srdivacky if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) 738202375Srdivacky // Be conservative. 739202375Srdivacky return false; 740202375Srdivacky SrcReg = MI.getOperand(1).getReg(); 741202375Srdivacky DstReg = MI.getOperand(0).getReg(); 742202375Srdivacky switch (MI.getOpcode()) { 743202375Srdivacky default: 744202375Srdivacky llvm_unreachable(0); 745202375Srdivacky break; 746202375Srdivacky case X86::MOVSX16rr8: 747202375Srdivacky case X86::MOVZX16rr8: 748202375Srdivacky case X86::MOVSX32rr8: 749202375Srdivacky case X86::MOVZX32rr8: 750202375Srdivacky case X86::MOVSX64rr8: 751202375Srdivacky case X86::MOVZX64rr8: 752202375Srdivacky SubIdx = 1; 753202375Srdivacky break; 754202375Srdivacky case X86::MOVSX32rr16: 755202375Srdivacky case X86::MOVZX32rr16: 756202375Srdivacky case X86::MOVSX64rr16: 757202375Srdivacky case X86::MOVZX64rr16: 758202375Srdivacky SubIdx = 3; 759202375Srdivacky break; 760202375Srdivacky case X86::MOVSX64rr32: 761202375Srdivacky case X86::MOVZX64rr32: 762202375Srdivacky SubIdx = 4; 763202375Srdivacky break; 764202375Srdivacky } 765202375Srdivacky return true; 766202375Srdivacky } 767202375Srdivacky } 768202375Srdivacky return false; 769202375Srdivacky} 770202375Srdivacky 771199481Srdivacky/// isFrameOperand - Return true and the FrameIndex if the specified 772199481Srdivacky/// operand and follow operands form a reference to the stack frame. 773199481Srdivackybool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, 774199481Srdivacky int &FrameIndex) const { 775199481Srdivacky if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && 776199481Srdivacky MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && 777199481Srdivacky MI->getOperand(Op+1).getImm() == 1 && 778199481Srdivacky MI->getOperand(Op+2).getReg() == 0 && 779199481Srdivacky MI->getOperand(Op+3).getImm() == 0) { 780199481Srdivacky FrameIndex = MI->getOperand(Op).getIndex(); 781199481Srdivacky return true; 782199481Srdivacky } 783199481Srdivacky return false; 784199481Srdivacky} 785199481Srdivacky 786199481Srdivackystatic bool isFrameLoadOpcode(int Opcode) { 787199481Srdivacky switch (Opcode) { 788193323Sed default: break; 789193323Sed case X86::MOV8rm: 790193323Sed case X86::MOV16rm: 791193323Sed case X86::MOV32rm: 792193323Sed case X86::MOV64rm: 793193323Sed case X86::LD_Fp64m: 794193323Sed case X86::MOVSSrm: 795193323Sed case X86::MOVSDrm: 796193323Sed case X86::MOVAPSrm: 797193323Sed case X86::MOVAPDrm: 798193323Sed case X86::MOVDQArm: 799193323Sed case X86::MMX_MOVD64rm: 800193323Sed case X86::MMX_MOVQ64rm: 801199481Srdivacky return true; 802193323Sed break; 803193323Sed } 804199481Srdivacky return false; 805193323Sed} 806193323Sed 807199481Srdivackystatic bool isFrameStoreOpcode(int Opcode) { 808199481Srdivacky switch (Opcode) { 809193323Sed default: break; 810193323Sed case X86::MOV8mr: 811193323Sed case X86::MOV16mr: 812193323Sed case X86::MOV32mr: 813193323Sed case X86::MOV64mr: 814193323Sed case X86::ST_FpP64m: 815193323Sed case X86::MOVSSmr: 816193323Sed case X86::MOVSDmr: 817193323Sed case X86::MOVAPSmr: 818193323Sed case X86::MOVAPDmr: 819193323Sed case X86::MOVDQAmr: 820193323Sed case X86::MMX_MOVD64mr: 821193323Sed case X86::MMX_MOVQ64mr: 822193323Sed case X86::MMX_MOVNTQmr: 823199481Srdivacky return true; 824199481Srdivacky } 825199481Srdivacky return false; 826199481Srdivacky} 827199481Srdivacky 828199481Srdivackyunsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 829199481Srdivacky int &FrameIndex) const { 830199481Srdivacky if (isFrameLoadOpcode(MI->getOpcode())) 831199481Srdivacky if (isFrameOperand(MI, 1, FrameIndex)) 832199481Srdivacky return MI->getOperand(0).getReg(); 833199481Srdivacky return 0; 834199481Srdivacky} 835199481Srdivacky 836199481Srdivackyunsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 837199481Srdivacky int &FrameIndex) const { 838199481Srdivacky if (isFrameLoadOpcode(MI->getOpcode())) { 839199481Srdivacky unsigned Reg; 840199481Srdivacky if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) 841199481Srdivacky return Reg; 842199481Srdivacky // Check for post-frame index elimination operations 843200581Srdivacky const MachineMemOperand *Dummy; 844200581Srdivacky return hasLoadFromStackSlot(MI, Dummy, FrameIndex); 845199481Srdivacky } 846199481Srdivacky return 0; 847199481Srdivacky} 848199481Srdivacky 849199481Srdivackybool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, 850200581Srdivacky const MachineMemOperand *&MMO, 851199481Srdivacky int &FrameIndex) const { 852199481Srdivacky for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 853199481Srdivacky oe = MI->memoperands_end(); 854199481Srdivacky o != oe; 855199481Srdivacky ++o) { 856199481Srdivacky if ((*o)->isLoad() && (*o)->getValue()) 857199481Srdivacky if (const FixedStackPseudoSourceValue *Value = 858199481Srdivacky dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 859199481Srdivacky FrameIndex = Value->getFrameIndex(); 860200581Srdivacky MMO = *o; 861199481Srdivacky return true; 862199481Srdivacky } 863199481Srdivacky } 864199481Srdivacky return false; 865199481Srdivacky} 866199481Srdivacky 867199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 868199481Srdivacky int &FrameIndex) const { 869199481Srdivacky if (isFrameStoreOpcode(MI->getOpcode())) 870199481Srdivacky if (isFrameOperand(MI, 0, FrameIndex)) 871193323Sed return MI->getOperand(X86AddrNumOperands).getReg(); 872199481Srdivacky return 0; 873199481Srdivacky} 874199481Srdivacky 875199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 876199481Srdivacky int &FrameIndex) const { 877199481Srdivacky if (isFrameStoreOpcode(MI->getOpcode())) { 878199481Srdivacky unsigned Reg; 879199481Srdivacky if ((Reg = isStoreToStackSlot(MI, FrameIndex))) 880199481Srdivacky return Reg; 881199481Srdivacky // Check for post-frame index elimination operations 882200581Srdivacky const MachineMemOperand *Dummy; 883200581Srdivacky return hasStoreToStackSlot(MI, Dummy, FrameIndex); 884193323Sed } 885193323Sed return 0; 886193323Sed} 887193323Sed 888199481Srdivackybool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, 889200581Srdivacky const MachineMemOperand *&MMO, 890199481Srdivacky int &FrameIndex) const { 891199481Srdivacky for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 892199481Srdivacky oe = MI->memoperands_end(); 893199481Srdivacky o != oe; 894199481Srdivacky ++o) { 895199481Srdivacky if ((*o)->isStore() && (*o)->getValue()) 896199481Srdivacky if (const FixedStackPseudoSourceValue *Value = 897199481Srdivacky dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 898199481Srdivacky FrameIndex = Value->getFrameIndex(); 899200581Srdivacky MMO = *o; 900199481Srdivacky return true; 901199481Srdivacky } 902199481Srdivacky } 903199481Srdivacky return false; 904199481Srdivacky} 905199481Srdivacky 906193323Sed/// regIsPICBase - Return true if register is PIC base (i.e.g defined by 907193323Sed/// X86::MOVPC32r. 908193323Sedstatic bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { 909193323Sed bool isPICBase = false; 910193323Sed for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 911193323Sed E = MRI.def_end(); I != E; ++I) { 912193323Sed MachineInstr *DefMI = I.getOperand().getParent(); 913193323Sed if (DefMI->getOpcode() != X86::MOVPC32r) 914193323Sed return false; 915193323Sed assert(!isPICBase && "More than one PIC base?"); 916193323Sed isPICBase = true; 917193323Sed } 918193323Sed return isPICBase; 919193323Sed} 920193323Sed 921193323Sedbool 922198090SrdivackyX86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, 923198090Srdivacky AliasAnalysis *AA) const { 924193323Sed switch (MI->getOpcode()) { 925193323Sed default: break; 926193323Sed case X86::MOV8rm: 927193323Sed case X86::MOV16rm: 928193323Sed case X86::MOV32rm: 929193323Sed case X86::MOV64rm: 930193323Sed case X86::LD_Fp64m: 931193323Sed case X86::MOVSSrm: 932193323Sed case X86::MOVSDrm: 933193323Sed case X86::MOVAPSrm: 934199481Srdivacky case X86::MOVUPSrm: 935199481Srdivacky case X86::MOVUPSrm_Int: 936193323Sed case X86::MOVAPDrm: 937193323Sed case X86::MOVDQArm: 938193323Sed case X86::MMX_MOVD64rm: 939199481Srdivacky case X86::MMX_MOVQ64rm: 940199481Srdivacky case X86::FsMOVAPSrm: 941199481Srdivacky case X86::FsMOVAPDrm: { 942193323Sed // Loads from constant pools are trivially rematerializable. 943193323Sed if (MI->getOperand(1).isReg() && 944193323Sed MI->getOperand(2).isImm() && 945193323Sed MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 946198090Srdivacky MI->isInvariantLoad(AA)) { 947193323Sed unsigned BaseReg = MI->getOperand(1).getReg(); 948195098Sed if (BaseReg == 0 || BaseReg == X86::RIP) 949193323Sed return true; 950193323Sed // Allow re-materialization of PIC load. 951193323Sed if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) 952193323Sed return false; 953193323Sed const MachineFunction &MF = *MI->getParent()->getParent(); 954193323Sed const MachineRegisterInfo &MRI = MF.getRegInfo(); 955193323Sed bool isPICBase = false; 956193323Sed for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 957193323Sed E = MRI.def_end(); I != E; ++I) { 958193323Sed MachineInstr *DefMI = I.getOperand().getParent(); 959193323Sed if (DefMI->getOpcode() != X86::MOVPC32r) 960193323Sed return false; 961193323Sed assert(!isPICBase && "More than one PIC base?"); 962193323Sed isPICBase = true; 963193323Sed } 964193323Sed return isPICBase; 965193323Sed } 966193323Sed return false; 967193323Sed } 968193323Sed 969193323Sed case X86::LEA32r: 970193323Sed case X86::LEA64r: { 971193323Sed if (MI->getOperand(2).isImm() && 972193323Sed MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 973193323Sed !MI->getOperand(4).isReg()) { 974193323Sed // lea fi#, lea GV, etc. are all rematerializable. 975193323Sed if (!MI->getOperand(1).isReg()) 976193323Sed return true; 977193323Sed unsigned BaseReg = MI->getOperand(1).getReg(); 978193323Sed if (BaseReg == 0) 979193323Sed return true; 980193323Sed // Allow re-materialization of lea PICBase + x. 981193323Sed const MachineFunction &MF = *MI->getParent()->getParent(); 982193323Sed const MachineRegisterInfo &MRI = MF.getRegInfo(); 983193323Sed return regIsPICBase(BaseReg, MRI); 984193323Sed } 985193323Sed return false; 986193323Sed } 987193323Sed } 988193323Sed 989193323Sed // All other instructions marked M_REMATERIALIZABLE are always trivially 990193323Sed // rematerializable. 991193323Sed return true; 992193323Sed} 993193323Sed 994193323Sed/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that 995193323Sed/// would clobber the EFLAGS condition register. Note the result may be 996193323Sed/// conservative. If it cannot definitely determine the safety after visiting 997198090Srdivacky/// a few instructions in each direction it assumes it's not safe. 998193323Sedstatic bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, 999193323Sed MachineBasicBlock::iterator I) { 1000193323Sed // It's always safe to clobber EFLAGS at the end of a block. 1001193323Sed if (I == MBB.end()) 1002193323Sed return true; 1003193323Sed 1004193323Sed // For compile time consideration, if we are not able to determine the 1005198090Srdivacky // safety after visiting 4 instructions in each direction, we will assume 1006198090Srdivacky // it's not safe. 1007198090Srdivacky MachineBasicBlock::iterator Iter = I; 1008198090Srdivacky for (unsigned i = 0; i < 4; ++i) { 1009193323Sed bool SeenDef = false; 1010198090Srdivacky for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 1011198090Srdivacky MachineOperand &MO = Iter->getOperand(j); 1012193323Sed if (!MO.isReg()) 1013193323Sed continue; 1014193323Sed if (MO.getReg() == X86::EFLAGS) { 1015193323Sed if (MO.isUse()) 1016193323Sed return false; 1017193323Sed SeenDef = true; 1018193323Sed } 1019193323Sed } 1020193323Sed 1021193323Sed if (SeenDef) 1022193323Sed // This instruction defines EFLAGS, no need to look any further. 1023193323Sed return true; 1024198090Srdivacky ++Iter; 1025193323Sed 1026193323Sed // If we make it to the end of the block, it's safe to clobber EFLAGS. 1027198090Srdivacky if (Iter == MBB.end()) 1028193323Sed return true; 1029193323Sed } 1030193323Sed 1031198090Srdivacky Iter = I; 1032198090Srdivacky for (unsigned i = 0; i < 4; ++i) { 1033198090Srdivacky // If we make it to the beginning of the block, it's safe to clobber 1034198090Srdivacky // EFLAGS iff EFLAGS is not live-in. 1035198090Srdivacky if (Iter == MBB.begin()) 1036198090Srdivacky return !MBB.isLiveIn(X86::EFLAGS); 1037198090Srdivacky 1038198090Srdivacky --Iter; 1039198090Srdivacky bool SawKill = false; 1040198090Srdivacky for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 1041198090Srdivacky MachineOperand &MO = Iter->getOperand(j); 1042198090Srdivacky if (MO.isReg() && MO.getReg() == X86::EFLAGS) { 1043198090Srdivacky if (MO.isDef()) return MO.isDead(); 1044198090Srdivacky if (MO.isKill()) SawKill = true; 1045198090Srdivacky } 1046198090Srdivacky } 1047198090Srdivacky 1048198090Srdivacky if (SawKill) 1049198090Srdivacky // This instruction kills EFLAGS and doesn't redefine it, so 1050198090Srdivacky // there's no need to look further. 1051198090Srdivacky return true; 1052198090Srdivacky } 1053198090Srdivacky 1054193323Sed // Conservative answer. 1055193323Sed return false; 1056193323Sed} 1057193323Sed 1058193323Sedvoid X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, 1059193323Sed MachineBasicBlock::iterator I, 1060198090Srdivacky unsigned DestReg, unsigned SubIdx, 1061199481Srdivacky const MachineInstr *Orig, 1062199481Srdivacky const TargetRegisterInfo *TRI) const { 1063193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 1064193323Sed if (I != MBB.end()) DL = I->getDebugLoc(); 1065193323Sed 1066193323Sed if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { 1067199481Srdivacky DestReg = TRI->getSubReg(DestReg, SubIdx); 1068193323Sed SubIdx = 0; 1069193323Sed } 1070193323Sed 1071193323Sed // MOV32r0 etc. are implemented with xor which clobbers condition code. 1072193323Sed // Re-materialize them as movri instructions to avoid side effects. 1073198090Srdivacky bool Clone = true; 1074198090Srdivacky unsigned Opc = Orig->getOpcode(); 1075198090Srdivacky switch (Opc) { 1076193323Sed default: break; 1077193323Sed case X86::MOV8r0: 1078202375Srdivacky case X86::MOV16r0: 1079202375Srdivacky case X86::MOV32r0: 1080202375Srdivacky case X86::MOV64r0: { 1081193323Sed if (!isSafeToClobberEFLAGS(MBB, I)) { 1082198090Srdivacky switch (Opc) { 1083193323Sed default: break; 1084193323Sed case X86::MOV8r0: Opc = X86::MOV8ri; break; 1085202375Srdivacky case X86::MOV16r0: Opc = X86::MOV16ri; break; 1086193323Sed case X86::MOV32r0: Opc = X86::MOV32ri; break; 1087202375Srdivacky case X86::MOV64r0: Opc = X86::MOV64ri; break; 1088193323Sed } 1089198090Srdivacky Clone = false; 1090193323Sed } 1091193323Sed break; 1092193323Sed } 1093193323Sed } 1094193323Sed 1095198090Srdivacky if (Clone) { 1096193323Sed MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 1097193323Sed MI->getOperand(0).setReg(DestReg); 1098193323Sed MBB.insert(I, MI); 1099198090Srdivacky } else { 1100198090Srdivacky BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); 1101193323Sed } 1102193323Sed 1103198090Srdivacky MachineInstr *NewMI = prior(I); 1104198090Srdivacky NewMI->getOperand(0).setSubReg(SubIdx); 1105193323Sed} 1106193323Sed 1107193323Sed/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that 1108193323Sed/// is not marked dead. 1109193323Sedstatic bool hasLiveCondCodeDef(MachineInstr *MI) { 1110193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1111193323Sed MachineOperand &MO = MI->getOperand(i); 1112193323Sed if (MO.isReg() && MO.isDef() && 1113193323Sed MO.getReg() == X86::EFLAGS && !MO.isDead()) { 1114193323Sed return true; 1115193323Sed } 1116193323Sed } 1117193323Sed return false; 1118193323Sed} 1119193323Sed 1120200581Srdivacky/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when 1121200581Srdivacky/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting 1122200581Srdivacky/// to a 32-bit superregister and then truncating back down to a 16-bit 1123200581Srdivacky/// subregister. 1124200581SrdivackyMachineInstr * 1125200581SrdivackyX86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, 1126200581Srdivacky MachineFunction::iterator &MFI, 1127200581Srdivacky MachineBasicBlock::iterator &MBBI, 1128200581Srdivacky LiveVariables *LV) const { 1129200581Srdivacky MachineInstr *MI = MBBI; 1130200581Srdivacky unsigned Dest = MI->getOperand(0).getReg(); 1131200581Srdivacky unsigned Src = MI->getOperand(1).getReg(); 1132200581Srdivacky bool isDead = MI->getOperand(0).isDead(); 1133200581Srdivacky bool isKill = MI->getOperand(1).isKill(); 1134200581Srdivacky 1135200581Srdivacky unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() 1136200581Srdivacky ? X86::LEA64_32r : X86::LEA32r; 1137200581Srdivacky MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); 1138200581Srdivacky unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1139200581Srdivacky unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1140200581Srdivacky 1141200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 1142200581Srdivacky // well be shifting and then extracting the lower 16-bits. 1143200581Srdivacky // This has the potential to cause partial register stall. e.g. 1144200581Srdivacky // movw (%rbp,%rcx,2), %dx 1145200581Srdivacky // leal -65(%rdx), %esi 1146200581Srdivacky // But testing has shown this *does* help performance in 64-bit mode (at 1147200581Srdivacky // least on modern x86 machines). 1148200581Srdivacky BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); 1149200581Srdivacky MachineInstr *InsMI = 1150200581Srdivacky BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) 1151200581Srdivacky .addReg(leaInReg) 1152200581Srdivacky .addReg(Src, getKillRegState(isKill)) 1153200581Srdivacky .addImm(X86::SUBREG_16BIT); 1154200581Srdivacky 1155200581Srdivacky MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), 1156200581Srdivacky get(Opc), leaOutReg); 1157200581Srdivacky switch (MIOpc) { 1158200581Srdivacky default: 1159200581Srdivacky llvm_unreachable(0); 1160200581Srdivacky break; 1161200581Srdivacky case X86::SHL16ri: { 1162200581Srdivacky unsigned ShAmt = MI->getOperand(2).getImm(); 1163200581Srdivacky MIB.addReg(0).addImm(1 << ShAmt) 1164200581Srdivacky .addReg(leaInReg, RegState::Kill).addImm(0); 1165200581Srdivacky break; 1166200581Srdivacky } 1167200581Srdivacky case X86::INC16r: 1168200581Srdivacky case X86::INC64_16r: 1169200581Srdivacky addLeaRegOffset(MIB, leaInReg, true, 1); 1170200581Srdivacky break; 1171200581Srdivacky case X86::DEC16r: 1172200581Srdivacky case X86::DEC64_16r: 1173200581Srdivacky addLeaRegOffset(MIB, leaInReg, true, -1); 1174200581Srdivacky break; 1175200581Srdivacky case X86::ADD16ri: 1176200581Srdivacky case X86::ADD16ri8: 1177200581Srdivacky addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); 1178200581Srdivacky break; 1179200581Srdivacky case X86::ADD16rr: { 1180200581Srdivacky unsigned Src2 = MI->getOperand(2).getReg(); 1181200581Srdivacky bool isKill2 = MI->getOperand(2).isKill(); 1182200581Srdivacky unsigned leaInReg2 = 0; 1183200581Srdivacky MachineInstr *InsMI2 = 0; 1184200581Srdivacky if (Src == Src2) { 1185200581Srdivacky // ADD16rr %reg1028<kill>, %reg1028 1186200581Srdivacky // just a single insert_subreg. 1187200581Srdivacky addRegReg(MIB, leaInReg, true, leaInReg, false); 1188200581Srdivacky } else { 1189200581Srdivacky leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1190200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 1191200581Srdivacky // well be shifting and then extracting the lower 16-bits. 1192200581Srdivacky BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); 1193200581Srdivacky InsMI2 = 1194200581Srdivacky BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) 1195200581Srdivacky .addReg(leaInReg2) 1196200581Srdivacky .addReg(Src2, getKillRegState(isKill2)) 1197200581Srdivacky .addImm(X86::SUBREG_16BIT); 1198200581Srdivacky addRegReg(MIB, leaInReg, true, leaInReg2, true); 1199200581Srdivacky } 1200200581Srdivacky if (LV && isKill2 && InsMI2) 1201200581Srdivacky LV->replaceKillInstruction(Src2, MI, InsMI2); 1202200581Srdivacky break; 1203200581Srdivacky } 1204200581Srdivacky } 1205200581Srdivacky 1206200581Srdivacky MachineInstr *NewMI = MIB; 1207200581Srdivacky MachineInstr *ExtMI = 1208200581Srdivacky BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) 1209200581Srdivacky .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1210200581Srdivacky .addReg(leaOutReg, RegState::Kill) 1211200581Srdivacky .addImm(X86::SUBREG_16BIT); 1212200581Srdivacky 1213200581Srdivacky if (LV) { 1214200581Srdivacky // Update live variables 1215200581Srdivacky LV->getVarInfo(leaInReg).Kills.push_back(NewMI); 1216200581Srdivacky LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); 1217200581Srdivacky if (isKill) 1218200581Srdivacky LV->replaceKillInstruction(Src, MI, InsMI); 1219200581Srdivacky if (isDead) 1220200581Srdivacky LV->replaceKillInstruction(Dest, MI, ExtMI); 1221200581Srdivacky } 1222200581Srdivacky 1223200581Srdivacky return ExtMI; 1224200581Srdivacky} 1225200581Srdivacky 1226193323Sed/// convertToThreeAddress - This method must be implemented by targets that 1227193323Sed/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target 1228193323Sed/// may be able to convert a two-address instruction into a true 1229193323Sed/// three-address instruction on demand. This allows the X86 target (for 1230193323Sed/// example) to convert ADD and SHL instructions into LEA instructions if they 1231193323Sed/// would require register copies due to two-addressness. 1232193323Sed/// 1233193323Sed/// This method returns a null pointer if the transformation cannot be 1234193323Sed/// performed, otherwise it returns the new instruction. 1235193323Sed/// 1236193323SedMachineInstr * 1237193323SedX86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 1238193323Sed MachineBasicBlock::iterator &MBBI, 1239193323Sed LiveVariables *LV) const { 1240193323Sed MachineInstr *MI = MBBI; 1241193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1242193323Sed // All instructions input are two-addr instructions. Get the known operands. 1243193323Sed unsigned Dest = MI->getOperand(0).getReg(); 1244193323Sed unsigned Src = MI->getOperand(1).getReg(); 1245193323Sed bool isDead = MI->getOperand(0).isDead(); 1246193323Sed bool isKill = MI->getOperand(1).isKill(); 1247193323Sed 1248193323Sed MachineInstr *NewMI = NULL; 1249193323Sed // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When 1250193323Sed // we have better subtarget support, enable the 16-bit LEA generation here. 1251200581Srdivacky // 16-bit LEA is also slow on Core2. 1252193323Sed bool DisableLEA16 = true; 1253200581Srdivacky bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 1254193323Sed 1255193323Sed unsigned MIOpc = MI->getOpcode(); 1256193323Sed switch (MIOpc) { 1257193323Sed case X86::SHUFPSrri: { 1258193323Sed assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); 1259193323Sed if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0; 1260193323Sed 1261193323Sed unsigned B = MI->getOperand(1).getReg(); 1262193323Sed unsigned C = MI->getOperand(2).getReg(); 1263193323Sed if (B != C) return 0; 1264193323Sed unsigned A = MI->getOperand(0).getReg(); 1265193323Sed unsigned M = MI->getOperand(3).getImm(); 1266193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) 1267193323Sed .addReg(A, RegState::Define | getDeadRegState(isDead)) 1268193323Sed .addReg(B, getKillRegState(isKill)).addImm(M); 1269193323Sed break; 1270193323Sed } 1271193323Sed case X86::SHL64ri: { 1272193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1273193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1274193323Sed // the flags produced by a shift yet, so this is safe. 1275193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1276193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1277193323Sed 1278193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1279193323Sed .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1280193323Sed .addReg(0).addImm(1 << ShAmt) 1281193323Sed .addReg(Src, getKillRegState(isKill)) 1282193323Sed .addImm(0); 1283193323Sed break; 1284193323Sed } 1285193323Sed case X86::SHL32ri: { 1286193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1287193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1288193323Sed // the flags produced by a shift yet, so this is safe. 1289193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1290193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1291193323Sed 1292200581Srdivacky unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1293193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1294193323Sed .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1295193323Sed .addReg(0).addImm(1 << ShAmt) 1296193323Sed .addReg(Src, getKillRegState(isKill)).addImm(0); 1297193323Sed break; 1298193323Sed } 1299193323Sed case X86::SHL16ri: { 1300193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1301193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1302193323Sed // the flags produced by a shift yet, so this is safe. 1303193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1304193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1305193323Sed 1306200581Srdivacky if (DisableLEA16) 1307200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1308200581Srdivacky NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1309200581Srdivacky .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1310200581Srdivacky .addReg(0).addImm(1 << ShAmt) 1311200581Srdivacky .addReg(Src, getKillRegState(isKill)) 1312200581Srdivacky .addImm(0); 1313193323Sed break; 1314193323Sed } 1315193323Sed default: { 1316193323Sed // The following opcodes also sets the condition code register(s). Only 1317193323Sed // convert them to equivalent lea if the condition code register def's 1318193323Sed // are dead! 1319193323Sed if (hasLiveCondCodeDef(MI)) 1320193323Sed return 0; 1321193323Sed 1322193323Sed switch (MIOpc) { 1323193323Sed default: return 0; 1324193323Sed case X86::INC64r: 1325193323Sed case X86::INC32r: 1326193323Sed case X86::INC64_32r: { 1327193323Sed assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1328193323Sed unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r 1329193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1330193323Sed NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1331193323Sed .addReg(Dest, RegState::Define | 1332193323Sed getDeadRegState(isDead)), 1333193323Sed Src, isKill, 1); 1334193323Sed break; 1335193323Sed } 1336193323Sed case X86::INC16r: 1337193323Sed case X86::INC64_16r: 1338200581Srdivacky if (DisableLEA16) 1339200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1340193323Sed assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1341193323Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1342193323Sed .addReg(Dest, RegState::Define | 1343193323Sed getDeadRegState(isDead)), 1344193323Sed Src, isKill, 1); 1345193323Sed break; 1346193323Sed case X86::DEC64r: 1347193323Sed case X86::DEC32r: 1348193323Sed case X86::DEC64_32r: { 1349193323Sed assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1350193323Sed unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r 1351193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1352193323Sed NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1353193323Sed .addReg(Dest, RegState::Define | 1354193323Sed getDeadRegState(isDead)), 1355193323Sed Src, isKill, -1); 1356193323Sed break; 1357193323Sed } 1358193323Sed case X86::DEC16r: 1359193323Sed case X86::DEC64_16r: 1360200581Srdivacky if (DisableLEA16) 1361200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1362193323Sed assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1363193323Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1364193323Sed .addReg(Dest, RegState::Define | 1365193323Sed getDeadRegState(isDead)), 1366193323Sed Src, isKill, -1); 1367193323Sed break; 1368193323Sed case X86::ADD64rr: 1369193323Sed case X86::ADD32rr: { 1370193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1371193323Sed unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r 1372193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1373193323Sed unsigned Src2 = MI->getOperand(2).getReg(); 1374193323Sed bool isKill2 = MI->getOperand(2).isKill(); 1375193323Sed NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1376193323Sed .addReg(Dest, RegState::Define | 1377193323Sed getDeadRegState(isDead)), 1378193323Sed Src, isKill, Src2, isKill2); 1379193323Sed if (LV && isKill2) 1380193323Sed LV->replaceKillInstruction(Src2, MI, NewMI); 1381193323Sed break; 1382193323Sed } 1383193323Sed case X86::ADD16rr: { 1384200581Srdivacky if (DisableLEA16) 1385200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1386193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1387193323Sed unsigned Src2 = MI->getOperand(2).getReg(); 1388193323Sed bool isKill2 = MI->getOperand(2).isKill(); 1389193323Sed NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1390193323Sed .addReg(Dest, RegState::Define | 1391193323Sed getDeadRegState(isDead)), 1392193323Sed Src, isKill, Src2, isKill2); 1393193323Sed if (LV && isKill2) 1394193323Sed LV->replaceKillInstruction(Src2, MI, NewMI); 1395193323Sed break; 1396193323Sed } 1397193323Sed case X86::ADD64ri32: 1398193323Sed case X86::ADD64ri8: 1399193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1400200581Srdivacky NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1401200581Srdivacky .addReg(Dest, RegState::Define | 1402200581Srdivacky getDeadRegState(isDead)), 1403200581Srdivacky Src, isKill, MI->getOperand(2).getImm()); 1404193323Sed break; 1405193323Sed case X86::ADD32ri: 1406200581Srdivacky case X86::ADD32ri8: { 1407193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1408200581Srdivacky unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1409200581Srdivacky NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1410200581Srdivacky .addReg(Dest, RegState::Define | 1411200581Srdivacky getDeadRegState(isDead)), 1412193323Sed Src, isKill, MI->getOperand(2).getImm()); 1413193323Sed break; 1414200581Srdivacky } 1415193323Sed case X86::ADD16ri: 1416193323Sed case X86::ADD16ri8: 1417200581Srdivacky if (DisableLEA16) 1418200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1419193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1420200581Srdivacky NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1421200581Srdivacky .addReg(Dest, RegState::Define | 1422200581Srdivacky getDeadRegState(isDead)), 1423200581Srdivacky Src, isKill, MI->getOperand(2).getImm()); 1424193323Sed break; 1425193323Sed } 1426193323Sed } 1427193323Sed } 1428193323Sed 1429193323Sed if (!NewMI) return 0; 1430193323Sed 1431193323Sed if (LV) { // Update live variables 1432193323Sed if (isKill) 1433193323Sed LV->replaceKillInstruction(Src, MI, NewMI); 1434193323Sed if (isDead) 1435193323Sed LV->replaceKillInstruction(Dest, MI, NewMI); 1436193323Sed } 1437193323Sed 1438193323Sed MFI->insert(MBBI, NewMI); // Insert the new inst 1439193323Sed return NewMI; 1440193323Sed} 1441193323Sed 1442193323Sed/// commuteInstruction - We have a few instructions that must be hacked on to 1443193323Sed/// commute them. 1444193323Sed/// 1445193323SedMachineInstr * 1446193323SedX86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { 1447193323Sed switch (MI->getOpcode()) { 1448193323Sed case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) 1449193323Sed case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) 1450193323Sed case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) 1451193323Sed case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) 1452193323Sed case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) 1453193323Sed case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) 1454193323Sed unsigned Opc; 1455193323Sed unsigned Size; 1456193323Sed switch (MI->getOpcode()) { 1457198090Srdivacky default: llvm_unreachable("Unreachable!"); 1458193323Sed case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; 1459193323Sed case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; 1460193323Sed case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; 1461193323Sed case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break; 1462193323Sed case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break; 1463193323Sed case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; 1464193323Sed } 1465193323Sed unsigned Amt = MI->getOperand(3).getImm(); 1466193323Sed if (NewMI) { 1467193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1468193323Sed MI = MF.CloneMachineInstr(MI); 1469193323Sed NewMI = false; 1470193323Sed } 1471193323Sed MI->setDesc(get(Opc)); 1472193323Sed MI->getOperand(3).setImm(Size-Amt); 1473193323Sed return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1474193323Sed } 1475193323Sed case X86::CMOVB16rr: 1476193323Sed case X86::CMOVB32rr: 1477193323Sed case X86::CMOVB64rr: 1478193323Sed case X86::CMOVAE16rr: 1479193323Sed case X86::CMOVAE32rr: 1480193323Sed case X86::CMOVAE64rr: 1481193323Sed case X86::CMOVE16rr: 1482193323Sed case X86::CMOVE32rr: 1483193323Sed case X86::CMOVE64rr: 1484193323Sed case X86::CMOVNE16rr: 1485193323Sed case X86::CMOVNE32rr: 1486193323Sed case X86::CMOVNE64rr: 1487193323Sed case X86::CMOVBE16rr: 1488193323Sed case X86::CMOVBE32rr: 1489193323Sed case X86::CMOVBE64rr: 1490193323Sed case X86::CMOVA16rr: 1491193323Sed case X86::CMOVA32rr: 1492193323Sed case X86::CMOVA64rr: 1493193323Sed case X86::CMOVL16rr: 1494193323Sed case X86::CMOVL32rr: 1495193323Sed case X86::CMOVL64rr: 1496193323Sed case X86::CMOVGE16rr: 1497193323Sed case X86::CMOVGE32rr: 1498193323Sed case X86::CMOVGE64rr: 1499193323Sed case X86::CMOVLE16rr: 1500193323Sed case X86::CMOVLE32rr: 1501193323Sed case X86::CMOVLE64rr: 1502193323Sed case X86::CMOVG16rr: 1503193323Sed case X86::CMOVG32rr: 1504193323Sed case X86::CMOVG64rr: 1505193323Sed case X86::CMOVS16rr: 1506193323Sed case X86::CMOVS32rr: 1507193323Sed case X86::CMOVS64rr: 1508193323Sed case X86::CMOVNS16rr: 1509193323Sed case X86::CMOVNS32rr: 1510193323Sed case X86::CMOVNS64rr: 1511193323Sed case X86::CMOVP16rr: 1512193323Sed case X86::CMOVP32rr: 1513193323Sed case X86::CMOVP64rr: 1514193323Sed case X86::CMOVNP16rr: 1515193323Sed case X86::CMOVNP32rr: 1516193323Sed case X86::CMOVNP64rr: 1517193323Sed case X86::CMOVO16rr: 1518193323Sed case X86::CMOVO32rr: 1519193323Sed case X86::CMOVO64rr: 1520193323Sed case X86::CMOVNO16rr: 1521193323Sed case X86::CMOVNO32rr: 1522193323Sed case X86::CMOVNO64rr: { 1523193323Sed unsigned Opc = 0; 1524193323Sed switch (MI->getOpcode()) { 1525193323Sed default: break; 1526193323Sed case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; 1527193323Sed case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; 1528193323Sed case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; 1529193323Sed case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break; 1530193323Sed case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break; 1531193323Sed case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break; 1532193323Sed case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break; 1533193323Sed case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break; 1534193323Sed case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break; 1535193323Sed case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; 1536193323Sed case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; 1537193323Sed case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; 1538193323Sed case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; 1539193323Sed case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; 1540193323Sed case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; 1541193323Sed case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; 1542193323Sed case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; 1543193323Sed case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; 1544193323Sed case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; 1545193323Sed case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; 1546193323Sed case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; 1547193323Sed case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break; 1548193323Sed case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break; 1549193323Sed case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break; 1550193323Sed case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break; 1551193323Sed case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break; 1552193323Sed case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break; 1553193323Sed case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break; 1554193323Sed case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break; 1555193323Sed case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; 1556193323Sed case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; 1557193323Sed case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; 1558193323Sed case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; 1559193323Sed case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; 1560193323Sed case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; 1561193323Sed case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; 1562193323Sed case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; 1563193323Sed case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; 1564193323Sed case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; 1565193323Sed case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; 1566193323Sed case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; 1567193323Sed case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; 1568193323Sed case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; 1569193323Sed case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; 1570193323Sed case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; 1571193323Sed case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; 1572193323Sed case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; 1573193323Sed case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; 1574193323Sed } 1575193323Sed if (NewMI) { 1576193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1577193323Sed MI = MF.CloneMachineInstr(MI); 1578193323Sed NewMI = false; 1579193323Sed } 1580193323Sed MI->setDesc(get(Opc)); 1581193323Sed // Fallthrough intended. 1582193323Sed } 1583193323Sed default: 1584193323Sed return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1585193323Sed } 1586193323Sed} 1587193323Sed 1588193323Sedstatic X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { 1589193323Sed switch (BrOpc) { 1590193323Sed default: return X86::COND_INVALID; 1591193323Sed case X86::JE: return X86::COND_E; 1592193323Sed case X86::JNE: return X86::COND_NE; 1593193323Sed case X86::JL: return X86::COND_L; 1594193323Sed case X86::JLE: return X86::COND_LE; 1595193323Sed case X86::JG: return X86::COND_G; 1596193323Sed case X86::JGE: return X86::COND_GE; 1597193323Sed case X86::JB: return X86::COND_B; 1598193323Sed case X86::JBE: return X86::COND_BE; 1599193323Sed case X86::JA: return X86::COND_A; 1600193323Sed case X86::JAE: return X86::COND_AE; 1601193323Sed case X86::JS: return X86::COND_S; 1602193323Sed case X86::JNS: return X86::COND_NS; 1603193323Sed case X86::JP: return X86::COND_P; 1604193323Sed case X86::JNP: return X86::COND_NP; 1605193323Sed case X86::JO: return X86::COND_O; 1606193323Sed case X86::JNO: return X86::COND_NO; 1607193323Sed } 1608193323Sed} 1609193323Sed 1610193323Sedunsigned X86::GetCondBranchFromCond(X86::CondCode CC) { 1611193323Sed switch (CC) { 1612198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 1613193323Sed case X86::COND_E: return X86::JE; 1614193323Sed case X86::COND_NE: return X86::JNE; 1615193323Sed case X86::COND_L: return X86::JL; 1616193323Sed case X86::COND_LE: return X86::JLE; 1617193323Sed case X86::COND_G: return X86::JG; 1618193323Sed case X86::COND_GE: return X86::JGE; 1619193323Sed case X86::COND_B: return X86::JB; 1620193323Sed case X86::COND_BE: return X86::JBE; 1621193323Sed case X86::COND_A: return X86::JA; 1622193323Sed case X86::COND_AE: return X86::JAE; 1623193323Sed case X86::COND_S: return X86::JS; 1624193323Sed case X86::COND_NS: return X86::JNS; 1625193323Sed case X86::COND_P: return X86::JP; 1626193323Sed case X86::COND_NP: return X86::JNP; 1627193323Sed case X86::COND_O: return X86::JO; 1628193323Sed case X86::COND_NO: return X86::JNO; 1629193323Sed } 1630193323Sed} 1631193323Sed 1632193323Sed/// GetOppositeBranchCondition - Return the inverse of the specified condition, 1633193323Sed/// e.g. turning COND_E to COND_NE. 1634193323SedX86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { 1635193323Sed switch (CC) { 1636198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 1637193323Sed case X86::COND_E: return X86::COND_NE; 1638193323Sed case X86::COND_NE: return X86::COND_E; 1639193323Sed case X86::COND_L: return X86::COND_GE; 1640193323Sed case X86::COND_LE: return X86::COND_G; 1641193323Sed case X86::COND_G: return X86::COND_LE; 1642193323Sed case X86::COND_GE: return X86::COND_L; 1643193323Sed case X86::COND_B: return X86::COND_AE; 1644193323Sed case X86::COND_BE: return X86::COND_A; 1645193323Sed case X86::COND_A: return X86::COND_BE; 1646193323Sed case X86::COND_AE: return X86::COND_B; 1647193323Sed case X86::COND_S: return X86::COND_NS; 1648193323Sed case X86::COND_NS: return X86::COND_S; 1649193323Sed case X86::COND_P: return X86::COND_NP; 1650193323Sed case X86::COND_NP: return X86::COND_P; 1651193323Sed case X86::COND_O: return X86::COND_NO; 1652193323Sed case X86::COND_NO: return X86::COND_O; 1653193323Sed } 1654193323Sed} 1655193323Sed 1656193323Sedbool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { 1657193323Sed const TargetInstrDesc &TID = MI->getDesc(); 1658193323Sed if (!TID.isTerminator()) return false; 1659193323Sed 1660193323Sed // Conditional branch is a special case. 1661193323Sed if (TID.isBranch() && !TID.isBarrier()) 1662193323Sed return true; 1663193323Sed if (!TID.isPredicable()) 1664193323Sed return true; 1665193323Sed return !isPredicated(MI); 1666193323Sed} 1667193323Sed 1668193323Sed// For purposes of branch analysis do not count FP_REG_KILL as a terminator. 1669193323Sedstatic bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI, 1670193323Sed const X86InstrInfo &TII) { 1671193323Sed if (MI->getOpcode() == X86::FP_REG_KILL) 1672193323Sed return false; 1673193323Sed return TII.isUnpredicatedTerminator(MI); 1674193323Sed} 1675193323Sed 1676193323Sedbool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 1677193323Sed MachineBasicBlock *&TBB, 1678193323Sed MachineBasicBlock *&FBB, 1679193323Sed SmallVectorImpl<MachineOperand> &Cond, 1680193323Sed bool AllowModify) const { 1681193323Sed // Start from the bottom of the block and work up, examining the 1682193323Sed // terminator instructions. 1683193323Sed MachineBasicBlock::iterator I = MBB.end(); 1684193323Sed while (I != MBB.begin()) { 1685193323Sed --I; 1686200581Srdivacky 1687200581Srdivacky // Working from the bottom, when we see a non-terminator instruction, we're 1688200581Srdivacky // done. 1689193323Sed if (!isBrAnalysisUnpredicatedTerminator(I, *this)) 1690193323Sed break; 1691200581Srdivacky 1692200581Srdivacky // A terminator that isn't a branch can't easily be handled by this 1693200581Srdivacky // analysis. 1694193323Sed if (!I->getDesc().isBranch()) 1695193323Sed return true; 1696200581Srdivacky 1697193323Sed // Handle unconditional branches. 1698193323Sed if (I->getOpcode() == X86::JMP) { 1699193323Sed if (!AllowModify) { 1700193323Sed TBB = I->getOperand(0).getMBB(); 1701193323Sed continue; 1702193323Sed } 1703193323Sed 1704193323Sed // If the block has any instructions after a JMP, delete them. 1705200581Srdivacky while (llvm::next(I) != MBB.end()) 1706200581Srdivacky llvm::next(I)->eraseFromParent(); 1707200581Srdivacky 1708193323Sed Cond.clear(); 1709193323Sed FBB = 0; 1710200581Srdivacky 1711193323Sed // Delete the JMP if it's equivalent to a fall-through. 1712193323Sed if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { 1713193323Sed TBB = 0; 1714193323Sed I->eraseFromParent(); 1715193323Sed I = MBB.end(); 1716193323Sed continue; 1717193323Sed } 1718200581Srdivacky 1719193323Sed // TBB is used to indicate the unconditinal destination. 1720193323Sed TBB = I->getOperand(0).getMBB(); 1721193323Sed continue; 1722193323Sed } 1723200581Srdivacky 1724193323Sed // Handle conditional branches. 1725193323Sed X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); 1726193323Sed if (BranchCode == X86::COND_INVALID) 1727193323Sed return true; // Can't handle indirect branch. 1728200581Srdivacky 1729193323Sed // Working from the bottom, handle the first conditional branch. 1730193323Sed if (Cond.empty()) { 1731193323Sed FBB = TBB; 1732193323Sed TBB = I->getOperand(0).getMBB(); 1733193323Sed Cond.push_back(MachineOperand::CreateImm(BranchCode)); 1734193323Sed continue; 1735193323Sed } 1736200581Srdivacky 1737200581Srdivacky // Handle subsequent conditional branches. Only handle the case where all 1738200581Srdivacky // conditional branches branch to the same destination and their condition 1739200581Srdivacky // opcodes fit one of the special multi-branch idioms. 1740193323Sed assert(Cond.size() == 1); 1741193323Sed assert(TBB); 1742200581Srdivacky 1743200581Srdivacky // Only handle the case where all conditional branches branch to the same 1744200581Srdivacky // destination. 1745193323Sed if (TBB != I->getOperand(0).getMBB()) 1746193323Sed return true; 1747200581Srdivacky 1748200581Srdivacky // If the conditions are the same, we can leave them alone. 1749193323Sed X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); 1750193323Sed if (OldBranchCode == BranchCode) 1751193323Sed continue; 1752200581Srdivacky 1753200581Srdivacky // If they differ, see if they fit one of the known patterns. Theoretically, 1754200581Srdivacky // we could handle more patterns here, but we shouldn't expect to see them 1755200581Srdivacky // if instruction selection has done a reasonable job. 1756193323Sed if ((OldBranchCode == X86::COND_NP && 1757193323Sed BranchCode == X86::COND_E) || 1758193323Sed (OldBranchCode == X86::COND_E && 1759193323Sed BranchCode == X86::COND_NP)) 1760193323Sed BranchCode = X86::COND_NP_OR_E; 1761193323Sed else if ((OldBranchCode == X86::COND_P && 1762193323Sed BranchCode == X86::COND_NE) || 1763193323Sed (OldBranchCode == X86::COND_NE && 1764193323Sed BranchCode == X86::COND_P)) 1765193323Sed BranchCode = X86::COND_NE_OR_P; 1766193323Sed else 1767193323Sed return true; 1768200581Srdivacky 1769193323Sed // Update the MachineOperand. 1770193323Sed Cond[0].setImm(BranchCode); 1771193323Sed } 1772193323Sed 1773193323Sed return false; 1774193323Sed} 1775193323Sed 1776193323Sedunsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 1777193323Sed MachineBasicBlock::iterator I = MBB.end(); 1778193323Sed unsigned Count = 0; 1779193323Sed 1780193323Sed while (I != MBB.begin()) { 1781193323Sed --I; 1782193323Sed if (I->getOpcode() != X86::JMP && 1783193323Sed GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) 1784193323Sed break; 1785193323Sed // Remove the branch. 1786193323Sed I->eraseFromParent(); 1787193323Sed I = MBB.end(); 1788193323Sed ++Count; 1789193323Sed } 1790193323Sed 1791193323Sed return Count; 1792193323Sed} 1793193323Sed 1794193323Sedunsigned 1795193323SedX86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 1796193323Sed MachineBasicBlock *FBB, 1797193323Sed const SmallVectorImpl<MachineOperand> &Cond) const { 1798193323Sed // FIXME this should probably have a DebugLoc operand 1799193323Sed DebugLoc dl = DebugLoc::getUnknownLoc(); 1800193323Sed // Shouldn't be a fall through. 1801193323Sed assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 1802193323Sed assert((Cond.size() == 1 || Cond.size() == 0) && 1803193323Sed "X86 branch conditions have one component!"); 1804193323Sed 1805193323Sed if (Cond.empty()) { 1806193323Sed // Unconditional branch? 1807193323Sed assert(!FBB && "Unconditional branch with multiple successors!"); 1808193323Sed BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB); 1809193323Sed return 1; 1810193323Sed } 1811193323Sed 1812193323Sed // Conditional branch. 1813193323Sed unsigned Count = 0; 1814193323Sed X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); 1815193323Sed switch (CC) { 1816193323Sed case X86::COND_NP_OR_E: 1817193323Sed // Synthesize NP_OR_E with two branches. 1818193323Sed BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB); 1819193323Sed ++Count; 1820193323Sed BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB); 1821193323Sed ++Count; 1822193323Sed break; 1823193323Sed case X86::COND_NE_OR_P: 1824193323Sed // Synthesize NE_OR_P with two branches. 1825193323Sed BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB); 1826193323Sed ++Count; 1827193323Sed BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB); 1828193323Sed ++Count; 1829193323Sed break; 1830193323Sed default: { 1831193323Sed unsigned Opc = GetCondBranchFromCond(CC); 1832193323Sed BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); 1833193323Sed ++Count; 1834193323Sed } 1835193323Sed } 1836193323Sed if (FBB) { 1837193323Sed // Two-way Conditional branch. Insert the second branch. 1838193323Sed BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB); 1839193323Sed ++Count; 1840193323Sed } 1841193323Sed return Count; 1842193323Sed} 1843193323Sed 1844193323Sed/// isHReg - Test if the given register is a physical h register. 1845193323Sedstatic bool isHReg(unsigned Reg) { 1846193323Sed return X86::GR8_ABCD_HRegClass.contains(Reg); 1847193323Sed} 1848193323Sed 1849193323Sedbool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, 1850193323Sed MachineBasicBlock::iterator MI, 1851193323Sed unsigned DestReg, unsigned SrcReg, 1852193323Sed const TargetRegisterClass *DestRC, 1853193323Sed const TargetRegisterClass *SrcRC) const { 1854193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 1855193323Sed if (MI != MBB.end()) DL = MI->getDebugLoc(); 1856193323Sed 1857193323Sed // Determine if DstRC and SrcRC have a common superclass in common. 1858193323Sed const TargetRegisterClass *CommonRC = DestRC; 1859193323Sed if (DestRC == SrcRC) 1860193323Sed /* Source and destination have the same register class. */; 1861193323Sed else if (CommonRC->hasSuperClass(SrcRC)) 1862193323Sed CommonRC = SrcRC; 1863198090Srdivacky else if (!DestRC->hasSubClass(SrcRC)) { 1864198090Srdivacky // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, 1865198090Srdivacky // but we want to copy then as GR64. Similarly, for GR32_NOREX and 1866198090Srdivacky // GR32_NOSP, copy as GR32. 1867198090Srdivacky if (SrcRC->hasSuperClass(&X86::GR64RegClass) && 1868198090Srdivacky DestRC->hasSuperClass(&X86::GR64RegClass)) 1869198090Srdivacky CommonRC = &X86::GR64RegClass; 1870198090Srdivacky else if (SrcRC->hasSuperClass(&X86::GR32RegClass) && 1871198090Srdivacky DestRC->hasSuperClass(&X86::GR32RegClass)) 1872198090Srdivacky CommonRC = &X86::GR32RegClass; 1873198090Srdivacky else 1874198090Srdivacky CommonRC = 0; 1875198090Srdivacky } 1876193323Sed 1877193323Sed if (CommonRC) { 1878193323Sed unsigned Opc; 1879198090Srdivacky if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) { 1880193323Sed Opc = X86::MOV64rr; 1881198090Srdivacky } else if (CommonRC == &X86::GR32RegClass || 1882198090Srdivacky CommonRC == &X86::GR32_NOSPRegClass) { 1883193323Sed Opc = X86::MOV32rr; 1884193323Sed } else if (CommonRC == &X86::GR16RegClass) { 1885193323Sed Opc = X86::MOV16rr; 1886193323Sed } else if (CommonRC == &X86::GR8RegClass) { 1887193323Sed // Copying to or from a physical H register on x86-64 requires a NOREX 1888193323Sed // move. Otherwise use a normal move. 1889193323Sed if ((isHReg(DestReg) || isHReg(SrcReg)) && 1890193323Sed TM.getSubtarget<X86Subtarget>().is64Bit()) 1891193323Sed Opc = X86::MOV8rr_NOREX; 1892193323Sed else 1893193323Sed Opc = X86::MOV8rr; 1894193323Sed } else if (CommonRC == &X86::GR64_ABCDRegClass) { 1895193323Sed Opc = X86::MOV64rr; 1896193323Sed } else if (CommonRC == &X86::GR32_ABCDRegClass) { 1897193323Sed Opc = X86::MOV32rr; 1898193323Sed } else if (CommonRC == &X86::GR16_ABCDRegClass) { 1899193323Sed Opc = X86::MOV16rr; 1900193323Sed } else if (CommonRC == &X86::GR8_ABCD_LRegClass) { 1901193323Sed Opc = X86::MOV8rr; 1902193323Sed } else if (CommonRC == &X86::GR8_ABCD_HRegClass) { 1903193323Sed if (TM.getSubtarget<X86Subtarget>().is64Bit()) 1904193323Sed Opc = X86::MOV8rr_NOREX; 1905193323Sed else 1906193323Sed Opc = X86::MOV8rr; 1907198090Srdivacky } else if (CommonRC == &X86::GR64_NOREXRegClass || 1908198090Srdivacky CommonRC == &X86::GR64_NOREX_NOSPRegClass) { 1909193323Sed Opc = X86::MOV64rr; 1910193323Sed } else if (CommonRC == &X86::GR32_NOREXRegClass) { 1911193323Sed Opc = X86::MOV32rr; 1912193323Sed } else if (CommonRC == &X86::GR16_NOREXRegClass) { 1913193323Sed Opc = X86::MOV16rr; 1914193323Sed } else if (CommonRC == &X86::GR8_NOREXRegClass) { 1915193323Sed Opc = X86::MOV8rr; 1916193323Sed } else if (CommonRC == &X86::RFP32RegClass) { 1917193323Sed Opc = X86::MOV_Fp3232; 1918193323Sed } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) { 1919193323Sed Opc = X86::MOV_Fp6464; 1920193323Sed } else if (CommonRC == &X86::RFP80RegClass) { 1921193323Sed Opc = X86::MOV_Fp8080; 1922193323Sed } else if (CommonRC == &X86::FR32RegClass) { 1923193323Sed Opc = X86::FsMOVAPSrr; 1924193323Sed } else if (CommonRC == &X86::FR64RegClass) { 1925193323Sed Opc = X86::FsMOVAPDrr; 1926193323Sed } else if (CommonRC == &X86::VR128RegClass) { 1927193323Sed Opc = X86::MOVAPSrr; 1928193323Sed } else if (CommonRC == &X86::VR64RegClass) { 1929193323Sed Opc = X86::MMX_MOVQ64rr; 1930193323Sed } else { 1931193323Sed return false; 1932193323Sed } 1933193323Sed BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); 1934193323Sed return true; 1935193323Sed } 1936198090Srdivacky 1937193323Sed // Moving EFLAGS to / from another register requires a push and a pop. 1938193323Sed if (SrcRC == &X86::CCRRegClass) { 1939193323Sed if (SrcReg != X86::EFLAGS) 1940193323Sed return false; 1941198090Srdivacky if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { 1942201360Srdivacky BuildMI(MBB, MI, DL, get(X86::PUSHFQ64)); 1943193323Sed BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); 1944193323Sed return true; 1945198090Srdivacky } else if (DestRC == &X86::GR32RegClass || 1946198090Srdivacky DestRC == &X86::GR32_NOSPRegClass) { 1947193323Sed BuildMI(MBB, MI, DL, get(X86::PUSHFD)); 1948193323Sed BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); 1949193323Sed return true; 1950193323Sed } 1951193323Sed } else if (DestRC == &X86::CCRRegClass) { 1952193323Sed if (DestReg != X86::EFLAGS) 1953193323Sed return false; 1954198090Srdivacky if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { 1955193323Sed BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); 1956193323Sed BuildMI(MBB, MI, DL, get(X86::POPFQ)); 1957193323Sed return true; 1958198090Srdivacky } else if (SrcRC == &X86::GR32RegClass || 1959198090Srdivacky DestRC == &X86::GR32_NOSPRegClass) { 1960193323Sed BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); 1961193323Sed BuildMI(MBB, MI, DL, get(X86::POPFD)); 1962193323Sed return true; 1963193323Sed } 1964193323Sed } 1965193323Sed 1966193323Sed // Moving from ST(0) turns into FpGET_ST0_32 etc. 1967193323Sed if (SrcRC == &X86::RSTRegClass) { 1968193323Sed // Copying from ST(0)/ST(1). 1969193323Sed if (SrcReg != X86::ST0 && SrcReg != X86::ST1) 1970193323Sed // Can only copy from ST(0)/ST(1) right now 1971193323Sed return false; 1972193323Sed bool isST0 = SrcReg == X86::ST0; 1973193323Sed unsigned Opc; 1974193323Sed if (DestRC == &X86::RFP32RegClass) 1975193323Sed Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32; 1976193323Sed else if (DestRC == &X86::RFP64RegClass) 1977193323Sed Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64; 1978193323Sed else { 1979193323Sed if (DestRC != &X86::RFP80RegClass) 1980193323Sed return false; 1981193323Sed Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80; 1982193323Sed } 1983193323Sed BuildMI(MBB, MI, DL, get(Opc), DestReg); 1984193323Sed return true; 1985193323Sed } 1986193323Sed 1987193323Sed // Moving to ST(0) turns into FpSET_ST0_32 etc. 1988193323Sed if (DestRC == &X86::RSTRegClass) { 1989193323Sed // Copying to ST(0) / ST(1). 1990193323Sed if (DestReg != X86::ST0 && DestReg != X86::ST1) 1991193323Sed // Can only copy to TOS right now 1992193323Sed return false; 1993193323Sed bool isST0 = DestReg == X86::ST0; 1994193323Sed unsigned Opc; 1995193323Sed if (SrcRC == &X86::RFP32RegClass) 1996193323Sed Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32; 1997193323Sed else if (SrcRC == &X86::RFP64RegClass) 1998193323Sed Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64; 1999193323Sed else { 2000193323Sed if (SrcRC != &X86::RFP80RegClass) 2001193323Sed return false; 2002193323Sed Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80; 2003193323Sed } 2004193323Sed BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg); 2005193323Sed return true; 2006193323Sed } 2007193323Sed 2008193323Sed // Not yet supported! 2009193323Sed return false; 2010193323Sed} 2011193323Sed 2012193323Sedstatic unsigned getStoreRegOpcode(unsigned SrcReg, 2013193323Sed const TargetRegisterClass *RC, 2014193323Sed bool isStackAligned, 2015193323Sed TargetMachine &TM) { 2016193323Sed unsigned Opc = 0; 2017198090Srdivacky if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { 2018193323Sed Opc = X86::MOV64mr; 2019198090Srdivacky } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { 2020193323Sed Opc = X86::MOV32mr; 2021193323Sed } else if (RC == &X86::GR16RegClass) { 2022193323Sed Opc = X86::MOV16mr; 2023193323Sed } else if (RC == &X86::GR8RegClass) { 2024193323Sed // Copying to or from a physical H register on x86-64 requires a NOREX 2025193323Sed // move. Otherwise use a normal move. 2026193323Sed if (isHReg(SrcReg) && 2027193323Sed TM.getSubtarget<X86Subtarget>().is64Bit()) 2028193323Sed Opc = X86::MOV8mr_NOREX; 2029193323Sed else 2030193323Sed Opc = X86::MOV8mr; 2031193323Sed } else if (RC == &X86::GR64_ABCDRegClass) { 2032193323Sed Opc = X86::MOV64mr; 2033193323Sed } else if (RC == &X86::GR32_ABCDRegClass) { 2034193323Sed Opc = X86::MOV32mr; 2035193323Sed } else if (RC == &X86::GR16_ABCDRegClass) { 2036193323Sed Opc = X86::MOV16mr; 2037193323Sed } else if (RC == &X86::GR8_ABCD_LRegClass) { 2038193323Sed Opc = X86::MOV8mr; 2039193323Sed } else if (RC == &X86::GR8_ABCD_HRegClass) { 2040193323Sed if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2041193323Sed Opc = X86::MOV8mr_NOREX; 2042193323Sed else 2043193323Sed Opc = X86::MOV8mr; 2044198090Srdivacky } else if (RC == &X86::GR64_NOREXRegClass || 2045198090Srdivacky RC == &X86::GR64_NOREX_NOSPRegClass) { 2046193323Sed Opc = X86::MOV64mr; 2047193323Sed } else if (RC == &X86::GR32_NOREXRegClass) { 2048193323Sed Opc = X86::MOV32mr; 2049193323Sed } else if (RC == &X86::GR16_NOREXRegClass) { 2050193323Sed Opc = X86::MOV16mr; 2051193323Sed } else if (RC == &X86::GR8_NOREXRegClass) { 2052193323Sed Opc = X86::MOV8mr; 2053193323Sed } else if (RC == &X86::RFP80RegClass) { 2054193323Sed Opc = X86::ST_FpP80m; // pops 2055193323Sed } else if (RC == &X86::RFP64RegClass) { 2056193323Sed Opc = X86::ST_Fp64m; 2057193323Sed } else if (RC == &X86::RFP32RegClass) { 2058193323Sed Opc = X86::ST_Fp32m; 2059193323Sed } else if (RC == &X86::FR32RegClass) { 2060193323Sed Opc = X86::MOVSSmr; 2061193323Sed } else if (RC == &X86::FR64RegClass) { 2062193323Sed Opc = X86::MOVSDmr; 2063193323Sed } else if (RC == &X86::VR128RegClass) { 2064193323Sed // If stack is realigned we can use aligned stores. 2065193323Sed Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr; 2066193323Sed } else if (RC == &X86::VR64RegClass) { 2067193323Sed Opc = X86::MMX_MOVQ64mr; 2068193323Sed } else { 2069198090Srdivacky llvm_unreachable("Unknown regclass"); 2070193323Sed } 2071193323Sed 2072193323Sed return Opc; 2073193323Sed} 2074193323Sed 2075193323Sedvoid X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 2076193323Sed MachineBasicBlock::iterator MI, 2077193323Sed unsigned SrcReg, bool isKill, int FrameIdx, 2078193323Sed const TargetRegisterClass *RC) const { 2079193323Sed const MachineFunction &MF = *MBB.getParent(); 2080202878Srdivacky bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); 2081193323Sed unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 2082193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 2083193323Sed if (MI != MBB.end()) DL = MI->getDebugLoc(); 2084193323Sed addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) 2085193323Sed .addReg(SrcReg, getKillRegState(isKill)); 2086193323Sed} 2087193323Sed 2088193323Sedvoid X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, 2089193323Sed bool isKill, 2090193323Sed SmallVectorImpl<MachineOperand> &Addr, 2091193323Sed const TargetRegisterClass *RC, 2092198090Srdivacky MachineInstr::mmo_iterator MMOBegin, 2093198090Srdivacky MachineInstr::mmo_iterator MMOEnd, 2094193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2095199481Srdivacky bool isAligned = (*MMOBegin)->getAlignment() >= 16; 2096193323Sed unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 2097193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 2098193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); 2099193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2100193323Sed MIB.addOperand(Addr[i]); 2101193323Sed MIB.addReg(SrcReg, getKillRegState(isKill)); 2102198090Srdivacky (*MIB).setMemRefs(MMOBegin, MMOEnd); 2103193323Sed NewMIs.push_back(MIB); 2104193323Sed} 2105193323Sed 2106193323Sedstatic unsigned getLoadRegOpcode(unsigned DestReg, 2107193323Sed const TargetRegisterClass *RC, 2108193323Sed bool isStackAligned, 2109193323Sed const TargetMachine &TM) { 2110193323Sed unsigned Opc = 0; 2111198090Srdivacky if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { 2112193323Sed Opc = X86::MOV64rm; 2113198090Srdivacky } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { 2114193323Sed Opc = X86::MOV32rm; 2115193323Sed } else if (RC == &X86::GR16RegClass) { 2116193323Sed Opc = X86::MOV16rm; 2117193323Sed } else if (RC == &X86::GR8RegClass) { 2118193323Sed // Copying to or from a physical H register on x86-64 requires a NOREX 2119193323Sed // move. Otherwise use a normal move. 2120193323Sed if (isHReg(DestReg) && 2121193323Sed TM.getSubtarget<X86Subtarget>().is64Bit()) 2122193323Sed Opc = X86::MOV8rm_NOREX; 2123193323Sed else 2124193323Sed Opc = X86::MOV8rm; 2125193323Sed } else if (RC == &X86::GR64_ABCDRegClass) { 2126193323Sed Opc = X86::MOV64rm; 2127193323Sed } else if (RC == &X86::GR32_ABCDRegClass) { 2128193323Sed Opc = X86::MOV32rm; 2129193323Sed } else if (RC == &X86::GR16_ABCDRegClass) { 2130193323Sed Opc = X86::MOV16rm; 2131193323Sed } else if (RC == &X86::GR8_ABCD_LRegClass) { 2132193323Sed Opc = X86::MOV8rm; 2133193323Sed } else if (RC == &X86::GR8_ABCD_HRegClass) { 2134193323Sed if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2135193323Sed Opc = X86::MOV8rm_NOREX; 2136193323Sed else 2137193323Sed Opc = X86::MOV8rm; 2138198090Srdivacky } else if (RC == &X86::GR64_NOREXRegClass || 2139198090Srdivacky RC == &X86::GR64_NOREX_NOSPRegClass) { 2140193323Sed Opc = X86::MOV64rm; 2141193323Sed } else if (RC == &X86::GR32_NOREXRegClass) { 2142193323Sed Opc = X86::MOV32rm; 2143193323Sed } else if (RC == &X86::GR16_NOREXRegClass) { 2144193323Sed Opc = X86::MOV16rm; 2145193323Sed } else if (RC == &X86::GR8_NOREXRegClass) { 2146193323Sed Opc = X86::MOV8rm; 2147193323Sed } else if (RC == &X86::RFP80RegClass) { 2148193323Sed Opc = X86::LD_Fp80m; 2149193323Sed } else if (RC == &X86::RFP64RegClass) { 2150193323Sed Opc = X86::LD_Fp64m; 2151193323Sed } else if (RC == &X86::RFP32RegClass) { 2152193323Sed Opc = X86::LD_Fp32m; 2153193323Sed } else if (RC == &X86::FR32RegClass) { 2154193323Sed Opc = X86::MOVSSrm; 2155193323Sed } else if (RC == &X86::FR64RegClass) { 2156193323Sed Opc = X86::MOVSDrm; 2157193323Sed } else if (RC == &X86::VR128RegClass) { 2158193323Sed // If stack is realigned we can use aligned loads. 2159193323Sed Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm; 2160193323Sed } else if (RC == &X86::VR64RegClass) { 2161193323Sed Opc = X86::MMX_MOVQ64rm; 2162193323Sed } else { 2163198090Srdivacky llvm_unreachable("Unknown regclass"); 2164193323Sed } 2165193323Sed 2166193323Sed return Opc; 2167193323Sed} 2168193323Sed 2169193323Sedvoid X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 2170193323Sed MachineBasicBlock::iterator MI, 2171193323Sed unsigned DestReg, int FrameIdx, 2172193323Sed const TargetRegisterClass *RC) const{ 2173193323Sed const MachineFunction &MF = *MBB.getParent(); 2174202878Srdivacky bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); 2175193323Sed unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2176193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 2177193323Sed if (MI != MBB.end()) DL = MI->getDebugLoc(); 2178193323Sed addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); 2179193323Sed} 2180193323Sed 2181193323Sedvoid X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, 2182193323Sed SmallVectorImpl<MachineOperand> &Addr, 2183193323Sed const TargetRegisterClass *RC, 2184198090Srdivacky MachineInstr::mmo_iterator MMOBegin, 2185198090Srdivacky MachineInstr::mmo_iterator MMOEnd, 2186193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2187199481Srdivacky bool isAligned = (*MMOBegin)->getAlignment() >= 16; 2188193323Sed unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2189193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 2190193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); 2191193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2192193323Sed MIB.addOperand(Addr[i]); 2193198090Srdivacky (*MIB).setMemRefs(MMOBegin, MMOEnd); 2194193323Sed NewMIs.push_back(MIB); 2195193323Sed} 2196193323Sed 2197193323Sedbool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 2198193323Sed MachineBasicBlock::iterator MI, 2199193323Sed const std::vector<CalleeSavedInfo> &CSI) const { 2200193323Sed if (CSI.empty()) 2201193323Sed return false; 2202193323Sed 2203202878Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 2204193323Sed 2205193323Sed bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2206198090Srdivacky bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); 2207193323Sed unsigned SlotSize = is64Bit ? 8 : 4; 2208193323Sed 2209193323Sed MachineFunction &MF = *MBB.getParent(); 2210198090Srdivacky unsigned FPReg = RI.getFrameRegister(MF); 2211193323Sed X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2212193574Sed unsigned CalleeFrameSize = 0; 2213193323Sed 2214193323Sed unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; 2215193323Sed for (unsigned i = CSI.size(); i != 0; --i) { 2216193323Sed unsigned Reg = CSI[i-1].getReg(); 2217193574Sed const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); 2218193323Sed // Add the callee-saved register as live-in. It's killed at the spill. 2219193323Sed MBB.addLiveIn(Reg); 2220198090Srdivacky if (Reg == FPReg) 2221198090Srdivacky // X86RegisterInfo::emitPrologue will handle spilling of frame register. 2222198090Srdivacky continue; 2223198090Srdivacky if (RegClass != &X86::VR128RegClass && !isWin64) { 2224193574Sed CalleeFrameSize += SlotSize; 2225198090Srdivacky BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); 2226193574Sed } else { 2227193574Sed storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); 2228193574Sed } 2229193323Sed } 2230193574Sed 2231193574Sed X86FI->setCalleeSavedFrameSize(CalleeFrameSize); 2232193323Sed return true; 2233193323Sed} 2234193323Sed 2235193323Sedbool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2236193323Sed MachineBasicBlock::iterator MI, 2237193323Sed const std::vector<CalleeSavedInfo> &CSI) const { 2238193323Sed if (CSI.empty()) 2239193323Sed return false; 2240193323Sed 2241202878Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 2242193323Sed 2243198090Srdivacky MachineFunction &MF = *MBB.getParent(); 2244198090Srdivacky unsigned FPReg = RI.getFrameRegister(MF); 2245193323Sed bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2246198090Srdivacky bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); 2247193323Sed unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; 2248193323Sed for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2249193323Sed unsigned Reg = CSI[i].getReg(); 2250198090Srdivacky if (Reg == FPReg) 2251198090Srdivacky // X86RegisterInfo::emitEpilogue will handle restoring of frame register. 2252198090Srdivacky continue; 2253193574Sed const TargetRegisterClass *RegClass = CSI[i].getRegClass(); 2254198090Srdivacky if (RegClass != &X86::VR128RegClass && !isWin64) { 2255193574Sed BuildMI(MBB, MI, DL, get(Opc), Reg); 2256193574Sed } else { 2257193574Sed loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); 2258193574Sed } 2259193323Sed } 2260193323Sed return true; 2261193323Sed} 2262193323Sed 2263193323Sedstatic MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, 2264193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2265193323Sed MachineInstr *MI, 2266193323Sed const TargetInstrInfo &TII) { 2267193323Sed // Create the base instruction with the memory operand as the first part. 2268193323Sed MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2269193323Sed MI->getDebugLoc(), true); 2270193323Sed MachineInstrBuilder MIB(NewMI); 2271193323Sed unsigned NumAddrOps = MOs.size(); 2272193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2273193323Sed MIB.addOperand(MOs[i]); 2274193323Sed if (NumAddrOps < 4) // FrameIndex only 2275193323Sed addOffset(MIB, 0); 2276193323Sed 2277193323Sed // Loop over the rest of the ri operands, converting them over. 2278193323Sed unsigned NumOps = MI->getDesc().getNumOperands()-2; 2279193323Sed for (unsigned i = 0; i != NumOps; ++i) { 2280193323Sed MachineOperand &MO = MI->getOperand(i+2); 2281193323Sed MIB.addOperand(MO); 2282193323Sed } 2283193323Sed for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { 2284193323Sed MachineOperand &MO = MI->getOperand(i); 2285193323Sed MIB.addOperand(MO); 2286193323Sed } 2287193323Sed return MIB; 2288193323Sed} 2289193323Sed 2290193323Sedstatic MachineInstr *FuseInst(MachineFunction &MF, 2291193323Sed unsigned Opcode, unsigned OpNo, 2292193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2293193323Sed MachineInstr *MI, const TargetInstrInfo &TII) { 2294193323Sed MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2295193323Sed MI->getDebugLoc(), true); 2296193323Sed MachineInstrBuilder MIB(NewMI); 2297193323Sed 2298193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2299193323Sed MachineOperand &MO = MI->getOperand(i); 2300193323Sed if (i == OpNo) { 2301193323Sed assert(MO.isReg() && "Expected to fold into reg operand!"); 2302193323Sed unsigned NumAddrOps = MOs.size(); 2303193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2304193323Sed MIB.addOperand(MOs[i]); 2305193323Sed if (NumAddrOps < 4) // FrameIndex only 2306193323Sed addOffset(MIB, 0); 2307193323Sed } else { 2308193323Sed MIB.addOperand(MO); 2309193323Sed } 2310193323Sed } 2311193323Sed return MIB; 2312193323Sed} 2313193323Sed 2314193323Sedstatic MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, 2315193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2316193323Sed MachineInstr *MI) { 2317193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 2318193323Sed MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); 2319193323Sed 2320193323Sed unsigned NumAddrOps = MOs.size(); 2321193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2322193323Sed MIB.addOperand(MOs[i]); 2323193323Sed if (NumAddrOps < 4) // FrameIndex only 2324193323Sed addOffset(MIB, 0); 2325193323Sed return MIB.addImm(0); 2326193323Sed} 2327193323Sed 2328193323SedMachineInstr* 2329193323SedX86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2330193323Sed MachineInstr *MI, unsigned i, 2331198090Srdivacky const SmallVectorImpl<MachineOperand> &MOs, 2332198090Srdivacky unsigned Size, unsigned Align) const { 2333198090Srdivacky const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; 2334193323Sed bool isTwoAddrFold = false; 2335193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 2336193323Sed bool isTwoAddr = NumOps > 1 && 2337193323Sed MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2338193323Sed 2339193323Sed MachineInstr *NewMI = NULL; 2340193323Sed // Folding a memory location into the two-address part of a two-address 2341193323Sed // instruction is different than folding it other places. It requires 2342193323Sed // replacing the *two* registers with the memory location. 2343193323Sed if (isTwoAddr && NumOps >= 2 && i < 2 && 2344193323Sed MI->getOperand(0).isReg() && 2345193323Sed MI->getOperand(1).isReg() && 2346193323Sed MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { 2347193323Sed OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2348193323Sed isTwoAddrFold = true; 2349193323Sed } else if (i == 0) { // If operand 0 2350202375Srdivacky if (MI->getOpcode() == X86::MOV64r0) 2351202375Srdivacky NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); 2352202375Srdivacky else if (MI->getOpcode() == X86::MOV32r0) 2353193323Sed NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); 2354202375Srdivacky else if (MI->getOpcode() == X86::MOV16r0) 2355202375Srdivacky NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); 2356193323Sed else if (MI->getOpcode() == X86::MOV8r0) 2357193323Sed NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); 2358193323Sed if (NewMI) 2359193323Sed return NewMI; 2360193323Sed 2361193323Sed OpcodeTablePtr = &RegOp2MemOpTable0; 2362193323Sed } else if (i == 1) { 2363193323Sed OpcodeTablePtr = &RegOp2MemOpTable1; 2364193323Sed } else if (i == 2) { 2365193323Sed OpcodeTablePtr = &RegOp2MemOpTable2; 2366193323Sed } 2367193323Sed 2368193323Sed // If table selected... 2369193323Sed if (OpcodeTablePtr) { 2370193323Sed // Find the Opcode to fuse 2371199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2372193323Sed OpcodeTablePtr->find((unsigned*)MI->getOpcode()); 2373193323Sed if (I != OpcodeTablePtr->end()) { 2374198090Srdivacky unsigned Opcode = I->second.first; 2375198090Srdivacky unsigned MinAlign = I->second.second; 2376198090Srdivacky if (Align < MinAlign) 2377198090Srdivacky return NULL; 2378198090Srdivacky bool NarrowToMOV32rm = false; 2379198090Srdivacky if (Size) { 2380198090Srdivacky unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); 2381198090Srdivacky if (Size < RCSize) { 2382198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 2383198090Srdivacky // narrower than the load width, then it's not. 2384198090Srdivacky if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) 2385198090Srdivacky return NULL; 2386198090Srdivacky // If this is a 64-bit load, but the spill slot is 32, then we can do 2387198090Srdivacky // a 32-bit load which is implicitly zero-extended. This likely is due 2388198090Srdivacky // to liveintervalanalysis remat'ing a load from stack slot. 2389198090Srdivacky if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) 2390198090Srdivacky return NULL; 2391198090Srdivacky Opcode = X86::MOV32rm; 2392198090Srdivacky NarrowToMOV32rm = true; 2393198090Srdivacky } 2394198090Srdivacky } 2395198090Srdivacky 2396193323Sed if (isTwoAddrFold) 2397198090Srdivacky NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); 2398193323Sed else 2399198090Srdivacky NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this); 2400198090Srdivacky 2401198090Srdivacky if (NarrowToMOV32rm) { 2402198090Srdivacky // If this is the special case where we use a MOV32rm to load a 32-bit 2403198090Srdivacky // value and zero-extend the top bits. Change the destination register 2404198090Srdivacky // to a 32-bit one. 2405198090Srdivacky unsigned DstReg = NewMI->getOperand(0).getReg(); 2406198090Srdivacky if (TargetRegisterInfo::isPhysicalRegister(DstReg)) 2407198090Srdivacky NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, 2408198090Srdivacky 4/*x86_subreg_32bit*/)); 2409198090Srdivacky else 2410198090Srdivacky NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/); 2411198090Srdivacky } 2412193323Sed return NewMI; 2413193323Sed } 2414193323Sed } 2415193323Sed 2416193323Sed // No fusion 2417193323Sed if (PrintFailedFusing) 2418202375Srdivacky dbgs() << "We failed to fuse operand " << i << " in " << *MI; 2419193323Sed return NULL; 2420193323Sed} 2421193323Sed 2422193323Sed 2423193323SedMachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2424193323Sed MachineInstr *MI, 2425198090Srdivacky const SmallVectorImpl<unsigned> &Ops, 2426193323Sed int FrameIndex) const { 2427193323Sed // Check switch flag 2428193323Sed if (NoFusing) return NULL; 2429193323Sed 2430201360Srdivacky if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 2431201360Srdivacky switch (MI->getOpcode()) { 2432201360Srdivacky case X86::CVTSD2SSrr: 2433201360Srdivacky case X86::Int_CVTSD2SSrr: 2434201360Srdivacky case X86::CVTSS2SDrr: 2435201360Srdivacky case X86::Int_CVTSS2SDrr: 2436201360Srdivacky case X86::RCPSSr: 2437201360Srdivacky case X86::RCPSSr_Int: 2438201360Srdivacky case X86::ROUNDSDr_Int: 2439201360Srdivacky case X86::ROUNDSSr_Int: 2440201360Srdivacky case X86::RSQRTSSr: 2441201360Srdivacky case X86::RSQRTSSr_Int: 2442201360Srdivacky case X86::SQRTSSr: 2443201360Srdivacky case X86::SQRTSSr_Int: 2444201360Srdivacky return 0; 2445201360Srdivacky } 2446201360Srdivacky 2447193323Sed const MachineFrameInfo *MFI = MF.getFrameInfo(); 2448198090Srdivacky unsigned Size = MFI->getObjectSize(FrameIndex); 2449193323Sed unsigned Alignment = MFI->getObjectAlignment(FrameIndex); 2450193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2451193323Sed unsigned NewOpc = 0; 2452198090Srdivacky unsigned RCSize = 0; 2453193323Sed switch (MI->getOpcode()) { 2454193323Sed default: return NULL; 2455198090Srdivacky case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; 2456198090Srdivacky case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; 2457198090Srdivacky case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; 2458198090Srdivacky case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; 2459193323Sed } 2460198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 2461198090Srdivacky // narrower than the load width, then it's not. 2462198090Srdivacky if (Size < RCSize) 2463198090Srdivacky return NULL; 2464193323Sed // Change to CMPXXri r, 0 first. 2465193323Sed MI->setDesc(get(NewOpc)); 2466193323Sed MI->getOperand(1).ChangeToImmediate(0); 2467193323Sed } else if (Ops.size() != 1) 2468193323Sed return NULL; 2469193323Sed 2470193323Sed SmallVector<MachineOperand,4> MOs; 2471193323Sed MOs.push_back(MachineOperand::CreateFI(FrameIndex)); 2472198090Srdivacky return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment); 2473193323Sed} 2474193323Sed 2475193323SedMachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2476193323Sed MachineInstr *MI, 2477198090Srdivacky const SmallVectorImpl<unsigned> &Ops, 2478193323Sed MachineInstr *LoadMI) const { 2479193323Sed // Check switch flag 2480193323Sed if (NoFusing) return NULL; 2481193323Sed 2482201360Srdivacky if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 2483201360Srdivacky switch (MI->getOpcode()) { 2484201360Srdivacky case X86::CVTSD2SSrr: 2485201360Srdivacky case X86::Int_CVTSD2SSrr: 2486201360Srdivacky case X86::CVTSS2SDrr: 2487201360Srdivacky case X86::Int_CVTSS2SDrr: 2488201360Srdivacky case X86::RCPSSr: 2489201360Srdivacky case X86::RCPSSr_Int: 2490201360Srdivacky case X86::ROUNDSDr_Int: 2491201360Srdivacky case X86::ROUNDSSr_Int: 2492201360Srdivacky case X86::RSQRTSSr: 2493201360Srdivacky case X86::RSQRTSSr_Int: 2494201360Srdivacky case X86::SQRTSSr: 2495201360Srdivacky case X86::SQRTSSr_Int: 2496201360Srdivacky return 0; 2497201360Srdivacky } 2498201360Srdivacky 2499193323Sed // Determine the alignment of the load. 2500193323Sed unsigned Alignment = 0; 2501193323Sed if (LoadMI->hasOneMemOperand()) 2502198090Srdivacky Alignment = (*LoadMI->memoperands_begin())->getAlignment(); 2503198090Srdivacky else 2504198090Srdivacky switch (LoadMI->getOpcode()) { 2505198090Srdivacky case X86::V_SET0: 2506198090Srdivacky case X86::V_SETALLONES: 2507198090Srdivacky Alignment = 16; 2508198090Srdivacky break; 2509198090Srdivacky case X86::FsFLD0SD: 2510198090Srdivacky Alignment = 8; 2511198090Srdivacky break; 2512198090Srdivacky case X86::FsFLD0SS: 2513198090Srdivacky Alignment = 4; 2514198090Srdivacky break; 2515198090Srdivacky default: 2516198090Srdivacky llvm_unreachable("Don't know how to fold this instruction!"); 2517193323Sed } 2518193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2519193323Sed unsigned NewOpc = 0; 2520193323Sed switch (MI->getOpcode()) { 2521193323Sed default: return NULL; 2522193323Sed case X86::TEST8rr: NewOpc = X86::CMP8ri; break; 2523193323Sed case X86::TEST16rr: NewOpc = X86::CMP16ri; break; 2524193323Sed case X86::TEST32rr: NewOpc = X86::CMP32ri; break; 2525193323Sed case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; 2526193323Sed } 2527193323Sed // Change to CMPXXri r, 0 first. 2528193323Sed MI->setDesc(get(NewOpc)); 2529193323Sed MI->getOperand(1).ChangeToImmediate(0); 2530193323Sed } else if (Ops.size() != 1) 2531193323Sed return NULL; 2532193323Sed 2533193323Sed SmallVector<MachineOperand,X86AddrNumOperands> MOs; 2534198090Srdivacky switch (LoadMI->getOpcode()) { 2535198090Srdivacky case X86::V_SET0: 2536198090Srdivacky case X86::V_SETALLONES: 2537198090Srdivacky case X86::FsFLD0SD: 2538198090Srdivacky case X86::FsFLD0SS: { 2539193323Sed // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. 2540193323Sed // Create a constant-pool entry and operands to load from it. 2541193323Sed 2542193323Sed // x86-32 PIC requires a PIC base register for constant pools. 2543193323Sed unsigned PICBase = 0; 2544198090Srdivacky if (TM.getRelocationModel() == Reloc::PIC_) { 2545198090Srdivacky if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2546198090Srdivacky PICBase = X86::RIP; 2547198090Srdivacky else 2548198090Srdivacky // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); 2549198090Srdivacky // This doesn't work for several reasons. 2550198090Srdivacky // 1. GlobalBaseReg may have been spilled. 2551198090Srdivacky // 2. It may not be live at MI. 2552198090Srdivacky return NULL; 2553198090Srdivacky } 2554193323Sed 2555198090Srdivacky // Create a constant-pool entry. 2556193323Sed MachineConstantPool &MCP = *MF.getConstantPool(); 2557198090Srdivacky const Type *Ty; 2558198090Srdivacky if (LoadMI->getOpcode() == X86::FsFLD0SS) 2559198090Srdivacky Ty = Type::getFloatTy(MF.getFunction()->getContext()); 2560198090Srdivacky else if (LoadMI->getOpcode() == X86::FsFLD0SD) 2561198090Srdivacky Ty = Type::getDoubleTy(MF.getFunction()->getContext()); 2562198090Srdivacky else 2563198090Srdivacky Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); 2564198090Srdivacky Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? 2565198090Srdivacky Constant::getAllOnesValue(Ty) : 2566198090Srdivacky Constant::getNullValue(Ty); 2567198090Srdivacky unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); 2568193323Sed 2569193323Sed // Create operands to load from the constant pool entry. 2570193323Sed MOs.push_back(MachineOperand::CreateReg(PICBase, false)); 2571193323Sed MOs.push_back(MachineOperand::CreateImm(1)); 2572193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 2573193323Sed MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); 2574193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 2575198090Srdivacky break; 2576198090Srdivacky } 2577198090Srdivacky default: { 2578193323Sed // Folding a normal load. Just copy the load's address operands. 2579193323Sed unsigned NumOps = LoadMI->getDesc().getNumOperands(); 2580193323Sed for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) 2581193323Sed MOs.push_back(LoadMI->getOperand(i)); 2582198090Srdivacky break; 2583193323Sed } 2584198090Srdivacky } 2585198090Srdivacky return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment); 2586193323Sed} 2587193323Sed 2588193323Sed 2589193323Sedbool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, 2590193323Sed const SmallVectorImpl<unsigned> &Ops) const { 2591193323Sed // Check switch flag 2592193323Sed if (NoFusing) return 0; 2593193323Sed 2594193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2595193323Sed switch (MI->getOpcode()) { 2596193323Sed default: return false; 2597193323Sed case X86::TEST8rr: 2598193323Sed case X86::TEST16rr: 2599193323Sed case X86::TEST32rr: 2600193323Sed case X86::TEST64rr: 2601193323Sed return true; 2602193323Sed } 2603193323Sed } 2604193323Sed 2605193323Sed if (Ops.size() != 1) 2606193323Sed return false; 2607193323Sed 2608193323Sed unsigned OpNum = Ops[0]; 2609193323Sed unsigned Opc = MI->getOpcode(); 2610193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 2611193323Sed bool isTwoAddr = NumOps > 1 && 2612193323Sed MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2613193323Sed 2614193323Sed // Folding a memory location into the two-address part of a two-address 2615193323Sed // instruction is different than folding it other places. It requires 2616193323Sed // replacing the *two* registers with the memory location. 2617198090Srdivacky const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; 2618193323Sed if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 2619193323Sed OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2620193323Sed } else if (OpNum == 0) { // If operand 0 2621193323Sed switch (Opc) { 2622198090Srdivacky case X86::MOV8r0: 2623202375Srdivacky case X86::MOV16r0: 2624193323Sed case X86::MOV32r0: 2625202375Srdivacky case X86::MOV64r0: 2626193323Sed return true; 2627193323Sed default: break; 2628193323Sed } 2629193323Sed OpcodeTablePtr = &RegOp2MemOpTable0; 2630193323Sed } else if (OpNum == 1) { 2631193323Sed OpcodeTablePtr = &RegOp2MemOpTable1; 2632193323Sed } else if (OpNum == 2) { 2633193323Sed OpcodeTablePtr = &RegOp2MemOpTable2; 2634193323Sed } 2635193323Sed 2636193323Sed if (OpcodeTablePtr) { 2637193323Sed // Find the Opcode to fuse 2638199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2639193323Sed OpcodeTablePtr->find((unsigned*)Opc); 2640193323Sed if (I != OpcodeTablePtr->end()) 2641193323Sed return true; 2642193323Sed } 2643193323Sed return false; 2644193323Sed} 2645193323Sed 2646193323Sedbool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, 2647193323Sed unsigned Reg, bool UnfoldLoad, bool UnfoldStore, 2648193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2649199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2650193323Sed MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); 2651193323Sed if (I == MemOp2RegOpTable.end()) 2652193323Sed return false; 2653193323Sed unsigned Opc = I->second.first; 2654193323Sed unsigned Index = I->second.second & 0xf; 2655193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2656193323Sed bool FoldedStore = I->second.second & (1 << 5); 2657193323Sed if (UnfoldLoad && !FoldedLoad) 2658193323Sed return false; 2659193323Sed UnfoldLoad &= FoldedLoad; 2660193323Sed if (UnfoldStore && !FoldedStore) 2661193323Sed return false; 2662193323Sed UnfoldStore &= FoldedStore; 2663193323Sed 2664193323Sed const TargetInstrDesc &TID = get(Opc); 2665193323Sed const TargetOperandInfo &TOI = TID.OpInfo[Index]; 2666198090Srdivacky const TargetRegisterClass *RC = TOI.getRegClass(&RI); 2667193323Sed SmallVector<MachineOperand, X86AddrNumOperands> AddrOps; 2668193323Sed SmallVector<MachineOperand,2> BeforeOps; 2669193323Sed SmallVector<MachineOperand,2> AfterOps; 2670193323Sed SmallVector<MachineOperand,4> ImpOps; 2671193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2672193323Sed MachineOperand &Op = MI->getOperand(i); 2673193323Sed if (i >= Index && i < Index + X86AddrNumOperands) 2674193323Sed AddrOps.push_back(Op); 2675193323Sed else if (Op.isReg() && Op.isImplicit()) 2676193323Sed ImpOps.push_back(Op); 2677193323Sed else if (i < Index) 2678193323Sed BeforeOps.push_back(Op); 2679193323Sed else if (i > Index) 2680193323Sed AfterOps.push_back(Op); 2681193323Sed } 2682193323Sed 2683193323Sed // Emit the load instruction. 2684193323Sed if (UnfoldLoad) { 2685198090Srdivacky std::pair<MachineInstr::mmo_iterator, 2686198090Srdivacky MachineInstr::mmo_iterator> MMOs = 2687198090Srdivacky MF.extractLoadMemRefs(MI->memoperands_begin(), 2688198090Srdivacky MI->memoperands_end()); 2689198090Srdivacky loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); 2690193323Sed if (UnfoldStore) { 2691193323Sed // Address operands cannot be marked isKill. 2692193323Sed for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { 2693193323Sed MachineOperand &MO = NewMIs[0]->getOperand(i); 2694193323Sed if (MO.isReg()) 2695193323Sed MO.setIsKill(false); 2696193323Sed } 2697193323Sed } 2698193323Sed } 2699193323Sed 2700193323Sed // Emit the data processing instruction. 2701193323Sed MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); 2702193323Sed MachineInstrBuilder MIB(DataMI); 2703193323Sed 2704193323Sed if (FoldedStore) 2705193323Sed MIB.addReg(Reg, RegState::Define); 2706193323Sed for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) 2707193323Sed MIB.addOperand(BeforeOps[i]); 2708193323Sed if (FoldedLoad) 2709193323Sed MIB.addReg(Reg); 2710193323Sed for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) 2711193323Sed MIB.addOperand(AfterOps[i]); 2712193323Sed for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { 2713193323Sed MachineOperand &MO = ImpOps[i]; 2714193323Sed MIB.addReg(MO.getReg(), 2715193323Sed getDefRegState(MO.isDef()) | 2716193323Sed RegState::Implicit | 2717193323Sed getKillRegState(MO.isKill()) | 2718195340Sed getDeadRegState(MO.isDead()) | 2719195340Sed getUndefRegState(MO.isUndef())); 2720193323Sed } 2721193323Sed // Change CMP32ri r, 0 back to TEST32rr r, r, etc. 2722193323Sed unsigned NewOpc = 0; 2723193323Sed switch (DataMI->getOpcode()) { 2724193323Sed default: break; 2725193323Sed case X86::CMP64ri32: 2726193323Sed case X86::CMP32ri: 2727193323Sed case X86::CMP16ri: 2728193323Sed case X86::CMP8ri: { 2729193323Sed MachineOperand &MO0 = DataMI->getOperand(0); 2730193323Sed MachineOperand &MO1 = DataMI->getOperand(1); 2731193323Sed if (MO1.getImm() == 0) { 2732193323Sed switch (DataMI->getOpcode()) { 2733193323Sed default: break; 2734193323Sed case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; 2735193323Sed case X86::CMP32ri: NewOpc = X86::TEST32rr; break; 2736193323Sed case X86::CMP16ri: NewOpc = X86::TEST16rr; break; 2737193323Sed case X86::CMP8ri: NewOpc = X86::TEST8rr; break; 2738193323Sed } 2739193323Sed DataMI->setDesc(get(NewOpc)); 2740193323Sed MO1.ChangeToRegister(MO0.getReg(), false); 2741193323Sed } 2742193323Sed } 2743193323Sed } 2744193323Sed NewMIs.push_back(DataMI); 2745193323Sed 2746193323Sed // Emit the store instruction. 2747193323Sed if (UnfoldStore) { 2748198090Srdivacky const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); 2749198090Srdivacky std::pair<MachineInstr::mmo_iterator, 2750198090Srdivacky MachineInstr::mmo_iterator> MMOs = 2751198090Srdivacky MF.extractStoreMemRefs(MI->memoperands_begin(), 2752198090Srdivacky MI->memoperands_end()); 2753198090Srdivacky storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs); 2754193323Sed } 2755193323Sed 2756193323Sed return true; 2757193323Sed} 2758193323Sed 2759193323Sedbool 2760193323SedX86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, 2761193323Sed SmallVectorImpl<SDNode*> &NewNodes) const { 2762193323Sed if (!N->isMachineOpcode()) 2763193323Sed return false; 2764193323Sed 2765199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2766193323Sed MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); 2767193323Sed if (I == MemOp2RegOpTable.end()) 2768193323Sed return false; 2769193323Sed unsigned Opc = I->second.first; 2770193323Sed unsigned Index = I->second.second & 0xf; 2771193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2772193323Sed bool FoldedStore = I->second.second & (1 << 5); 2773193323Sed const TargetInstrDesc &TID = get(Opc); 2774198090Srdivacky const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); 2775193323Sed unsigned NumDefs = TID.NumDefs; 2776193323Sed std::vector<SDValue> AddrOps; 2777193323Sed std::vector<SDValue> BeforeOps; 2778193323Sed std::vector<SDValue> AfterOps; 2779193323Sed DebugLoc dl = N->getDebugLoc(); 2780193323Sed unsigned NumOps = N->getNumOperands(); 2781193323Sed for (unsigned i = 0; i != NumOps-1; ++i) { 2782193323Sed SDValue Op = N->getOperand(i); 2783193323Sed if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands) 2784193323Sed AddrOps.push_back(Op); 2785193323Sed else if (i < Index-NumDefs) 2786193323Sed BeforeOps.push_back(Op); 2787193323Sed else if (i > Index-NumDefs) 2788193323Sed AfterOps.push_back(Op); 2789193323Sed } 2790193323Sed SDValue Chain = N->getOperand(NumOps-1); 2791193323Sed AddrOps.push_back(Chain); 2792193323Sed 2793193323Sed // Emit the load instruction. 2794193323Sed SDNode *Load = 0; 2795198090Srdivacky MachineFunction &MF = DAG.getMachineFunction(); 2796193323Sed if (FoldedLoad) { 2797198090Srdivacky EVT VT = *RC->vt_begin(); 2798199481Srdivacky std::pair<MachineInstr::mmo_iterator, 2799199481Srdivacky MachineInstr::mmo_iterator> MMOs = 2800199481Srdivacky MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2801199481Srdivacky cast<MachineSDNode>(N)->memoperands_end()); 2802199481Srdivacky bool isAligned = (*MMOs.first)->getAlignment() >= 16; 2803198090Srdivacky Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, 2804198090Srdivacky VT, MVT::Other, &AddrOps[0], AddrOps.size()); 2805193323Sed NewNodes.push_back(Load); 2806198090Srdivacky 2807198090Srdivacky // Preserve memory reference information. 2808198090Srdivacky cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2809193323Sed } 2810193323Sed 2811193323Sed // Emit the data processing instruction. 2812198090Srdivacky std::vector<EVT> VTs; 2813193323Sed const TargetRegisterClass *DstRC = 0; 2814193323Sed if (TID.getNumDefs() > 0) { 2815198090Srdivacky DstRC = TID.OpInfo[0].getRegClass(&RI); 2816193323Sed VTs.push_back(*DstRC->vt_begin()); 2817193323Sed } 2818193323Sed for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 2819198090Srdivacky EVT VT = N->getValueType(i); 2820193323Sed if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) 2821193323Sed VTs.push_back(VT); 2822193323Sed } 2823193323Sed if (Load) 2824193323Sed BeforeOps.push_back(SDValue(Load, 0)); 2825193323Sed std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); 2826198090Srdivacky SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0], 2827198090Srdivacky BeforeOps.size()); 2828193323Sed NewNodes.push_back(NewNode); 2829193323Sed 2830193323Sed // Emit the store instruction. 2831193323Sed if (FoldedStore) { 2832193323Sed AddrOps.pop_back(); 2833193323Sed AddrOps.push_back(SDValue(NewNode, 0)); 2834193323Sed AddrOps.push_back(Chain); 2835199481Srdivacky std::pair<MachineInstr::mmo_iterator, 2836199481Srdivacky MachineInstr::mmo_iterator> MMOs = 2837199481Srdivacky MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2838199481Srdivacky cast<MachineSDNode>(N)->memoperands_end()); 2839199481Srdivacky bool isAligned = (*MMOs.first)->getAlignment() >= 16; 2840198090Srdivacky SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, 2841198090Srdivacky isAligned, TM), 2842198090Srdivacky dl, MVT::Other, 2843198090Srdivacky &AddrOps[0], AddrOps.size()); 2844193323Sed NewNodes.push_back(Store); 2845198090Srdivacky 2846198090Srdivacky // Preserve memory reference information. 2847198090Srdivacky cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2848193323Sed } 2849193323Sed 2850193323Sed return true; 2851193323Sed} 2852193323Sed 2853193323Sedunsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, 2854198892Srdivacky bool UnfoldLoad, bool UnfoldStore, 2855198892Srdivacky unsigned *LoadRegIndex) const { 2856199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2857193323Sed MemOp2RegOpTable.find((unsigned*)Opc); 2858193323Sed if (I == MemOp2RegOpTable.end()) 2859193323Sed return 0; 2860193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2861193323Sed bool FoldedStore = I->second.second & (1 << 5); 2862193323Sed if (UnfoldLoad && !FoldedLoad) 2863193323Sed return 0; 2864193323Sed if (UnfoldStore && !FoldedStore) 2865193323Sed return 0; 2866198892Srdivacky if (LoadRegIndex) 2867198892Srdivacky *LoadRegIndex = I->second.second & 0xf; 2868193323Sed return I->second.first; 2869193323Sed} 2870193323Sed 2871202878Srdivackybool 2872202878SrdivackyX86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 2873202878Srdivacky int64_t &Offset1, int64_t &Offset2) const { 2874202878Srdivacky if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 2875202878Srdivacky return false; 2876202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 2877202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 2878202878Srdivacky switch (Opc1) { 2879202878Srdivacky default: return false; 2880202878Srdivacky case X86::MOV8rm: 2881202878Srdivacky case X86::MOV16rm: 2882202878Srdivacky case X86::MOV32rm: 2883202878Srdivacky case X86::MOV64rm: 2884202878Srdivacky case X86::LD_Fp32m: 2885202878Srdivacky case X86::LD_Fp64m: 2886202878Srdivacky case X86::LD_Fp80m: 2887202878Srdivacky case X86::MOVSSrm: 2888202878Srdivacky case X86::MOVSDrm: 2889202878Srdivacky case X86::MMX_MOVD64rm: 2890202878Srdivacky case X86::MMX_MOVQ64rm: 2891202878Srdivacky case X86::FsMOVAPSrm: 2892202878Srdivacky case X86::FsMOVAPDrm: 2893202878Srdivacky case X86::MOVAPSrm: 2894202878Srdivacky case X86::MOVUPSrm: 2895202878Srdivacky case X86::MOVUPSrm_Int: 2896202878Srdivacky case X86::MOVAPDrm: 2897202878Srdivacky case X86::MOVDQArm: 2898202878Srdivacky case X86::MOVDQUrm: 2899202878Srdivacky case X86::MOVDQUrm_Int: 2900202878Srdivacky break; 2901202878Srdivacky } 2902202878Srdivacky switch (Opc2) { 2903202878Srdivacky default: return false; 2904202878Srdivacky case X86::MOV8rm: 2905202878Srdivacky case X86::MOV16rm: 2906202878Srdivacky case X86::MOV32rm: 2907202878Srdivacky case X86::MOV64rm: 2908202878Srdivacky case X86::LD_Fp32m: 2909202878Srdivacky case X86::LD_Fp64m: 2910202878Srdivacky case X86::LD_Fp80m: 2911202878Srdivacky case X86::MOVSSrm: 2912202878Srdivacky case X86::MOVSDrm: 2913202878Srdivacky case X86::MMX_MOVD64rm: 2914202878Srdivacky case X86::MMX_MOVQ64rm: 2915202878Srdivacky case X86::FsMOVAPSrm: 2916202878Srdivacky case X86::FsMOVAPDrm: 2917202878Srdivacky case X86::MOVAPSrm: 2918202878Srdivacky case X86::MOVUPSrm: 2919202878Srdivacky case X86::MOVUPSrm_Int: 2920202878Srdivacky case X86::MOVAPDrm: 2921202878Srdivacky case X86::MOVDQArm: 2922202878Srdivacky case X86::MOVDQUrm: 2923202878Srdivacky case X86::MOVDQUrm_Int: 2924202878Srdivacky break; 2925202878Srdivacky } 2926202878Srdivacky 2927202878Srdivacky // Check if chain operands and base addresses match. 2928202878Srdivacky if (Load1->getOperand(0) != Load2->getOperand(0) || 2929202878Srdivacky Load1->getOperand(5) != Load2->getOperand(5)) 2930202878Srdivacky return false; 2931202878Srdivacky // Segment operands should match as well. 2932202878Srdivacky if (Load1->getOperand(4) != Load2->getOperand(4)) 2933202878Srdivacky return false; 2934202878Srdivacky // Scale should be 1, Index should be Reg0. 2935202878Srdivacky if (Load1->getOperand(1) == Load2->getOperand(1) && 2936202878Srdivacky Load1->getOperand(2) == Load2->getOperand(2)) { 2937202878Srdivacky if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1) 2938202878Srdivacky return false; 2939202878Srdivacky SDValue Op2 = Load1->getOperand(2); 2940202878Srdivacky if (!isa<RegisterSDNode>(Op2) || 2941202878Srdivacky cast<RegisterSDNode>(Op2)->getReg() != 0) 2942202878Srdivacky return 0; 2943202878Srdivacky 2944202878Srdivacky // Now let's examine the displacements. 2945202878Srdivacky if (isa<ConstantSDNode>(Load1->getOperand(3)) && 2946202878Srdivacky isa<ConstantSDNode>(Load2->getOperand(3))) { 2947202878Srdivacky Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue(); 2948202878Srdivacky Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue(); 2949202878Srdivacky return true; 2950202878Srdivacky } 2951202878Srdivacky } 2952202878Srdivacky return false; 2953202878Srdivacky} 2954202878Srdivacky 2955202878Srdivackybool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 2956202878Srdivacky int64_t Offset1, int64_t Offset2, 2957202878Srdivacky unsigned NumLoads) const { 2958202878Srdivacky assert(Offset2 > Offset1); 2959202878Srdivacky if ((Offset2 - Offset1) / 8 > 64) 2960202878Srdivacky return false; 2961202878Srdivacky 2962202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 2963202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 2964202878Srdivacky if (Opc1 != Opc2) 2965202878Srdivacky return false; // FIXME: overly conservative? 2966202878Srdivacky 2967202878Srdivacky switch (Opc1) { 2968202878Srdivacky default: break; 2969202878Srdivacky case X86::LD_Fp32m: 2970202878Srdivacky case X86::LD_Fp64m: 2971202878Srdivacky case X86::LD_Fp80m: 2972202878Srdivacky case X86::MMX_MOVD64rm: 2973202878Srdivacky case X86::MMX_MOVQ64rm: 2974202878Srdivacky return false; 2975202878Srdivacky } 2976202878Srdivacky 2977202878Srdivacky EVT VT = Load1->getValueType(0); 2978202878Srdivacky switch (VT.getSimpleVT().SimpleTy) { 2979202878Srdivacky default: { 2980202878Srdivacky // XMM registers. In 64-bit mode we can be a bit more aggressive since we 2981202878Srdivacky // have 16 of them to play with. 2982202878Srdivacky if (TM.getSubtargetImpl()->is64Bit()) { 2983202878Srdivacky if (NumLoads >= 3) 2984202878Srdivacky return false; 2985202878Srdivacky } else if (NumLoads) 2986202878Srdivacky return false; 2987202878Srdivacky break; 2988202878Srdivacky } 2989202878Srdivacky case MVT::i8: 2990202878Srdivacky case MVT::i16: 2991202878Srdivacky case MVT::i32: 2992202878Srdivacky case MVT::i64: 2993202878Srdivacky case MVT::f32: 2994202878Srdivacky case MVT::f64: 2995202878Srdivacky if (NumLoads) 2996202878Srdivacky return false; 2997202878Srdivacky } 2998202878Srdivacky 2999202878Srdivacky return true; 3000202878Srdivacky} 3001202878Srdivacky 3002202878Srdivacky 3003193323Sedbool X86InstrInfo:: 3004193323SedReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 3005193323Sed assert(Cond.size() == 1 && "Invalid X86 branch condition!"); 3006193323Sed X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm()); 3007193323Sed if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E) 3008193323Sed return true; 3009193323Sed Cond[0].setImm(GetOppositeBranchCondition(CC)); 3010193323Sed return false; 3011193323Sed} 3012193323Sed 3013193323Sedbool X86InstrInfo:: 3014193323SedisSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 3015193323Sed // FIXME: Return false for x87 stack register classes for now. We can't 3016193323Sed // allow any loads of these registers before FpGet_ST0_80. 3017193323Sed return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || 3018193323Sed RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); 3019193323Sed} 3020193323Sed 3021193323Sedunsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { 3022193323Sed switch (Desc->TSFlags & X86II::ImmMask) { 3023193323Sed case X86II::Imm8: return 1; 3024193323Sed case X86II::Imm16: return 2; 3025193323Sed case X86II::Imm32: return 4; 3026193323Sed case X86II::Imm64: return 8; 3027198090Srdivacky default: llvm_unreachable("Immediate size not set!"); 3028193323Sed return 0; 3029193323Sed } 3030193323Sed} 3031193323Sed 3032193323Sed/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register? 3033193323Sed/// e.g. r8, xmm8, etc. 3034193323Sedbool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) { 3035193323Sed if (!MO.isReg()) return false; 3036193323Sed switch (MO.getReg()) { 3037193323Sed default: break; 3038193323Sed case X86::R8: case X86::R9: case X86::R10: case X86::R11: 3039193323Sed case X86::R12: case X86::R13: case X86::R14: case X86::R15: 3040193323Sed case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: 3041193323Sed case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: 3042193323Sed case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: 3043193323Sed case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: 3044193323Sed case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: 3045193323Sed case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: 3046193323Sed case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: 3047193323Sed case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: 3048193323Sed return true; 3049193323Sed } 3050193323Sed return false; 3051193323Sed} 3052193323Sed 3053193323Sed 3054193323Sed/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 3055193323Sed/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand 3056193323Sed/// size, and 3) use of X86-64 extended registers. 3057193323Sedunsigned X86InstrInfo::determineREX(const MachineInstr &MI) { 3058193323Sed unsigned REX = 0; 3059193323Sed const TargetInstrDesc &Desc = MI.getDesc(); 3060193323Sed 3061193323Sed // Pseudo instructions do not need REX prefix byte. 3062193323Sed if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) 3063193323Sed return 0; 3064193323Sed if (Desc.TSFlags & X86II::REX_W) 3065193323Sed REX |= 1 << 3; 3066193323Sed 3067193323Sed unsigned NumOps = Desc.getNumOperands(); 3068193323Sed if (NumOps) { 3069193323Sed bool isTwoAddr = NumOps > 1 && 3070193323Sed Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; 3071193323Sed 3072193323Sed // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. 3073193323Sed unsigned i = isTwoAddr ? 1 : 0; 3074193323Sed for (unsigned e = NumOps; i != e; ++i) { 3075193323Sed const MachineOperand& MO = MI.getOperand(i); 3076193323Sed if (MO.isReg()) { 3077193323Sed unsigned Reg = MO.getReg(); 3078193323Sed if (isX86_64NonExtLowByteReg(Reg)) 3079193323Sed REX |= 0x40; 3080193323Sed } 3081193323Sed } 3082193323Sed 3083193323Sed switch (Desc.TSFlags & X86II::FormMask) { 3084193323Sed case X86II::MRMInitReg: 3085193323Sed if (isX86_64ExtendedReg(MI.getOperand(0))) 3086193323Sed REX |= (1 << 0) | (1 << 2); 3087193323Sed break; 3088193323Sed case X86II::MRMSrcReg: { 3089193323Sed if (isX86_64ExtendedReg(MI.getOperand(0))) 3090193323Sed REX |= 1 << 2; 3091193323Sed i = isTwoAddr ? 2 : 1; 3092193323Sed for (unsigned e = NumOps; i != e; ++i) { 3093193323Sed const MachineOperand& MO = MI.getOperand(i); 3094193323Sed if (isX86_64ExtendedReg(MO)) 3095193323Sed REX |= 1 << 0; 3096193323Sed } 3097193323Sed break; 3098193323Sed } 3099193323Sed case X86II::MRMSrcMem: { 3100193323Sed if (isX86_64ExtendedReg(MI.getOperand(0))) 3101193323Sed REX |= 1 << 2; 3102193323Sed unsigned Bit = 0; 3103193323Sed i = isTwoAddr ? 2 : 1; 3104193323Sed for (; i != NumOps; ++i) { 3105193323Sed const MachineOperand& MO = MI.getOperand(i); 3106193323Sed if (MO.isReg()) { 3107193323Sed if (isX86_64ExtendedReg(MO)) 3108193323Sed REX |= 1 << Bit; 3109193323Sed Bit++; 3110193323Sed } 3111193323Sed } 3112193323Sed break; 3113193323Sed } 3114193323Sed case X86II::MRM0m: case X86II::MRM1m: 3115193323Sed case X86II::MRM2m: case X86II::MRM3m: 3116193323Sed case X86II::MRM4m: case X86II::MRM5m: 3117193323Sed case X86II::MRM6m: case X86II::MRM7m: 3118193323Sed case X86II::MRMDestMem: { 3119193323Sed unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); 3120193323Sed i = isTwoAddr ? 1 : 0; 3121193323Sed if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) 3122193323Sed REX |= 1 << 2; 3123193323Sed unsigned Bit = 0; 3124193323Sed for (; i != e; ++i) { 3125193323Sed const MachineOperand& MO = MI.getOperand(i); 3126193323Sed if (MO.isReg()) { 3127193323Sed if (isX86_64ExtendedReg(MO)) 3128193323Sed REX |= 1 << Bit; 3129193323Sed Bit++; 3130193323Sed } 3131193323Sed } 3132193323Sed break; 3133193323Sed } 3134193323Sed default: { 3135193323Sed if (isX86_64ExtendedReg(MI.getOperand(0))) 3136193323Sed REX |= 1 << 0; 3137193323Sed i = isTwoAddr ? 2 : 1; 3138193323Sed for (unsigned e = NumOps; i != e; ++i) { 3139193323Sed const MachineOperand& MO = MI.getOperand(i); 3140193323Sed if (isX86_64ExtendedReg(MO)) 3141193323Sed REX |= 1 << 2; 3142193323Sed } 3143193323Sed break; 3144193323Sed } 3145193323Sed } 3146193323Sed } 3147193323Sed return REX; 3148193323Sed} 3149193323Sed 3150193323Sed/// sizePCRelativeBlockAddress - This method returns the size of a PC 3151193323Sed/// relative block address instruction 3152193323Sed/// 3153193323Sedstatic unsigned sizePCRelativeBlockAddress() { 3154193323Sed return 4; 3155193323Sed} 3156193323Sed 3157193323Sed/// sizeGlobalAddress - Give the size of the emission of this global address 3158193323Sed/// 3159193323Sedstatic unsigned sizeGlobalAddress(bool dword) { 3160193323Sed return dword ? 8 : 4; 3161193323Sed} 3162193323Sed 3163193323Sed/// sizeConstPoolAddress - Give the size of the emission of this constant 3164193323Sed/// pool address 3165193323Sed/// 3166193323Sedstatic unsigned sizeConstPoolAddress(bool dword) { 3167193323Sed return dword ? 8 : 4; 3168193323Sed} 3169193323Sed 3170193323Sed/// sizeExternalSymbolAddress - Give the size of the emission of this external 3171193323Sed/// symbol 3172193323Sed/// 3173193323Sedstatic unsigned sizeExternalSymbolAddress(bool dword) { 3174193323Sed return dword ? 8 : 4; 3175193323Sed} 3176193323Sed 3177193323Sed/// sizeJumpTableAddress - Give the size of the emission of this jump 3178193323Sed/// table address 3179193323Sed/// 3180193323Sedstatic unsigned sizeJumpTableAddress(bool dword) { 3181193323Sed return dword ? 8 : 4; 3182193323Sed} 3183193323Sed 3184193323Sedstatic unsigned sizeConstant(unsigned Size) { 3185193323Sed return Size; 3186193323Sed} 3187193323Sed 3188193323Sedstatic unsigned sizeRegModRMByte(){ 3189193323Sed return 1; 3190193323Sed} 3191193323Sed 3192193323Sedstatic unsigned sizeSIBByte(){ 3193193323Sed return 1; 3194193323Sed} 3195193323Sed 3196193323Sedstatic unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { 3197193323Sed unsigned FinalSize = 0; 3198193323Sed // If this is a simple integer displacement that doesn't require a relocation. 3199193323Sed if (!RelocOp) { 3200193323Sed FinalSize += sizeConstant(4); 3201193323Sed return FinalSize; 3202193323Sed } 3203193323Sed 3204193323Sed // Otherwise, this is something that requires a relocation. 3205193323Sed if (RelocOp->isGlobal()) { 3206193323Sed FinalSize += sizeGlobalAddress(false); 3207193323Sed } else if (RelocOp->isCPI()) { 3208193323Sed FinalSize += sizeConstPoolAddress(false); 3209193323Sed } else if (RelocOp->isJTI()) { 3210193323Sed FinalSize += sizeJumpTableAddress(false); 3211193323Sed } else { 3212198090Srdivacky llvm_unreachable("Unknown value to relocate!"); 3213193323Sed } 3214193323Sed return FinalSize; 3215193323Sed} 3216193323Sed 3217193323Sedstatic unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op, 3218193323Sed bool IsPIC, bool Is64BitMode) { 3219193323Sed const MachineOperand &Op3 = MI.getOperand(Op+3); 3220193323Sed int DispVal = 0; 3221193323Sed const MachineOperand *DispForReloc = 0; 3222193323Sed unsigned FinalSize = 0; 3223193323Sed 3224193323Sed // Figure out what sort of displacement we have to handle here. 3225193323Sed if (Op3.isGlobal()) { 3226193323Sed DispForReloc = &Op3; 3227193323Sed } else if (Op3.isCPI()) { 3228193323Sed if (Is64BitMode || IsPIC) { 3229193323Sed DispForReloc = &Op3; 3230193323Sed } else { 3231193323Sed DispVal = 1; 3232193323Sed } 3233193323Sed } else if (Op3.isJTI()) { 3234193323Sed if (Is64BitMode || IsPIC) { 3235193323Sed DispForReloc = &Op3; 3236193323Sed } else { 3237193323Sed DispVal = 1; 3238193323Sed } 3239193323Sed } else { 3240193323Sed DispVal = 1; 3241193323Sed } 3242193323Sed 3243193323Sed const MachineOperand &Base = MI.getOperand(Op); 3244193323Sed const MachineOperand &IndexReg = MI.getOperand(Op+2); 3245193323Sed 3246193323Sed unsigned BaseReg = Base.getReg(); 3247193323Sed 3248193323Sed // Is a SIB byte needed? 3249193323Sed if ((!Is64BitMode || DispForReloc || BaseReg != 0) && 3250193323Sed IndexReg.getReg() == 0 && 3251193323Sed (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) { 3252193323Sed if (BaseReg == 0) { // Just a displacement? 3253193323Sed // Emit special case [disp32] encoding 3254193323Sed ++FinalSize; 3255193323Sed FinalSize += getDisplacementFieldSize(DispForReloc); 3256193323Sed } else { 3257193323Sed unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg); 3258193323Sed if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { 3259193323Sed // Emit simple indirect register encoding... [EAX] f.e. 3260193323Sed ++FinalSize; 3261193323Sed // Be pessimistic and assume it's a disp32, not a disp8 3262193323Sed } else { 3263193323Sed // Emit the most general non-SIB encoding: [REG+disp32] 3264193323Sed ++FinalSize; 3265193323Sed FinalSize += getDisplacementFieldSize(DispForReloc); 3266193323Sed } 3267193323Sed } 3268193323Sed 3269193323Sed } else { // We need a SIB byte, so start by outputting the ModR/M byte first 3270193323Sed assert(IndexReg.getReg() != X86::ESP && 3271193323Sed IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); 3272193323Sed 3273193323Sed bool ForceDisp32 = false; 3274193323Sed if (BaseReg == 0 || DispForReloc) { 3275193323Sed // Emit the normal disp32 encoding. 3276193323Sed ++FinalSize; 3277193323Sed ForceDisp32 = true; 3278193323Sed } else { 3279193323Sed ++FinalSize; 3280193323Sed } 3281193323Sed 3282193323Sed FinalSize += sizeSIBByte(); 3283193323Sed 3284193323Sed // Do we need to output a displacement? 3285193323Sed if (DispVal != 0 || ForceDisp32) { 3286193323Sed FinalSize += getDisplacementFieldSize(DispForReloc); 3287193323Sed } 3288193323Sed } 3289193323Sed return FinalSize; 3290193323Sed} 3291193323Sed 3292193323Sed 3293193323Sedstatic unsigned GetInstSizeWithDesc(const MachineInstr &MI, 3294193323Sed const TargetInstrDesc *Desc, 3295193323Sed bool IsPIC, bool Is64BitMode) { 3296193323Sed 3297193323Sed unsigned Opcode = Desc->Opcode; 3298193323Sed unsigned FinalSize = 0; 3299193323Sed 3300193323Sed // Emit the lock opcode prefix as needed. 3301193323Sed if (Desc->TSFlags & X86II::LOCK) ++FinalSize; 3302193323Sed 3303193323Sed // Emit segment override opcode prefix as needed. 3304193323Sed switch (Desc->TSFlags & X86II::SegOvrMask) { 3305193323Sed case X86II::FS: 3306193323Sed case X86II::GS: 3307193323Sed ++FinalSize; 3308193323Sed break; 3309198090Srdivacky default: llvm_unreachable("Invalid segment!"); 3310193323Sed case 0: break; // No segment override! 3311193323Sed } 3312193323Sed 3313193323Sed // Emit the repeat opcode prefix as needed. 3314193323Sed if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize; 3315193323Sed 3316193323Sed // Emit the operand size opcode prefix as needed. 3317193323Sed if (Desc->TSFlags & X86II::OpSize) ++FinalSize; 3318193323Sed 3319193323Sed // Emit the address size opcode prefix as needed. 3320193323Sed if (Desc->TSFlags & X86II::AdSize) ++FinalSize; 3321193323Sed 3322193323Sed bool Need0FPrefix = false; 3323193323Sed switch (Desc->TSFlags & X86II::Op0Mask) { 3324193323Sed case X86II::TB: // Two-byte opcode prefix 3325193323Sed case X86II::T8: // 0F 38 3326193323Sed case X86II::TA: // 0F 3A 3327193323Sed Need0FPrefix = true; 3328193323Sed break; 3329198090Srdivacky case X86II::TF: // F2 0F 38 3330198090Srdivacky ++FinalSize; 3331198090Srdivacky Need0FPrefix = true; 3332198090Srdivacky break; 3333193323Sed case X86II::REP: break; // already handled. 3334193323Sed case X86II::XS: // F3 0F 3335193323Sed ++FinalSize; 3336193323Sed Need0FPrefix = true; 3337193323Sed break; 3338193323Sed case X86II::XD: // F2 0F 3339193323Sed ++FinalSize; 3340193323Sed Need0FPrefix = true; 3341193323Sed break; 3342193323Sed case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: 3343193323Sed case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: 3344193323Sed ++FinalSize; 3345193323Sed break; // Two-byte opcode prefix 3346198090Srdivacky default: llvm_unreachable("Invalid prefix!"); 3347193323Sed case 0: break; // No prefix! 3348193323Sed } 3349193323Sed 3350193323Sed if (Is64BitMode) { 3351193323Sed // REX prefix 3352193323Sed unsigned REX = X86InstrInfo::determineREX(MI); 3353193323Sed if (REX) 3354193323Sed ++FinalSize; 3355193323Sed } 3356193323Sed 3357193323Sed // 0x0F escape code must be emitted just before the opcode. 3358193323Sed if (Need0FPrefix) 3359193323Sed ++FinalSize; 3360193323Sed 3361193323Sed switch (Desc->TSFlags & X86II::Op0Mask) { 3362193323Sed case X86II::T8: // 0F 38 3363193323Sed ++FinalSize; 3364193323Sed break; 3365193323Sed case X86II::TA: // 0F 3A 3366193323Sed ++FinalSize; 3367193323Sed break; 3368198090Srdivacky case X86II::TF: // F2 0F 38 3369198090Srdivacky ++FinalSize; 3370198090Srdivacky break; 3371193323Sed } 3372193323Sed 3373193323Sed // If this is a two-address instruction, skip one of the register operands. 3374193323Sed unsigned NumOps = Desc->getNumOperands(); 3375193323Sed unsigned CurOp = 0; 3376193323Sed if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) 3377193323Sed CurOp++; 3378193323Sed else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) 3379193323Sed // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 3380193323Sed --NumOps; 3381193323Sed 3382193323Sed switch (Desc->TSFlags & X86II::FormMask) { 3383198090Srdivacky default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); 3384193323Sed case X86II::Pseudo: 3385193323Sed // Remember the current PC offset, this is the PIC relocation 3386193323Sed // base address. 3387193323Sed switch (Opcode) { 3388193323Sed default: 3389193323Sed break; 3390193323Sed case TargetInstrInfo::INLINEASM: { 3391193323Sed const MachineFunction *MF = MI.getParent()->getParent(); 3392198090Srdivacky const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); 3393198090Srdivacky FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), 3394198090Srdivacky *MF->getTarget().getMCAsmInfo()); 3395193323Sed break; 3396193323Sed } 3397193323Sed case TargetInstrInfo::DBG_LABEL: 3398193323Sed case TargetInstrInfo::EH_LABEL: 3399193323Sed break; 3400193323Sed case TargetInstrInfo::IMPLICIT_DEF: 3401198090Srdivacky case TargetInstrInfo::KILL: 3402193323Sed case X86::FP_REG_KILL: 3403193323Sed break; 3404193323Sed case X86::MOVPC32r: { 3405193323Sed // This emits the "call" portion of this pseudo instruction. 3406193323Sed ++FinalSize; 3407193323Sed FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3408193323Sed break; 3409193323Sed } 3410193323Sed } 3411193323Sed CurOp = NumOps; 3412193323Sed break; 3413193323Sed case X86II::RawFrm: 3414193323Sed ++FinalSize; 3415193323Sed 3416193323Sed if (CurOp != NumOps) { 3417193323Sed const MachineOperand &MO = MI.getOperand(CurOp++); 3418193323Sed if (MO.isMBB()) { 3419193323Sed FinalSize += sizePCRelativeBlockAddress(); 3420193323Sed } else if (MO.isGlobal()) { 3421193323Sed FinalSize += sizeGlobalAddress(false); 3422193323Sed } else if (MO.isSymbol()) { 3423193323Sed FinalSize += sizeExternalSymbolAddress(false); 3424193323Sed } else if (MO.isImm()) { 3425193323Sed FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3426193323Sed } else { 3427198090Srdivacky llvm_unreachable("Unknown RawFrm operand!"); 3428193323Sed } 3429193323Sed } 3430193323Sed break; 3431193323Sed 3432193323Sed case X86II::AddRegFrm: 3433193323Sed ++FinalSize; 3434193323Sed ++CurOp; 3435193323Sed 3436193323Sed if (CurOp != NumOps) { 3437193323Sed const MachineOperand &MO1 = MI.getOperand(CurOp++); 3438193323Sed unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3439193323Sed if (MO1.isImm()) 3440193323Sed FinalSize += sizeConstant(Size); 3441193323Sed else { 3442193323Sed bool dword = false; 3443193323Sed if (Opcode == X86::MOV64ri) 3444193323Sed dword = true; 3445193323Sed if (MO1.isGlobal()) { 3446193323Sed FinalSize += sizeGlobalAddress(dword); 3447193323Sed } else if (MO1.isSymbol()) 3448193323Sed FinalSize += sizeExternalSymbolAddress(dword); 3449193323Sed else if (MO1.isCPI()) 3450193323Sed FinalSize += sizeConstPoolAddress(dword); 3451193323Sed else if (MO1.isJTI()) 3452193323Sed FinalSize += sizeJumpTableAddress(dword); 3453193323Sed } 3454193323Sed } 3455193323Sed break; 3456193323Sed 3457193323Sed case X86II::MRMDestReg: { 3458193323Sed ++FinalSize; 3459193323Sed FinalSize += sizeRegModRMByte(); 3460193323Sed CurOp += 2; 3461193323Sed if (CurOp != NumOps) { 3462193323Sed ++CurOp; 3463193323Sed FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3464193323Sed } 3465193323Sed break; 3466193323Sed } 3467193323Sed case X86II::MRMDestMem: { 3468193323Sed ++FinalSize; 3469193323Sed FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); 3470193323Sed CurOp += X86AddrNumOperands + 1; 3471193323Sed if (CurOp != NumOps) { 3472193323Sed ++CurOp; 3473193323Sed FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3474193323Sed } 3475193323Sed break; 3476193323Sed } 3477193323Sed 3478193323Sed case X86II::MRMSrcReg: 3479193323Sed ++FinalSize; 3480193323Sed FinalSize += sizeRegModRMByte(); 3481193323Sed CurOp += 2; 3482193323Sed if (CurOp != NumOps) { 3483193323Sed ++CurOp; 3484193323Sed FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3485193323Sed } 3486193323Sed break; 3487193323Sed 3488193323Sed case X86II::MRMSrcMem: { 3489193323Sed int AddrOperands; 3490193323Sed if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || 3491193323Sed Opcode == X86::LEA16r || Opcode == X86::LEA32r) 3492193323Sed AddrOperands = X86AddrNumOperands - 1; // No segment register 3493193323Sed else 3494193323Sed AddrOperands = X86AddrNumOperands; 3495193323Sed 3496193323Sed ++FinalSize; 3497193323Sed FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); 3498193323Sed CurOp += AddrOperands + 1; 3499193323Sed if (CurOp != NumOps) { 3500193323Sed ++CurOp; 3501193323Sed FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3502193323Sed } 3503193323Sed break; 3504193323Sed } 3505193323Sed 3506193323Sed case X86II::MRM0r: case X86II::MRM1r: 3507193323Sed case X86II::MRM2r: case X86II::MRM3r: 3508193323Sed case X86II::MRM4r: case X86II::MRM5r: 3509193323Sed case X86II::MRM6r: case X86II::MRM7r: 3510193323Sed ++FinalSize; 3511193323Sed if (Desc->getOpcode() == X86::LFENCE || 3512193323Sed Desc->getOpcode() == X86::MFENCE) { 3513193323Sed // Special handling of lfence and mfence; 3514193323Sed FinalSize += sizeRegModRMByte(); 3515193323Sed } else if (Desc->getOpcode() == X86::MONITOR || 3516193323Sed Desc->getOpcode() == X86::MWAIT) { 3517193323Sed // Special handling of monitor and mwait. 3518193323Sed FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode. 3519193323Sed } else { 3520193323Sed ++CurOp; 3521193323Sed FinalSize += sizeRegModRMByte(); 3522193323Sed } 3523193323Sed 3524193323Sed if (CurOp != NumOps) { 3525193323Sed const MachineOperand &MO1 = MI.getOperand(CurOp++); 3526193323Sed unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3527193323Sed if (MO1.isImm()) 3528193323Sed FinalSize += sizeConstant(Size); 3529193323Sed else { 3530193323Sed bool dword = false; 3531193323Sed if (Opcode == X86::MOV64ri32) 3532193323Sed dword = true; 3533193323Sed if (MO1.isGlobal()) { 3534193323Sed FinalSize += sizeGlobalAddress(dword); 3535193323Sed } else if (MO1.isSymbol()) 3536193323Sed FinalSize += sizeExternalSymbolAddress(dword); 3537193323Sed else if (MO1.isCPI()) 3538193323Sed FinalSize += sizeConstPoolAddress(dword); 3539193323Sed else if (MO1.isJTI()) 3540193323Sed FinalSize += sizeJumpTableAddress(dword); 3541193323Sed } 3542193323Sed } 3543193323Sed break; 3544193323Sed 3545193323Sed case X86II::MRM0m: case X86II::MRM1m: 3546193323Sed case X86II::MRM2m: case X86II::MRM3m: 3547193323Sed case X86II::MRM4m: case X86II::MRM5m: 3548193323Sed case X86II::MRM6m: case X86II::MRM7m: { 3549193323Sed 3550193323Sed ++FinalSize; 3551193323Sed FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); 3552193323Sed CurOp += X86AddrNumOperands; 3553193323Sed 3554193323Sed if (CurOp != NumOps) { 3555193323Sed const MachineOperand &MO = MI.getOperand(CurOp++); 3556193323Sed unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3557193323Sed if (MO.isImm()) 3558193323Sed FinalSize += sizeConstant(Size); 3559193323Sed else { 3560193323Sed bool dword = false; 3561193323Sed if (Opcode == X86::MOV64mi32) 3562193323Sed dword = true; 3563193323Sed if (MO.isGlobal()) { 3564193323Sed FinalSize += sizeGlobalAddress(dword); 3565193323Sed } else if (MO.isSymbol()) 3566193323Sed FinalSize += sizeExternalSymbolAddress(dword); 3567193323Sed else if (MO.isCPI()) 3568193323Sed FinalSize += sizeConstPoolAddress(dword); 3569193323Sed else if (MO.isJTI()) 3570193323Sed FinalSize += sizeJumpTableAddress(dword); 3571193323Sed } 3572193323Sed } 3573193323Sed break; 3574193323Sed } 3575193323Sed 3576193323Sed case X86II::MRMInitReg: 3577193323Sed ++FinalSize; 3578193323Sed // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). 3579193323Sed FinalSize += sizeRegModRMByte(); 3580193323Sed ++CurOp; 3581193323Sed break; 3582193323Sed } 3583193323Sed 3584193323Sed if (!Desc->isVariadic() && CurOp != NumOps) { 3585198090Srdivacky std::string msg; 3586198090Srdivacky raw_string_ostream Msg(msg); 3587198090Srdivacky Msg << "Cannot determine size: " << MI; 3588198090Srdivacky llvm_report_error(Msg.str()); 3589193323Sed } 3590193323Sed 3591193323Sed 3592193323Sed return FinalSize; 3593193323Sed} 3594193323Sed 3595193323Sed 3596193323Sedunsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 3597193323Sed const TargetInstrDesc &Desc = MI->getDesc(); 3598198090Srdivacky bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; 3599193323Sed bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); 3600193323Sed unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); 3601195098Sed if (Desc.getOpcode() == X86::MOVPC32r) 3602193323Sed Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode); 3603193323Sed return Size; 3604193323Sed} 3605193323Sed 3606193323Sed/// getGlobalBaseReg - Return a virtual register initialized with the 3607193323Sed/// the global base register value. Output instructions required to 3608193323Sed/// initialize the register in the function entry block, if necessary. 3609193323Sed/// 3610193323Sedunsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { 3611193323Sed assert(!TM.getSubtarget<X86Subtarget>().is64Bit() && 3612193323Sed "X86-64 PIC uses RIP relative addressing"); 3613193323Sed 3614193323Sed X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); 3615193323Sed unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 3616193323Sed if (GlobalBaseReg != 0) 3617193323Sed return GlobalBaseReg; 3618193323Sed 3619193323Sed // Insert the set of GlobalBaseReg into the first MBB of the function 3620193323Sed MachineBasicBlock &FirstMBB = MF->front(); 3621193323Sed MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 3622193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 3623193323Sed if (MBBI != FirstMBB.end()) DL = MBBI->getDebugLoc(); 3624193323Sed MachineRegisterInfo &RegInfo = MF->getRegInfo(); 3625193323Sed unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3626193323Sed 3627193323Sed const TargetInstrInfo *TII = TM.getInstrInfo(); 3628193323Sed // Operand of MovePCtoStack is completely ignored by asm printer. It's 3629193323Sed // only used in JIT code emission as displacement to pc. 3630195098Sed BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); 3631193323Sed 3632193323Sed // If we're using vanilla 'GOT' PIC style, we should use relative addressing 3633195098Sed // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. 3634198090Srdivacky if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) { 3635195098Sed GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3636195098Sed // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register 3637193323Sed BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) 3638198090Srdivacky .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 3639195098Sed X86II::MO_GOT_ABSOLUTE_ADDRESS); 3640193323Sed } else { 3641193323Sed GlobalBaseReg = PC; 3642193323Sed } 3643193323Sed 3644193323Sed X86FI->setGlobalBaseReg(GlobalBaseReg); 3645193323Sed return GlobalBaseReg; 3646193323Sed} 3647