X86InstrInfo.cpp revision 199481
1221337Sdim//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===// 2221337Sdim// 3221337Sdim// The LLVM Compiler Infrastructure 4221337Sdim// 5221337Sdim// This file is distributed under the University of Illinois Open Source 6221337Sdim// License. See LICENSE.TXT for details. 7221337Sdim// 8221337Sdim//===----------------------------------------------------------------------===// 9221337Sdim// 10221337Sdim// This file contains the X86 implementation of the TargetInstrInfo class. 11221337Sdim// 12221337Sdim//===----------------------------------------------------------------------===// 13221337Sdim 14221337Sdim#include "X86InstrInfo.h" 15252723Sdim#include "X86.h" 16221337Sdim#include "X86GenInstrInfo.inc" 17221337Sdim#include "X86InstrBuilder.h" 18221337Sdim#include "X86MachineFunctionInfo.h" 19221337Sdim#include "X86Subtarget.h" 20221337Sdim#include "X86TargetMachine.h" 21221337Sdim#include "llvm/DerivedTypes.h" 22252723Sdim#include "llvm/LLVMContext.h" 23252723Sdim#include "llvm/ADT/STLExtras.h" 24252723Sdim#include "llvm/CodeGen/MachineConstantPool.h" 25252723Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 26252723Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 27252723Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 28252723Sdim#include "llvm/CodeGen/LiveVariables.h" 29252723Sdim#include "llvm/CodeGen/PseudoSourceValue.h" 30252723Sdim#include "llvm/Support/CommandLine.h" 31252723Sdim#include "llvm/Support/ErrorHandling.h" 32252723Sdim#include "llvm/Support/raw_ostream.h" 33252723Sdim#include "llvm/Target/TargetOptions.h" 34252723Sdim#include "llvm/MC/MCAsmInfo.h" 35252723Sdim 36252723Sdim#include <limits> 37252723Sdim 38252723Sdimusing namespace llvm; 39252723Sdim 40252723Sdimstatic cl::opt<bool> 41252723SdimNoFusing("disable-spill-fusing", 42252723Sdim cl::desc("Disable fusing of spill code into instructions")); 43252723Sdimstatic cl::opt<bool> 44252723SdimPrintFailedFusing("print-failed-fuse-candidates", 45252723Sdim cl::desc("Print instructions that the allocator wants to" 46252723Sdim " fuse, but the X86 backend currently can't"), 47252723Sdim cl::Hidden); 48252723Sdimstatic cl::opt<bool> 49252723SdimReMatPICStubLoad("remat-pic-stub-load", 50252723Sdim cl::desc("Re-materialize load from stub in PIC mode"), 51252723Sdim cl::init(false), cl::Hidden); 52252723Sdim 53252723SdimX86InstrInfo::X86InstrInfo(X86TargetMachine &tm) 54252723Sdim : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), 55252723Sdim TM(tm), RI(tm, *this) { 56252723Sdim SmallVector<unsigned,16> AmbEntries; 57252723Sdim static const unsigned OpTbl2Addr[][2] = { 58252723Sdim { X86::ADC32ri, X86::ADC32mi }, 59252723Sdim { X86::ADC32ri8, X86::ADC32mi8 }, 60235633Sdim { X86::ADC32rr, X86::ADC32mr }, 61221337Sdim { X86::ADC64ri32, X86::ADC64mi32 }, 62221337Sdim { X86::ADC64ri8, X86::ADC64mi8 }, 63221337Sdim { X86::ADC64rr, X86::ADC64mr }, 64221337Sdim { X86::ADD16ri, X86::ADD16mi }, 65221337Sdim { X86::ADD16ri8, X86::ADD16mi8 }, 66221337Sdim { X86::ADD16rr, X86::ADD16mr }, 67221337Sdim { X86::ADD32ri, X86::ADD32mi }, 68221337Sdim { X86::ADD32ri8, X86::ADD32mi8 }, 69235633Sdim { X86::ADD32rr, X86::ADD32mr }, 70221337Sdim { X86::ADD64ri32, X86::ADD64mi32 }, 71226890Sdim { X86::ADD64ri8, X86::ADD64mi8 }, 72226890Sdim { X86::ADD64rr, X86::ADD64mr }, 73221337Sdim { X86::ADD8ri, X86::ADD8mi }, 74221337Sdim { X86::ADD8rr, X86::ADD8mr }, 75221337Sdim { X86::AND16ri, X86::AND16mi }, 76221337Sdim { X86::AND16ri8, X86::AND16mi8 }, 77221337Sdim { X86::AND16rr, X86::AND16mr }, 78221337Sdim { X86::AND32ri, X86::AND32mi }, 79221337Sdim { X86::AND32ri8, X86::AND32mi8 }, 80221337Sdim { X86::AND32rr, X86::AND32mr }, 81221337Sdim { X86::AND64ri32, X86::AND64mi32 }, 82221337Sdim { X86::AND64ri8, X86::AND64mi8 }, 83221337Sdim { X86::AND64rr, X86::AND64mr }, 84221337Sdim { X86::AND8ri, X86::AND8mi }, 85224145Sdim { X86::AND8rr, X86::AND8mr }, 86224145Sdim { X86::DEC16r, X86::DEC16m }, 87224145Sdim { X86::DEC32r, X86::DEC32m }, 88221337Sdim { X86::DEC64_16r, X86::DEC64_16m }, 89221337Sdim { X86::DEC64_32r, X86::DEC64_32m }, 90235633Sdim { X86::DEC64r, X86::DEC64m }, 91235633Sdim { X86::DEC8r, X86::DEC8m }, 92235633Sdim { X86::INC16r, X86::INC16m }, 93235633Sdim { X86::INC32r, X86::INC32m }, 94235633Sdim { X86::INC64_16r, X86::INC64_16m }, 95235633Sdim { X86::INC64_32r, X86::INC64_32m }, 96235633Sdim { X86::INC64r, X86::INC64m }, 97235633Sdim { X86::INC8r, X86::INC8m }, 98235633Sdim { X86::NEG16r, X86::NEG16m }, 99235633Sdim { X86::NEG32r, X86::NEG32m }, 100235633Sdim { X86::NEG64r, X86::NEG64m }, 101235633Sdim { X86::NEG8r, X86::NEG8m }, 102235633Sdim { X86::NOT16r, X86::NOT16m }, 103235633Sdim { X86::NOT32r, X86::NOT32m }, 104235633Sdim { X86::NOT64r, X86::NOT64m }, 105235633Sdim { X86::NOT8r, X86::NOT8m }, 106235633Sdim { X86::OR16ri, X86::OR16mi }, 107235633Sdim { X86::OR16ri8, X86::OR16mi8 }, 108235633Sdim { X86::OR16rr, X86::OR16mr }, 109235633Sdim { X86::OR32ri, X86::OR32mi }, 110235633Sdim { X86::OR32ri8, X86::OR32mi8 }, 111235633Sdim { X86::OR32rr, X86::OR32mr }, 112235633Sdim { X86::OR64ri32, X86::OR64mi32 }, 113235633Sdim { X86::OR64ri8, X86::OR64mi8 }, 114235633Sdim { X86::OR64rr, X86::OR64mr }, 115235633Sdim { X86::OR8ri, X86::OR8mi }, 116235633Sdim { X86::OR8rr, X86::OR8mr }, 117235633Sdim { X86::ROL16r1, X86::ROL16m1 }, 118221337Sdim { X86::ROL16rCL, X86::ROL16mCL }, 119224145Sdim { X86::ROL16ri, X86::ROL16mi }, 120224145Sdim { X86::ROL32r1, X86::ROL32m1 }, 121224145Sdim { X86::ROL32rCL, X86::ROL32mCL }, 122224145Sdim { X86::ROL32ri, X86::ROL32mi }, 123221337Sdim { X86::ROL64r1, X86::ROL64m1 }, 124221337Sdim { X86::ROL64rCL, X86::ROL64mCL }, 125221337Sdim { X86::ROL64ri, X86::ROL64mi }, 126224145Sdim { X86::ROL8r1, X86::ROL8m1 }, 127224145Sdim { X86::ROL8rCL, X86::ROL8mCL }, 128224145Sdim { X86::ROL8ri, X86::ROL8mi }, 129224145Sdim { X86::ROR16r1, X86::ROR16m1 }, 130221337Sdim { X86::ROR16rCL, X86::ROR16mCL }, 131221337Sdim { X86::ROR16ri, X86::ROR16mi }, 132221337Sdim { X86::ROR32r1, X86::ROR32m1 }, 133224145Sdim { X86::ROR32rCL, X86::ROR32mCL }, 134224145Sdim { X86::ROR32ri, X86::ROR32mi }, 135224145Sdim { X86::ROR64r1, X86::ROR64m1 }, 136224145Sdim { X86::ROR64rCL, X86::ROR64mCL }, 137221337Sdim { X86::ROR64ri, X86::ROR64mi }, 138235633Sdim { X86::ROR8r1, X86::ROR8m1 }, 139235633Sdim { X86::ROR8rCL, X86::ROR8mCL }, 140235633Sdim { X86::ROR8ri, X86::ROR8mi }, 141235633Sdim { X86::SAR16r1, X86::SAR16m1 }, 142235633Sdim { X86::SAR16rCL, X86::SAR16mCL }, 143235633Sdim { X86::SAR16ri, X86::SAR16mi }, 144235633Sdim { X86::SAR32r1, X86::SAR32m1 }, 145235633Sdim { X86::SAR32rCL, X86::SAR32mCL }, 146235633Sdim { X86::SAR32ri, X86::SAR32mi }, 147235633Sdim { X86::SAR64r1, X86::SAR64m1 }, 148235633Sdim { X86::SAR64rCL, X86::SAR64mCL }, 149235633Sdim { X86::SAR64ri, X86::SAR64mi }, 150235633Sdim { X86::SAR8r1, X86::SAR8m1 }, 151235633Sdim { X86::SAR8rCL, X86::SAR8mCL }, 152235633Sdim { X86::SAR8ri, X86::SAR8mi }, 153235633Sdim { X86::SBB32ri, X86::SBB32mi }, 154235633Sdim { X86::SBB32ri8, X86::SBB32mi8 }, 155235633Sdim { X86::SBB32rr, X86::SBB32mr }, 156235633Sdim { X86::SBB64ri32, X86::SBB64mi32 }, 157235633Sdim { X86::SBB64ri8, X86::SBB64mi8 }, 158235633Sdim { X86::SBB64rr, X86::SBB64mr }, 159235633Sdim { X86::SHL16rCL, X86::SHL16mCL }, 160235633Sdim { X86::SHL16ri, X86::SHL16mi }, 161235633Sdim { X86::SHL32rCL, X86::SHL32mCL }, 162235633Sdim { X86::SHL32ri, X86::SHL32mi }, 163235633Sdim { X86::SHL64rCL, X86::SHL64mCL }, 164235633Sdim { X86::SHL64ri, X86::SHL64mi }, 165235633Sdim { X86::SHL8rCL, X86::SHL8mCL }, 166235633Sdim { X86::SHL8ri, X86::SHL8mi }, 167235633Sdim { X86::SHLD16rrCL, X86::SHLD16mrCL }, 168235633Sdim { X86::SHLD16rri8, X86::SHLD16mri8 }, 169235633Sdim { X86::SHLD32rrCL, X86::SHLD32mrCL }, 170235633Sdim { X86::SHLD32rri8, X86::SHLD32mri8 }, 171235633Sdim { X86::SHLD64rrCL, X86::SHLD64mrCL }, 172235633Sdim { X86::SHLD64rri8, X86::SHLD64mri8 }, 173235633Sdim { X86::SHR16r1, X86::SHR16m1 }, 174235633Sdim { X86::SHR16rCL, X86::SHR16mCL }, 175235633Sdim { X86::SHR16ri, X86::SHR16mi }, 176235633Sdim { X86::SHR32r1, X86::SHR32m1 }, 177235633Sdim { X86::SHR32rCL, X86::SHR32mCL }, 178235633Sdim { X86::SHR32ri, X86::SHR32mi }, 179235633Sdim { X86::SHR64r1, X86::SHR64m1 }, 180235633Sdim { X86::SHR64rCL, X86::SHR64mCL }, 181235633Sdim { X86::SHR64ri, X86::SHR64mi }, 182235633Sdim { X86::SHR8r1, X86::SHR8m1 }, 183235633Sdim { X86::SHR8rCL, X86::SHR8mCL }, 184235633Sdim { X86::SHR8ri, X86::SHR8mi }, 185235633Sdim { X86::SHRD16rrCL, X86::SHRD16mrCL }, 186235633Sdim { X86::SHRD16rri8, X86::SHRD16mri8 }, 187235633Sdim { X86::SHRD32rrCL, X86::SHRD32mrCL }, 188235633Sdim { X86::SHRD32rri8, X86::SHRD32mri8 }, 189235633Sdim { X86::SHRD64rrCL, X86::SHRD64mrCL }, 190235633Sdim { X86::SHRD64rri8, X86::SHRD64mri8 }, 191235633Sdim { X86::SUB16ri, X86::SUB16mi }, 192235633Sdim { X86::SUB16ri8, X86::SUB16mi8 }, 193235633Sdim { X86::SUB16rr, X86::SUB16mr }, 194235633Sdim { X86::SUB32ri, X86::SUB32mi }, 195235633Sdim { X86::SUB32ri8, X86::SUB32mi8 }, 196235633Sdim { X86::SUB32rr, X86::SUB32mr }, 197235633Sdim { X86::SUB64ri32, X86::SUB64mi32 }, 198235633Sdim { X86::SUB64ri8, X86::SUB64mi8 }, 199235633Sdim { X86::SUB64rr, X86::SUB64mr }, 200235633Sdim { X86::SUB8ri, X86::SUB8mi }, 201235633Sdim { X86::SUB8rr, X86::SUB8mr }, 202235633Sdim { X86::XOR16ri, X86::XOR16mi }, 203235633Sdim { X86::XOR16ri8, X86::XOR16mi8 }, 204235633Sdim { X86::XOR16rr, X86::XOR16mr }, 205235633Sdim { X86::XOR32ri, X86::XOR32mi }, 206235633Sdim { X86::XOR32ri8, X86::XOR32mi8 }, 207235633Sdim { X86::XOR32rr, X86::XOR32mr }, 208235633Sdim { X86::XOR64ri32, X86::XOR64mi32 }, 209235633Sdim { X86::XOR64ri8, X86::XOR64mi8 }, 210235633Sdim { X86::XOR64rr, X86::XOR64mr }, 211235633Sdim { X86::XOR8ri, X86::XOR8mi }, 212235633Sdim { X86::XOR8rr, X86::XOR8mr } 213235633Sdim }; 214235633Sdim 215235633Sdim for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { 216235633Sdim unsigned RegOp = OpTbl2Addr[i][0]; 217235633Sdim unsigned MemOp = OpTbl2Addr[i][1]; 218235633Sdim if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, 219235633Sdim std::make_pair(MemOp,0))).second) 220235633Sdim assert(false && "Duplicated entries?"); 221235633Sdim // Index 0, folded load and store, no alignment requirement. 222263509Sdim unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); 223235633Sdim if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 224235633Sdim std::make_pair(RegOp, 225235633Sdim AuxInfo))).second) 226235633Sdim AmbEntries.push_back(MemOp); 227235633Sdim } 228235633Sdim 229235633Sdim // If the third value is 1, then it's folding either a load or a store. 230235633Sdim static const unsigned OpTbl0[][4] = { 231235633Sdim { X86::BT16ri8, X86::BT16mi8, 1, 0 }, 232235633Sdim { X86::BT32ri8, X86::BT32mi8, 1, 0 }, 233235633Sdim { X86::BT64ri8, X86::BT64mi8, 1, 0 }, 234235633Sdim { X86::CALL32r, X86::CALL32m, 1, 0 }, 235235633Sdim { X86::CALL64r, X86::CALL64m, 1, 0 }, 236235633Sdim { X86::CMP16ri, X86::CMP16mi, 1, 0 }, 237235633Sdim { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, 238235633Sdim { X86::CMP16rr, X86::CMP16mr, 1, 0 }, 239235633Sdim { X86::CMP32ri, X86::CMP32mi, 1, 0 }, 240235633Sdim { X86::CMP32ri8, X86::CMP32mi8, 1, 0 }, 241235633Sdim { X86::CMP32rr, X86::CMP32mr, 1, 0 }, 242235633Sdim { X86::CMP64ri32, X86::CMP64mi32, 1, 0 }, 243235633Sdim { X86::CMP64ri8, X86::CMP64mi8, 1, 0 }, 244235633Sdim { X86::CMP64rr, X86::CMP64mr, 1, 0 }, 245235633Sdim { X86::CMP8ri, X86::CMP8mi, 1, 0 }, 246235633Sdim { X86::CMP8rr, X86::CMP8mr, 1, 0 }, 247235633Sdim { X86::DIV16r, X86::DIV16m, 1, 0 }, 248235633Sdim { X86::DIV32r, X86::DIV32m, 1, 0 }, 249235633Sdim { X86::DIV64r, X86::DIV64m, 1, 0 }, 250235633Sdim { X86::DIV8r, X86::DIV8m, 1, 0 }, 251235633Sdim { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, 252235633Sdim { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 }, 253235633Sdim { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 }, 254235633Sdim { X86::IDIV16r, X86::IDIV16m, 1, 0 }, 255 { X86::IDIV32r, X86::IDIV32m, 1, 0 }, 256 { X86::IDIV64r, X86::IDIV64m, 1, 0 }, 257 { X86::IDIV8r, X86::IDIV8m, 1, 0 }, 258 { X86::IMUL16r, X86::IMUL16m, 1, 0 }, 259 { X86::IMUL32r, X86::IMUL32m, 1, 0 }, 260 { X86::IMUL64r, X86::IMUL64m, 1, 0 }, 261 { X86::IMUL8r, X86::IMUL8m, 1, 0 }, 262 { X86::JMP32r, X86::JMP32m, 1, 0 }, 263 { X86::JMP64r, X86::JMP64m, 1, 0 }, 264 { X86::MOV16ri, X86::MOV16mi, 0, 0 }, 265 { X86::MOV16rr, X86::MOV16mr, 0, 0 }, 266 { X86::MOV32ri, X86::MOV32mi, 0, 0 }, 267 { X86::MOV32rr, X86::MOV32mr, 0, 0 }, 268 { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, 269 { X86::MOV64rr, X86::MOV64mr, 0, 0 }, 270 { X86::MOV8ri, X86::MOV8mi, 0, 0 }, 271 { X86::MOV8rr, X86::MOV8mr, 0, 0 }, 272 { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 }, 273 { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, 274 { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, 275 { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, 276 { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, 277 { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, 278 { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 }, 279 { X86::MOVSDrr, X86::MOVSDmr, 0, 0 }, 280 { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, 281 { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, 282 { X86::MOVSSrr, X86::MOVSSmr, 0, 0 }, 283 { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, 284 { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, 285 { X86::MUL16r, X86::MUL16m, 1, 0 }, 286 { X86::MUL32r, X86::MUL32m, 1, 0 }, 287 { X86::MUL64r, X86::MUL64m, 1, 0 }, 288 { X86::MUL8r, X86::MUL8m, 1, 0 }, 289 { X86::SETAEr, X86::SETAEm, 0, 0 }, 290 { X86::SETAr, X86::SETAm, 0, 0 }, 291 { X86::SETBEr, X86::SETBEm, 0, 0 }, 292 { X86::SETBr, X86::SETBm, 0, 0 }, 293 { X86::SETEr, X86::SETEm, 0, 0 }, 294 { X86::SETGEr, X86::SETGEm, 0, 0 }, 295 { X86::SETGr, X86::SETGm, 0, 0 }, 296 { X86::SETLEr, X86::SETLEm, 0, 0 }, 297 { X86::SETLr, X86::SETLm, 0, 0 }, 298 { X86::SETNEr, X86::SETNEm, 0, 0 }, 299 { X86::SETNOr, X86::SETNOm, 0, 0 }, 300 { X86::SETNPr, X86::SETNPm, 0, 0 }, 301 { X86::SETNSr, X86::SETNSm, 0, 0 }, 302 { X86::SETOr, X86::SETOm, 0, 0 }, 303 { X86::SETPr, X86::SETPm, 0, 0 }, 304 { X86::SETSr, X86::SETSm, 0, 0 }, 305 { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, 306 { X86::TEST16ri, X86::TEST16mi, 1, 0 }, 307 { X86::TEST32ri, X86::TEST32mi, 1, 0 }, 308 { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, 309 { X86::TEST8ri, X86::TEST8mi, 1, 0 } 310 }; 311 312 for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { 313 unsigned RegOp = OpTbl0[i][0]; 314 unsigned MemOp = OpTbl0[i][1]; 315 unsigned Align = OpTbl0[i][3]; 316 if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, 317 std::make_pair(MemOp,Align))).second) 318 assert(false && "Duplicated entries?"); 319 unsigned FoldedLoad = OpTbl0[i][2]; 320 // Index 0, folded load or store. 321 unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); 322 if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 323 if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 324 std::make_pair(RegOp, AuxInfo))).second) 325 AmbEntries.push_back(MemOp); 326 } 327 328 static const unsigned OpTbl1[][3] = { 329 { X86::CMP16rr, X86::CMP16rm, 0 }, 330 { X86::CMP32rr, X86::CMP32rm, 0 }, 331 { X86::CMP64rr, X86::CMP64rm, 0 }, 332 { X86::CMP8rr, X86::CMP8rm, 0 }, 333 { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, 334 { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, 335 { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, 336 { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, 337 { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, 338 { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, 339 { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, 340 { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, 341 { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, 342 { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, 343 { X86::FsMOVAPDrr, X86::MOVSDrm, 0 }, 344 { X86::FsMOVAPSrr, X86::MOVSSrm, 0 }, 345 { X86::IMUL16rri, X86::IMUL16rmi, 0 }, 346 { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, 347 { X86::IMUL32rri, X86::IMUL32rmi, 0 }, 348 { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, 349 { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, 350 { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, 351 { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, 352 { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, 353 { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, 354 { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, 355 { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, 356 { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 }, 357 { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 }, 358 { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, 359 { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, 360 { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, 361 { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 }, 362 { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 }, 363 { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, 364 { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, 365 { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, 366 { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, 367 { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, 368 { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, 369 { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, 370 { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, 371 { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 }, 372 { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 }, 373 { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, 374 { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, 375 { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, 376 { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, 377 { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, 378 { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, 379 { X86::MOV16rr, X86::MOV16rm, 0 }, 380 { X86::MOV32rr, X86::MOV32rm, 0 }, 381 { X86::MOV64rr, X86::MOV64rm, 0 }, 382 { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, 383 { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, 384 { X86::MOV8rr, X86::MOV8rm, 0 }, 385 { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, 386 { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, 387 { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, 388 { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, 389 { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, 390 { X86::MOVDQArr, X86::MOVDQArm, 16 }, 391 { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 }, 392 { X86::MOVSDrr, X86::MOVSDrm, 0 }, 393 { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, 394 { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, 395 { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 }, 396 { X86::MOVSSrr, X86::MOVSSrm, 0 }, 397 { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, 398 { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, 399 { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, 400 { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, 401 { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, 402 { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, 403 { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, 404 { X86::MOVUPSrr, X86::MOVUPSrm, 16 }, 405 { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, 406 { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, 407 { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, 408 { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, 409 { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, 410 { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, 411 { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, 412 { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, 413 { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, 414 { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, 415 { X86::PSHUFDri, X86::PSHUFDmi, 16 }, 416 { X86::PSHUFHWri, X86::PSHUFHWmi, 16 }, 417 { X86::PSHUFLWri, X86::PSHUFLWmi, 16 }, 418 { X86::RCPPSr, X86::RCPPSm, 16 }, 419 { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 }, 420 { X86::RSQRTPSr, X86::RSQRTPSm, 16 }, 421 { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 }, 422 { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, 423 { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, 424 { X86::SQRTPDr, X86::SQRTPDm, 16 }, 425 { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 }, 426 { X86::SQRTPSr, X86::SQRTPSm, 16 }, 427 { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 }, 428 { X86::SQRTSDr, X86::SQRTSDm, 0 }, 429 { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, 430 { X86::SQRTSSr, X86::SQRTSSm, 0 }, 431 { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, 432 { X86::TEST16rr, X86::TEST16rm, 0 }, 433 { X86::TEST32rr, X86::TEST32rm, 0 }, 434 { X86::TEST64rr, X86::TEST64rm, 0 }, 435 { X86::TEST8rr, X86::TEST8rm, 0 }, 436 // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 437 { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, 438 { X86::UCOMISSrr, X86::UCOMISSrm, 0 } 439 }; 440 441 for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { 442 unsigned RegOp = OpTbl1[i][0]; 443 unsigned MemOp = OpTbl1[i][1]; 444 unsigned Align = OpTbl1[i][2]; 445 if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, 446 std::make_pair(MemOp,Align))).second) 447 assert(false && "Duplicated entries?"); 448 // Index 1, folded load 449 unsigned AuxInfo = 1 | (1 << 4); 450 if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 451 if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 452 std::make_pair(RegOp, AuxInfo))).second) 453 AmbEntries.push_back(MemOp); 454 } 455 456 static const unsigned OpTbl2[][3] = { 457 { X86::ADC32rr, X86::ADC32rm, 0 }, 458 { X86::ADC64rr, X86::ADC64rm, 0 }, 459 { X86::ADD16rr, X86::ADD16rm, 0 }, 460 { X86::ADD32rr, X86::ADD32rm, 0 }, 461 { X86::ADD64rr, X86::ADD64rm, 0 }, 462 { X86::ADD8rr, X86::ADD8rm, 0 }, 463 { X86::ADDPDrr, X86::ADDPDrm, 16 }, 464 { X86::ADDPSrr, X86::ADDPSrm, 16 }, 465 { X86::ADDSDrr, X86::ADDSDrm, 0 }, 466 { X86::ADDSSrr, X86::ADDSSrm, 0 }, 467 { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 }, 468 { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 }, 469 { X86::AND16rr, X86::AND16rm, 0 }, 470 { X86::AND32rr, X86::AND32rm, 0 }, 471 { X86::AND64rr, X86::AND64rm, 0 }, 472 { X86::AND8rr, X86::AND8rm, 0 }, 473 { X86::ANDNPDrr, X86::ANDNPDrm, 16 }, 474 { X86::ANDNPSrr, X86::ANDNPSrm, 16 }, 475 { X86::ANDPDrr, X86::ANDPDrm, 16 }, 476 { X86::ANDPSrr, X86::ANDPSrm, 16 }, 477 { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, 478 { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, 479 { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, 480 { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, 481 { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, 482 { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, 483 { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, 484 { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, 485 { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, 486 { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, 487 { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, 488 { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, 489 { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, 490 { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, 491 { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, 492 { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, 493 { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, 494 { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, 495 { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, 496 { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, 497 { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, 498 { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, 499 { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, 500 { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, 501 { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, 502 { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, 503 { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, 504 { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, 505 { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, 506 { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, 507 { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, 508 { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, 509 { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, 510 { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, 511 { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, 512 { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, 513 { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, 514 { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, 515 { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, 516 { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, 517 { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, 518 { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, 519 { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, 520 { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, 521 { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, 522 { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, 523 { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, 524 { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, 525 { X86::CMPPDrri, X86::CMPPDrmi, 16 }, 526 { X86::CMPPSrri, X86::CMPPSrmi, 16 }, 527 { X86::CMPSDrr, X86::CMPSDrm, 0 }, 528 { X86::CMPSSrr, X86::CMPSSrm, 0 }, 529 { X86::DIVPDrr, X86::DIVPDrm, 16 }, 530 { X86::DIVPSrr, X86::DIVPSrm, 16 }, 531 { X86::DIVSDrr, X86::DIVSDrm, 0 }, 532 { X86::DIVSSrr, X86::DIVSSrm, 0 }, 533 { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 }, 534 { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 }, 535 { X86::FsANDPDrr, X86::FsANDPDrm, 16 }, 536 { X86::FsANDPSrr, X86::FsANDPSrm, 16 }, 537 { X86::FsORPDrr, X86::FsORPDrm, 16 }, 538 { X86::FsORPSrr, X86::FsORPSrm, 16 }, 539 { X86::FsXORPDrr, X86::FsXORPDrm, 16 }, 540 { X86::FsXORPSrr, X86::FsXORPSrm, 16 }, 541 { X86::HADDPDrr, X86::HADDPDrm, 16 }, 542 { X86::HADDPSrr, X86::HADDPSrm, 16 }, 543 { X86::HSUBPDrr, X86::HSUBPDrm, 16 }, 544 { X86::HSUBPSrr, X86::HSUBPSrm, 16 }, 545 { X86::IMUL16rr, X86::IMUL16rm, 0 }, 546 { X86::IMUL32rr, X86::IMUL32rm, 0 }, 547 { X86::IMUL64rr, X86::IMUL64rm, 0 }, 548 { X86::MAXPDrr, X86::MAXPDrm, 16 }, 549 { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, 550 { X86::MAXPSrr, X86::MAXPSrm, 16 }, 551 { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 }, 552 { X86::MAXSDrr, X86::MAXSDrm, 0 }, 553 { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, 554 { X86::MAXSSrr, X86::MAXSSrm, 0 }, 555 { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, 556 { X86::MINPDrr, X86::MINPDrm, 16 }, 557 { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 }, 558 { X86::MINPSrr, X86::MINPSrm, 16 }, 559 { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 }, 560 { X86::MINSDrr, X86::MINSDrm, 0 }, 561 { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, 562 { X86::MINSSrr, X86::MINSSrm, 0 }, 563 { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, 564 { X86::MULPDrr, X86::MULPDrm, 16 }, 565 { X86::MULPSrr, X86::MULPSrm, 16 }, 566 { X86::MULSDrr, X86::MULSDrm, 0 }, 567 { X86::MULSSrr, X86::MULSSrm, 0 }, 568 { X86::OR16rr, X86::OR16rm, 0 }, 569 { X86::OR32rr, X86::OR32rm, 0 }, 570 { X86::OR64rr, X86::OR64rm, 0 }, 571 { X86::OR8rr, X86::OR8rm, 0 }, 572 { X86::ORPDrr, X86::ORPDrm, 16 }, 573 { X86::ORPSrr, X86::ORPSrm, 16 }, 574 { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 }, 575 { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 }, 576 { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 }, 577 { X86::PADDBrr, X86::PADDBrm, 16 }, 578 { X86::PADDDrr, X86::PADDDrm, 16 }, 579 { X86::PADDQrr, X86::PADDQrm, 16 }, 580 { X86::PADDSBrr, X86::PADDSBrm, 16 }, 581 { X86::PADDSWrr, X86::PADDSWrm, 16 }, 582 { X86::PADDWrr, X86::PADDWrm, 16 }, 583 { X86::PANDNrr, X86::PANDNrm, 16 }, 584 { X86::PANDrr, X86::PANDrm, 16 }, 585 { X86::PAVGBrr, X86::PAVGBrm, 16 }, 586 { X86::PAVGWrr, X86::PAVGWrm, 16 }, 587 { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 }, 588 { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 }, 589 { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 }, 590 { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 }, 591 { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 }, 592 { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 }, 593 { X86::PINSRWrri, X86::PINSRWrmi, 16 }, 594 { X86::PMADDWDrr, X86::PMADDWDrm, 16 }, 595 { X86::PMAXSWrr, X86::PMAXSWrm, 16 }, 596 { X86::PMAXUBrr, X86::PMAXUBrm, 16 }, 597 { X86::PMINSWrr, X86::PMINSWrm, 16 }, 598 { X86::PMINUBrr, X86::PMINUBrm, 16 }, 599 { X86::PMULDQrr, X86::PMULDQrm, 16 }, 600 { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, 601 { X86::PMULHWrr, X86::PMULHWrm, 16 }, 602 { X86::PMULLDrr, X86::PMULLDrm, 16 }, 603 { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 }, 604 { X86::PMULLWrr, X86::PMULLWrm, 16 }, 605 { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, 606 { X86::PORrr, X86::PORrm, 16 }, 607 { X86::PSADBWrr, X86::PSADBWrm, 16 }, 608 { X86::PSLLDrr, X86::PSLLDrm, 16 }, 609 { X86::PSLLQrr, X86::PSLLQrm, 16 }, 610 { X86::PSLLWrr, X86::PSLLWrm, 16 }, 611 { X86::PSRADrr, X86::PSRADrm, 16 }, 612 { X86::PSRAWrr, X86::PSRAWrm, 16 }, 613 { X86::PSRLDrr, X86::PSRLDrm, 16 }, 614 { X86::PSRLQrr, X86::PSRLQrm, 16 }, 615 { X86::PSRLWrr, X86::PSRLWrm, 16 }, 616 { X86::PSUBBrr, X86::PSUBBrm, 16 }, 617 { X86::PSUBDrr, X86::PSUBDrm, 16 }, 618 { X86::PSUBSBrr, X86::PSUBSBrm, 16 }, 619 { X86::PSUBSWrr, X86::PSUBSWrm, 16 }, 620 { X86::PSUBWrr, X86::PSUBWrm, 16 }, 621 { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 }, 622 { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 }, 623 { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 }, 624 { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 }, 625 { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 }, 626 { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 }, 627 { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 }, 628 { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 }, 629 { X86::PXORrr, X86::PXORrm, 16 }, 630 { X86::SBB32rr, X86::SBB32rm, 0 }, 631 { X86::SBB64rr, X86::SBB64rm, 0 }, 632 { X86::SHUFPDrri, X86::SHUFPDrmi, 16 }, 633 { X86::SHUFPSrri, X86::SHUFPSrmi, 16 }, 634 { X86::SUB16rr, X86::SUB16rm, 0 }, 635 { X86::SUB32rr, X86::SUB32rm, 0 }, 636 { X86::SUB64rr, X86::SUB64rm, 0 }, 637 { X86::SUB8rr, X86::SUB8rm, 0 }, 638 { X86::SUBPDrr, X86::SUBPDrm, 16 }, 639 { X86::SUBPSrr, X86::SUBPSrm, 16 }, 640 { X86::SUBSDrr, X86::SUBSDrm, 0 }, 641 { X86::SUBSSrr, X86::SUBSSrm, 0 }, 642 // FIXME: TEST*rr -> swapped operand of TEST*mr. 643 { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 }, 644 { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 }, 645 { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 }, 646 { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 }, 647 { X86::XOR16rr, X86::XOR16rm, 0 }, 648 { X86::XOR32rr, X86::XOR32rm, 0 }, 649 { X86::XOR64rr, X86::XOR64rm, 0 }, 650 { X86::XOR8rr, X86::XOR8rm, 0 }, 651 { X86::XORPDrr, X86::XORPDrm, 16 }, 652 { X86::XORPSrr, X86::XORPSrm, 16 } 653 }; 654 655 for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { 656 unsigned RegOp = OpTbl2[i][0]; 657 unsigned MemOp = OpTbl2[i][1]; 658 unsigned Align = OpTbl2[i][2]; 659 if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, 660 std::make_pair(MemOp,Align))).second) 661 assert(false && "Duplicated entries?"); 662 // Index 2, folded load 663 unsigned AuxInfo = 2 | (1 << 4); 664 if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 665 std::make_pair(RegOp, AuxInfo))).second) 666 AmbEntries.push_back(MemOp); 667 } 668 669 // Remove ambiguous entries. 670 assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); 671} 672 673bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, 674 unsigned &SrcReg, unsigned &DstReg, 675 unsigned &SrcSubIdx, unsigned &DstSubIdx) const { 676 switch (MI.getOpcode()) { 677 default: 678 return false; 679 case X86::MOV8rr: 680 case X86::MOV8rr_NOREX: 681 case X86::MOV16rr: 682 case X86::MOV32rr: 683 case X86::MOV64rr: 684 case X86::MOVSSrr: 685 case X86::MOVSDrr: 686 687 // FP Stack register class copies 688 case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: 689 case X86::MOV_Fp3264: case X86::MOV_Fp3280: 690 case X86::MOV_Fp6432: case X86::MOV_Fp8032: 691 692 case X86::FsMOVAPSrr: 693 case X86::FsMOVAPDrr: 694 case X86::MOVAPSrr: 695 case X86::MOVAPDrr: 696 case X86::MOVDQArr: 697 case X86::MOVSS2PSrr: 698 case X86::MOVSD2PDrr: 699 case X86::MOVPS2SSrr: 700 case X86::MOVPD2SDrr: 701 case X86::MMX_MOVQ64rr: 702 assert(MI.getNumOperands() >= 2 && 703 MI.getOperand(0).isReg() && 704 MI.getOperand(1).isReg() && 705 "invalid register-register move instruction"); 706 SrcReg = MI.getOperand(1).getReg(); 707 DstReg = MI.getOperand(0).getReg(); 708 SrcSubIdx = MI.getOperand(1).getSubReg(); 709 DstSubIdx = MI.getOperand(0).getSubReg(); 710 return true; 711 } 712} 713 714/// isFrameOperand - Return true and the FrameIndex if the specified 715/// operand and follow operands form a reference to the stack frame. 716bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, 717 int &FrameIndex) const { 718 if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && 719 MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && 720 MI->getOperand(Op+1).getImm() == 1 && 721 MI->getOperand(Op+2).getReg() == 0 && 722 MI->getOperand(Op+3).getImm() == 0) { 723 FrameIndex = MI->getOperand(Op).getIndex(); 724 return true; 725 } 726 return false; 727} 728 729static bool isFrameLoadOpcode(int Opcode) { 730 switch (Opcode) { 731 default: break; 732 case X86::MOV8rm: 733 case X86::MOV16rm: 734 case X86::MOV32rm: 735 case X86::MOV64rm: 736 case X86::LD_Fp64m: 737 case X86::MOVSSrm: 738 case X86::MOVSDrm: 739 case X86::MOVAPSrm: 740 case X86::MOVAPDrm: 741 case X86::MOVDQArm: 742 case X86::MMX_MOVD64rm: 743 case X86::MMX_MOVQ64rm: 744 return true; 745 break; 746 } 747 return false; 748} 749 750static bool isFrameStoreOpcode(int Opcode) { 751 switch (Opcode) { 752 default: break; 753 case X86::MOV8mr: 754 case X86::MOV16mr: 755 case X86::MOV32mr: 756 case X86::MOV64mr: 757 case X86::ST_FpP64m: 758 case X86::MOVSSmr: 759 case X86::MOVSDmr: 760 case X86::MOVAPSmr: 761 case X86::MOVAPDmr: 762 case X86::MOVDQAmr: 763 case X86::MMX_MOVD64mr: 764 case X86::MMX_MOVQ64mr: 765 case X86::MMX_MOVNTQmr: 766 return true; 767 } 768 return false; 769} 770 771unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 772 int &FrameIndex) const { 773 if (isFrameLoadOpcode(MI->getOpcode())) 774 if (isFrameOperand(MI, 1, FrameIndex)) 775 return MI->getOperand(0).getReg(); 776 return 0; 777} 778 779unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 780 int &FrameIndex) const { 781 if (isFrameLoadOpcode(MI->getOpcode())) { 782 unsigned Reg; 783 if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) 784 return Reg; 785 // Check for post-frame index elimination operations 786 return hasLoadFromStackSlot(MI, FrameIndex); 787 } 788 return 0; 789} 790 791bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, 792 int &FrameIndex) const { 793 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 794 oe = MI->memoperands_end(); 795 o != oe; 796 ++o) { 797 if ((*o)->isLoad() && (*o)->getValue()) 798 if (const FixedStackPseudoSourceValue *Value = 799 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 800 FrameIndex = Value->getFrameIndex(); 801 return true; 802 } 803 } 804 return false; 805} 806 807unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 808 int &FrameIndex) const { 809 if (isFrameStoreOpcode(MI->getOpcode())) 810 if (isFrameOperand(MI, 0, FrameIndex)) 811 return MI->getOperand(X86AddrNumOperands).getReg(); 812 return 0; 813} 814 815unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 816 int &FrameIndex) const { 817 if (isFrameStoreOpcode(MI->getOpcode())) { 818 unsigned Reg; 819 if ((Reg = isStoreToStackSlot(MI, FrameIndex))) 820 return Reg; 821 // Check for post-frame index elimination operations 822 return hasStoreToStackSlot(MI, FrameIndex); 823 } 824 return 0; 825} 826 827bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, 828 int &FrameIndex) const { 829 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 830 oe = MI->memoperands_end(); 831 o != oe; 832 ++o) { 833 if ((*o)->isStore() && (*o)->getValue()) 834 if (const FixedStackPseudoSourceValue *Value = 835 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 836 FrameIndex = Value->getFrameIndex(); 837 return true; 838 } 839 } 840 return false; 841} 842 843/// regIsPICBase - Return true if register is PIC base (i.e.g defined by 844/// X86::MOVPC32r. 845static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { 846 bool isPICBase = false; 847 for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 848 E = MRI.def_end(); I != E; ++I) { 849 MachineInstr *DefMI = I.getOperand().getParent(); 850 if (DefMI->getOpcode() != X86::MOVPC32r) 851 return false; 852 assert(!isPICBase && "More than one PIC base?"); 853 isPICBase = true; 854 } 855 return isPICBase; 856} 857 858bool 859X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, 860 AliasAnalysis *AA) const { 861 switch (MI->getOpcode()) { 862 default: break; 863 case X86::MOV8rm: 864 case X86::MOV16rm: 865 case X86::MOV32rm: 866 case X86::MOV64rm: 867 case X86::LD_Fp64m: 868 case X86::MOVSSrm: 869 case X86::MOVSDrm: 870 case X86::MOVAPSrm: 871 case X86::MOVUPSrm: 872 case X86::MOVUPSrm_Int: 873 case X86::MOVAPDrm: 874 case X86::MOVDQArm: 875 case X86::MMX_MOVD64rm: 876 case X86::MMX_MOVQ64rm: 877 case X86::FsMOVAPSrm: 878 case X86::FsMOVAPDrm: { 879 // Loads from constant pools are trivially rematerializable. 880 if (MI->getOperand(1).isReg() && 881 MI->getOperand(2).isImm() && 882 MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 883 MI->isInvariantLoad(AA)) { 884 unsigned BaseReg = MI->getOperand(1).getReg(); 885 if (BaseReg == 0 || BaseReg == X86::RIP) 886 return true; 887 // Allow re-materialization of PIC load. 888 if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) 889 return false; 890 const MachineFunction &MF = *MI->getParent()->getParent(); 891 const MachineRegisterInfo &MRI = MF.getRegInfo(); 892 bool isPICBase = false; 893 for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 894 E = MRI.def_end(); I != E; ++I) { 895 MachineInstr *DefMI = I.getOperand().getParent(); 896 if (DefMI->getOpcode() != X86::MOVPC32r) 897 return false; 898 assert(!isPICBase && "More than one PIC base?"); 899 isPICBase = true; 900 } 901 return isPICBase; 902 } 903 return false; 904 } 905 906 case X86::LEA32r: 907 case X86::LEA64r: { 908 if (MI->getOperand(2).isImm() && 909 MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 910 !MI->getOperand(4).isReg()) { 911 // lea fi#, lea GV, etc. are all rematerializable. 912 if (!MI->getOperand(1).isReg()) 913 return true; 914 unsigned BaseReg = MI->getOperand(1).getReg(); 915 if (BaseReg == 0) 916 return true; 917 // Allow re-materialization of lea PICBase + x. 918 const MachineFunction &MF = *MI->getParent()->getParent(); 919 const MachineRegisterInfo &MRI = MF.getRegInfo(); 920 return regIsPICBase(BaseReg, MRI); 921 } 922 return false; 923 } 924 } 925 926 // All other instructions marked M_REMATERIALIZABLE are always trivially 927 // rematerializable. 928 return true; 929} 930 931/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that 932/// would clobber the EFLAGS condition register. Note the result may be 933/// conservative. If it cannot definitely determine the safety after visiting 934/// a few instructions in each direction it assumes it's not safe. 935static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, 936 MachineBasicBlock::iterator I) { 937 // It's always safe to clobber EFLAGS at the end of a block. 938 if (I == MBB.end()) 939 return true; 940 941 // For compile time consideration, if we are not able to determine the 942 // safety after visiting 4 instructions in each direction, we will assume 943 // it's not safe. 944 MachineBasicBlock::iterator Iter = I; 945 for (unsigned i = 0; i < 4; ++i) { 946 bool SeenDef = false; 947 for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 948 MachineOperand &MO = Iter->getOperand(j); 949 if (!MO.isReg()) 950 continue; 951 if (MO.getReg() == X86::EFLAGS) { 952 if (MO.isUse()) 953 return false; 954 SeenDef = true; 955 } 956 } 957 958 if (SeenDef) 959 // This instruction defines EFLAGS, no need to look any further. 960 return true; 961 ++Iter; 962 963 // If we make it to the end of the block, it's safe to clobber EFLAGS. 964 if (Iter == MBB.end()) 965 return true; 966 } 967 968 Iter = I; 969 for (unsigned i = 0; i < 4; ++i) { 970 // If we make it to the beginning of the block, it's safe to clobber 971 // EFLAGS iff EFLAGS is not live-in. 972 if (Iter == MBB.begin()) 973 return !MBB.isLiveIn(X86::EFLAGS); 974 975 --Iter; 976 bool SawKill = false; 977 for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 978 MachineOperand &MO = Iter->getOperand(j); 979 if (MO.isReg() && MO.getReg() == X86::EFLAGS) { 980 if (MO.isDef()) return MO.isDead(); 981 if (MO.isKill()) SawKill = true; 982 } 983 } 984 985 if (SawKill) 986 // This instruction kills EFLAGS and doesn't redefine it, so 987 // there's no need to look further. 988 return true; 989 } 990 991 // Conservative answer. 992 return false; 993} 994 995void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, 996 MachineBasicBlock::iterator I, 997 unsigned DestReg, unsigned SubIdx, 998 const MachineInstr *Orig, 999 const TargetRegisterInfo *TRI) const { 1000 DebugLoc DL = DebugLoc::getUnknownLoc(); 1001 if (I != MBB.end()) DL = I->getDebugLoc(); 1002 1003 if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { 1004 DestReg = TRI->getSubReg(DestReg, SubIdx); 1005 SubIdx = 0; 1006 } 1007 1008 // MOV32r0 etc. are implemented with xor which clobbers condition code. 1009 // Re-materialize them as movri instructions to avoid side effects. 1010 bool Clone = true; 1011 unsigned Opc = Orig->getOpcode(); 1012 switch (Opc) { 1013 default: break; 1014 case X86::MOV8r0: 1015 case X86::MOV16r0: 1016 case X86::MOV32r0: { 1017 if (!isSafeToClobberEFLAGS(MBB, I)) { 1018 switch (Opc) { 1019 default: break; 1020 case X86::MOV8r0: Opc = X86::MOV8ri; break; 1021 case X86::MOV16r0: Opc = X86::MOV16ri; break; 1022 case X86::MOV32r0: Opc = X86::MOV32ri; break; 1023 } 1024 Clone = false; 1025 } 1026 break; 1027 } 1028 } 1029 1030 if (Clone) { 1031 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 1032 MI->getOperand(0).setReg(DestReg); 1033 MBB.insert(I, MI); 1034 } else { 1035 BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); 1036 } 1037 1038 MachineInstr *NewMI = prior(I); 1039 NewMI->getOperand(0).setSubReg(SubIdx); 1040} 1041 1042/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that 1043/// is not marked dead. 1044static bool hasLiveCondCodeDef(MachineInstr *MI) { 1045 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1046 MachineOperand &MO = MI->getOperand(i); 1047 if (MO.isReg() && MO.isDef() && 1048 MO.getReg() == X86::EFLAGS && !MO.isDead()) { 1049 return true; 1050 } 1051 } 1052 return false; 1053} 1054 1055/// convertToThreeAddress - This method must be implemented by targets that 1056/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target 1057/// may be able to convert a two-address instruction into a true 1058/// three-address instruction on demand. This allows the X86 target (for 1059/// example) to convert ADD and SHL instructions into LEA instructions if they 1060/// would require register copies due to two-addressness. 1061/// 1062/// This method returns a null pointer if the transformation cannot be 1063/// performed, otherwise it returns the new instruction. 1064/// 1065MachineInstr * 1066X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 1067 MachineBasicBlock::iterator &MBBI, 1068 LiveVariables *LV) const { 1069 MachineInstr *MI = MBBI; 1070 MachineFunction &MF = *MI->getParent()->getParent(); 1071 // All instructions input are two-addr instructions. Get the known operands. 1072 unsigned Dest = MI->getOperand(0).getReg(); 1073 unsigned Src = MI->getOperand(1).getReg(); 1074 bool isDead = MI->getOperand(0).isDead(); 1075 bool isKill = MI->getOperand(1).isKill(); 1076 1077 MachineInstr *NewMI = NULL; 1078 // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When 1079 // we have better subtarget support, enable the 16-bit LEA generation here. 1080 bool DisableLEA16 = true; 1081 1082 unsigned MIOpc = MI->getOpcode(); 1083 switch (MIOpc) { 1084 case X86::SHUFPSrri: { 1085 assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); 1086 if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0; 1087 1088 unsigned B = MI->getOperand(1).getReg(); 1089 unsigned C = MI->getOperand(2).getReg(); 1090 if (B != C) return 0; 1091 unsigned A = MI->getOperand(0).getReg(); 1092 unsigned M = MI->getOperand(3).getImm(); 1093 NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) 1094 .addReg(A, RegState::Define | getDeadRegState(isDead)) 1095 .addReg(B, getKillRegState(isKill)).addImm(M); 1096 break; 1097 } 1098 case X86::SHL64ri: { 1099 assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1100 // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1101 // the flags produced by a shift yet, so this is safe. 1102 unsigned ShAmt = MI->getOperand(2).getImm(); 1103 if (ShAmt == 0 || ShAmt >= 4) return 0; 1104 1105 NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1106 .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1107 .addReg(0).addImm(1 << ShAmt) 1108 .addReg(Src, getKillRegState(isKill)) 1109 .addImm(0); 1110 break; 1111 } 1112 case X86::SHL32ri: { 1113 assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1114 // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1115 // the flags produced by a shift yet, so this is safe. 1116 unsigned ShAmt = MI->getOperand(2).getImm(); 1117 if (ShAmt == 0 || ShAmt >= 4) return 0; 1118 1119 unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ? 1120 X86::LEA64_32r : X86::LEA32r; 1121 NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1122 .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1123 .addReg(0).addImm(1 << ShAmt) 1124 .addReg(Src, getKillRegState(isKill)).addImm(0); 1125 break; 1126 } 1127 case X86::SHL16ri: { 1128 assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1129 // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1130 // the flags produced by a shift yet, so this is safe. 1131 unsigned ShAmt = MI->getOperand(2).getImm(); 1132 if (ShAmt == 0 || ShAmt >= 4) return 0; 1133 1134 if (DisableLEA16) { 1135 // If 16-bit LEA is disabled, use 32-bit LEA via subregisters. 1136 MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); 1137 unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() 1138 ? X86::LEA64_32r : X86::LEA32r; 1139 unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1140 unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1141 1142 // Build and insert into an implicit UNDEF value. This is OK because 1143 // well be shifting and then extracting the lower 16-bits. 1144 BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); 1145 MachineInstr *InsMI = 1146 BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) 1147 .addReg(leaInReg) 1148 .addReg(Src, getKillRegState(isKill)) 1149 .addImm(X86::SUBREG_16BIT); 1150 1151 NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg) 1152 .addReg(0).addImm(1 << ShAmt) 1153 .addReg(leaInReg, RegState::Kill) 1154 .addImm(0); 1155 1156 MachineInstr *ExtMI = 1157 BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) 1158 .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1159 .addReg(leaOutReg, RegState::Kill) 1160 .addImm(X86::SUBREG_16BIT); 1161 1162 if (LV) { 1163 // Update live variables 1164 LV->getVarInfo(leaInReg).Kills.push_back(NewMI); 1165 LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); 1166 if (isKill) 1167 LV->replaceKillInstruction(Src, MI, InsMI); 1168 if (isDead) 1169 LV->replaceKillInstruction(Dest, MI, ExtMI); 1170 } 1171 return ExtMI; 1172 } else { 1173 NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1174 .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1175 .addReg(0).addImm(1 << ShAmt) 1176 .addReg(Src, getKillRegState(isKill)) 1177 .addImm(0); 1178 } 1179 break; 1180 } 1181 default: { 1182 // The following opcodes also sets the condition code register(s). Only 1183 // convert them to equivalent lea if the condition code register def's 1184 // are dead! 1185 if (hasLiveCondCodeDef(MI)) 1186 return 0; 1187 1188 bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 1189 switch (MIOpc) { 1190 default: return 0; 1191 case X86::INC64r: 1192 case X86::INC32r: 1193 case X86::INC64_32r: { 1194 assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1195 unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r 1196 : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1197 NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1198 .addReg(Dest, RegState::Define | 1199 getDeadRegState(isDead)), 1200 Src, isKill, 1); 1201 break; 1202 } 1203 case X86::INC16r: 1204 case X86::INC64_16r: 1205 if (DisableLEA16) return 0; 1206 assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1207 NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1208 .addReg(Dest, RegState::Define | 1209 getDeadRegState(isDead)), 1210 Src, isKill, 1); 1211 break; 1212 case X86::DEC64r: 1213 case X86::DEC32r: 1214 case X86::DEC64_32r: { 1215 assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1216 unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r 1217 : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1218 NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1219 .addReg(Dest, RegState::Define | 1220 getDeadRegState(isDead)), 1221 Src, isKill, -1); 1222 break; 1223 } 1224 case X86::DEC16r: 1225 case X86::DEC64_16r: 1226 if (DisableLEA16) return 0; 1227 assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1228 NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1229 .addReg(Dest, RegState::Define | 1230 getDeadRegState(isDead)), 1231 Src, isKill, -1); 1232 break; 1233 case X86::ADD64rr: 1234 case X86::ADD32rr: { 1235 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1236 unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r 1237 : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1238 unsigned Src2 = MI->getOperand(2).getReg(); 1239 bool isKill2 = MI->getOperand(2).isKill(); 1240 NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1241 .addReg(Dest, RegState::Define | 1242 getDeadRegState(isDead)), 1243 Src, isKill, Src2, isKill2); 1244 if (LV && isKill2) 1245 LV->replaceKillInstruction(Src2, MI, NewMI); 1246 break; 1247 } 1248 case X86::ADD16rr: { 1249 if (DisableLEA16) return 0; 1250 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1251 unsigned Src2 = MI->getOperand(2).getReg(); 1252 bool isKill2 = MI->getOperand(2).isKill(); 1253 NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1254 .addReg(Dest, RegState::Define | 1255 getDeadRegState(isDead)), 1256 Src, isKill, Src2, isKill2); 1257 if (LV && isKill2) 1258 LV->replaceKillInstruction(Src2, MI, NewMI); 1259 break; 1260 } 1261 case X86::ADD64ri32: 1262 case X86::ADD64ri8: 1263 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1264 if (MI->getOperand(2).isImm()) 1265 NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1266 .addReg(Dest, RegState::Define | 1267 getDeadRegState(isDead)), 1268 Src, isKill, MI->getOperand(2).getImm()); 1269 break; 1270 case X86::ADD32ri: 1271 case X86::ADD32ri8: 1272 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1273 if (MI->getOperand(2).isImm()) { 1274 unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1275 NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1276 .addReg(Dest, RegState::Define | 1277 getDeadRegState(isDead)), 1278 Src, isKill, MI->getOperand(2).getImm()); 1279 } 1280 break; 1281 case X86::ADD16ri: 1282 case X86::ADD16ri8: 1283 if (DisableLEA16) return 0; 1284 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1285 if (MI->getOperand(2).isImm()) 1286 NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1287 .addReg(Dest, RegState::Define | 1288 getDeadRegState(isDead)), 1289 Src, isKill, MI->getOperand(2).getImm()); 1290 break; 1291 case X86::SHL16ri: 1292 if (DisableLEA16) return 0; 1293 case X86::SHL32ri: 1294 case X86::SHL64ri: { 1295 assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() && 1296 "Unknown shl instruction!"); 1297 unsigned ShAmt = MI->getOperand(2).getImm(); 1298 if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) { 1299 X86AddressMode AM; 1300 AM.Scale = 1 << ShAmt; 1301 AM.IndexReg = Src; 1302 unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r 1303 : (MIOpc == X86::SHL32ri 1304 ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r); 1305 NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1306 .addReg(Dest, RegState::Define | 1307 getDeadRegState(isDead)), AM); 1308 if (isKill) 1309 NewMI->getOperand(3).setIsKill(true); 1310 } 1311 break; 1312 } 1313 } 1314 } 1315 } 1316 1317 if (!NewMI) return 0; 1318 1319 if (LV) { // Update live variables 1320 if (isKill) 1321 LV->replaceKillInstruction(Src, MI, NewMI); 1322 if (isDead) 1323 LV->replaceKillInstruction(Dest, MI, NewMI); 1324 } 1325 1326 MFI->insert(MBBI, NewMI); // Insert the new inst 1327 return NewMI; 1328} 1329 1330/// commuteInstruction - We have a few instructions that must be hacked on to 1331/// commute them. 1332/// 1333MachineInstr * 1334X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { 1335 switch (MI->getOpcode()) { 1336 case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) 1337 case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) 1338 case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) 1339 case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) 1340 case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) 1341 case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) 1342 unsigned Opc; 1343 unsigned Size; 1344 switch (MI->getOpcode()) { 1345 default: llvm_unreachable("Unreachable!"); 1346 case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; 1347 case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; 1348 case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; 1349 case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break; 1350 case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break; 1351 case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; 1352 } 1353 unsigned Amt = MI->getOperand(3).getImm(); 1354 if (NewMI) { 1355 MachineFunction &MF = *MI->getParent()->getParent(); 1356 MI = MF.CloneMachineInstr(MI); 1357 NewMI = false; 1358 } 1359 MI->setDesc(get(Opc)); 1360 MI->getOperand(3).setImm(Size-Amt); 1361 return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1362 } 1363 case X86::CMOVB16rr: 1364 case X86::CMOVB32rr: 1365 case X86::CMOVB64rr: 1366 case X86::CMOVAE16rr: 1367 case X86::CMOVAE32rr: 1368 case X86::CMOVAE64rr: 1369 case X86::CMOVE16rr: 1370 case X86::CMOVE32rr: 1371 case X86::CMOVE64rr: 1372 case X86::CMOVNE16rr: 1373 case X86::CMOVNE32rr: 1374 case X86::CMOVNE64rr: 1375 case X86::CMOVBE16rr: 1376 case X86::CMOVBE32rr: 1377 case X86::CMOVBE64rr: 1378 case X86::CMOVA16rr: 1379 case X86::CMOVA32rr: 1380 case X86::CMOVA64rr: 1381 case X86::CMOVL16rr: 1382 case X86::CMOVL32rr: 1383 case X86::CMOVL64rr: 1384 case X86::CMOVGE16rr: 1385 case X86::CMOVGE32rr: 1386 case X86::CMOVGE64rr: 1387 case X86::CMOVLE16rr: 1388 case X86::CMOVLE32rr: 1389 case X86::CMOVLE64rr: 1390 case X86::CMOVG16rr: 1391 case X86::CMOVG32rr: 1392 case X86::CMOVG64rr: 1393 case X86::CMOVS16rr: 1394 case X86::CMOVS32rr: 1395 case X86::CMOVS64rr: 1396 case X86::CMOVNS16rr: 1397 case X86::CMOVNS32rr: 1398 case X86::CMOVNS64rr: 1399 case X86::CMOVP16rr: 1400 case X86::CMOVP32rr: 1401 case X86::CMOVP64rr: 1402 case X86::CMOVNP16rr: 1403 case X86::CMOVNP32rr: 1404 case X86::CMOVNP64rr: 1405 case X86::CMOVO16rr: 1406 case X86::CMOVO32rr: 1407 case X86::CMOVO64rr: 1408 case X86::CMOVNO16rr: 1409 case X86::CMOVNO32rr: 1410 case X86::CMOVNO64rr: { 1411 unsigned Opc = 0; 1412 switch (MI->getOpcode()) { 1413 default: break; 1414 case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; 1415 case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; 1416 case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; 1417 case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break; 1418 case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break; 1419 case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break; 1420 case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break; 1421 case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break; 1422 case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break; 1423 case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; 1424 case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; 1425 case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; 1426 case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; 1427 case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; 1428 case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; 1429 case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; 1430 case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; 1431 case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; 1432 case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; 1433 case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; 1434 case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; 1435 case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break; 1436 case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break; 1437 case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break; 1438 case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break; 1439 case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break; 1440 case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break; 1441 case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break; 1442 case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break; 1443 case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; 1444 case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; 1445 case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; 1446 case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; 1447 case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; 1448 case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; 1449 case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; 1450 case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; 1451 case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; 1452 case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; 1453 case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; 1454 case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; 1455 case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; 1456 case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; 1457 case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; 1458 case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; 1459 case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; 1460 case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; 1461 case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; 1462 } 1463 if (NewMI) { 1464 MachineFunction &MF = *MI->getParent()->getParent(); 1465 MI = MF.CloneMachineInstr(MI); 1466 NewMI = false; 1467 } 1468 MI->setDesc(get(Opc)); 1469 // Fallthrough intended. 1470 } 1471 default: 1472 return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1473 } 1474} 1475 1476static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { 1477 switch (BrOpc) { 1478 default: return X86::COND_INVALID; 1479 case X86::JE: return X86::COND_E; 1480 case X86::JNE: return X86::COND_NE; 1481 case X86::JL: return X86::COND_L; 1482 case X86::JLE: return X86::COND_LE; 1483 case X86::JG: return X86::COND_G; 1484 case X86::JGE: return X86::COND_GE; 1485 case X86::JB: return X86::COND_B; 1486 case X86::JBE: return X86::COND_BE; 1487 case X86::JA: return X86::COND_A; 1488 case X86::JAE: return X86::COND_AE; 1489 case X86::JS: return X86::COND_S; 1490 case X86::JNS: return X86::COND_NS; 1491 case X86::JP: return X86::COND_P; 1492 case X86::JNP: return X86::COND_NP; 1493 case X86::JO: return X86::COND_O; 1494 case X86::JNO: return X86::COND_NO; 1495 } 1496} 1497 1498unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { 1499 switch (CC) { 1500 default: llvm_unreachable("Illegal condition code!"); 1501 case X86::COND_E: return X86::JE; 1502 case X86::COND_NE: return X86::JNE; 1503 case X86::COND_L: return X86::JL; 1504 case X86::COND_LE: return X86::JLE; 1505 case X86::COND_G: return X86::JG; 1506 case X86::COND_GE: return X86::JGE; 1507 case X86::COND_B: return X86::JB; 1508 case X86::COND_BE: return X86::JBE; 1509 case X86::COND_A: return X86::JA; 1510 case X86::COND_AE: return X86::JAE; 1511 case X86::COND_S: return X86::JS; 1512 case X86::COND_NS: return X86::JNS; 1513 case X86::COND_P: return X86::JP; 1514 case X86::COND_NP: return X86::JNP; 1515 case X86::COND_O: return X86::JO; 1516 case X86::COND_NO: return X86::JNO; 1517 } 1518} 1519 1520/// GetOppositeBranchCondition - Return the inverse of the specified condition, 1521/// e.g. turning COND_E to COND_NE. 1522X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { 1523 switch (CC) { 1524 default: llvm_unreachable("Illegal condition code!"); 1525 case X86::COND_E: return X86::COND_NE; 1526 case X86::COND_NE: return X86::COND_E; 1527 case X86::COND_L: return X86::COND_GE; 1528 case X86::COND_LE: return X86::COND_G; 1529 case X86::COND_G: return X86::COND_LE; 1530 case X86::COND_GE: return X86::COND_L; 1531 case X86::COND_B: return X86::COND_AE; 1532 case X86::COND_BE: return X86::COND_A; 1533 case X86::COND_A: return X86::COND_BE; 1534 case X86::COND_AE: return X86::COND_B; 1535 case X86::COND_S: return X86::COND_NS; 1536 case X86::COND_NS: return X86::COND_S; 1537 case X86::COND_P: return X86::COND_NP; 1538 case X86::COND_NP: return X86::COND_P; 1539 case X86::COND_O: return X86::COND_NO; 1540 case X86::COND_NO: return X86::COND_O; 1541 } 1542} 1543 1544bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { 1545 const TargetInstrDesc &TID = MI->getDesc(); 1546 if (!TID.isTerminator()) return false; 1547 1548 // Conditional branch is a special case. 1549 if (TID.isBranch() && !TID.isBarrier()) 1550 return true; 1551 if (!TID.isPredicable()) 1552 return true; 1553 return !isPredicated(MI); 1554} 1555 1556// For purposes of branch analysis do not count FP_REG_KILL as a terminator. 1557static bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI, 1558 const X86InstrInfo &TII) { 1559 if (MI->getOpcode() == X86::FP_REG_KILL) 1560 return false; 1561 return TII.isUnpredicatedTerminator(MI); 1562} 1563 1564bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 1565 MachineBasicBlock *&TBB, 1566 MachineBasicBlock *&FBB, 1567 SmallVectorImpl<MachineOperand> &Cond, 1568 bool AllowModify) const { 1569 // Start from the bottom of the block and work up, examining the 1570 // terminator instructions. 1571 MachineBasicBlock::iterator I = MBB.end(); 1572 while (I != MBB.begin()) { 1573 --I; 1574 // Working from the bottom, when we see a non-terminator 1575 // instruction, we're done. 1576 if (!isBrAnalysisUnpredicatedTerminator(I, *this)) 1577 break; 1578 // A terminator that isn't a branch can't easily be handled 1579 // by this analysis. 1580 if (!I->getDesc().isBranch()) 1581 return true; 1582 // Handle unconditional branches. 1583 if (I->getOpcode() == X86::JMP) { 1584 if (!AllowModify) { 1585 TBB = I->getOperand(0).getMBB(); 1586 continue; 1587 } 1588 1589 // If the block has any instructions after a JMP, delete them. 1590 while (next(I) != MBB.end()) 1591 next(I)->eraseFromParent(); 1592 Cond.clear(); 1593 FBB = 0; 1594 // Delete the JMP if it's equivalent to a fall-through. 1595 if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { 1596 TBB = 0; 1597 I->eraseFromParent(); 1598 I = MBB.end(); 1599 continue; 1600 } 1601 // TBB is used to indicate the unconditinal destination. 1602 TBB = I->getOperand(0).getMBB(); 1603 continue; 1604 } 1605 // Handle conditional branches. 1606 X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); 1607 if (BranchCode == X86::COND_INVALID) 1608 return true; // Can't handle indirect branch. 1609 // Working from the bottom, handle the first conditional branch. 1610 if (Cond.empty()) { 1611 FBB = TBB; 1612 TBB = I->getOperand(0).getMBB(); 1613 Cond.push_back(MachineOperand::CreateImm(BranchCode)); 1614 continue; 1615 } 1616 // Handle subsequent conditional branches. Only handle the case 1617 // where all conditional branches branch to the same destination 1618 // and their condition opcodes fit one of the special 1619 // multi-branch idioms. 1620 assert(Cond.size() == 1); 1621 assert(TBB); 1622 // Only handle the case where all conditional branches branch to 1623 // the same destination. 1624 if (TBB != I->getOperand(0).getMBB()) 1625 return true; 1626 X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); 1627 // If the conditions are the same, we can leave them alone. 1628 if (OldBranchCode == BranchCode) 1629 continue; 1630 // If they differ, see if they fit one of the known patterns. 1631 // Theoretically we could handle more patterns here, but 1632 // we shouldn't expect to see them if instruction selection 1633 // has done a reasonable job. 1634 if ((OldBranchCode == X86::COND_NP && 1635 BranchCode == X86::COND_E) || 1636 (OldBranchCode == X86::COND_E && 1637 BranchCode == X86::COND_NP)) 1638 BranchCode = X86::COND_NP_OR_E; 1639 else if ((OldBranchCode == X86::COND_P && 1640 BranchCode == X86::COND_NE) || 1641 (OldBranchCode == X86::COND_NE && 1642 BranchCode == X86::COND_P)) 1643 BranchCode = X86::COND_NE_OR_P; 1644 else 1645 return true; 1646 // Update the MachineOperand. 1647 Cond[0].setImm(BranchCode); 1648 } 1649 1650 return false; 1651} 1652 1653unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 1654 MachineBasicBlock::iterator I = MBB.end(); 1655 unsigned Count = 0; 1656 1657 while (I != MBB.begin()) { 1658 --I; 1659 if (I->getOpcode() != X86::JMP && 1660 GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) 1661 break; 1662 // Remove the branch. 1663 I->eraseFromParent(); 1664 I = MBB.end(); 1665 ++Count; 1666 } 1667 1668 return Count; 1669} 1670 1671unsigned 1672X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 1673 MachineBasicBlock *FBB, 1674 const SmallVectorImpl<MachineOperand> &Cond) const { 1675 // FIXME this should probably have a DebugLoc operand 1676 DebugLoc dl = DebugLoc::getUnknownLoc(); 1677 // Shouldn't be a fall through. 1678 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 1679 assert((Cond.size() == 1 || Cond.size() == 0) && 1680 "X86 branch conditions have one component!"); 1681 1682 if (Cond.empty()) { 1683 // Unconditional branch? 1684 assert(!FBB && "Unconditional branch with multiple successors!"); 1685 BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB); 1686 return 1; 1687 } 1688 1689 // Conditional branch. 1690 unsigned Count = 0; 1691 X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); 1692 switch (CC) { 1693 case X86::COND_NP_OR_E: 1694 // Synthesize NP_OR_E with two branches. 1695 BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB); 1696 ++Count; 1697 BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB); 1698 ++Count; 1699 break; 1700 case X86::COND_NE_OR_P: 1701 // Synthesize NE_OR_P with two branches. 1702 BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB); 1703 ++Count; 1704 BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB); 1705 ++Count; 1706 break; 1707 default: { 1708 unsigned Opc = GetCondBranchFromCond(CC); 1709 BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); 1710 ++Count; 1711 } 1712 } 1713 if (FBB) { 1714 // Two-way Conditional branch. Insert the second branch. 1715 BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB); 1716 ++Count; 1717 } 1718 return Count; 1719} 1720 1721/// isHReg - Test if the given register is a physical h register. 1722static bool isHReg(unsigned Reg) { 1723 return X86::GR8_ABCD_HRegClass.contains(Reg); 1724} 1725 1726bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, 1727 MachineBasicBlock::iterator MI, 1728 unsigned DestReg, unsigned SrcReg, 1729 const TargetRegisterClass *DestRC, 1730 const TargetRegisterClass *SrcRC) const { 1731 DebugLoc DL = DebugLoc::getUnknownLoc(); 1732 if (MI != MBB.end()) DL = MI->getDebugLoc(); 1733 1734 // Determine if DstRC and SrcRC have a common superclass in common. 1735 const TargetRegisterClass *CommonRC = DestRC; 1736 if (DestRC == SrcRC) 1737 /* Source and destination have the same register class. */; 1738 else if (CommonRC->hasSuperClass(SrcRC)) 1739 CommonRC = SrcRC; 1740 else if (!DestRC->hasSubClass(SrcRC)) { 1741 // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, 1742 // but we want to copy then as GR64. Similarly, for GR32_NOREX and 1743 // GR32_NOSP, copy as GR32. 1744 if (SrcRC->hasSuperClass(&X86::GR64RegClass) && 1745 DestRC->hasSuperClass(&X86::GR64RegClass)) 1746 CommonRC = &X86::GR64RegClass; 1747 else if (SrcRC->hasSuperClass(&X86::GR32RegClass) && 1748 DestRC->hasSuperClass(&X86::GR32RegClass)) 1749 CommonRC = &X86::GR32RegClass; 1750 else 1751 CommonRC = 0; 1752 } 1753 1754 if (CommonRC) { 1755 unsigned Opc; 1756 if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) { 1757 Opc = X86::MOV64rr; 1758 } else if (CommonRC == &X86::GR32RegClass || 1759 CommonRC == &X86::GR32_NOSPRegClass) { 1760 Opc = X86::MOV32rr; 1761 } else if (CommonRC == &X86::GR16RegClass) { 1762 Opc = X86::MOV16rr; 1763 } else if (CommonRC == &X86::GR8RegClass) { 1764 // Copying to or from a physical H register on x86-64 requires a NOREX 1765 // move. Otherwise use a normal move. 1766 if ((isHReg(DestReg) || isHReg(SrcReg)) && 1767 TM.getSubtarget<X86Subtarget>().is64Bit()) 1768 Opc = X86::MOV8rr_NOREX; 1769 else 1770 Opc = X86::MOV8rr; 1771 } else if (CommonRC == &X86::GR64_ABCDRegClass) { 1772 Opc = X86::MOV64rr; 1773 } else if (CommonRC == &X86::GR32_ABCDRegClass) { 1774 Opc = X86::MOV32rr; 1775 } else if (CommonRC == &X86::GR16_ABCDRegClass) { 1776 Opc = X86::MOV16rr; 1777 } else if (CommonRC == &X86::GR8_ABCD_LRegClass) { 1778 Opc = X86::MOV8rr; 1779 } else if (CommonRC == &X86::GR8_ABCD_HRegClass) { 1780 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 1781 Opc = X86::MOV8rr_NOREX; 1782 else 1783 Opc = X86::MOV8rr; 1784 } else if (CommonRC == &X86::GR64_NOREXRegClass || 1785 CommonRC == &X86::GR64_NOREX_NOSPRegClass) { 1786 Opc = X86::MOV64rr; 1787 } else if (CommonRC == &X86::GR32_NOREXRegClass) { 1788 Opc = X86::MOV32rr; 1789 } else if (CommonRC == &X86::GR16_NOREXRegClass) { 1790 Opc = X86::MOV16rr; 1791 } else if (CommonRC == &X86::GR8_NOREXRegClass) { 1792 Opc = X86::MOV8rr; 1793 } else if (CommonRC == &X86::RFP32RegClass) { 1794 Opc = X86::MOV_Fp3232; 1795 } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) { 1796 Opc = X86::MOV_Fp6464; 1797 } else if (CommonRC == &X86::RFP80RegClass) { 1798 Opc = X86::MOV_Fp8080; 1799 } else if (CommonRC == &X86::FR32RegClass) { 1800 Opc = X86::FsMOVAPSrr; 1801 } else if (CommonRC == &X86::FR64RegClass) { 1802 Opc = X86::FsMOVAPDrr; 1803 } else if (CommonRC == &X86::VR128RegClass) { 1804 Opc = X86::MOVAPSrr; 1805 } else if (CommonRC == &X86::VR64RegClass) { 1806 Opc = X86::MMX_MOVQ64rr; 1807 } else { 1808 return false; 1809 } 1810 BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); 1811 return true; 1812 } 1813 1814 // Moving EFLAGS to / from another register requires a push and a pop. 1815 if (SrcRC == &X86::CCRRegClass) { 1816 if (SrcReg != X86::EFLAGS) 1817 return false; 1818 if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { 1819 BuildMI(MBB, MI, DL, get(X86::PUSHFQ)); 1820 BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); 1821 return true; 1822 } else if (DestRC == &X86::GR32RegClass || 1823 DestRC == &X86::GR32_NOSPRegClass) { 1824 BuildMI(MBB, MI, DL, get(X86::PUSHFD)); 1825 BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); 1826 return true; 1827 } 1828 } else if (DestRC == &X86::CCRRegClass) { 1829 if (DestReg != X86::EFLAGS) 1830 return false; 1831 if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { 1832 BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); 1833 BuildMI(MBB, MI, DL, get(X86::POPFQ)); 1834 return true; 1835 } else if (SrcRC == &X86::GR32RegClass || 1836 DestRC == &X86::GR32_NOSPRegClass) { 1837 BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); 1838 BuildMI(MBB, MI, DL, get(X86::POPFD)); 1839 return true; 1840 } 1841 } 1842 1843 // Moving from ST(0) turns into FpGET_ST0_32 etc. 1844 if (SrcRC == &X86::RSTRegClass) { 1845 // Copying from ST(0)/ST(1). 1846 if (SrcReg != X86::ST0 && SrcReg != X86::ST1) 1847 // Can only copy from ST(0)/ST(1) right now 1848 return false; 1849 bool isST0 = SrcReg == X86::ST0; 1850 unsigned Opc; 1851 if (DestRC == &X86::RFP32RegClass) 1852 Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32; 1853 else if (DestRC == &X86::RFP64RegClass) 1854 Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64; 1855 else { 1856 if (DestRC != &X86::RFP80RegClass) 1857 return false; 1858 Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80; 1859 } 1860 BuildMI(MBB, MI, DL, get(Opc), DestReg); 1861 return true; 1862 } 1863 1864 // Moving to ST(0) turns into FpSET_ST0_32 etc. 1865 if (DestRC == &X86::RSTRegClass) { 1866 // Copying to ST(0) / ST(1). 1867 if (DestReg != X86::ST0 && DestReg != X86::ST1) 1868 // Can only copy to TOS right now 1869 return false; 1870 bool isST0 = DestReg == X86::ST0; 1871 unsigned Opc; 1872 if (SrcRC == &X86::RFP32RegClass) 1873 Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32; 1874 else if (SrcRC == &X86::RFP64RegClass) 1875 Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64; 1876 else { 1877 if (SrcRC != &X86::RFP80RegClass) 1878 return false; 1879 Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80; 1880 } 1881 BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg); 1882 return true; 1883 } 1884 1885 // Not yet supported! 1886 return false; 1887} 1888 1889static unsigned getStoreRegOpcode(unsigned SrcReg, 1890 const TargetRegisterClass *RC, 1891 bool isStackAligned, 1892 TargetMachine &TM) { 1893 unsigned Opc = 0; 1894 if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { 1895 Opc = X86::MOV64mr; 1896 } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { 1897 Opc = X86::MOV32mr; 1898 } else if (RC == &X86::GR16RegClass) { 1899 Opc = X86::MOV16mr; 1900 } else if (RC == &X86::GR8RegClass) { 1901 // Copying to or from a physical H register on x86-64 requires a NOREX 1902 // move. Otherwise use a normal move. 1903 if (isHReg(SrcReg) && 1904 TM.getSubtarget<X86Subtarget>().is64Bit()) 1905 Opc = X86::MOV8mr_NOREX; 1906 else 1907 Opc = X86::MOV8mr; 1908 } else if (RC == &X86::GR64_ABCDRegClass) { 1909 Opc = X86::MOV64mr; 1910 } else if (RC == &X86::GR32_ABCDRegClass) { 1911 Opc = X86::MOV32mr; 1912 } else if (RC == &X86::GR16_ABCDRegClass) { 1913 Opc = X86::MOV16mr; 1914 } else if (RC == &X86::GR8_ABCD_LRegClass) { 1915 Opc = X86::MOV8mr; 1916 } else if (RC == &X86::GR8_ABCD_HRegClass) { 1917 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 1918 Opc = X86::MOV8mr_NOREX; 1919 else 1920 Opc = X86::MOV8mr; 1921 } else if (RC == &X86::GR64_NOREXRegClass || 1922 RC == &X86::GR64_NOREX_NOSPRegClass) { 1923 Opc = X86::MOV64mr; 1924 } else if (RC == &X86::GR32_NOREXRegClass) { 1925 Opc = X86::MOV32mr; 1926 } else if (RC == &X86::GR16_NOREXRegClass) { 1927 Opc = X86::MOV16mr; 1928 } else if (RC == &X86::GR8_NOREXRegClass) { 1929 Opc = X86::MOV8mr; 1930 } else if (RC == &X86::RFP80RegClass) { 1931 Opc = X86::ST_FpP80m; // pops 1932 } else if (RC == &X86::RFP64RegClass) { 1933 Opc = X86::ST_Fp64m; 1934 } else if (RC == &X86::RFP32RegClass) { 1935 Opc = X86::ST_Fp32m; 1936 } else if (RC == &X86::FR32RegClass) { 1937 Opc = X86::MOVSSmr; 1938 } else if (RC == &X86::FR64RegClass) { 1939 Opc = X86::MOVSDmr; 1940 } else if (RC == &X86::VR128RegClass) { 1941 // If stack is realigned we can use aligned stores. 1942 Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr; 1943 } else if (RC == &X86::VR64RegClass) { 1944 Opc = X86::MMX_MOVQ64mr; 1945 } else { 1946 llvm_unreachable("Unknown regclass"); 1947 } 1948 1949 return Opc; 1950} 1951 1952void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 1953 MachineBasicBlock::iterator MI, 1954 unsigned SrcReg, bool isKill, int FrameIdx, 1955 const TargetRegisterClass *RC) const { 1956 const MachineFunction &MF = *MBB.getParent(); 1957 bool isAligned = (RI.getStackAlignment() >= 16) || 1958 RI.needsStackRealignment(MF); 1959 unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 1960 DebugLoc DL = DebugLoc::getUnknownLoc(); 1961 if (MI != MBB.end()) DL = MI->getDebugLoc(); 1962 addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) 1963 .addReg(SrcReg, getKillRegState(isKill)); 1964} 1965 1966void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, 1967 bool isKill, 1968 SmallVectorImpl<MachineOperand> &Addr, 1969 const TargetRegisterClass *RC, 1970 MachineInstr::mmo_iterator MMOBegin, 1971 MachineInstr::mmo_iterator MMOEnd, 1972 SmallVectorImpl<MachineInstr*> &NewMIs) const { 1973 bool isAligned = (*MMOBegin)->getAlignment() >= 16; 1974 unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 1975 DebugLoc DL = DebugLoc::getUnknownLoc(); 1976 MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); 1977 for (unsigned i = 0, e = Addr.size(); i != e; ++i) 1978 MIB.addOperand(Addr[i]); 1979 MIB.addReg(SrcReg, getKillRegState(isKill)); 1980 (*MIB).setMemRefs(MMOBegin, MMOEnd); 1981 NewMIs.push_back(MIB); 1982} 1983 1984static unsigned getLoadRegOpcode(unsigned DestReg, 1985 const TargetRegisterClass *RC, 1986 bool isStackAligned, 1987 const TargetMachine &TM) { 1988 unsigned Opc = 0; 1989 if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { 1990 Opc = X86::MOV64rm; 1991 } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { 1992 Opc = X86::MOV32rm; 1993 } else if (RC == &X86::GR16RegClass) { 1994 Opc = X86::MOV16rm; 1995 } else if (RC == &X86::GR8RegClass) { 1996 // Copying to or from a physical H register on x86-64 requires a NOREX 1997 // move. Otherwise use a normal move. 1998 if (isHReg(DestReg) && 1999 TM.getSubtarget<X86Subtarget>().is64Bit()) 2000 Opc = X86::MOV8rm_NOREX; 2001 else 2002 Opc = X86::MOV8rm; 2003 } else if (RC == &X86::GR64_ABCDRegClass) { 2004 Opc = X86::MOV64rm; 2005 } else if (RC == &X86::GR32_ABCDRegClass) { 2006 Opc = X86::MOV32rm; 2007 } else if (RC == &X86::GR16_ABCDRegClass) { 2008 Opc = X86::MOV16rm; 2009 } else if (RC == &X86::GR8_ABCD_LRegClass) { 2010 Opc = X86::MOV8rm; 2011 } else if (RC == &X86::GR8_ABCD_HRegClass) { 2012 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2013 Opc = X86::MOV8rm_NOREX; 2014 else 2015 Opc = X86::MOV8rm; 2016 } else if (RC == &X86::GR64_NOREXRegClass || 2017 RC == &X86::GR64_NOREX_NOSPRegClass) { 2018 Opc = X86::MOV64rm; 2019 } else if (RC == &X86::GR32_NOREXRegClass) { 2020 Opc = X86::MOV32rm; 2021 } else if (RC == &X86::GR16_NOREXRegClass) { 2022 Opc = X86::MOV16rm; 2023 } else if (RC == &X86::GR8_NOREXRegClass) { 2024 Opc = X86::MOV8rm; 2025 } else if (RC == &X86::RFP80RegClass) { 2026 Opc = X86::LD_Fp80m; 2027 } else if (RC == &X86::RFP64RegClass) { 2028 Opc = X86::LD_Fp64m; 2029 } else if (RC == &X86::RFP32RegClass) { 2030 Opc = X86::LD_Fp32m; 2031 } else if (RC == &X86::FR32RegClass) { 2032 Opc = X86::MOVSSrm; 2033 } else if (RC == &X86::FR64RegClass) { 2034 Opc = X86::MOVSDrm; 2035 } else if (RC == &X86::VR128RegClass) { 2036 // If stack is realigned we can use aligned loads. 2037 Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm; 2038 } else if (RC == &X86::VR64RegClass) { 2039 Opc = X86::MMX_MOVQ64rm; 2040 } else { 2041 llvm_unreachable("Unknown regclass"); 2042 } 2043 2044 return Opc; 2045} 2046 2047void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 2048 MachineBasicBlock::iterator MI, 2049 unsigned DestReg, int FrameIdx, 2050 const TargetRegisterClass *RC) const{ 2051 const MachineFunction &MF = *MBB.getParent(); 2052 bool isAligned = (RI.getStackAlignment() >= 16) || 2053 RI.needsStackRealignment(MF); 2054 unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2055 DebugLoc DL = DebugLoc::getUnknownLoc(); 2056 if (MI != MBB.end()) DL = MI->getDebugLoc(); 2057 addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); 2058} 2059 2060void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, 2061 SmallVectorImpl<MachineOperand> &Addr, 2062 const TargetRegisterClass *RC, 2063 MachineInstr::mmo_iterator MMOBegin, 2064 MachineInstr::mmo_iterator MMOEnd, 2065 SmallVectorImpl<MachineInstr*> &NewMIs) const { 2066 bool isAligned = (*MMOBegin)->getAlignment() >= 16; 2067 unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2068 DebugLoc DL = DebugLoc::getUnknownLoc(); 2069 MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); 2070 for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2071 MIB.addOperand(Addr[i]); 2072 (*MIB).setMemRefs(MMOBegin, MMOEnd); 2073 NewMIs.push_back(MIB); 2074} 2075 2076bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 2077 MachineBasicBlock::iterator MI, 2078 const std::vector<CalleeSavedInfo> &CSI) const { 2079 if (CSI.empty()) 2080 return false; 2081 2082 DebugLoc DL = DebugLoc::getUnknownLoc(); 2083 if (MI != MBB.end()) DL = MI->getDebugLoc(); 2084 2085 bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2086 bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); 2087 unsigned SlotSize = is64Bit ? 8 : 4; 2088 2089 MachineFunction &MF = *MBB.getParent(); 2090 unsigned FPReg = RI.getFrameRegister(MF); 2091 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2092 unsigned CalleeFrameSize = 0; 2093 2094 unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; 2095 for (unsigned i = CSI.size(); i != 0; --i) { 2096 unsigned Reg = CSI[i-1].getReg(); 2097 const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); 2098 // Add the callee-saved register as live-in. It's killed at the spill. 2099 MBB.addLiveIn(Reg); 2100 if (Reg == FPReg) 2101 // X86RegisterInfo::emitPrologue will handle spilling of frame register. 2102 continue; 2103 if (RegClass != &X86::VR128RegClass && !isWin64) { 2104 CalleeFrameSize += SlotSize; 2105 BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); 2106 } else { 2107 storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); 2108 } 2109 } 2110 2111 X86FI->setCalleeSavedFrameSize(CalleeFrameSize); 2112 return true; 2113} 2114 2115bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2116 MachineBasicBlock::iterator MI, 2117 const std::vector<CalleeSavedInfo> &CSI) const { 2118 if (CSI.empty()) 2119 return false; 2120 2121 DebugLoc DL = DebugLoc::getUnknownLoc(); 2122 if (MI != MBB.end()) DL = MI->getDebugLoc(); 2123 2124 MachineFunction &MF = *MBB.getParent(); 2125 unsigned FPReg = RI.getFrameRegister(MF); 2126 bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2127 bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); 2128 unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; 2129 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2130 unsigned Reg = CSI[i].getReg(); 2131 if (Reg == FPReg) 2132 // X86RegisterInfo::emitEpilogue will handle restoring of frame register. 2133 continue; 2134 const TargetRegisterClass *RegClass = CSI[i].getRegClass(); 2135 if (RegClass != &X86::VR128RegClass && !isWin64) { 2136 BuildMI(MBB, MI, DL, get(Opc), Reg); 2137 } else { 2138 loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); 2139 } 2140 } 2141 return true; 2142} 2143 2144static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, 2145 const SmallVectorImpl<MachineOperand> &MOs, 2146 MachineInstr *MI, 2147 const TargetInstrInfo &TII) { 2148 // Create the base instruction with the memory operand as the first part. 2149 MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2150 MI->getDebugLoc(), true); 2151 MachineInstrBuilder MIB(NewMI); 2152 unsigned NumAddrOps = MOs.size(); 2153 for (unsigned i = 0; i != NumAddrOps; ++i) 2154 MIB.addOperand(MOs[i]); 2155 if (NumAddrOps < 4) // FrameIndex only 2156 addOffset(MIB, 0); 2157 2158 // Loop over the rest of the ri operands, converting them over. 2159 unsigned NumOps = MI->getDesc().getNumOperands()-2; 2160 for (unsigned i = 0; i != NumOps; ++i) { 2161 MachineOperand &MO = MI->getOperand(i+2); 2162 MIB.addOperand(MO); 2163 } 2164 for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { 2165 MachineOperand &MO = MI->getOperand(i); 2166 MIB.addOperand(MO); 2167 } 2168 return MIB; 2169} 2170 2171static MachineInstr *FuseInst(MachineFunction &MF, 2172 unsigned Opcode, unsigned OpNo, 2173 const SmallVectorImpl<MachineOperand> &MOs, 2174 MachineInstr *MI, const TargetInstrInfo &TII) { 2175 MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2176 MI->getDebugLoc(), true); 2177 MachineInstrBuilder MIB(NewMI); 2178 2179 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2180 MachineOperand &MO = MI->getOperand(i); 2181 if (i == OpNo) { 2182 assert(MO.isReg() && "Expected to fold into reg operand!"); 2183 unsigned NumAddrOps = MOs.size(); 2184 for (unsigned i = 0; i != NumAddrOps; ++i) 2185 MIB.addOperand(MOs[i]); 2186 if (NumAddrOps < 4) // FrameIndex only 2187 addOffset(MIB, 0); 2188 } else { 2189 MIB.addOperand(MO); 2190 } 2191 } 2192 return MIB; 2193} 2194 2195static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, 2196 const SmallVectorImpl<MachineOperand> &MOs, 2197 MachineInstr *MI) { 2198 MachineFunction &MF = *MI->getParent()->getParent(); 2199 MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); 2200 2201 unsigned NumAddrOps = MOs.size(); 2202 for (unsigned i = 0; i != NumAddrOps; ++i) 2203 MIB.addOperand(MOs[i]); 2204 if (NumAddrOps < 4) // FrameIndex only 2205 addOffset(MIB, 0); 2206 return MIB.addImm(0); 2207} 2208 2209MachineInstr* 2210X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2211 MachineInstr *MI, unsigned i, 2212 const SmallVectorImpl<MachineOperand> &MOs, 2213 unsigned Size, unsigned Align) const { 2214 const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; 2215 bool isTwoAddrFold = false; 2216 unsigned NumOps = MI->getDesc().getNumOperands(); 2217 bool isTwoAddr = NumOps > 1 && 2218 MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2219 2220 MachineInstr *NewMI = NULL; 2221 // Folding a memory location into the two-address part of a two-address 2222 // instruction is different than folding it other places. It requires 2223 // replacing the *two* registers with the memory location. 2224 if (isTwoAddr && NumOps >= 2 && i < 2 && 2225 MI->getOperand(0).isReg() && 2226 MI->getOperand(1).isReg() && 2227 MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { 2228 OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2229 isTwoAddrFold = true; 2230 } else if (i == 0) { // If operand 0 2231 if (MI->getOpcode() == X86::MOV16r0) 2232 NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); 2233 else if (MI->getOpcode() == X86::MOV32r0) 2234 NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); 2235 else if (MI->getOpcode() == X86::MOV8r0) 2236 NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); 2237 if (NewMI) 2238 return NewMI; 2239 2240 OpcodeTablePtr = &RegOp2MemOpTable0; 2241 } else if (i == 1) { 2242 OpcodeTablePtr = &RegOp2MemOpTable1; 2243 } else if (i == 2) { 2244 OpcodeTablePtr = &RegOp2MemOpTable2; 2245 } 2246 2247 // If table selected... 2248 if (OpcodeTablePtr) { 2249 // Find the Opcode to fuse 2250 DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2251 OpcodeTablePtr->find((unsigned*)MI->getOpcode()); 2252 if (I != OpcodeTablePtr->end()) { 2253 unsigned Opcode = I->second.first; 2254 unsigned MinAlign = I->second.second; 2255 if (Align < MinAlign) 2256 return NULL; 2257 bool NarrowToMOV32rm = false; 2258 if (Size) { 2259 unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); 2260 if (Size < RCSize) { 2261 // Check if it's safe to fold the load. If the size of the object is 2262 // narrower than the load width, then it's not. 2263 if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) 2264 return NULL; 2265 // If this is a 64-bit load, but the spill slot is 32, then we can do 2266 // a 32-bit load which is implicitly zero-extended. This likely is due 2267 // to liveintervalanalysis remat'ing a load from stack slot. 2268 if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) 2269 return NULL; 2270 Opcode = X86::MOV32rm; 2271 NarrowToMOV32rm = true; 2272 } 2273 } 2274 2275 if (isTwoAddrFold) 2276 NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); 2277 else 2278 NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this); 2279 2280 if (NarrowToMOV32rm) { 2281 // If this is the special case where we use a MOV32rm to load a 32-bit 2282 // value and zero-extend the top bits. Change the destination register 2283 // to a 32-bit one. 2284 unsigned DstReg = NewMI->getOperand(0).getReg(); 2285 if (TargetRegisterInfo::isPhysicalRegister(DstReg)) 2286 NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, 2287 4/*x86_subreg_32bit*/)); 2288 else 2289 NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/); 2290 } 2291 return NewMI; 2292 } 2293 } 2294 2295 // No fusion 2296 if (PrintFailedFusing) 2297 errs() << "We failed to fuse operand " << i << " in " << *MI; 2298 return NULL; 2299} 2300 2301 2302MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2303 MachineInstr *MI, 2304 const SmallVectorImpl<unsigned> &Ops, 2305 int FrameIndex) const { 2306 // Check switch flag 2307 if (NoFusing) return NULL; 2308 2309 const MachineFrameInfo *MFI = MF.getFrameInfo(); 2310 unsigned Size = MFI->getObjectSize(FrameIndex); 2311 unsigned Alignment = MFI->getObjectAlignment(FrameIndex); 2312 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2313 unsigned NewOpc = 0; 2314 unsigned RCSize = 0; 2315 switch (MI->getOpcode()) { 2316 default: return NULL; 2317 case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; 2318 case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; 2319 case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; 2320 case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; 2321 } 2322 // Check if it's safe to fold the load. If the size of the object is 2323 // narrower than the load width, then it's not. 2324 if (Size < RCSize) 2325 return NULL; 2326 // Change to CMPXXri r, 0 first. 2327 MI->setDesc(get(NewOpc)); 2328 MI->getOperand(1).ChangeToImmediate(0); 2329 } else if (Ops.size() != 1) 2330 return NULL; 2331 2332 SmallVector<MachineOperand,4> MOs; 2333 MOs.push_back(MachineOperand::CreateFI(FrameIndex)); 2334 return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment); 2335} 2336 2337MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2338 MachineInstr *MI, 2339 const SmallVectorImpl<unsigned> &Ops, 2340 MachineInstr *LoadMI) const { 2341 // Check switch flag 2342 if (NoFusing) return NULL; 2343 2344 // Determine the alignment of the load. 2345 unsigned Alignment = 0; 2346 if (LoadMI->hasOneMemOperand()) 2347 Alignment = (*LoadMI->memoperands_begin())->getAlignment(); 2348 else 2349 switch (LoadMI->getOpcode()) { 2350 case X86::V_SET0: 2351 case X86::V_SETALLONES: 2352 Alignment = 16; 2353 break; 2354 case X86::FsFLD0SD: 2355 Alignment = 8; 2356 break; 2357 case X86::FsFLD0SS: 2358 Alignment = 4; 2359 break; 2360 default: 2361 llvm_unreachable("Don't know how to fold this instruction!"); 2362 } 2363 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2364 unsigned NewOpc = 0; 2365 switch (MI->getOpcode()) { 2366 default: return NULL; 2367 case X86::TEST8rr: NewOpc = X86::CMP8ri; break; 2368 case X86::TEST16rr: NewOpc = X86::CMP16ri; break; 2369 case X86::TEST32rr: NewOpc = X86::CMP32ri; break; 2370 case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; 2371 } 2372 // Change to CMPXXri r, 0 first. 2373 MI->setDesc(get(NewOpc)); 2374 MI->getOperand(1).ChangeToImmediate(0); 2375 } else if (Ops.size() != 1) 2376 return NULL; 2377 2378 SmallVector<MachineOperand,X86AddrNumOperands> MOs; 2379 switch (LoadMI->getOpcode()) { 2380 case X86::V_SET0: 2381 case X86::V_SETALLONES: 2382 case X86::FsFLD0SD: 2383 case X86::FsFLD0SS: { 2384 // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. 2385 // Create a constant-pool entry and operands to load from it. 2386 2387 // x86-32 PIC requires a PIC base register for constant pools. 2388 unsigned PICBase = 0; 2389 if (TM.getRelocationModel() == Reloc::PIC_) { 2390 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2391 PICBase = X86::RIP; 2392 else 2393 // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); 2394 // This doesn't work for several reasons. 2395 // 1. GlobalBaseReg may have been spilled. 2396 // 2. It may not be live at MI. 2397 return NULL; 2398 } 2399 2400 // Create a constant-pool entry. 2401 MachineConstantPool &MCP = *MF.getConstantPool(); 2402 const Type *Ty; 2403 if (LoadMI->getOpcode() == X86::FsFLD0SS) 2404 Ty = Type::getFloatTy(MF.getFunction()->getContext()); 2405 else if (LoadMI->getOpcode() == X86::FsFLD0SD) 2406 Ty = Type::getDoubleTy(MF.getFunction()->getContext()); 2407 else 2408 Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); 2409 Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? 2410 Constant::getAllOnesValue(Ty) : 2411 Constant::getNullValue(Ty); 2412 unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); 2413 2414 // Create operands to load from the constant pool entry. 2415 MOs.push_back(MachineOperand::CreateReg(PICBase, false)); 2416 MOs.push_back(MachineOperand::CreateImm(1)); 2417 MOs.push_back(MachineOperand::CreateReg(0, false)); 2418 MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); 2419 MOs.push_back(MachineOperand::CreateReg(0, false)); 2420 break; 2421 } 2422 default: { 2423 // Folding a normal load. Just copy the load's address operands. 2424 unsigned NumOps = LoadMI->getDesc().getNumOperands(); 2425 for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) 2426 MOs.push_back(LoadMI->getOperand(i)); 2427 break; 2428 } 2429 } 2430 return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment); 2431} 2432 2433 2434bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, 2435 const SmallVectorImpl<unsigned> &Ops) const { 2436 // Check switch flag 2437 if (NoFusing) return 0; 2438 2439 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2440 switch (MI->getOpcode()) { 2441 default: return false; 2442 case X86::TEST8rr: 2443 case X86::TEST16rr: 2444 case X86::TEST32rr: 2445 case X86::TEST64rr: 2446 return true; 2447 } 2448 } 2449 2450 if (Ops.size() != 1) 2451 return false; 2452 2453 unsigned OpNum = Ops[0]; 2454 unsigned Opc = MI->getOpcode(); 2455 unsigned NumOps = MI->getDesc().getNumOperands(); 2456 bool isTwoAddr = NumOps > 1 && 2457 MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2458 2459 // Folding a memory location into the two-address part of a two-address 2460 // instruction is different than folding it other places. It requires 2461 // replacing the *two* registers with the memory location. 2462 const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; 2463 if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 2464 OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2465 } else if (OpNum == 0) { // If operand 0 2466 switch (Opc) { 2467 case X86::MOV8r0: 2468 case X86::MOV16r0: 2469 case X86::MOV32r0: 2470 return true; 2471 default: break; 2472 } 2473 OpcodeTablePtr = &RegOp2MemOpTable0; 2474 } else if (OpNum == 1) { 2475 OpcodeTablePtr = &RegOp2MemOpTable1; 2476 } else if (OpNum == 2) { 2477 OpcodeTablePtr = &RegOp2MemOpTable2; 2478 } 2479 2480 if (OpcodeTablePtr) { 2481 // Find the Opcode to fuse 2482 DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2483 OpcodeTablePtr->find((unsigned*)Opc); 2484 if (I != OpcodeTablePtr->end()) 2485 return true; 2486 } 2487 return false; 2488} 2489 2490bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, 2491 unsigned Reg, bool UnfoldLoad, bool UnfoldStore, 2492 SmallVectorImpl<MachineInstr*> &NewMIs) const { 2493 DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2494 MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); 2495 if (I == MemOp2RegOpTable.end()) 2496 return false; 2497 DebugLoc dl = MI->getDebugLoc(); 2498 unsigned Opc = I->second.first; 2499 unsigned Index = I->second.second & 0xf; 2500 bool FoldedLoad = I->second.second & (1 << 4); 2501 bool FoldedStore = I->second.second & (1 << 5); 2502 if (UnfoldLoad && !FoldedLoad) 2503 return false; 2504 UnfoldLoad &= FoldedLoad; 2505 if (UnfoldStore && !FoldedStore) 2506 return false; 2507 UnfoldStore &= FoldedStore; 2508 2509 const TargetInstrDesc &TID = get(Opc); 2510 const TargetOperandInfo &TOI = TID.OpInfo[Index]; 2511 const TargetRegisterClass *RC = TOI.getRegClass(&RI); 2512 SmallVector<MachineOperand, X86AddrNumOperands> AddrOps; 2513 SmallVector<MachineOperand,2> BeforeOps; 2514 SmallVector<MachineOperand,2> AfterOps; 2515 SmallVector<MachineOperand,4> ImpOps; 2516 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2517 MachineOperand &Op = MI->getOperand(i); 2518 if (i >= Index && i < Index + X86AddrNumOperands) 2519 AddrOps.push_back(Op); 2520 else if (Op.isReg() && Op.isImplicit()) 2521 ImpOps.push_back(Op); 2522 else if (i < Index) 2523 BeforeOps.push_back(Op); 2524 else if (i > Index) 2525 AfterOps.push_back(Op); 2526 } 2527 2528 // Emit the load instruction. 2529 if (UnfoldLoad) { 2530 std::pair<MachineInstr::mmo_iterator, 2531 MachineInstr::mmo_iterator> MMOs = 2532 MF.extractLoadMemRefs(MI->memoperands_begin(), 2533 MI->memoperands_end()); 2534 loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); 2535 if (UnfoldStore) { 2536 // Address operands cannot be marked isKill. 2537 for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { 2538 MachineOperand &MO = NewMIs[0]->getOperand(i); 2539 if (MO.isReg()) 2540 MO.setIsKill(false); 2541 } 2542 } 2543 } 2544 2545 // Emit the data processing instruction. 2546 MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); 2547 MachineInstrBuilder MIB(DataMI); 2548 2549 if (FoldedStore) 2550 MIB.addReg(Reg, RegState::Define); 2551 for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) 2552 MIB.addOperand(BeforeOps[i]); 2553 if (FoldedLoad) 2554 MIB.addReg(Reg); 2555 for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) 2556 MIB.addOperand(AfterOps[i]); 2557 for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { 2558 MachineOperand &MO = ImpOps[i]; 2559 MIB.addReg(MO.getReg(), 2560 getDefRegState(MO.isDef()) | 2561 RegState::Implicit | 2562 getKillRegState(MO.isKill()) | 2563 getDeadRegState(MO.isDead()) | 2564 getUndefRegState(MO.isUndef())); 2565 } 2566 // Change CMP32ri r, 0 back to TEST32rr r, r, etc. 2567 unsigned NewOpc = 0; 2568 switch (DataMI->getOpcode()) { 2569 default: break; 2570 case X86::CMP64ri32: 2571 case X86::CMP32ri: 2572 case X86::CMP16ri: 2573 case X86::CMP8ri: { 2574 MachineOperand &MO0 = DataMI->getOperand(0); 2575 MachineOperand &MO1 = DataMI->getOperand(1); 2576 if (MO1.getImm() == 0) { 2577 switch (DataMI->getOpcode()) { 2578 default: break; 2579 case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; 2580 case X86::CMP32ri: NewOpc = X86::TEST32rr; break; 2581 case X86::CMP16ri: NewOpc = X86::TEST16rr; break; 2582 case X86::CMP8ri: NewOpc = X86::TEST8rr; break; 2583 } 2584 DataMI->setDesc(get(NewOpc)); 2585 MO1.ChangeToRegister(MO0.getReg(), false); 2586 } 2587 } 2588 } 2589 NewMIs.push_back(DataMI); 2590 2591 // Emit the store instruction. 2592 if (UnfoldStore) { 2593 const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); 2594 std::pair<MachineInstr::mmo_iterator, 2595 MachineInstr::mmo_iterator> MMOs = 2596 MF.extractStoreMemRefs(MI->memoperands_begin(), 2597 MI->memoperands_end()); 2598 storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs); 2599 } 2600 2601 return true; 2602} 2603 2604bool 2605X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, 2606 SmallVectorImpl<SDNode*> &NewNodes) const { 2607 if (!N->isMachineOpcode()) 2608 return false; 2609 2610 DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2611 MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); 2612 if (I == MemOp2RegOpTable.end()) 2613 return false; 2614 unsigned Opc = I->second.first; 2615 unsigned Index = I->second.second & 0xf; 2616 bool FoldedLoad = I->second.second & (1 << 4); 2617 bool FoldedStore = I->second.second & (1 << 5); 2618 const TargetInstrDesc &TID = get(Opc); 2619 const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); 2620 unsigned NumDefs = TID.NumDefs; 2621 std::vector<SDValue> AddrOps; 2622 std::vector<SDValue> BeforeOps; 2623 std::vector<SDValue> AfterOps; 2624 DebugLoc dl = N->getDebugLoc(); 2625 unsigned NumOps = N->getNumOperands(); 2626 for (unsigned i = 0; i != NumOps-1; ++i) { 2627 SDValue Op = N->getOperand(i); 2628 if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands) 2629 AddrOps.push_back(Op); 2630 else if (i < Index-NumDefs) 2631 BeforeOps.push_back(Op); 2632 else if (i > Index-NumDefs) 2633 AfterOps.push_back(Op); 2634 } 2635 SDValue Chain = N->getOperand(NumOps-1); 2636 AddrOps.push_back(Chain); 2637 2638 // Emit the load instruction. 2639 SDNode *Load = 0; 2640 MachineFunction &MF = DAG.getMachineFunction(); 2641 if (FoldedLoad) { 2642 EVT VT = *RC->vt_begin(); 2643 std::pair<MachineInstr::mmo_iterator, 2644 MachineInstr::mmo_iterator> MMOs = 2645 MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2646 cast<MachineSDNode>(N)->memoperands_end()); 2647 bool isAligned = (*MMOs.first)->getAlignment() >= 16; 2648 Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, 2649 VT, MVT::Other, &AddrOps[0], AddrOps.size()); 2650 NewNodes.push_back(Load); 2651 2652 // Preserve memory reference information. 2653 cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2654 } 2655 2656 // Emit the data processing instruction. 2657 std::vector<EVT> VTs; 2658 const TargetRegisterClass *DstRC = 0; 2659 if (TID.getNumDefs() > 0) { 2660 DstRC = TID.OpInfo[0].getRegClass(&RI); 2661 VTs.push_back(*DstRC->vt_begin()); 2662 } 2663 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 2664 EVT VT = N->getValueType(i); 2665 if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) 2666 VTs.push_back(VT); 2667 } 2668 if (Load) 2669 BeforeOps.push_back(SDValue(Load, 0)); 2670 std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); 2671 SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0], 2672 BeforeOps.size()); 2673 NewNodes.push_back(NewNode); 2674 2675 // Emit the store instruction. 2676 if (FoldedStore) { 2677 AddrOps.pop_back(); 2678 AddrOps.push_back(SDValue(NewNode, 0)); 2679 AddrOps.push_back(Chain); 2680 std::pair<MachineInstr::mmo_iterator, 2681 MachineInstr::mmo_iterator> MMOs = 2682 MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2683 cast<MachineSDNode>(N)->memoperands_end()); 2684 bool isAligned = (*MMOs.first)->getAlignment() >= 16; 2685 SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, 2686 isAligned, TM), 2687 dl, MVT::Other, 2688 &AddrOps[0], AddrOps.size()); 2689 NewNodes.push_back(Store); 2690 2691 // Preserve memory reference information. 2692 cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2693 } 2694 2695 return true; 2696} 2697 2698unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, 2699 bool UnfoldLoad, bool UnfoldStore, 2700 unsigned *LoadRegIndex) const { 2701 DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2702 MemOp2RegOpTable.find((unsigned*)Opc); 2703 if (I == MemOp2RegOpTable.end()) 2704 return 0; 2705 bool FoldedLoad = I->second.second & (1 << 4); 2706 bool FoldedStore = I->second.second & (1 << 5); 2707 if (UnfoldLoad && !FoldedLoad) 2708 return 0; 2709 if (UnfoldStore && !FoldedStore) 2710 return 0; 2711 if (LoadRegIndex) 2712 *LoadRegIndex = I->second.second & 0xf; 2713 return I->second.first; 2714} 2715 2716bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { 2717 if (MBB.empty()) return false; 2718 2719 switch (MBB.back().getOpcode()) { 2720 case X86::TCRETURNri: 2721 case X86::TCRETURNdi: 2722 case X86::RET: // Return. 2723 case X86::RETI: 2724 case X86::TAILJMPd: 2725 case X86::TAILJMPr: 2726 case X86::TAILJMPm: 2727 case X86::JMP: // Uncond branch. 2728 case X86::JMP32r: // Indirect branch. 2729 case X86::JMP64r: // Indirect branch (64-bit). 2730 case X86::JMP32m: // Indirect branch through mem. 2731 case X86::JMP64m: // Indirect branch through mem (64-bit). 2732 return true; 2733 default: return false; 2734 } 2735} 2736 2737bool X86InstrInfo:: 2738ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 2739 assert(Cond.size() == 1 && "Invalid X86 branch condition!"); 2740 X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm()); 2741 if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E) 2742 return true; 2743 Cond[0].setImm(GetOppositeBranchCondition(CC)); 2744 return false; 2745} 2746 2747bool X86InstrInfo:: 2748isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 2749 // FIXME: Return false for x87 stack register classes for now. We can't 2750 // allow any loads of these registers before FpGet_ST0_80. 2751 return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || 2752 RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); 2753} 2754 2755unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { 2756 switch (Desc->TSFlags & X86II::ImmMask) { 2757 case X86II::Imm8: return 1; 2758 case X86II::Imm16: return 2; 2759 case X86II::Imm32: return 4; 2760 case X86II::Imm64: return 8; 2761 default: llvm_unreachable("Immediate size not set!"); 2762 return 0; 2763 } 2764} 2765 2766/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register? 2767/// e.g. r8, xmm8, etc. 2768bool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) { 2769 if (!MO.isReg()) return false; 2770 switch (MO.getReg()) { 2771 default: break; 2772 case X86::R8: case X86::R9: case X86::R10: case X86::R11: 2773 case X86::R12: case X86::R13: case X86::R14: case X86::R15: 2774 case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: 2775 case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: 2776 case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: 2777 case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: 2778 case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: 2779 case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: 2780 case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: 2781 case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: 2782 return true; 2783 } 2784 return false; 2785} 2786 2787 2788/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 2789/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand 2790/// size, and 3) use of X86-64 extended registers. 2791unsigned X86InstrInfo::determineREX(const MachineInstr &MI) { 2792 unsigned REX = 0; 2793 const TargetInstrDesc &Desc = MI.getDesc(); 2794 2795 // Pseudo instructions do not need REX prefix byte. 2796 if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) 2797 return 0; 2798 if (Desc.TSFlags & X86II::REX_W) 2799 REX |= 1 << 3; 2800 2801 unsigned NumOps = Desc.getNumOperands(); 2802 if (NumOps) { 2803 bool isTwoAddr = NumOps > 1 && 2804 Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; 2805 2806 // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. 2807 unsigned i = isTwoAddr ? 1 : 0; 2808 for (unsigned e = NumOps; i != e; ++i) { 2809 const MachineOperand& MO = MI.getOperand(i); 2810 if (MO.isReg()) { 2811 unsigned Reg = MO.getReg(); 2812 if (isX86_64NonExtLowByteReg(Reg)) 2813 REX |= 0x40; 2814 } 2815 } 2816 2817 switch (Desc.TSFlags & X86II::FormMask) { 2818 case X86II::MRMInitReg: 2819 if (isX86_64ExtendedReg(MI.getOperand(0))) 2820 REX |= (1 << 0) | (1 << 2); 2821 break; 2822 case X86II::MRMSrcReg: { 2823 if (isX86_64ExtendedReg(MI.getOperand(0))) 2824 REX |= 1 << 2; 2825 i = isTwoAddr ? 2 : 1; 2826 for (unsigned e = NumOps; i != e; ++i) { 2827 const MachineOperand& MO = MI.getOperand(i); 2828 if (isX86_64ExtendedReg(MO)) 2829 REX |= 1 << 0; 2830 } 2831 break; 2832 } 2833 case X86II::MRMSrcMem: { 2834 if (isX86_64ExtendedReg(MI.getOperand(0))) 2835 REX |= 1 << 2; 2836 unsigned Bit = 0; 2837 i = isTwoAddr ? 2 : 1; 2838 for (; i != NumOps; ++i) { 2839 const MachineOperand& MO = MI.getOperand(i); 2840 if (MO.isReg()) { 2841 if (isX86_64ExtendedReg(MO)) 2842 REX |= 1 << Bit; 2843 Bit++; 2844 } 2845 } 2846 break; 2847 } 2848 case X86II::MRM0m: case X86II::MRM1m: 2849 case X86II::MRM2m: case X86II::MRM3m: 2850 case X86II::MRM4m: case X86II::MRM5m: 2851 case X86II::MRM6m: case X86II::MRM7m: 2852 case X86II::MRMDestMem: { 2853 unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); 2854 i = isTwoAddr ? 1 : 0; 2855 if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) 2856 REX |= 1 << 2; 2857 unsigned Bit = 0; 2858 for (; i != e; ++i) { 2859 const MachineOperand& MO = MI.getOperand(i); 2860 if (MO.isReg()) { 2861 if (isX86_64ExtendedReg(MO)) 2862 REX |= 1 << Bit; 2863 Bit++; 2864 } 2865 } 2866 break; 2867 } 2868 default: { 2869 if (isX86_64ExtendedReg(MI.getOperand(0))) 2870 REX |= 1 << 0; 2871 i = isTwoAddr ? 2 : 1; 2872 for (unsigned e = NumOps; i != e; ++i) { 2873 const MachineOperand& MO = MI.getOperand(i); 2874 if (isX86_64ExtendedReg(MO)) 2875 REX |= 1 << 2; 2876 } 2877 break; 2878 } 2879 } 2880 } 2881 return REX; 2882} 2883 2884/// sizePCRelativeBlockAddress - This method returns the size of a PC 2885/// relative block address instruction 2886/// 2887static unsigned sizePCRelativeBlockAddress() { 2888 return 4; 2889} 2890 2891/// sizeGlobalAddress - Give the size of the emission of this global address 2892/// 2893static unsigned sizeGlobalAddress(bool dword) { 2894 return dword ? 8 : 4; 2895} 2896 2897/// sizeConstPoolAddress - Give the size of the emission of this constant 2898/// pool address 2899/// 2900static unsigned sizeConstPoolAddress(bool dword) { 2901 return dword ? 8 : 4; 2902} 2903 2904/// sizeExternalSymbolAddress - Give the size of the emission of this external 2905/// symbol 2906/// 2907static unsigned sizeExternalSymbolAddress(bool dword) { 2908 return dword ? 8 : 4; 2909} 2910 2911/// sizeJumpTableAddress - Give the size of the emission of this jump 2912/// table address 2913/// 2914static unsigned sizeJumpTableAddress(bool dword) { 2915 return dword ? 8 : 4; 2916} 2917 2918static unsigned sizeConstant(unsigned Size) { 2919 return Size; 2920} 2921 2922static unsigned sizeRegModRMByte(){ 2923 return 1; 2924} 2925 2926static unsigned sizeSIBByte(){ 2927 return 1; 2928} 2929 2930static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { 2931 unsigned FinalSize = 0; 2932 // If this is a simple integer displacement that doesn't require a relocation. 2933 if (!RelocOp) { 2934 FinalSize += sizeConstant(4); 2935 return FinalSize; 2936 } 2937 2938 // Otherwise, this is something that requires a relocation. 2939 if (RelocOp->isGlobal()) { 2940 FinalSize += sizeGlobalAddress(false); 2941 } else if (RelocOp->isCPI()) { 2942 FinalSize += sizeConstPoolAddress(false); 2943 } else if (RelocOp->isJTI()) { 2944 FinalSize += sizeJumpTableAddress(false); 2945 } else { 2946 llvm_unreachable("Unknown value to relocate!"); 2947 } 2948 return FinalSize; 2949} 2950 2951static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op, 2952 bool IsPIC, bool Is64BitMode) { 2953 const MachineOperand &Op3 = MI.getOperand(Op+3); 2954 int DispVal = 0; 2955 const MachineOperand *DispForReloc = 0; 2956 unsigned FinalSize = 0; 2957 2958 // Figure out what sort of displacement we have to handle here. 2959 if (Op3.isGlobal()) { 2960 DispForReloc = &Op3; 2961 } else if (Op3.isCPI()) { 2962 if (Is64BitMode || IsPIC) { 2963 DispForReloc = &Op3; 2964 } else { 2965 DispVal = 1; 2966 } 2967 } else if (Op3.isJTI()) { 2968 if (Is64BitMode || IsPIC) { 2969 DispForReloc = &Op3; 2970 } else { 2971 DispVal = 1; 2972 } 2973 } else { 2974 DispVal = 1; 2975 } 2976 2977 const MachineOperand &Base = MI.getOperand(Op); 2978 const MachineOperand &IndexReg = MI.getOperand(Op+2); 2979 2980 unsigned BaseReg = Base.getReg(); 2981 2982 // Is a SIB byte needed? 2983 if ((!Is64BitMode || DispForReloc || BaseReg != 0) && 2984 IndexReg.getReg() == 0 && 2985 (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) { 2986 if (BaseReg == 0) { // Just a displacement? 2987 // Emit special case [disp32] encoding 2988 ++FinalSize; 2989 FinalSize += getDisplacementFieldSize(DispForReloc); 2990 } else { 2991 unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg); 2992 if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { 2993 // Emit simple indirect register encoding... [EAX] f.e. 2994 ++FinalSize; 2995 // Be pessimistic and assume it's a disp32, not a disp8 2996 } else { 2997 // Emit the most general non-SIB encoding: [REG+disp32] 2998 ++FinalSize; 2999 FinalSize += getDisplacementFieldSize(DispForReloc); 3000 } 3001 } 3002 3003 } else { // We need a SIB byte, so start by outputting the ModR/M byte first 3004 assert(IndexReg.getReg() != X86::ESP && 3005 IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); 3006 3007 bool ForceDisp32 = false; 3008 if (BaseReg == 0 || DispForReloc) { 3009 // Emit the normal disp32 encoding. 3010 ++FinalSize; 3011 ForceDisp32 = true; 3012 } else { 3013 ++FinalSize; 3014 } 3015 3016 FinalSize += sizeSIBByte(); 3017 3018 // Do we need to output a displacement? 3019 if (DispVal != 0 || ForceDisp32) { 3020 FinalSize += getDisplacementFieldSize(DispForReloc); 3021 } 3022 } 3023 return FinalSize; 3024} 3025 3026 3027static unsigned GetInstSizeWithDesc(const MachineInstr &MI, 3028 const TargetInstrDesc *Desc, 3029 bool IsPIC, bool Is64BitMode) { 3030 3031 unsigned Opcode = Desc->Opcode; 3032 unsigned FinalSize = 0; 3033 3034 // Emit the lock opcode prefix as needed. 3035 if (Desc->TSFlags & X86II::LOCK) ++FinalSize; 3036 3037 // Emit segment override opcode prefix as needed. 3038 switch (Desc->TSFlags & X86II::SegOvrMask) { 3039 case X86II::FS: 3040 case X86II::GS: 3041 ++FinalSize; 3042 break; 3043 default: llvm_unreachable("Invalid segment!"); 3044 case 0: break; // No segment override! 3045 } 3046 3047 // Emit the repeat opcode prefix as needed. 3048 if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize; 3049 3050 // Emit the operand size opcode prefix as needed. 3051 if (Desc->TSFlags & X86II::OpSize) ++FinalSize; 3052 3053 // Emit the address size opcode prefix as needed. 3054 if (Desc->TSFlags & X86II::AdSize) ++FinalSize; 3055 3056 bool Need0FPrefix = false; 3057 switch (Desc->TSFlags & X86II::Op0Mask) { 3058 case X86II::TB: // Two-byte opcode prefix 3059 case X86II::T8: // 0F 38 3060 case X86II::TA: // 0F 3A 3061 Need0FPrefix = true; 3062 break; 3063 case X86II::TF: // F2 0F 38 3064 ++FinalSize; 3065 Need0FPrefix = true; 3066 break; 3067 case X86II::REP: break; // already handled. 3068 case X86II::XS: // F3 0F 3069 ++FinalSize; 3070 Need0FPrefix = true; 3071 break; 3072 case X86II::XD: // F2 0F 3073 ++FinalSize; 3074 Need0FPrefix = true; 3075 break; 3076 case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: 3077 case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: 3078 ++FinalSize; 3079 break; // Two-byte opcode prefix 3080 default: llvm_unreachable("Invalid prefix!"); 3081 case 0: break; // No prefix! 3082 } 3083 3084 if (Is64BitMode) { 3085 // REX prefix 3086 unsigned REX = X86InstrInfo::determineREX(MI); 3087 if (REX) 3088 ++FinalSize; 3089 } 3090 3091 // 0x0F escape code must be emitted just before the opcode. 3092 if (Need0FPrefix) 3093 ++FinalSize; 3094 3095 switch (Desc->TSFlags & X86II::Op0Mask) { 3096 case X86II::T8: // 0F 38 3097 ++FinalSize; 3098 break; 3099 case X86II::TA: // 0F 3A 3100 ++FinalSize; 3101 break; 3102 case X86II::TF: // F2 0F 38 3103 ++FinalSize; 3104 break; 3105 } 3106 3107 // If this is a two-address instruction, skip one of the register operands. 3108 unsigned NumOps = Desc->getNumOperands(); 3109 unsigned CurOp = 0; 3110 if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) 3111 CurOp++; 3112 else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) 3113 // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 3114 --NumOps; 3115 3116 switch (Desc->TSFlags & X86II::FormMask) { 3117 default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); 3118 case X86II::Pseudo: 3119 // Remember the current PC offset, this is the PIC relocation 3120 // base address. 3121 switch (Opcode) { 3122 default: 3123 break; 3124 case TargetInstrInfo::INLINEASM: { 3125 const MachineFunction *MF = MI.getParent()->getParent(); 3126 const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); 3127 FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), 3128 *MF->getTarget().getMCAsmInfo()); 3129 break; 3130 } 3131 case TargetInstrInfo::DBG_LABEL: 3132 case TargetInstrInfo::EH_LABEL: 3133 break; 3134 case TargetInstrInfo::IMPLICIT_DEF: 3135 case TargetInstrInfo::KILL: 3136 case X86::DWARF_LOC: 3137 case X86::FP_REG_KILL: 3138 break; 3139 case X86::MOVPC32r: { 3140 // This emits the "call" portion of this pseudo instruction. 3141 ++FinalSize; 3142 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3143 break; 3144 } 3145 } 3146 CurOp = NumOps; 3147 break; 3148 case X86II::RawFrm: 3149 ++FinalSize; 3150 3151 if (CurOp != NumOps) { 3152 const MachineOperand &MO = MI.getOperand(CurOp++); 3153 if (MO.isMBB()) { 3154 FinalSize += sizePCRelativeBlockAddress(); 3155 } else if (MO.isGlobal()) { 3156 FinalSize += sizeGlobalAddress(false); 3157 } else if (MO.isSymbol()) { 3158 FinalSize += sizeExternalSymbolAddress(false); 3159 } else if (MO.isImm()) { 3160 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3161 } else { 3162 llvm_unreachable("Unknown RawFrm operand!"); 3163 } 3164 } 3165 break; 3166 3167 case X86II::AddRegFrm: 3168 ++FinalSize; 3169 ++CurOp; 3170 3171 if (CurOp != NumOps) { 3172 const MachineOperand &MO1 = MI.getOperand(CurOp++); 3173 unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3174 if (MO1.isImm()) 3175 FinalSize += sizeConstant(Size); 3176 else { 3177 bool dword = false; 3178 if (Opcode == X86::MOV64ri) 3179 dword = true; 3180 if (MO1.isGlobal()) { 3181 FinalSize += sizeGlobalAddress(dword); 3182 } else if (MO1.isSymbol()) 3183 FinalSize += sizeExternalSymbolAddress(dword); 3184 else if (MO1.isCPI()) 3185 FinalSize += sizeConstPoolAddress(dword); 3186 else if (MO1.isJTI()) 3187 FinalSize += sizeJumpTableAddress(dword); 3188 } 3189 } 3190 break; 3191 3192 case X86II::MRMDestReg: { 3193 ++FinalSize; 3194 FinalSize += sizeRegModRMByte(); 3195 CurOp += 2; 3196 if (CurOp != NumOps) { 3197 ++CurOp; 3198 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3199 } 3200 break; 3201 } 3202 case X86II::MRMDestMem: { 3203 ++FinalSize; 3204 FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); 3205 CurOp += X86AddrNumOperands + 1; 3206 if (CurOp != NumOps) { 3207 ++CurOp; 3208 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3209 } 3210 break; 3211 } 3212 3213 case X86II::MRMSrcReg: 3214 ++FinalSize; 3215 FinalSize += sizeRegModRMByte(); 3216 CurOp += 2; 3217 if (CurOp != NumOps) { 3218 ++CurOp; 3219 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3220 } 3221 break; 3222 3223 case X86II::MRMSrcMem: { 3224 int AddrOperands; 3225 if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || 3226 Opcode == X86::LEA16r || Opcode == X86::LEA32r) 3227 AddrOperands = X86AddrNumOperands - 1; // No segment register 3228 else 3229 AddrOperands = X86AddrNumOperands; 3230 3231 ++FinalSize; 3232 FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); 3233 CurOp += AddrOperands + 1; 3234 if (CurOp != NumOps) { 3235 ++CurOp; 3236 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3237 } 3238 break; 3239 } 3240 3241 case X86II::MRM0r: case X86II::MRM1r: 3242 case X86II::MRM2r: case X86II::MRM3r: 3243 case X86II::MRM4r: case X86II::MRM5r: 3244 case X86II::MRM6r: case X86II::MRM7r: 3245 ++FinalSize; 3246 if (Desc->getOpcode() == X86::LFENCE || 3247 Desc->getOpcode() == X86::MFENCE) { 3248 // Special handling of lfence and mfence; 3249 FinalSize += sizeRegModRMByte(); 3250 } else if (Desc->getOpcode() == X86::MONITOR || 3251 Desc->getOpcode() == X86::MWAIT) { 3252 // Special handling of monitor and mwait. 3253 FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode. 3254 } else { 3255 ++CurOp; 3256 FinalSize += sizeRegModRMByte(); 3257 } 3258 3259 if (CurOp != NumOps) { 3260 const MachineOperand &MO1 = MI.getOperand(CurOp++); 3261 unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3262 if (MO1.isImm()) 3263 FinalSize += sizeConstant(Size); 3264 else { 3265 bool dword = false; 3266 if (Opcode == X86::MOV64ri32) 3267 dword = true; 3268 if (MO1.isGlobal()) { 3269 FinalSize += sizeGlobalAddress(dword); 3270 } else if (MO1.isSymbol()) 3271 FinalSize += sizeExternalSymbolAddress(dword); 3272 else if (MO1.isCPI()) 3273 FinalSize += sizeConstPoolAddress(dword); 3274 else if (MO1.isJTI()) 3275 FinalSize += sizeJumpTableAddress(dword); 3276 } 3277 } 3278 break; 3279 3280 case X86II::MRM0m: case X86II::MRM1m: 3281 case X86II::MRM2m: case X86II::MRM3m: 3282 case X86II::MRM4m: case X86II::MRM5m: 3283 case X86II::MRM6m: case X86II::MRM7m: { 3284 3285 ++FinalSize; 3286 FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); 3287 CurOp += X86AddrNumOperands; 3288 3289 if (CurOp != NumOps) { 3290 const MachineOperand &MO = MI.getOperand(CurOp++); 3291 unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3292 if (MO.isImm()) 3293 FinalSize += sizeConstant(Size); 3294 else { 3295 bool dword = false; 3296 if (Opcode == X86::MOV64mi32) 3297 dword = true; 3298 if (MO.isGlobal()) { 3299 FinalSize += sizeGlobalAddress(dword); 3300 } else if (MO.isSymbol()) 3301 FinalSize += sizeExternalSymbolAddress(dword); 3302 else if (MO.isCPI()) 3303 FinalSize += sizeConstPoolAddress(dword); 3304 else if (MO.isJTI()) 3305 FinalSize += sizeJumpTableAddress(dword); 3306 } 3307 } 3308 break; 3309 } 3310 3311 case X86II::MRMInitReg: 3312 ++FinalSize; 3313 // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). 3314 FinalSize += sizeRegModRMByte(); 3315 ++CurOp; 3316 break; 3317 } 3318 3319 if (!Desc->isVariadic() && CurOp != NumOps) { 3320 std::string msg; 3321 raw_string_ostream Msg(msg); 3322 Msg << "Cannot determine size: " << MI; 3323 llvm_report_error(Msg.str()); 3324 } 3325 3326 3327 return FinalSize; 3328} 3329 3330 3331unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 3332 const TargetInstrDesc &Desc = MI->getDesc(); 3333 bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; 3334 bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); 3335 unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); 3336 if (Desc.getOpcode() == X86::MOVPC32r) 3337 Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode); 3338 return Size; 3339} 3340 3341/// getGlobalBaseReg - Return a virtual register initialized with the 3342/// the global base register value. Output instructions required to 3343/// initialize the register in the function entry block, if necessary. 3344/// 3345unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { 3346 assert(!TM.getSubtarget<X86Subtarget>().is64Bit() && 3347 "X86-64 PIC uses RIP relative addressing"); 3348 3349 X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); 3350 unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 3351 if (GlobalBaseReg != 0) 3352 return GlobalBaseReg; 3353 3354 // Insert the set of GlobalBaseReg into the first MBB of the function 3355 MachineBasicBlock &FirstMBB = MF->front(); 3356 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 3357 DebugLoc DL = DebugLoc::getUnknownLoc(); 3358 if (MBBI != FirstMBB.end()) DL = MBBI->getDebugLoc(); 3359 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 3360 unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3361 3362 const TargetInstrInfo *TII = TM.getInstrInfo(); 3363 // Operand of MovePCtoStack is completely ignored by asm printer. It's 3364 // only used in JIT code emission as displacement to pc. 3365 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); 3366 3367 // If we're using vanilla 'GOT' PIC style, we should use relative addressing 3368 // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. 3369 if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) { 3370 GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3371 // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register 3372 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) 3373 .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 3374 X86II::MO_GOT_ABSOLUTE_ADDRESS); 3375 } else { 3376 GlobalBaseReg = PC; 3377 } 3378 3379 X86FI->setGlobalBaseReg(GlobalBaseReg); 3380 return GlobalBaseReg; 3381} 3382