X86InstrInfo.cpp revision 201360
1193323Sed//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file contains the X86 implementation of the TargetInstrInfo class. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#include "X86InstrInfo.h" 15193323Sed#include "X86.h" 16193323Sed#include "X86GenInstrInfo.inc" 17193323Sed#include "X86InstrBuilder.h" 18193323Sed#include "X86MachineFunctionInfo.h" 19193323Sed#include "X86Subtarget.h" 20193323Sed#include "X86TargetMachine.h" 21193323Sed#include "llvm/DerivedTypes.h" 22198090Srdivacky#include "llvm/LLVMContext.h" 23193323Sed#include "llvm/ADT/STLExtras.h" 24193323Sed#include "llvm/CodeGen/MachineConstantPool.h" 25193323Sed#include "llvm/CodeGen/MachineFrameInfo.h" 26193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 27193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 28193323Sed#include "llvm/CodeGen/LiveVariables.h" 29199481Srdivacky#include "llvm/CodeGen/PseudoSourceValue.h" 30193323Sed#include "llvm/Support/CommandLine.h" 31198090Srdivacky#include "llvm/Support/ErrorHandling.h" 32198090Srdivacky#include "llvm/Support/raw_ostream.h" 33193323Sed#include "llvm/Target/TargetOptions.h" 34198090Srdivacky#include "llvm/MC/MCAsmInfo.h" 35199481Srdivacky 36199481Srdivacky#include <limits> 37199481Srdivacky 38193323Sedusing namespace llvm; 39193323Sed 40198090Srdivackystatic cl::opt<bool> 41198090SrdivackyNoFusing("disable-spill-fusing", 42198090Srdivacky cl::desc("Disable fusing of spill code into instructions")); 43198090Srdivackystatic cl::opt<bool> 44198090SrdivackyPrintFailedFusing("print-failed-fuse-candidates", 45198090Srdivacky cl::desc("Print instructions that the allocator wants to" 46198090Srdivacky " fuse, but the X86 backend currently can't"), 47198090Srdivacky cl::Hidden); 48198090Srdivackystatic cl::opt<bool> 49198090SrdivackyReMatPICStubLoad("remat-pic-stub-load", 50198090Srdivacky cl::desc("Re-materialize load from stub in PIC mode"), 51198090Srdivacky cl::init(false), cl::Hidden); 52193323Sed 53193323SedX86InstrInfo::X86InstrInfo(X86TargetMachine &tm) 54193323Sed : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), 55193323Sed TM(tm), RI(tm, *this) { 56193323Sed SmallVector<unsigned,16> AmbEntries; 57193323Sed static const unsigned OpTbl2Addr[][2] = { 58193323Sed { X86::ADC32ri, X86::ADC32mi }, 59193323Sed { X86::ADC32ri8, X86::ADC32mi8 }, 60193323Sed { X86::ADC32rr, X86::ADC32mr }, 61193323Sed { X86::ADC64ri32, X86::ADC64mi32 }, 62193323Sed { X86::ADC64ri8, X86::ADC64mi8 }, 63193323Sed { X86::ADC64rr, X86::ADC64mr }, 64193323Sed { X86::ADD16ri, X86::ADD16mi }, 65193323Sed { X86::ADD16ri8, X86::ADD16mi8 }, 66193323Sed { X86::ADD16rr, X86::ADD16mr }, 67193323Sed { X86::ADD32ri, X86::ADD32mi }, 68193323Sed { X86::ADD32ri8, X86::ADD32mi8 }, 69193323Sed { X86::ADD32rr, X86::ADD32mr }, 70193323Sed { X86::ADD64ri32, X86::ADD64mi32 }, 71193323Sed { X86::ADD64ri8, X86::ADD64mi8 }, 72193323Sed { X86::ADD64rr, X86::ADD64mr }, 73193323Sed { X86::ADD8ri, X86::ADD8mi }, 74193323Sed { X86::ADD8rr, X86::ADD8mr }, 75193323Sed { X86::AND16ri, X86::AND16mi }, 76193323Sed { X86::AND16ri8, X86::AND16mi8 }, 77193323Sed { X86::AND16rr, X86::AND16mr }, 78193323Sed { X86::AND32ri, X86::AND32mi }, 79193323Sed { X86::AND32ri8, X86::AND32mi8 }, 80193323Sed { X86::AND32rr, X86::AND32mr }, 81193323Sed { X86::AND64ri32, X86::AND64mi32 }, 82193323Sed { X86::AND64ri8, X86::AND64mi8 }, 83193323Sed { X86::AND64rr, X86::AND64mr }, 84193323Sed { X86::AND8ri, X86::AND8mi }, 85193323Sed { X86::AND8rr, X86::AND8mr }, 86193323Sed { X86::DEC16r, X86::DEC16m }, 87193323Sed { X86::DEC32r, X86::DEC32m }, 88193323Sed { X86::DEC64_16r, X86::DEC64_16m }, 89193323Sed { X86::DEC64_32r, X86::DEC64_32m }, 90193323Sed { X86::DEC64r, X86::DEC64m }, 91193323Sed { X86::DEC8r, X86::DEC8m }, 92193323Sed { X86::INC16r, X86::INC16m }, 93193323Sed { X86::INC32r, X86::INC32m }, 94193323Sed { X86::INC64_16r, X86::INC64_16m }, 95193323Sed { X86::INC64_32r, X86::INC64_32m }, 96193323Sed { X86::INC64r, X86::INC64m }, 97193323Sed { X86::INC8r, X86::INC8m }, 98193323Sed { X86::NEG16r, X86::NEG16m }, 99193323Sed { X86::NEG32r, X86::NEG32m }, 100193323Sed { X86::NEG64r, X86::NEG64m }, 101193323Sed { X86::NEG8r, X86::NEG8m }, 102193323Sed { X86::NOT16r, X86::NOT16m }, 103193323Sed { X86::NOT32r, X86::NOT32m }, 104193323Sed { X86::NOT64r, X86::NOT64m }, 105193323Sed { X86::NOT8r, X86::NOT8m }, 106193323Sed { X86::OR16ri, X86::OR16mi }, 107193323Sed { X86::OR16ri8, X86::OR16mi8 }, 108193323Sed { X86::OR16rr, X86::OR16mr }, 109193323Sed { X86::OR32ri, X86::OR32mi }, 110193323Sed { X86::OR32ri8, X86::OR32mi8 }, 111193323Sed { X86::OR32rr, X86::OR32mr }, 112193323Sed { X86::OR64ri32, X86::OR64mi32 }, 113193323Sed { X86::OR64ri8, X86::OR64mi8 }, 114193323Sed { X86::OR64rr, X86::OR64mr }, 115193323Sed { X86::OR8ri, X86::OR8mi }, 116193323Sed { X86::OR8rr, X86::OR8mr }, 117193323Sed { X86::ROL16r1, X86::ROL16m1 }, 118193323Sed { X86::ROL16rCL, X86::ROL16mCL }, 119193323Sed { X86::ROL16ri, X86::ROL16mi }, 120193323Sed { X86::ROL32r1, X86::ROL32m1 }, 121193323Sed { X86::ROL32rCL, X86::ROL32mCL }, 122193323Sed { X86::ROL32ri, X86::ROL32mi }, 123193323Sed { X86::ROL64r1, X86::ROL64m1 }, 124193323Sed { X86::ROL64rCL, X86::ROL64mCL }, 125193323Sed { X86::ROL64ri, X86::ROL64mi }, 126193323Sed { X86::ROL8r1, X86::ROL8m1 }, 127193323Sed { X86::ROL8rCL, X86::ROL8mCL }, 128193323Sed { X86::ROL8ri, X86::ROL8mi }, 129193323Sed { X86::ROR16r1, X86::ROR16m1 }, 130193323Sed { X86::ROR16rCL, X86::ROR16mCL }, 131193323Sed { X86::ROR16ri, X86::ROR16mi }, 132193323Sed { X86::ROR32r1, X86::ROR32m1 }, 133193323Sed { X86::ROR32rCL, X86::ROR32mCL }, 134193323Sed { X86::ROR32ri, X86::ROR32mi }, 135193323Sed { X86::ROR64r1, X86::ROR64m1 }, 136193323Sed { X86::ROR64rCL, X86::ROR64mCL }, 137193323Sed { X86::ROR64ri, X86::ROR64mi }, 138193323Sed { X86::ROR8r1, X86::ROR8m1 }, 139193323Sed { X86::ROR8rCL, X86::ROR8mCL }, 140193323Sed { X86::ROR8ri, X86::ROR8mi }, 141193323Sed { X86::SAR16r1, X86::SAR16m1 }, 142193323Sed { X86::SAR16rCL, X86::SAR16mCL }, 143193323Sed { X86::SAR16ri, X86::SAR16mi }, 144193323Sed { X86::SAR32r1, X86::SAR32m1 }, 145193323Sed { X86::SAR32rCL, X86::SAR32mCL }, 146193323Sed { X86::SAR32ri, X86::SAR32mi }, 147193323Sed { X86::SAR64r1, X86::SAR64m1 }, 148193323Sed { X86::SAR64rCL, X86::SAR64mCL }, 149193323Sed { X86::SAR64ri, X86::SAR64mi }, 150193323Sed { X86::SAR8r1, X86::SAR8m1 }, 151193323Sed { X86::SAR8rCL, X86::SAR8mCL }, 152193323Sed { X86::SAR8ri, X86::SAR8mi }, 153193323Sed { X86::SBB32ri, X86::SBB32mi }, 154193323Sed { X86::SBB32ri8, X86::SBB32mi8 }, 155193323Sed { X86::SBB32rr, X86::SBB32mr }, 156193323Sed { X86::SBB64ri32, X86::SBB64mi32 }, 157193323Sed { X86::SBB64ri8, X86::SBB64mi8 }, 158193323Sed { X86::SBB64rr, X86::SBB64mr }, 159193323Sed { X86::SHL16rCL, X86::SHL16mCL }, 160193323Sed { X86::SHL16ri, X86::SHL16mi }, 161193323Sed { X86::SHL32rCL, X86::SHL32mCL }, 162193323Sed { X86::SHL32ri, X86::SHL32mi }, 163193323Sed { X86::SHL64rCL, X86::SHL64mCL }, 164193323Sed { X86::SHL64ri, X86::SHL64mi }, 165193323Sed { X86::SHL8rCL, X86::SHL8mCL }, 166193323Sed { X86::SHL8ri, X86::SHL8mi }, 167193323Sed { X86::SHLD16rrCL, X86::SHLD16mrCL }, 168193323Sed { X86::SHLD16rri8, X86::SHLD16mri8 }, 169193323Sed { X86::SHLD32rrCL, X86::SHLD32mrCL }, 170193323Sed { X86::SHLD32rri8, X86::SHLD32mri8 }, 171193323Sed { X86::SHLD64rrCL, X86::SHLD64mrCL }, 172193323Sed { X86::SHLD64rri8, X86::SHLD64mri8 }, 173193323Sed { X86::SHR16r1, X86::SHR16m1 }, 174193323Sed { X86::SHR16rCL, X86::SHR16mCL }, 175193323Sed { X86::SHR16ri, X86::SHR16mi }, 176193323Sed { X86::SHR32r1, X86::SHR32m1 }, 177193323Sed { X86::SHR32rCL, X86::SHR32mCL }, 178193323Sed { X86::SHR32ri, X86::SHR32mi }, 179193323Sed { X86::SHR64r1, X86::SHR64m1 }, 180193323Sed { X86::SHR64rCL, X86::SHR64mCL }, 181193323Sed { X86::SHR64ri, X86::SHR64mi }, 182193323Sed { X86::SHR8r1, X86::SHR8m1 }, 183193323Sed { X86::SHR8rCL, X86::SHR8mCL }, 184193323Sed { X86::SHR8ri, X86::SHR8mi }, 185193323Sed { X86::SHRD16rrCL, X86::SHRD16mrCL }, 186193323Sed { X86::SHRD16rri8, X86::SHRD16mri8 }, 187193323Sed { X86::SHRD32rrCL, X86::SHRD32mrCL }, 188193323Sed { X86::SHRD32rri8, X86::SHRD32mri8 }, 189193323Sed { X86::SHRD64rrCL, X86::SHRD64mrCL }, 190193323Sed { X86::SHRD64rri8, X86::SHRD64mri8 }, 191193323Sed { X86::SUB16ri, X86::SUB16mi }, 192193323Sed { X86::SUB16ri8, X86::SUB16mi8 }, 193193323Sed { X86::SUB16rr, X86::SUB16mr }, 194193323Sed { X86::SUB32ri, X86::SUB32mi }, 195193323Sed { X86::SUB32ri8, X86::SUB32mi8 }, 196193323Sed { X86::SUB32rr, X86::SUB32mr }, 197193323Sed { X86::SUB64ri32, X86::SUB64mi32 }, 198193323Sed { X86::SUB64ri8, X86::SUB64mi8 }, 199193323Sed { X86::SUB64rr, X86::SUB64mr }, 200193323Sed { X86::SUB8ri, X86::SUB8mi }, 201193323Sed { X86::SUB8rr, X86::SUB8mr }, 202193323Sed { X86::XOR16ri, X86::XOR16mi }, 203193323Sed { X86::XOR16ri8, X86::XOR16mi8 }, 204193323Sed { X86::XOR16rr, X86::XOR16mr }, 205193323Sed { X86::XOR32ri, X86::XOR32mi }, 206193323Sed { X86::XOR32ri8, X86::XOR32mi8 }, 207193323Sed { X86::XOR32rr, X86::XOR32mr }, 208193323Sed { X86::XOR64ri32, X86::XOR64mi32 }, 209193323Sed { X86::XOR64ri8, X86::XOR64mi8 }, 210193323Sed { X86::XOR64rr, X86::XOR64mr }, 211193323Sed { X86::XOR8ri, X86::XOR8mi }, 212193323Sed { X86::XOR8rr, X86::XOR8mr } 213193323Sed }; 214193323Sed 215193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { 216193323Sed unsigned RegOp = OpTbl2Addr[i][0]; 217193323Sed unsigned MemOp = OpTbl2Addr[i][1]; 218193323Sed if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, 219198090Srdivacky std::make_pair(MemOp,0))).second) 220193323Sed assert(false && "Duplicated entries?"); 221198090Srdivacky // Index 0, folded load and store, no alignment requirement. 222198090Srdivacky unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); 223193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 224193323Sed std::make_pair(RegOp, 225193323Sed AuxInfo))).second) 226193323Sed AmbEntries.push_back(MemOp); 227193323Sed } 228193323Sed 229193323Sed // If the third value is 1, then it's folding either a load or a store. 230198090Srdivacky static const unsigned OpTbl0[][4] = { 231198090Srdivacky { X86::BT16ri8, X86::BT16mi8, 1, 0 }, 232198090Srdivacky { X86::BT32ri8, X86::BT32mi8, 1, 0 }, 233198090Srdivacky { X86::BT64ri8, X86::BT64mi8, 1, 0 }, 234198090Srdivacky { X86::CALL32r, X86::CALL32m, 1, 0 }, 235198090Srdivacky { X86::CALL64r, X86::CALL64m, 1, 0 }, 236198090Srdivacky { X86::CMP16ri, X86::CMP16mi, 1, 0 }, 237198090Srdivacky { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, 238198090Srdivacky { X86::CMP16rr, X86::CMP16mr, 1, 0 }, 239198090Srdivacky { X86::CMP32ri, X86::CMP32mi, 1, 0 }, 240198090Srdivacky { X86::CMP32ri8, X86::CMP32mi8, 1, 0 }, 241198090Srdivacky { X86::CMP32rr, X86::CMP32mr, 1, 0 }, 242198090Srdivacky { X86::CMP64ri32, X86::CMP64mi32, 1, 0 }, 243198090Srdivacky { X86::CMP64ri8, X86::CMP64mi8, 1, 0 }, 244198090Srdivacky { X86::CMP64rr, X86::CMP64mr, 1, 0 }, 245198090Srdivacky { X86::CMP8ri, X86::CMP8mi, 1, 0 }, 246198090Srdivacky { X86::CMP8rr, X86::CMP8mr, 1, 0 }, 247198090Srdivacky { X86::DIV16r, X86::DIV16m, 1, 0 }, 248198090Srdivacky { X86::DIV32r, X86::DIV32m, 1, 0 }, 249198090Srdivacky { X86::DIV64r, X86::DIV64m, 1, 0 }, 250198090Srdivacky { X86::DIV8r, X86::DIV8m, 1, 0 }, 251198090Srdivacky { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, 252198090Srdivacky { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 }, 253198090Srdivacky { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 }, 254198090Srdivacky { X86::IDIV16r, X86::IDIV16m, 1, 0 }, 255198090Srdivacky { X86::IDIV32r, X86::IDIV32m, 1, 0 }, 256198090Srdivacky { X86::IDIV64r, X86::IDIV64m, 1, 0 }, 257198090Srdivacky { X86::IDIV8r, X86::IDIV8m, 1, 0 }, 258198090Srdivacky { X86::IMUL16r, X86::IMUL16m, 1, 0 }, 259198090Srdivacky { X86::IMUL32r, X86::IMUL32m, 1, 0 }, 260198090Srdivacky { X86::IMUL64r, X86::IMUL64m, 1, 0 }, 261198090Srdivacky { X86::IMUL8r, X86::IMUL8m, 1, 0 }, 262198090Srdivacky { X86::JMP32r, X86::JMP32m, 1, 0 }, 263198090Srdivacky { X86::JMP64r, X86::JMP64m, 1, 0 }, 264198090Srdivacky { X86::MOV16ri, X86::MOV16mi, 0, 0 }, 265198090Srdivacky { X86::MOV16rr, X86::MOV16mr, 0, 0 }, 266198090Srdivacky { X86::MOV32ri, X86::MOV32mi, 0, 0 }, 267198090Srdivacky { X86::MOV32rr, X86::MOV32mr, 0, 0 }, 268198090Srdivacky { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, 269198090Srdivacky { X86::MOV64rr, X86::MOV64mr, 0, 0 }, 270198090Srdivacky { X86::MOV8ri, X86::MOV8mi, 0, 0 }, 271198090Srdivacky { X86::MOV8rr, X86::MOV8mr, 0, 0 }, 272198090Srdivacky { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 }, 273198090Srdivacky { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, 274198090Srdivacky { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, 275198090Srdivacky { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, 276198090Srdivacky { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, 277198090Srdivacky { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, 278198090Srdivacky { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 }, 279198090Srdivacky { X86::MOVSDrr, X86::MOVSDmr, 0, 0 }, 280198090Srdivacky { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, 281198090Srdivacky { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, 282198090Srdivacky { X86::MOVSSrr, X86::MOVSSmr, 0, 0 }, 283198090Srdivacky { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, 284198090Srdivacky { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, 285198090Srdivacky { X86::MUL16r, X86::MUL16m, 1, 0 }, 286198090Srdivacky { X86::MUL32r, X86::MUL32m, 1, 0 }, 287198090Srdivacky { X86::MUL64r, X86::MUL64m, 1, 0 }, 288198090Srdivacky { X86::MUL8r, X86::MUL8m, 1, 0 }, 289198090Srdivacky { X86::SETAEr, X86::SETAEm, 0, 0 }, 290198090Srdivacky { X86::SETAr, X86::SETAm, 0, 0 }, 291198090Srdivacky { X86::SETBEr, X86::SETBEm, 0, 0 }, 292198090Srdivacky { X86::SETBr, X86::SETBm, 0, 0 }, 293198090Srdivacky { X86::SETEr, X86::SETEm, 0, 0 }, 294198090Srdivacky { X86::SETGEr, X86::SETGEm, 0, 0 }, 295198090Srdivacky { X86::SETGr, X86::SETGm, 0, 0 }, 296198090Srdivacky { X86::SETLEr, X86::SETLEm, 0, 0 }, 297198090Srdivacky { X86::SETLr, X86::SETLm, 0, 0 }, 298198090Srdivacky { X86::SETNEr, X86::SETNEm, 0, 0 }, 299198090Srdivacky { X86::SETNOr, X86::SETNOm, 0, 0 }, 300198090Srdivacky { X86::SETNPr, X86::SETNPm, 0, 0 }, 301198090Srdivacky { X86::SETNSr, X86::SETNSm, 0, 0 }, 302198090Srdivacky { X86::SETOr, X86::SETOm, 0, 0 }, 303198090Srdivacky { X86::SETPr, X86::SETPm, 0, 0 }, 304198090Srdivacky { X86::SETSr, X86::SETSm, 0, 0 }, 305198090Srdivacky { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, 306198090Srdivacky { X86::TEST16ri, X86::TEST16mi, 1, 0 }, 307198090Srdivacky { X86::TEST32ri, X86::TEST32mi, 1, 0 }, 308198090Srdivacky { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, 309198090Srdivacky { X86::TEST8ri, X86::TEST8mi, 1, 0 } 310193323Sed }; 311193323Sed 312193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { 313193323Sed unsigned RegOp = OpTbl0[i][0]; 314193323Sed unsigned MemOp = OpTbl0[i][1]; 315198090Srdivacky unsigned Align = OpTbl0[i][3]; 316193323Sed if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, 317198090Srdivacky std::make_pair(MemOp,Align))).second) 318193323Sed assert(false && "Duplicated entries?"); 319193323Sed unsigned FoldedLoad = OpTbl0[i][2]; 320193323Sed // Index 0, folded load or store. 321193323Sed unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); 322193323Sed if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 323193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 324193323Sed std::make_pair(RegOp, AuxInfo))).second) 325193323Sed AmbEntries.push_back(MemOp); 326193323Sed } 327193323Sed 328198090Srdivacky static const unsigned OpTbl1[][3] = { 329198090Srdivacky { X86::CMP16rr, X86::CMP16rm, 0 }, 330198090Srdivacky { X86::CMP32rr, X86::CMP32rm, 0 }, 331198090Srdivacky { X86::CMP64rr, X86::CMP64rm, 0 }, 332198090Srdivacky { X86::CMP8rr, X86::CMP8rm, 0 }, 333198090Srdivacky { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, 334198090Srdivacky { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, 335198090Srdivacky { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, 336198090Srdivacky { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, 337198090Srdivacky { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, 338198090Srdivacky { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, 339198090Srdivacky { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, 340198090Srdivacky { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, 341198090Srdivacky { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, 342198090Srdivacky { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, 343198090Srdivacky { X86::FsMOVAPDrr, X86::MOVSDrm, 0 }, 344198090Srdivacky { X86::FsMOVAPSrr, X86::MOVSSrm, 0 }, 345198090Srdivacky { X86::IMUL16rri, X86::IMUL16rmi, 0 }, 346198090Srdivacky { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, 347198090Srdivacky { X86::IMUL32rri, X86::IMUL32rmi, 0 }, 348198090Srdivacky { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, 349198090Srdivacky { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, 350198090Srdivacky { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, 351198090Srdivacky { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, 352198090Srdivacky { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, 353198090Srdivacky { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, 354198090Srdivacky { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, 355198090Srdivacky { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, 356198090Srdivacky { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 }, 357198090Srdivacky { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 }, 358198090Srdivacky { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, 359198090Srdivacky { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, 360198090Srdivacky { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, 361198090Srdivacky { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 }, 362198090Srdivacky { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 }, 363198090Srdivacky { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, 364198090Srdivacky { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, 365198090Srdivacky { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, 366198090Srdivacky { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, 367198090Srdivacky { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, 368198090Srdivacky { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, 369198090Srdivacky { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, 370198090Srdivacky { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, 371198090Srdivacky { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 }, 372198090Srdivacky { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 }, 373198090Srdivacky { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, 374198090Srdivacky { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, 375198090Srdivacky { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, 376198090Srdivacky { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, 377198090Srdivacky { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, 378198090Srdivacky { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, 379198090Srdivacky { X86::MOV16rr, X86::MOV16rm, 0 }, 380198090Srdivacky { X86::MOV32rr, X86::MOV32rm, 0 }, 381198090Srdivacky { X86::MOV64rr, X86::MOV64rm, 0 }, 382198090Srdivacky { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, 383198090Srdivacky { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, 384198090Srdivacky { X86::MOV8rr, X86::MOV8rm, 0 }, 385198090Srdivacky { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, 386198090Srdivacky { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, 387198090Srdivacky { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, 388198090Srdivacky { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, 389198090Srdivacky { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, 390198090Srdivacky { X86::MOVDQArr, X86::MOVDQArm, 16 }, 391198090Srdivacky { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 }, 392198090Srdivacky { X86::MOVSDrr, X86::MOVSDrm, 0 }, 393198090Srdivacky { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, 394198090Srdivacky { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, 395198090Srdivacky { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 }, 396198090Srdivacky { X86::MOVSSrr, X86::MOVSSrm, 0 }, 397198090Srdivacky { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, 398198090Srdivacky { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, 399198090Srdivacky { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, 400198090Srdivacky { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, 401198090Srdivacky { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, 402198090Srdivacky { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, 403198090Srdivacky { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, 404198090Srdivacky { X86::MOVUPSrr, X86::MOVUPSrm, 16 }, 405198090Srdivacky { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, 406198090Srdivacky { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, 407198090Srdivacky { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, 408198090Srdivacky { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, 409198090Srdivacky { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, 410198090Srdivacky { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, 411198090Srdivacky { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, 412198090Srdivacky { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, 413198090Srdivacky { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, 414198090Srdivacky { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, 415198090Srdivacky { X86::PSHUFDri, X86::PSHUFDmi, 16 }, 416198090Srdivacky { X86::PSHUFHWri, X86::PSHUFHWmi, 16 }, 417198090Srdivacky { X86::PSHUFLWri, X86::PSHUFLWmi, 16 }, 418198090Srdivacky { X86::RCPPSr, X86::RCPPSm, 16 }, 419198090Srdivacky { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 }, 420198090Srdivacky { X86::RSQRTPSr, X86::RSQRTPSm, 16 }, 421198090Srdivacky { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 }, 422198090Srdivacky { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, 423198090Srdivacky { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, 424198090Srdivacky { X86::SQRTPDr, X86::SQRTPDm, 16 }, 425198090Srdivacky { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 }, 426198090Srdivacky { X86::SQRTPSr, X86::SQRTPSm, 16 }, 427198090Srdivacky { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 }, 428198090Srdivacky { X86::SQRTSDr, X86::SQRTSDm, 0 }, 429198090Srdivacky { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, 430198090Srdivacky { X86::SQRTSSr, X86::SQRTSSm, 0 }, 431198090Srdivacky { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, 432198090Srdivacky { X86::TEST16rr, X86::TEST16rm, 0 }, 433198090Srdivacky { X86::TEST32rr, X86::TEST32rm, 0 }, 434198090Srdivacky { X86::TEST64rr, X86::TEST64rm, 0 }, 435198090Srdivacky { X86::TEST8rr, X86::TEST8rm, 0 }, 436193323Sed // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 437198090Srdivacky { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, 438198090Srdivacky { X86::UCOMISSrr, X86::UCOMISSrm, 0 } 439193323Sed }; 440193323Sed 441193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { 442193323Sed unsigned RegOp = OpTbl1[i][0]; 443193323Sed unsigned MemOp = OpTbl1[i][1]; 444198090Srdivacky unsigned Align = OpTbl1[i][2]; 445193323Sed if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, 446198090Srdivacky std::make_pair(MemOp,Align))).second) 447193323Sed assert(false && "Duplicated entries?"); 448198090Srdivacky // Index 1, folded load 449198090Srdivacky unsigned AuxInfo = 1 | (1 << 4); 450193323Sed if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 451193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 452193323Sed std::make_pair(RegOp, AuxInfo))).second) 453193323Sed AmbEntries.push_back(MemOp); 454193323Sed } 455193323Sed 456198090Srdivacky static const unsigned OpTbl2[][3] = { 457198090Srdivacky { X86::ADC32rr, X86::ADC32rm, 0 }, 458198090Srdivacky { X86::ADC64rr, X86::ADC64rm, 0 }, 459198090Srdivacky { X86::ADD16rr, X86::ADD16rm, 0 }, 460198090Srdivacky { X86::ADD32rr, X86::ADD32rm, 0 }, 461198090Srdivacky { X86::ADD64rr, X86::ADD64rm, 0 }, 462198090Srdivacky { X86::ADD8rr, X86::ADD8rm, 0 }, 463198090Srdivacky { X86::ADDPDrr, X86::ADDPDrm, 16 }, 464198090Srdivacky { X86::ADDPSrr, X86::ADDPSrm, 16 }, 465198090Srdivacky { X86::ADDSDrr, X86::ADDSDrm, 0 }, 466198090Srdivacky { X86::ADDSSrr, X86::ADDSSrm, 0 }, 467198090Srdivacky { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 }, 468198090Srdivacky { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 }, 469198090Srdivacky { X86::AND16rr, X86::AND16rm, 0 }, 470198090Srdivacky { X86::AND32rr, X86::AND32rm, 0 }, 471198090Srdivacky { X86::AND64rr, X86::AND64rm, 0 }, 472198090Srdivacky { X86::AND8rr, X86::AND8rm, 0 }, 473198090Srdivacky { X86::ANDNPDrr, X86::ANDNPDrm, 16 }, 474198090Srdivacky { X86::ANDNPSrr, X86::ANDNPSrm, 16 }, 475198090Srdivacky { X86::ANDPDrr, X86::ANDPDrm, 16 }, 476198090Srdivacky { X86::ANDPSrr, X86::ANDPSrm, 16 }, 477198090Srdivacky { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, 478198090Srdivacky { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, 479198090Srdivacky { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, 480198090Srdivacky { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, 481198090Srdivacky { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, 482198090Srdivacky { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, 483198090Srdivacky { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, 484198090Srdivacky { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, 485198090Srdivacky { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, 486198090Srdivacky { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, 487198090Srdivacky { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, 488198090Srdivacky { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, 489198090Srdivacky { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, 490198090Srdivacky { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, 491198090Srdivacky { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, 492198090Srdivacky { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, 493198090Srdivacky { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, 494198090Srdivacky { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, 495198090Srdivacky { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, 496198090Srdivacky { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, 497198090Srdivacky { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, 498198090Srdivacky { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, 499198090Srdivacky { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, 500198090Srdivacky { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, 501198090Srdivacky { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, 502198090Srdivacky { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, 503198090Srdivacky { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, 504198090Srdivacky { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, 505198090Srdivacky { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, 506198090Srdivacky { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, 507198090Srdivacky { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, 508198090Srdivacky { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, 509198090Srdivacky { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, 510198090Srdivacky { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, 511198090Srdivacky { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, 512198090Srdivacky { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, 513198090Srdivacky { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, 514198090Srdivacky { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, 515198090Srdivacky { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, 516198090Srdivacky { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, 517198090Srdivacky { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, 518198090Srdivacky { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, 519198090Srdivacky { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, 520198090Srdivacky { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, 521198090Srdivacky { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, 522198090Srdivacky { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, 523198090Srdivacky { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, 524198090Srdivacky { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, 525198090Srdivacky { X86::CMPPDrri, X86::CMPPDrmi, 16 }, 526198090Srdivacky { X86::CMPPSrri, X86::CMPPSrmi, 16 }, 527198090Srdivacky { X86::CMPSDrr, X86::CMPSDrm, 0 }, 528198090Srdivacky { X86::CMPSSrr, X86::CMPSSrm, 0 }, 529198090Srdivacky { X86::DIVPDrr, X86::DIVPDrm, 16 }, 530198090Srdivacky { X86::DIVPSrr, X86::DIVPSrm, 16 }, 531198090Srdivacky { X86::DIVSDrr, X86::DIVSDrm, 0 }, 532198090Srdivacky { X86::DIVSSrr, X86::DIVSSrm, 0 }, 533198090Srdivacky { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 }, 534198090Srdivacky { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 }, 535198090Srdivacky { X86::FsANDPDrr, X86::FsANDPDrm, 16 }, 536198090Srdivacky { X86::FsANDPSrr, X86::FsANDPSrm, 16 }, 537198090Srdivacky { X86::FsORPDrr, X86::FsORPDrm, 16 }, 538198090Srdivacky { X86::FsORPSrr, X86::FsORPSrm, 16 }, 539198090Srdivacky { X86::FsXORPDrr, X86::FsXORPDrm, 16 }, 540198090Srdivacky { X86::FsXORPSrr, X86::FsXORPSrm, 16 }, 541198090Srdivacky { X86::HADDPDrr, X86::HADDPDrm, 16 }, 542198090Srdivacky { X86::HADDPSrr, X86::HADDPSrm, 16 }, 543198090Srdivacky { X86::HSUBPDrr, X86::HSUBPDrm, 16 }, 544198090Srdivacky { X86::HSUBPSrr, X86::HSUBPSrm, 16 }, 545198090Srdivacky { X86::IMUL16rr, X86::IMUL16rm, 0 }, 546198090Srdivacky { X86::IMUL32rr, X86::IMUL32rm, 0 }, 547198090Srdivacky { X86::IMUL64rr, X86::IMUL64rm, 0 }, 548198090Srdivacky { X86::MAXPDrr, X86::MAXPDrm, 16 }, 549198090Srdivacky { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, 550198090Srdivacky { X86::MAXPSrr, X86::MAXPSrm, 16 }, 551198090Srdivacky { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 }, 552198090Srdivacky { X86::MAXSDrr, X86::MAXSDrm, 0 }, 553198090Srdivacky { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, 554198090Srdivacky { X86::MAXSSrr, X86::MAXSSrm, 0 }, 555198090Srdivacky { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, 556198090Srdivacky { X86::MINPDrr, X86::MINPDrm, 16 }, 557198090Srdivacky { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 }, 558198090Srdivacky { X86::MINPSrr, X86::MINPSrm, 16 }, 559198090Srdivacky { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 }, 560198090Srdivacky { X86::MINSDrr, X86::MINSDrm, 0 }, 561198090Srdivacky { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, 562198090Srdivacky { X86::MINSSrr, X86::MINSSrm, 0 }, 563198090Srdivacky { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, 564198090Srdivacky { X86::MULPDrr, X86::MULPDrm, 16 }, 565198090Srdivacky { X86::MULPSrr, X86::MULPSrm, 16 }, 566198090Srdivacky { X86::MULSDrr, X86::MULSDrm, 0 }, 567198090Srdivacky { X86::MULSSrr, X86::MULSSrm, 0 }, 568198090Srdivacky { X86::OR16rr, X86::OR16rm, 0 }, 569198090Srdivacky { X86::OR32rr, X86::OR32rm, 0 }, 570198090Srdivacky { X86::OR64rr, X86::OR64rm, 0 }, 571198090Srdivacky { X86::OR8rr, X86::OR8rm, 0 }, 572198090Srdivacky { X86::ORPDrr, X86::ORPDrm, 16 }, 573198090Srdivacky { X86::ORPSrr, X86::ORPSrm, 16 }, 574198090Srdivacky { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 }, 575198090Srdivacky { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 }, 576198090Srdivacky { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 }, 577198090Srdivacky { X86::PADDBrr, X86::PADDBrm, 16 }, 578198090Srdivacky { X86::PADDDrr, X86::PADDDrm, 16 }, 579198090Srdivacky { X86::PADDQrr, X86::PADDQrm, 16 }, 580198090Srdivacky { X86::PADDSBrr, X86::PADDSBrm, 16 }, 581198090Srdivacky { X86::PADDSWrr, X86::PADDSWrm, 16 }, 582198090Srdivacky { X86::PADDWrr, X86::PADDWrm, 16 }, 583198090Srdivacky { X86::PANDNrr, X86::PANDNrm, 16 }, 584198090Srdivacky { X86::PANDrr, X86::PANDrm, 16 }, 585198090Srdivacky { X86::PAVGBrr, X86::PAVGBrm, 16 }, 586198090Srdivacky { X86::PAVGWrr, X86::PAVGWrm, 16 }, 587198090Srdivacky { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 }, 588198090Srdivacky { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 }, 589198090Srdivacky { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 }, 590198090Srdivacky { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 }, 591198090Srdivacky { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 }, 592198090Srdivacky { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 }, 593198090Srdivacky { X86::PINSRWrri, X86::PINSRWrmi, 16 }, 594198090Srdivacky { X86::PMADDWDrr, X86::PMADDWDrm, 16 }, 595198090Srdivacky { X86::PMAXSWrr, X86::PMAXSWrm, 16 }, 596198090Srdivacky { X86::PMAXUBrr, X86::PMAXUBrm, 16 }, 597198090Srdivacky { X86::PMINSWrr, X86::PMINSWrm, 16 }, 598198090Srdivacky { X86::PMINUBrr, X86::PMINUBrm, 16 }, 599198090Srdivacky { X86::PMULDQrr, X86::PMULDQrm, 16 }, 600198090Srdivacky { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, 601198090Srdivacky { X86::PMULHWrr, X86::PMULHWrm, 16 }, 602198090Srdivacky { X86::PMULLDrr, X86::PMULLDrm, 16 }, 603198090Srdivacky { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 }, 604198090Srdivacky { X86::PMULLWrr, X86::PMULLWrm, 16 }, 605198090Srdivacky { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, 606198090Srdivacky { X86::PORrr, X86::PORrm, 16 }, 607198090Srdivacky { X86::PSADBWrr, X86::PSADBWrm, 16 }, 608198090Srdivacky { X86::PSLLDrr, X86::PSLLDrm, 16 }, 609198090Srdivacky { X86::PSLLQrr, X86::PSLLQrm, 16 }, 610198090Srdivacky { X86::PSLLWrr, X86::PSLLWrm, 16 }, 611198090Srdivacky { X86::PSRADrr, X86::PSRADrm, 16 }, 612198090Srdivacky { X86::PSRAWrr, X86::PSRAWrm, 16 }, 613198090Srdivacky { X86::PSRLDrr, X86::PSRLDrm, 16 }, 614198090Srdivacky { X86::PSRLQrr, X86::PSRLQrm, 16 }, 615198090Srdivacky { X86::PSRLWrr, X86::PSRLWrm, 16 }, 616198090Srdivacky { X86::PSUBBrr, X86::PSUBBrm, 16 }, 617198090Srdivacky { X86::PSUBDrr, X86::PSUBDrm, 16 }, 618198090Srdivacky { X86::PSUBSBrr, X86::PSUBSBrm, 16 }, 619198090Srdivacky { X86::PSUBSWrr, X86::PSUBSWrm, 16 }, 620198090Srdivacky { X86::PSUBWrr, X86::PSUBWrm, 16 }, 621198090Srdivacky { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 }, 622198090Srdivacky { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 }, 623198090Srdivacky { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 }, 624198090Srdivacky { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 }, 625198090Srdivacky { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 }, 626198090Srdivacky { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 }, 627198090Srdivacky { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 }, 628198090Srdivacky { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 }, 629198090Srdivacky { X86::PXORrr, X86::PXORrm, 16 }, 630198090Srdivacky { X86::SBB32rr, X86::SBB32rm, 0 }, 631198090Srdivacky { X86::SBB64rr, X86::SBB64rm, 0 }, 632198090Srdivacky { X86::SHUFPDrri, X86::SHUFPDrmi, 16 }, 633198090Srdivacky { X86::SHUFPSrri, X86::SHUFPSrmi, 16 }, 634198090Srdivacky { X86::SUB16rr, X86::SUB16rm, 0 }, 635198090Srdivacky { X86::SUB32rr, X86::SUB32rm, 0 }, 636198090Srdivacky { X86::SUB64rr, X86::SUB64rm, 0 }, 637198090Srdivacky { X86::SUB8rr, X86::SUB8rm, 0 }, 638198090Srdivacky { X86::SUBPDrr, X86::SUBPDrm, 16 }, 639198090Srdivacky { X86::SUBPSrr, X86::SUBPSrm, 16 }, 640198090Srdivacky { X86::SUBSDrr, X86::SUBSDrm, 0 }, 641198090Srdivacky { X86::SUBSSrr, X86::SUBSSrm, 0 }, 642193323Sed // FIXME: TEST*rr -> swapped operand of TEST*mr. 643198090Srdivacky { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 }, 644198090Srdivacky { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 }, 645198090Srdivacky { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 }, 646198090Srdivacky { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 }, 647198090Srdivacky { X86::XOR16rr, X86::XOR16rm, 0 }, 648198090Srdivacky { X86::XOR32rr, X86::XOR32rm, 0 }, 649198090Srdivacky { X86::XOR64rr, X86::XOR64rm, 0 }, 650198090Srdivacky { X86::XOR8rr, X86::XOR8rm, 0 }, 651198090Srdivacky { X86::XORPDrr, X86::XORPDrm, 16 }, 652198090Srdivacky { X86::XORPSrr, X86::XORPSrm, 16 } 653193323Sed }; 654193323Sed 655193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { 656193323Sed unsigned RegOp = OpTbl2[i][0]; 657193323Sed unsigned MemOp = OpTbl2[i][1]; 658198090Srdivacky unsigned Align = OpTbl2[i][2]; 659193323Sed if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, 660198090Srdivacky std::make_pair(MemOp,Align))).second) 661193323Sed assert(false && "Duplicated entries?"); 662198090Srdivacky // Index 2, folded load 663198090Srdivacky unsigned AuxInfo = 2 | (1 << 4); 664193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 665193323Sed std::make_pair(RegOp, AuxInfo))).second) 666193323Sed AmbEntries.push_back(MemOp); 667193323Sed } 668193323Sed 669193323Sed // Remove ambiguous entries. 670193323Sed assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); 671193323Sed} 672193323Sed 673193323Sedbool X86InstrInfo::isMoveInstr(const MachineInstr& MI, 674193323Sed unsigned &SrcReg, unsigned &DstReg, 675193323Sed unsigned &SrcSubIdx, unsigned &DstSubIdx) const { 676193323Sed switch (MI.getOpcode()) { 677193323Sed default: 678193323Sed return false; 679193323Sed case X86::MOV8rr: 680193323Sed case X86::MOV8rr_NOREX: 681193323Sed case X86::MOV16rr: 682193323Sed case X86::MOV32rr: 683193323Sed case X86::MOV64rr: 684193323Sed case X86::MOVSSrr: 685193323Sed case X86::MOVSDrr: 686193323Sed 687193323Sed // FP Stack register class copies 688193323Sed case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: 689193323Sed case X86::MOV_Fp3264: case X86::MOV_Fp3280: 690193323Sed case X86::MOV_Fp6432: case X86::MOV_Fp8032: 691193323Sed 692193323Sed case X86::FsMOVAPSrr: 693193323Sed case X86::FsMOVAPDrr: 694193323Sed case X86::MOVAPSrr: 695193323Sed case X86::MOVAPDrr: 696193323Sed case X86::MOVDQArr: 697193323Sed case X86::MOVSS2PSrr: 698193323Sed case X86::MOVSD2PDrr: 699193323Sed case X86::MOVPS2SSrr: 700193323Sed case X86::MOVPD2SDrr: 701193323Sed case X86::MMX_MOVQ64rr: 702193323Sed assert(MI.getNumOperands() >= 2 && 703193323Sed MI.getOperand(0).isReg() && 704193323Sed MI.getOperand(1).isReg() && 705193323Sed "invalid register-register move instruction"); 706193323Sed SrcReg = MI.getOperand(1).getReg(); 707193323Sed DstReg = MI.getOperand(0).getReg(); 708193323Sed SrcSubIdx = MI.getOperand(1).getSubReg(); 709193323Sed DstSubIdx = MI.getOperand(0).getSubReg(); 710193323Sed return true; 711193323Sed } 712193323Sed} 713193323Sed 714199481Srdivacky/// isFrameOperand - Return true and the FrameIndex if the specified 715199481Srdivacky/// operand and follow operands form a reference to the stack frame. 716199481Srdivackybool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, 717199481Srdivacky int &FrameIndex) const { 718199481Srdivacky if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && 719199481Srdivacky MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && 720199481Srdivacky MI->getOperand(Op+1).getImm() == 1 && 721199481Srdivacky MI->getOperand(Op+2).getReg() == 0 && 722199481Srdivacky MI->getOperand(Op+3).getImm() == 0) { 723199481Srdivacky FrameIndex = MI->getOperand(Op).getIndex(); 724199481Srdivacky return true; 725199481Srdivacky } 726199481Srdivacky return false; 727199481Srdivacky} 728199481Srdivacky 729199481Srdivackystatic bool isFrameLoadOpcode(int Opcode) { 730199481Srdivacky switch (Opcode) { 731193323Sed default: break; 732193323Sed case X86::MOV8rm: 733193323Sed case X86::MOV16rm: 734193323Sed case X86::MOV32rm: 735193323Sed case X86::MOV64rm: 736193323Sed case X86::LD_Fp64m: 737193323Sed case X86::MOVSSrm: 738193323Sed case X86::MOVSDrm: 739193323Sed case X86::MOVAPSrm: 740193323Sed case X86::MOVAPDrm: 741193323Sed case X86::MOVDQArm: 742193323Sed case X86::MMX_MOVD64rm: 743193323Sed case X86::MMX_MOVQ64rm: 744199481Srdivacky return true; 745193323Sed break; 746193323Sed } 747199481Srdivacky return false; 748193323Sed} 749193323Sed 750199481Srdivackystatic bool isFrameStoreOpcode(int Opcode) { 751199481Srdivacky switch (Opcode) { 752193323Sed default: break; 753193323Sed case X86::MOV8mr: 754193323Sed case X86::MOV16mr: 755193323Sed case X86::MOV32mr: 756193323Sed case X86::MOV64mr: 757193323Sed case X86::ST_FpP64m: 758193323Sed case X86::MOVSSmr: 759193323Sed case X86::MOVSDmr: 760193323Sed case X86::MOVAPSmr: 761193323Sed case X86::MOVAPDmr: 762193323Sed case X86::MOVDQAmr: 763193323Sed case X86::MMX_MOVD64mr: 764193323Sed case X86::MMX_MOVQ64mr: 765193323Sed case X86::MMX_MOVNTQmr: 766199481Srdivacky return true; 767199481Srdivacky } 768199481Srdivacky return false; 769199481Srdivacky} 770199481Srdivacky 771199481Srdivackyunsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 772199481Srdivacky int &FrameIndex) const { 773199481Srdivacky if (isFrameLoadOpcode(MI->getOpcode())) 774199481Srdivacky if (isFrameOperand(MI, 1, FrameIndex)) 775199481Srdivacky return MI->getOperand(0).getReg(); 776199481Srdivacky return 0; 777199481Srdivacky} 778199481Srdivacky 779199481Srdivackyunsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 780199481Srdivacky int &FrameIndex) const { 781199481Srdivacky if (isFrameLoadOpcode(MI->getOpcode())) { 782199481Srdivacky unsigned Reg; 783199481Srdivacky if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) 784199481Srdivacky return Reg; 785199481Srdivacky // Check for post-frame index elimination operations 786200581Srdivacky const MachineMemOperand *Dummy; 787200581Srdivacky return hasLoadFromStackSlot(MI, Dummy, FrameIndex); 788199481Srdivacky } 789199481Srdivacky return 0; 790199481Srdivacky} 791199481Srdivacky 792199481Srdivackybool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, 793200581Srdivacky const MachineMemOperand *&MMO, 794199481Srdivacky int &FrameIndex) const { 795199481Srdivacky for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 796199481Srdivacky oe = MI->memoperands_end(); 797199481Srdivacky o != oe; 798199481Srdivacky ++o) { 799199481Srdivacky if ((*o)->isLoad() && (*o)->getValue()) 800199481Srdivacky if (const FixedStackPseudoSourceValue *Value = 801199481Srdivacky dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 802199481Srdivacky FrameIndex = Value->getFrameIndex(); 803200581Srdivacky MMO = *o; 804199481Srdivacky return true; 805199481Srdivacky } 806199481Srdivacky } 807199481Srdivacky return false; 808199481Srdivacky} 809199481Srdivacky 810199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 811199481Srdivacky int &FrameIndex) const { 812199481Srdivacky if (isFrameStoreOpcode(MI->getOpcode())) 813199481Srdivacky if (isFrameOperand(MI, 0, FrameIndex)) 814193323Sed return MI->getOperand(X86AddrNumOperands).getReg(); 815199481Srdivacky return 0; 816199481Srdivacky} 817199481Srdivacky 818199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 819199481Srdivacky int &FrameIndex) const { 820199481Srdivacky if (isFrameStoreOpcode(MI->getOpcode())) { 821199481Srdivacky unsigned Reg; 822199481Srdivacky if ((Reg = isStoreToStackSlot(MI, FrameIndex))) 823199481Srdivacky return Reg; 824199481Srdivacky // Check for post-frame index elimination operations 825200581Srdivacky const MachineMemOperand *Dummy; 826200581Srdivacky return hasStoreToStackSlot(MI, Dummy, FrameIndex); 827193323Sed } 828193323Sed return 0; 829193323Sed} 830193323Sed 831199481Srdivackybool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, 832200581Srdivacky const MachineMemOperand *&MMO, 833199481Srdivacky int &FrameIndex) const { 834199481Srdivacky for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 835199481Srdivacky oe = MI->memoperands_end(); 836199481Srdivacky o != oe; 837199481Srdivacky ++o) { 838199481Srdivacky if ((*o)->isStore() && (*o)->getValue()) 839199481Srdivacky if (const FixedStackPseudoSourceValue *Value = 840199481Srdivacky dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 841199481Srdivacky FrameIndex = Value->getFrameIndex(); 842200581Srdivacky MMO = *o; 843199481Srdivacky return true; 844199481Srdivacky } 845199481Srdivacky } 846199481Srdivacky return false; 847199481Srdivacky} 848199481Srdivacky 849193323Sed/// regIsPICBase - Return true if register is PIC base (i.e.g defined by 850193323Sed/// X86::MOVPC32r. 851193323Sedstatic bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { 852193323Sed bool isPICBase = false; 853193323Sed for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 854193323Sed E = MRI.def_end(); I != E; ++I) { 855193323Sed MachineInstr *DefMI = I.getOperand().getParent(); 856193323Sed if (DefMI->getOpcode() != X86::MOVPC32r) 857193323Sed return false; 858193323Sed assert(!isPICBase && "More than one PIC base?"); 859193323Sed isPICBase = true; 860193323Sed } 861193323Sed return isPICBase; 862193323Sed} 863193323Sed 864193323Sedbool 865198090SrdivackyX86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, 866198090Srdivacky AliasAnalysis *AA) const { 867193323Sed switch (MI->getOpcode()) { 868193323Sed default: break; 869193323Sed case X86::MOV8rm: 870193323Sed case X86::MOV16rm: 871193323Sed case X86::MOV32rm: 872193323Sed case X86::MOV64rm: 873193323Sed case X86::LD_Fp64m: 874193323Sed case X86::MOVSSrm: 875193323Sed case X86::MOVSDrm: 876193323Sed case X86::MOVAPSrm: 877199481Srdivacky case X86::MOVUPSrm: 878199481Srdivacky case X86::MOVUPSrm_Int: 879193323Sed case X86::MOVAPDrm: 880193323Sed case X86::MOVDQArm: 881193323Sed case X86::MMX_MOVD64rm: 882199481Srdivacky case X86::MMX_MOVQ64rm: 883199481Srdivacky case X86::FsMOVAPSrm: 884199481Srdivacky case X86::FsMOVAPDrm: { 885193323Sed // Loads from constant pools are trivially rematerializable. 886193323Sed if (MI->getOperand(1).isReg() && 887193323Sed MI->getOperand(2).isImm() && 888193323Sed MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 889198090Srdivacky MI->isInvariantLoad(AA)) { 890193323Sed unsigned BaseReg = MI->getOperand(1).getReg(); 891195098Sed if (BaseReg == 0 || BaseReg == X86::RIP) 892193323Sed return true; 893193323Sed // Allow re-materialization of PIC load. 894193323Sed if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) 895193323Sed return false; 896193323Sed const MachineFunction &MF = *MI->getParent()->getParent(); 897193323Sed const MachineRegisterInfo &MRI = MF.getRegInfo(); 898193323Sed bool isPICBase = false; 899193323Sed for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 900193323Sed E = MRI.def_end(); I != E; ++I) { 901193323Sed MachineInstr *DefMI = I.getOperand().getParent(); 902193323Sed if (DefMI->getOpcode() != X86::MOVPC32r) 903193323Sed return false; 904193323Sed assert(!isPICBase && "More than one PIC base?"); 905193323Sed isPICBase = true; 906193323Sed } 907193323Sed return isPICBase; 908193323Sed } 909193323Sed return false; 910193323Sed } 911193323Sed 912193323Sed case X86::LEA32r: 913193323Sed case X86::LEA64r: { 914193323Sed if (MI->getOperand(2).isImm() && 915193323Sed MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 916193323Sed !MI->getOperand(4).isReg()) { 917193323Sed // lea fi#, lea GV, etc. are all rematerializable. 918193323Sed if (!MI->getOperand(1).isReg()) 919193323Sed return true; 920193323Sed unsigned BaseReg = MI->getOperand(1).getReg(); 921193323Sed if (BaseReg == 0) 922193323Sed return true; 923193323Sed // Allow re-materialization of lea PICBase + x. 924193323Sed const MachineFunction &MF = *MI->getParent()->getParent(); 925193323Sed const MachineRegisterInfo &MRI = MF.getRegInfo(); 926193323Sed return regIsPICBase(BaseReg, MRI); 927193323Sed } 928193323Sed return false; 929193323Sed } 930193323Sed } 931193323Sed 932193323Sed // All other instructions marked M_REMATERIALIZABLE are always trivially 933193323Sed // rematerializable. 934193323Sed return true; 935193323Sed} 936193323Sed 937193323Sed/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that 938193323Sed/// would clobber the EFLAGS condition register. Note the result may be 939193323Sed/// conservative. If it cannot definitely determine the safety after visiting 940198090Srdivacky/// a few instructions in each direction it assumes it's not safe. 941193323Sedstatic bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, 942193323Sed MachineBasicBlock::iterator I) { 943193323Sed // It's always safe to clobber EFLAGS at the end of a block. 944193323Sed if (I == MBB.end()) 945193323Sed return true; 946193323Sed 947193323Sed // For compile time consideration, if we are not able to determine the 948198090Srdivacky // safety after visiting 4 instructions in each direction, we will assume 949198090Srdivacky // it's not safe. 950198090Srdivacky MachineBasicBlock::iterator Iter = I; 951198090Srdivacky for (unsigned i = 0; i < 4; ++i) { 952193323Sed bool SeenDef = false; 953198090Srdivacky for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 954198090Srdivacky MachineOperand &MO = Iter->getOperand(j); 955193323Sed if (!MO.isReg()) 956193323Sed continue; 957193323Sed if (MO.getReg() == X86::EFLAGS) { 958193323Sed if (MO.isUse()) 959193323Sed return false; 960193323Sed SeenDef = true; 961193323Sed } 962193323Sed } 963193323Sed 964193323Sed if (SeenDef) 965193323Sed // This instruction defines EFLAGS, no need to look any further. 966193323Sed return true; 967198090Srdivacky ++Iter; 968193323Sed 969193323Sed // If we make it to the end of the block, it's safe to clobber EFLAGS. 970198090Srdivacky if (Iter == MBB.end()) 971193323Sed return true; 972193323Sed } 973193323Sed 974198090Srdivacky Iter = I; 975198090Srdivacky for (unsigned i = 0; i < 4; ++i) { 976198090Srdivacky // If we make it to the beginning of the block, it's safe to clobber 977198090Srdivacky // EFLAGS iff EFLAGS is not live-in. 978198090Srdivacky if (Iter == MBB.begin()) 979198090Srdivacky return !MBB.isLiveIn(X86::EFLAGS); 980198090Srdivacky 981198090Srdivacky --Iter; 982198090Srdivacky bool SawKill = false; 983198090Srdivacky for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 984198090Srdivacky MachineOperand &MO = Iter->getOperand(j); 985198090Srdivacky if (MO.isReg() && MO.getReg() == X86::EFLAGS) { 986198090Srdivacky if (MO.isDef()) return MO.isDead(); 987198090Srdivacky if (MO.isKill()) SawKill = true; 988198090Srdivacky } 989198090Srdivacky } 990198090Srdivacky 991198090Srdivacky if (SawKill) 992198090Srdivacky // This instruction kills EFLAGS and doesn't redefine it, so 993198090Srdivacky // there's no need to look further. 994198090Srdivacky return true; 995198090Srdivacky } 996198090Srdivacky 997193323Sed // Conservative answer. 998193323Sed return false; 999193323Sed} 1000193323Sed 1001193323Sedvoid X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, 1002193323Sed MachineBasicBlock::iterator I, 1003198090Srdivacky unsigned DestReg, unsigned SubIdx, 1004199481Srdivacky const MachineInstr *Orig, 1005199481Srdivacky const TargetRegisterInfo *TRI) const { 1006193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 1007193323Sed if (I != MBB.end()) DL = I->getDebugLoc(); 1008193323Sed 1009193323Sed if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { 1010199481Srdivacky DestReg = TRI->getSubReg(DestReg, SubIdx); 1011193323Sed SubIdx = 0; 1012193323Sed } 1013193323Sed 1014193323Sed // MOV32r0 etc. are implemented with xor which clobbers condition code. 1015193323Sed // Re-materialize them as movri instructions to avoid side effects. 1016198090Srdivacky bool Clone = true; 1017198090Srdivacky unsigned Opc = Orig->getOpcode(); 1018198090Srdivacky switch (Opc) { 1019193323Sed default: break; 1020193323Sed case X86::MOV8r0: 1021198090Srdivacky case X86::MOV32r0: { 1022193323Sed if (!isSafeToClobberEFLAGS(MBB, I)) { 1023198090Srdivacky switch (Opc) { 1024193323Sed default: break; 1025193323Sed case X86::MOV8r0: Opc = X86::MOV8ri; break; 1026193323Sed case X86::MOV32r0: Opc = X86::MOV32ri; break; 1027193323Sed } 1028198090Srdivacky Clone = false; 1029193323Sed } 1030193323Sed break; 1031193323Sed } 1032193323Sed } 1033193323Sed 1034198090Srdivacky if (Clone) { 1035193323Sed MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 1036193323Sed MI->getOperand(0).setReg(DestReg); 1037193323Sed MBB.insert(I, MI); 1038198090Srdivacky } else { 1039198090Srdivacky BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); 1040193323Sed } 1041193323Sed 1042198090Srdivacky MachineInstr *NewMI = prior(I); 1043198090Srdivacky NewMI->getOperand(0).setSubReg(SubIdx); 1044193323Sed} 1045193323Sed 1046193323Sed/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that 1047193323Sed/// is not marked dead. 1048193323Sedstatic bool hasLiveCondCodeDef(MachineInstr *MI) { 1049193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1050193323Sed MachineOperand &MO = MI->getOperand(i); 1051193323Sed if (MO.isReg() && MO.isDef() && 1052193323Sed MO.getReg() == X86::EFLAGS && !MO.isDead()) { 1053193323Sed return true; 1054193323Sed } 1055193323Sed } 1056193323Sed return false; 1057193323Sed} 1058193323Sed 1059200581Srdivacky/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when 1060200581Srdivacky/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting 1061200581Srdivacky/// to a 32-bit superregister and then truncating back down to a 16-bit 1062200581Srdivacky/// subregister. 1063200581SrdivackyMachineInstr * 1064200581SrdivackyX86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, 1065200581Srdivacky MachineFunction::iterator &MFI, 1066200581Srdivacky MachineBasicBlock::iterator &MBBI, 1067200581Srdivacky LiveVariables *LV) const { 1068200581Srdivacky MachineInstr *MI = MBBI; 1069200581Srdivacky unsigned Dest = MI->getOperand(0).getReg(); 1070200581Srdivacky unsigned Src = MI->getOperand(1).getReg(); 1071200581Srdivacky bool isDead = MI->getOperand(0).isDead(); 1072200581Srdivacky bool isKill = MI->getOperand(1).isKill(); 1073200581Srdivacky 1074200581Srdivacky unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() 1075200581Srdivacky ? X86::LEA64_32r : X86::LEA32r; 1076200581Srdivacky MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); 1077200581Srdivacky unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1078200581Srdivacky unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1079200581Srdivacky 1080200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 1081200581Srdivacky // well be shifting and then extracting the lower 16-bits. 1082200581Srdivacky // This has the potential to cause partial register stall. e.g. 1083200581Srdivacky // movw (%rbp,%rcx,2), %dx 1084200581Srdivacky // leal -65(%rdx), %esi 1085200581Srdivacky // But testing has shown this *does* help performance in 64-bit mode (at 1086200581Srdivacky // least on modern x86 machines). 1087200581Srdivacky BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); 1088200581Srdivacky MachineInstr *InsMI = 1089200581Srdivacky BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) 1090200581Srdivacky .addReg(leaInReg) 1091200581Srdivacky .addReg(Src, getKillRegState(isKill)) 1092200581Srdivacky .addImm(X86::SUBREG_16BIT); 1093200581Srdivacky 1094200581Srdivacky MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), 1095200581Srdivacky get(Opc), leaOutReg); 1096200581Srdivacky switch (MIOpc) { 1097200581Srdivacky default: 1098200581Srdivacky llvm_unreachable(0); 1099200581Srdivacky break; 1100200581Srdivacky case X86::SHL16ri: { 1101200581Srdivacky unsigned ShAmt = MI->getOperand(2).getImm(); 1102200581Srdivacky MIB.addReg(0).addImm(1 << ShAmt) 1103200581Srdivacky .addReg(leaInReg, RegState::Kill).addImm(0); 1104200581Srdivacky break; 1105200581Srdivacky } 1106200581Srdivacky case X86::INC16r: 1107200581Srdivacky case X86::INC64_16r: 1108200581Srdivacky addLeaRegOffset(MIB, leaInReg, true, 1); 1109200581Srdivacky break; 1110200581Srdivacky case X86::DEC16r: 1111200581Srdivacky case X86::DEC64_16r: 1112200581Srdivacky addLeaRegOffset(MIB, leaInReg, true, -1); 1113200581Srdivacky break; 1114200581Srdivacky case X86::ADD16ri: 1115200581Srdivacky case X86::ADD16ri8: 1116200581Srdivacky addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); 1117200581Srdivacky break; 1118200581Srdivacky case X86::ADD16rr: { 1119200581Srdivacky unsigned Src2 = MI->getOperand(2).getReg(); 1120200581Srdivacky bool isKill2 = MI->getOperand(2).isKill(); 1121200581Srdivacky unsigned leaInReg2 = 0; 1122200581Srdivacky MachineInstr *InsMI2 = 0; 1123200581Srdivacky if (Src == Src2) { 1124200581Srdivacky // ADD16rr %reg1028<kill>, %reg1028 1125200581Srdivacky // just a single insert_subreg. 1126200581Srdivacky addRegReg(MIB, leaInReg, true, leaInReg, false); 1127200581Srdivacky } else { 1128200581Srdivacky leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1129200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 1130200581Srdivacky // well be shifting and then extracting the lower 16-bits. 1131200581Srdivacky BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); 1132200581Srdivacky InsMI2 = 1133200581Srdivacky BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) 1134200581Srdivacky .addReg(leaInReg2) 1135200581Srdivacky .addReg(Src2, getKillRegState(isKill2)) 1136200581Srdivacky .addImm(X86::SUBREG_16BIT); 1137200581Srdivacky addRegReg(MIB, leaInReg, true, leaInReg2, true); 1138200581Srdivacky } 1139200581Srdivacky if (LV && isKill2 && InsMI2) 1140200581Srdivacky LV->replaceKillInstruction(Src2, MI, InsMI2); 1141200581Srdivacky break; 1142200581Srdivacky } 1143200581Srdivacky } 1144200581Srdivacky 1145200581Srdivacky MachineInstr *NewMI = MIB; 1146200581Srdivacky MachineInstr *ExtMI = 1147200581Srdivacky BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) 1148200581Srdivacky .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1149200581Srdivacky .addReg(leaOutReg, RegState::Kill) 1150200581Srdivacky .addImm(X86::SUBREG_16BIT); 1151200581Srdivacky 1152200581Srdivacky if (LV) { 1153200581Srdivacky // Update live variables 1154200581Srdivacky LV->getVarInfo(leaInReg).Kills.push_back(NewMI); 1155200581Srdivacky LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); 1156200581Srdivacky if (isKill) 1157200581Srdivacky LV->replaceKillInstruction(Src, MI, InsMI); 1158200581Srdivacky if (isDead) 1159200581Srdivacky LV->replaceKillInstruction(Dest, MI, ExtMI); 1160200581Srdivacky } 1161200581Srdivacky 1162200581Srdivacky return ExtMI; 1163200581Srdivacky} 1164200581Srdivacky 1165193323Sed/// convertToThreeAddress - This method must be implemented by targets that 1166193323Sed/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target 1167193323Sed/// may be able to convert a two-address instruction into a true 1168193323Sed/// three-address instruction on demand. This allows the X86 target (for 1169193323Sed/// example) to convert ADD and SHL instructions into LEA instructions if they 1170193323Sed/// would require register copies due to two-addressness. 1171193323Sed/// 1172193323Sed/// This method returns a null pointer if the transformation cannot be 1173193323Sed/// performed, otherwise it returns the new instruction. 1174193323Sed/// 1175193323SedMachineInstr * 1176193323SedX86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 1177193323Sed MachineBasicBlock::iterator &MBBI, 1178193323Sed LiveVariables *LV) const { 1179193323Sed MachineInstr *MI = MBBI; 1180193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1181193323Sed // All instructions input are two-addr instructions. Get the known operands. 1182193323Sed unsigned Dest = MI->getOperand(0).getReg(); 1183193323Sed unsigned Src = MI->getOperand(1).getReg(); 1184193323Sed bool isDead = MI->getOperand(0).isDead(); 1185193323Sed bool isKill = MI->getOperand(1).isKill(); 1186193323Sed 1187193323Sed MachineInstr *NewMI = NULL; 1188193323Sed // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When 1189193323Sed // we have better subtarget support, enable the 16-bit LEA generation here. 1190200581Srdivacky // 16-bit LEA is also slow on Core2. 1191193323Sed bool DisableLEA16 = true; 1192200581Srdivacky bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 1193193323Sed 1194193323Sed unsigned MIOpc = MI->getOpcode(); 1195193323Sed switch (MIOpc) { 1196193323Sed case X86::SHUFPSrri: { 1197193323Sed assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); 1198193323Sed if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0; 1199193323Sed 1200193323Sed unsigned B = MI->getOperand(1).getReg(); 1201193323Sed unsigned C = MI->getOperand(2).getReg(); 1202193323Sed if (B != C) return 0; 1203193323Sed unsigned A = MI->getOperand(0).getReg(); 1204193323Sed unsigned M = MI->getOperand(3).getImm(); 1205193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) 1206193323Sed .addReg(A, RegState::Define | getDeadRegState(isDead)) 1207193323Sed .addReg(B, getKillRegState(isKill)).addImm(M); 1208193323Sed break; 1209193323Sed } 1210193323Sed case X86::SHL64ri: { 1211193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1212193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1213193323Sed // the flags produced by a shift yet, so this is safe. 1214193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1215193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1216193323Sed 1217193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1218193323Sed .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1219193323Sed .addReg(0).addImm(1 << ShAmt) 1220193323Sed .addReg(Src, getKillRegState(isKill)) 1221193323Sed .addImm(0); 1222193323Sed break; 1223193323Sed } 1224193323Sed case X86::SHL32ri: { 1225193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1226193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1227193323Sed // the flags produced by a shift yet, so this is safe. 1228193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1229193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1230193323Sed 1231200581Srdivacky unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1232193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1233193323Sed .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1234193323Sed .addReg(0).addImm(1 << ShAmt) 1235193323Sed .addReg(Src, getKillRegState(isKill)).addImm(0); 1236193323Sed break; 1237193323Sed } 1238193323Sed case X86::SHL16ri: { 1239193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1240193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1241193323Sed // the flags produced by a shift yet, so this is safe. 1242193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1243193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1244193323Sed 1245200581Srdivacky if (DisableLEA16) 1246200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1247200581Srdivacky NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1248200581Srdivacky .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1249200581Srdivacky .addReg(0).addImm(1 << ShAmt) 1250200581Srdivacky .addReg(Src, getKillRegState(isKill)) 1251200581Srdivacky .addImm(0); 1252193323Sed break; 1253193323Sed } 1254193323Sed default: { 1255193323Sed // The following opcodes also sets the condition code register(s). Only 1256193323Sed // convert them to equivalent lea if the condition code register def's 1257193323Sed // are dead! 1258193323Sed if (hasLiveCondCodeDef(MI)) 1259193323Sed return 0; 1260193323Sed 1261193323Sed switch (MIOpc) { 1262193323Sed default: return 0; 1263193323Sed case X86::INC64r: 1264193323Sed case X86::INC32r: 1265193323Sed case X86::INC64_32r: { 1266193323Sed assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1267193323Sed unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r 1268193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1269193323Sed NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1270193323Sed .addReg(Dest, RegState::Define | 1271193323Sed getDeadRegState(isDead)), 1272193323Sed Src, isKill, 1); 1273193323Sed break; 1274193323Sed } 1275193323Sed case X86::INC16r: 1276193323Sed case X86::INC64_16r: 1277200581Srdivacky if (DisableLEA16) 1278200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1279193323Sed assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1280193323Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1281193323Sed .addReg(Dest, RegState::Define | 1282193323Sed getDeadRegState(isDead)), 1283193323Sed Src, isKill, 1); 1284193323Sed break; 1285193323Sed case X86::DEC64r: 1286193323Sed case X86::DEC32r: 1287193323Sed case X86::DEC64_32r: { 1288193323Sed assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1289193323Sed unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r 1290193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1291193323Sed NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1292193323Sed .addReg(Dest, RegState::Define | 1293193323Sed getDeadRegState(isDead)), 1294193323Sed Src, isKill, -1); 1295193323Sed break; 1296193323Sed } 1297193323Sed case X86::DEC16r: 1298193323Sed case X86::DEC64_16r: 1299200581Srdivacky if (DisableLEA16) 1300200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1301193323Sed assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1302193323Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1303193323Sed .addReg(Dest, RegState::Define | 1304193323Sed getDeadRegState(isDead)), 1305193323Sed Src, isKill, -1); 1306193323Sed break; 1307193323Sed case X86::ADD64rr: 1308193323Sed case X86::ADD32rr: { 1309193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1310193323Sed unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r 1311193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1312193323Sed unsigned Src2 = MI->getOperand(2).getReg(); 1313193323Sed bool isKill2 = MI->getOperand(2).isKill(); 1314193323Sed NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1315193323Sed .addReg(Dest, RegState::Define | 1316193323Sed getDeadRegState(isDead)), 1317193323Sed Src, isKill, Src2, isKill2); 1318193323Sed if (LV && isKill2) 1319193323Sed LV->replaceKillInstruction(Src2, MI, NewMI); 1320193323Sed break; 1321193323Sed } 1322193323Sed case X86::ADD16rr: { 1323200581Srdivacky if (DisableLEA16) 1324200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1325193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1326193323Sed unsigned Src2 = MI->getOperand(2).getReg(); 1327193323Sed bool isKill2 = MI->getOperand(2).isKill(); 1328193323Sed NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1329193323Sed .addReg(Dest, RegState::Define | 1330193323Sed getDeadRegState(isDead)), 1331193323Sed Src, isKill, Src2, isKill2); 1332193323Sed if (LV && isKill2) 1333193323Sed LV->replaceKillInstruction(Src2, MI, NewMI); 1334193323Sed break; 1335193323Sed } 1336193323Sed case X86::ADD64ri32: 1337193323Sed case X86::ADD64ri8: 1338193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1339200581Srdivacky NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1340200581Srdivacky .addReg(Dest, RegState::Define | 1341200581Srdivacky getDeadRegState(isDead)), 1342200581Srdivacky Src, isKill, MI->getOperand(2).getImm()); 1343193323Sed break; 1344193323Sed case X86::ADD32ri: 1345200581Srdivacky case X86::ADD32ri8: { 1346193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1347200581Srdivacky unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1348200581Srdivacky NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1349200581Srdivacky .addReg(Dest, RegState::Define | 1350200581Srdivacky getDeadRegState(isDead)), 1351193323Sed Src, isKill, MI->getOperand(2).getImm()); 1352193323Sed break; 1353200581Srdivacky } 1354193323Sed case X86::ADD16ri: 1355193323Sed case X86::ADD16ri8: 1356200581Srdivacky if (DisableLEA16) 1357200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1358193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1359200581Srdivacky NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1360200581Srdivacky .addReg(Dest, RegState::Define | 1361200581Srdivacky getDeadRegState(isDead)), 1362200581Srdivacky Src, isKill, MI->getOperand(2).getImm()); 1363193323Sed break; 1364193323Sed } 1365193323Sed } 1366193323Sed } 1367193323Sed 1368193323Sed if (!NewMI) return 0; 1369193323Sed 1370193323Sed if (LV) { // Update live variables 1371193323Sed if (isKill) 1372193323Sed LV->replaceKillInstruction(Src, MI, NewMI); 1373193323Sed if (isDead) 1374193323Sed LV->replaceKillInstruction(Dest, MI, NewMI); 1375193323Sed } 1376193323Sed 1377193323Sed MFI->insert(MBBI, NewMI); // Insert the new inst 1378193323Sed return NewMI; 1379193323Sed} 1380193323Sed 1381193323Sed/// commuteInstruction - We have a few instructions that must be hacked on to 1382193323Sed/// commute them. 1383193323Sed/// 1384193323SedMachineInstr * 1385193323SedX86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { 1386193323Sed switch (MI->getOpcode()) { 1387193323Sed case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) 1388193323Sed case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) 1389193323Sed case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) 1390193323Sed case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) 1391193323Sed case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) 1392193323Sed case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) 1393193323Sed unsigned Opc; 1394193323Sed unsigned Size; 1395193323Sed switch (MI->getOpcode()) { 1396198090Srdivacky default: llvm_unreachable("Unreachable!"); 1397193323Sed case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; 1398193323Sed case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; 1399193323Sed case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; 1400193323Sed case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break; 1401193323Sed case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break; 1402193323Sed case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; 1403193323Sed } 1404193323Sed unsigned Amt = MI->getOperand(3).getImm(); 1405193323Sed if (NewMI) { 1406193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1407193323Sed MI = MF.CloneMachineInstr(MI); 1408193323Sed NewMI = false; 1409193323Sed } 1410193323Sed MI->setDesc(get(Opc)); 1411193323Sed MI->getOperand(3).setImm(Size-Amt); 1412193323Sed return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1413193323Sed } 1414193323Sed case X86::CMOVB16rr: 1415193323Sed case X86::CMOVB32rr: 1416193323Sed case X86::CMOVB64rr: 1417193323Sed case X86::CMOVAE16rr: 1418193323Sed case X86::CMOVAE32rr: 1419193323Sed case X86::CMOVAE64rr: 1420193323Sed case X86::CMOVE16rr: 1421193323Sed case X86::CMOVE32rr: 1422193323Sed case X86::CMOVE64rr: 1423193323Sed case X86::CMOVNE16rr: 1424193323Sed case X86::CMOVNE32rr: 1425193323Sed case X86::CMOVNE64rr: 1426193323Sed case X86::CMOVBE16rr: 1427193323Sed case X86::CMOVBE32rr: 1428193323Sed case X86::CMOVBE64rr: 1429193323Sed case X86::CMOVA16rr: 1430193323Sed case X86::CMOVA32rr: 1431193323Sed case X86::CMOVA64rr: 1432193323Sed case X86::CMOVL16rr: 1433193323Sed case X86::CMOVL32rr: 1434193323Sed case X86::CMOVL64rr: 1435193323Sed case X86::CMOVGE16rr: 1436193323Sed case X86::CMOVGE32rr: 1437193323Sed case X86::CMOVGE64rr: 1438193323Sed case X86::CMOVLE16rr: 1439193323Sed case X86::CMOVLE32rr: 1440193323Sed case X86::CMOVLE64rr: 1441193323Sed case X86::CMOVG16rr: 1442193323Sed case X86::CMOVG32rr: 1443193323Sed case X86::CMOVG64rr: 1444193323Sed case X86::CMOVS16rr: 1445193323Sed case X86::CMOVS32rr: 1446193323Sed case X86::CMOVS64rr: 1447193323Sed case X86::CMOVNS16rr: 1448193323Sed case X86::CMOVNS32rr: 1449193323Sed case X86::CMOVNS64rr: 1450193323Sed case X86::CMOVP16rr: 1451193323Sed case X86::CMOVP32rr: 1452193323Sed case X86::CMOVP64rr: 1453193323Sed case X86::CMOVNP16rr: 1454193323Sed case X86::CMOVNP32rr: 1455193323Sed case X86::CMOVNP64rr: 1456193323Sed case X86::CMOVO16rr: 1457193323Sed case X86::CMOVO32rr: 1458193323Sed case X86::CMOVO64rr: 1459193323Sed case X86::CMOVNO16rr: 1460193323Sed case X86::CMOVNO32rr: 1461193323Sed case X86::CMOVNO64rr: { 1462193323Sed unsigned Opc = 0; 1463193323Sed switch (MI->getOpcode()) { 1464193323Sed default: break; 1465193323Sed case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; 1466193323Sed case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; 1467193323Sed case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; 1468193323Sed case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break; 1469193323Sed case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break; 1470193323Sed case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break; 1471193323Sed case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break; 1472193323Sed case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break; 1473193323Sed case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break; 1474193323Sed case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; 1475193323Sed case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; 1476193323Sed case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; 1477193323Sed case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; 1478193323Sed case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; 1479193323Sed case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; 1480193323Sed case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; 1481193323Sed case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; 1482193323Sed case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; 1483193323Sed case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; 1484193323Sed case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; 1485193323Sed case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; 1486193323Sed case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break; 1487193323Sed case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break; 1488193323Sed case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break; 1489193323Sed case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break; 1490193323Sed case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break; 1491193323Sed case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break; 1492193323Sed case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break; 1493193323Sed case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break; 1494193323Sed case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; 1495193323Sed case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; 1496193323Sed case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; 1497193323Sed case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; 1498193323Sed case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; 1499193323Sed case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; 1500193323Sed case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; 1501193323Sed case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; 1502193323Sed case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; 1503193323Sed case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; 1504193323Sed case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; 1505193323Sed case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; 1506193323Sed case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; 1507193323Sed case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; 1508193323Sed case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; 1509193323Sed case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; 1510193323Sed case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; 1511193323Sed case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; 1512193323Sed case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; 1513193323Sed } 1514193323Sed if (NewMI) { 1515193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1516193323Sed MI = MF.CloneMachineInstr(MI); 1517193323Sed NewMI = false; 1518193323Sed } 1519193323Sed MI->setDesc(get(Opc)); 1520193323Sed // Fallthrough intended. 1521193323Sed } 1522193323Sed default: 1523193323Sed return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1524193323Sed } 1525193323Sed} 1526193323Sed 1527193323Sedstatic X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { 1528193323Sed switch (BrOpc) { 1529193323Sed default: return X86::COND_INVALID; 1530193323Sed case X86::JE: return X86::COND_E; 1531193323Sed case X86::JNE: return X86::COND_NE; 1532193323Sed case X86::JL: return X86::COND_L; 1533193323Sed case X86::JLE: return X86::COND_LE; 1534193323Sed case X86::JG: return X86::COND_G; 1535193323Sed case X86::JGE: return X86::COND_GE; 1536193323Sed case X86::JB: return X86::COND_B; 1537193323Sed case X86::JBE: return X86::COND_BE; 1538193323Sed case X86::JA: return X86::COND_A; 1539193323Sed case X86::JAE: return X86::COND_AE; 1540193323Sed case X86::JS: return X86::COND_S; 1541193323Sed case X86::JNS: return X86::COND_NS; 1542193323Sed case X86::JP: return X86::COND_P; 1543193323Sed case X86::JNP: return X86::COND_NP; 1544193323Sed case X86::JO: return X86::COND_O; 1545193323Sed case X86::JNO: return X86::COND_NO; 1546193323Sed } 1547193323Sed} 1548193323Sed 1549193323Sedunsigned X86::GetCondBranchFromCond(X86::CondCode CC) { 1550193323Sed switch (CC) { 1551198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 1552193323Sed case X86::COND_E: return X86::JE; 1553193323Sed case X86::COND_NE: return X86::JNE; 1554193323Sed case X86::COND_L: return X86::JL; 1555193323Sed case X86::COND_LE: return X86::JLE; 1556193323Sed case X86::COND_G: return X86::JG; 1557193323Sed case X86::COND_GE: return X86::JGE; 1558193323Sed case X86::COND_B: return X86::JB; 1559193323Sed case X86::COND_BE: return X86::JBE; 1560193323Sed case X86::COND_A: return X86::JA; 1561193323Sed case X86::COND_AE: return X86::JAE; 1562193323Sed case X86::COND_S: return X86::JS; 1563193323Sed case X86::COND_NS: return X86::JNS; 1564193323Sed case X86::COND_P: return X86::JP; 1565193323Sed case X86::COND_NP: return X86::JNP; 1566193323Sed case X86::COND_O: return X86::JO; 1567193323Sed case X86::COND_NO: return X86::JNO; 1568193323Sed } 1569193323Sed} 1570193323Sed 1571193323Sed/// GetOppositeBranchCondition - Return the inverse of the specified condition, 1572193323Sed/// e.g. turning COND_E to COND_NE. 1573193323SedX86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { 1574193323Sed switch (CC) { 1575198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 1576193323Sed case X86::COND_E: return X86::COND_NE; 1577193323Sed case X86::COND_NE: return X86::COND_E; 1578193323Sed case X86::COND_L: return X86::COND_GE; 1579193323Sed case X86::COND_LE: return X86::COND_G; 1580193323Sed case X86::COND_G: return X86::COND_LE; 1581193323Sed case X86::COND_GE: return X86::COND_L; 1582193323Sed case X86::COND_B: return X86::COND_AE; 1583193323Sed case X86::COND_BE: return X86::COND_A; 1584193323Sed case X86::COND_A: return X86::COND_BE; 1585193323Sed case X86::COND_AE: return X86::COND_B; 1586193323Sed case X86::COND_S: return X86::COND_NS; 1587193323Sed case X86::COND_NS: return X86::COND_S; 1588193323Sed case X86::COND_P: return X86::COND_NP; 1589193323Sed case X86::COND_NP: return X86::COND_P; 1590193323Sed case X86::COND_O: return X86::COND_NO; 1591193323Sed case X86::COND_NO: return X86::COND_O; 1592193323Sed } 1593193323Sed} 1594193323Sed 1595193323Sedbool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { 1596193323Sed const TargetInstrDesc &TID = MI->getDesc(); 1597193323Sed if (!TID.isTerminator()) return false; 1598193323Sed 1599193323Sed // Conditional branch is a special case. 1600193323Sed if (TID.isBranch() && !TID.isBarrier()) 1601193323Sed return true; 1602193323Sed if (!TID.isPredicable()) 1603193323Sed return true; 1604193323Sed return !isPredicated(MI); 1605193323Sed} 1606193323Sed 1607193323Sed// For purposes of branch analysis do not count FP_REG_KILL as a terminator. 1608193323Sedstatic bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI, 1609193323Sed const X86InstrInfo &TII) { 1610193323Sed if (MI->getOpcode() == X86::FP_REG_KILL) 1611193323Sed return false; 1612193323Sed return TII.isUnpredicatedTerminator(MI); 1613193323Sed} 1614193323Sed 1615193323Sedbool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 1616193323Sed MachineBasicBlock *&TBB, 1617193323Sed MachineBasicBlock *&FBB, 1618193323Sed SmallVectorImpl<MachineOperand> &Cond, 1619193323Sed bool AllowModify) const { 1620193323Sed // Start from the bottom of the block and work up, examining the 1621193323Sed // terminator instructions. 1622193323Sed MachineBasicBlock::iterator I = MBB.end(); 1623193323Sed while (I != MBB.begin()) { 1624193323Sed --I; 1625200581Srdivacky 1626200581Srdivacky // Working from the bottom, when we see a non-terminator instruction, we're 1627200581Srdivacky // done. 1628193323Sed if (!isBrAnalysisUnpredicatedTerminator(I, *this)) 1629193323Sed break; 1630200581Srdivacky 1631200581Srdivacky // A terminator that isn't a branch can't easily be handled by this 1632200581Srdivacky // analysis. 1633193323Sed if (!I->getDesc().isBranch()) 1634193323Sed return true; 1635200581Srdivacky 1636193323Sed // Handle unconditional branches. 1637193323Sed if (I->getOpcode() == X86::JMP) { 1638193323Sed if (!AllowModify) { 1639193323Sed TBB = I->getOperand(0).getMBB(); 1640193323Sed continue; 1641193323Sed } 1642193323Sed 1643193323Sed // If the block has any instructions after a JMP, delete them. 1644200581Srdivacky while (llvm::next(I) != MBB.end()) 1645200581Srdivacky llvm::next(I)->eraseFromParent(); 1646200581Srdivacky 1647193323Sed Cond.clear(); 1648193323Sed FBB = 0; 1649200581Srdivacky 1650193323Sed // Delete the JMP if it's equivalent to a fall-through. 1651193323Sed if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { 1652193323Sed TBB = 0; 1653193323Sed I->eraseFromParent(); 1654193323Sed I = MBB.end(); 1655193323Sed continue; 1656193323Sed } 1657200581Srdivacky 1658193323Sed // TBB is used to indicate the unconditinal destination. 1659193323Sed TBB = I->getOperand(0).getMBB(); 1660193323Sed continue; 1661193323Sed } 1662200581Srdivacky 1663193323Sed // Handle conditional branches. 1664193323Sed X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); 1665193323Sed if (BranchCode == X86::COND_INVALID) 1666193323Sed return true; // Can't handle indirect branch. 1667200581Srdivacky 1668193323Sed // Working from the bottom, handle the first conditional branch. 1669193323Sed if (Cond.empty()) { 1670193323Sed FBB = TBB; 1671193323Sed TBB = I->getOperand(0).getMBB(); 1672193323Sed Cond.push_back(MachineOperand::CreateImm(BranchCode)); 1673193323Sed continue; 1674193323Sed } 1675200581Srdivacky 1676200581Srdivacky // Handle subsequent conditional branches. Only handle the case where all 1677200581Srdivacky // conditional branches branch to the same destination and their condition 1678200581Srdivacky // opcodes fit one of the special multi-branch idioms. 1679193323Sed assert(Cond.size() == 1); 1680193323Sed assert(TBB); 1681200581Srdivacky 1682200581Srdivacky // Only handle the case where all conditional branches branch to the same 1683200581Srdivacky // destination. 1684193323Sed if (TBB != I->getOperand(0).getMBB()) 1685193323Sed return true; 1686200581Srdivacky 1687200581Srdivacky // If the conditions are the same, we can leave them alone. 1688193323Sed X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); 1689193323Sed if (OldBranchCode == BranchCode) 1690193323Sed continue; 1691200581Srdivacky 1692200581Srdivacky // If they differ, see if they fit one of the known patterns. Theoretically, 1693200581Srdivacky // we could handle more patterns here, but we shouldn't expect to see them 1694200581Srdivacky // if instruction selection has done a reasonable job. 1695193323Sed if ((OldBranchCode == X86::COND_NP && 1696193323Sed BranchCode == X86::COND_E) || 1697193323Sed (OldBranchCode == X86::COND_E && 1698193323Sed BranchCode == X86::COND_NP)) 1699193323Sed BranchCode = X86::COND_NP_OR_E; 1700193323Sed else if ((OldBranchCode == X86::COND_P && 1701193323Sed BranchCode == X86::COND_NE) || 1702193323Sed (OldBranchCode == X86::COND_NE && 1703193323Sed BranchCode == X86::COND_P)) 1704193323Sed BranchCode = X86::COND_NE_OR_P; 1705193323Sed else 1706193323Sed return true; 1707200581Srdivacky 1708193323Sed // Update the MachineOperand. 1709193323Sed Cond[0].setImm(BranchCode); 1710193323Sed } 1711193323Sed 1712193323Sed return false; 1713193323Sed} 1714193323Sed 1715193323Sedunsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 1716193323Sed MachineBasicBlock::iterator I = MBB.end(); 1717193323Sed unsigned Count = 0; 1718193323Sed 1719193323Sed while (I != MBB.begin()) { 1720193323Sed --I; 1721193323Sed if (I->getOpcode() != X86::JMP && 1722193323Sed GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) 1723193323Sed break; 1724193323Sed // Remove the branch. 1725193323Sed I->eraseFromParent(); 1726193323Sed I = MBB.end(); 1727193323Sed ++Count; 1728193323Sed } 1729193323Sed 1730193323Sed return Count; 1731193323Sed} 1732193323Sed 1733193323Sedunsigned 1734193323SedX86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 1735193323Sed MachineBasicBlock *FBB, 1736193323Sed const SmallVectorImpl<MachineOperand> &Cond) const { 1737193323Sed // FIXME this should probably have a DebugLoc operand 1738193323Sed DebugLoc dl = DebugLoc::getUnknownLoc(); 1739193323Sed // Shouldn't be a fall through. 1740193323Sed assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 1741193323Sed assert((Cond.size() == 1 || Cond.size() == 0) && 1742193323Sed "X86 branch conditions have one component!"); 1743193323Sed 1744193323Sed if (Cond.empty()) { 1745193323Sed // Unconditional branch? 1746193323Sed assert(!FBB && "Unconditional branch with multiple successors!"); 1747193323Sed BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB); 1748193323Sed return 1; 1749193323Sed } 1750193323Sed 1751193323Sed // Conditional branch. 1752193323Sed unsigned Count = 0; 1753193323Sed X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); 1754193323Sed switch (CC) { 1755193323Sed case X86::COND_NP_OR_E: 1756193323Sed // Synthesize NP_OR_E with two branches. 1757193323Sed BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB); 1758193323Sed ++Count; 1759193323Sed BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB); 1760193323Sed ++Count; 1761193323Sed break; 1762193323Sed case X86::COND_NE_OR_P: 1763193323Sed // Synthesize NE_OR_P with two branches. 1764193323Sed BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB); 1765193323Sed ++Count; 1766193323Sed BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB); 1767193323Sed ++Count; 1768193323Sed break; 1769193323Sed default: { 1770193323Sed unsigned Opc = GetCondBranchFromCond(CC); 1771193323Sed BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); 1772193323Sed ++Count; 1773193323Sed } 1774193323Sed } 1775193323Sed if (FBB) { 1776193323Sed // Two-way Conditional branch. Insert the second branch. 1777193323Sed BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB); 1778193323Sed ++Count; 1779193323Sed } 1780193323Sed return Count; 1781193323Sed} 1782193323Sed 1783193323Sed/// isHReg - Test if the given register is a physical h register. 1784193323Sedstatic bool isHReg(unsigned Reg) { 1785193323Sed return X86::GR8_ABCD_HRegClass.contains(Reg); 1786193323Sed} 1787193323Sed 1788193323Sedbool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, 1789193323Sed MachineBasicBlock::iterator MI, 1790193323Sed unsigned DestReg, unsigned SrcReg, 1791193323Sed const TargetRegisterClass *DestRC, 1792193323Sed const TargetRegisterClass *SrcRC) const { 1793193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 1794193323Sed if (MI != MBB.end()) DL = MI->getDebugLoc(); 1795193323Sed 1796193323Sed // Determine if DstRC and SrcRC have a common superclass in common. 1797193323Sed const TargetRegisterClass *CommonRC = DestRC; 1798193323Sed if (DestRC == SrcRC) 1799193323Sed /* Source and destination have the same register class. */; 1800193323Sed else if (CommonRC->hasSuperClass(SrcRC)) 1801193323Sed CommonRC = SrcRC; 1802198090Srdivacky else if (!DestRC->hasSubClass(SrcRC)) { 1803198090Srdivacky // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, 1804198090Srdivacky // but we want to copy then as GR64. Similarly, for GR32_NOREX and 1805198090Srdivacky // GR32_NOSP, copy as GR32. 1806198090Srdivacky if (SrcRC->hasSuperClass(&X86::GR64RegClass) && 1807198090Srdivacky DestRC->hasSuperClass(&X86::GR64RegClass)) 1808198090Srdivacky CommonRC = &X86::GR64RegClass; 1809198090Srdivacky else if (SrcRC->hasSuperClass(&X86::GR32RegClass) && 1810198090Srdivacky DestRC->hasSuperClass(&X86::GR32RegClass)) 1811198090Srdivacky CommonRC = &X86::GR32RegClass; 1812198090Srdivacky else 1813198090Srdivacky CommonRC = 0; 1814198090Srdivacky } 1815193323Sed 1816193323Sed if (CommonRC) { 1817193323Sed unsigned Opc; 1818198090Srdivacky if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) { 1819193323Sed Opc = X86::MOV64rr; 1820198090Srdivacky } else if (CommonRC == &X86::GR32RegClass || 1821198090Srdivacky CommonRC == &X86::GR32_NOSPRegClass) { 1822193323Sed Opc = X86::MOV32rr; 1823193323Sed } else if (CommonRC == &X86::GR16RegClass) { 1824193323Sed Opc = X86::MOV16rr; 1825193323Sed } else if (CommonRC == &X86::GR8RegClass) { 1826193323Sed // Copying to or from a physical H register on x86-64 requires a NOREX 1827193323Sed // move. Otherwise use a normal move. 1828193323Sed if ((isHReg(DestReg) || isHReg(SrcReg)) && 1829193323Sed TM.getSubtarget<X86Subtarget>().is64Bit()) 1830193323Sed Opc = X86::MOV8rr_NOREX; 1831193323Sed else 1832193323Sed Opc = X86::MOV8rr; 1833193323Sed } else if (CommonRC == &X86::GR64_ABCDRegClass) { 1834193323Sed Opc = X86::MOV64rr; 1835193323Sed } else if (CommonRC == &X86::GR32_ABCDRegClass) { 1836193323Sed Opc = X86::MOV32rr; 1837193323Sed } else if (CommonRC == &X86::GR16_ABCDRegClass) { 1838193323Sed Opc = X86::MOV16rr; 1839193323Sed } else if (CommonRC == &X86::GR8_ABCD_LRegClass) { 1840193323Sed Opc = X86::MOV8rr; 1841193323Sed } else if (CommonRC == &X86::GR8_ABCD_HRegClass) { 1842193323Sed if (TM.getSubtarget<X86Subtarget>().is64Bit()) 1843193323Sed Opc = X86::MOV8rr_NOREX; 1844193323Sed else 1845193323Sed Opc = X86::MOV8rr; 1846198090Srdivacky } else if (CommonRC == &X86::GR64_NOREXRegClass || 1847198090Srdivacky CommonRC == &X86::GR64_NOREX_NOSPRegClass) { 1848193323Sed Opc = X86::MOV64rr; 1849193323Sed } else if (CommonRC == &X86::GR32_NOREXRegClass) { 1850193323Sed Opc = X86::MOV32rr; 1851193323Sed } else if (CommonRC == &X86::GR16_NOREXRegClass) { 1852193323Sed Opc = X86::MOV16rr; 1853193323Sed } else if (CommonRC == &X86::GR8_NOREXRegClass) { 1854193323Sed Opc = X86::MOV8rr; 1855193323Sed } else if (CommonRC == &X86::RFP32RegClass) { 1856193323Sed Opc = X86::MOV_Fp3232; 1857193323Sed } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) { 1858193323Sed Opc = X86::MOV_Fp6464; 1859193323Sed } else if (CommonRC == &X86::RFP80RegClass) { 1860193323Sed Opc = X86::MOV_Fp8080; 1861193323Sed } else if (CommonRC == &X86::FR32RegClass) { 1862193323Sed Opc = X86::FsMOVAPSrr; 1863193323Sed } else if (CommonRC == &X86::FR64RegClass) { 1864193323Sed Opc = X86::FsMOVAPDrr; 1865193323Sed } else if (CommonRC == &X86::VR128RegClass) { 1866193323Sed Opc = X86::MOVAPSrr; 1867193323Sed } else if (CommonRC == &X86::VR64RegClass) { 1868193323Sed Opc = X86::MMX_MOVQ64rr; 1869193323Sed } else { 1870193323Sed return false; 1871193323Sed } 1872193323Sed BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); 1873193323Sed return true; 1874193323Sed } 1875198090Srdivacky 1876193323Sed // Moving EFLAGS to / from another register requires a push and a pop. 1877193323Sed if (SrcRC == &X86::CCRRegClass) { 1878193323Sed if (SrcReg != X86::EFLAGS) 1879193323Sed return false; 1880198090Srdivacky if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { 1881201360Srdivacky BuildMI(MBB, MI, DL, get(X86::PUSHFQ64)); 1882193323Sed BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); 1883193323Sed return true; 1884198090Srdivacky } else if (DestRC == &X86::GR32RegClass || 1885198090Srdivacky DestRC == &X86::GR32_NOSPRegClass) { 1886193323Sed BuildMI(MBB, MI, DL, get(X86::PUSHFD)); 1887193323Sed BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); 1888193323Sed return true; 1889193323Sed } 1890193323Sed } else if (DestRC == &X86::CCRRegClass) { 1891193323Sed if (DestReg != X86::EFLAGS) 1892193323Sed return false; 1893198090Srdivacky if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { 1894193323Sed BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); 1895193323Sed BuildMI(MBB, MI, DL, get(X86::POPFQ)); 1896193323Sed return true; 1897198090Srdivacky } else if (SrcRC == &X86::GR32RegClass || 1898198090Srdivacky DestRC == &X86::GR32_NOSPRegClass) { 1899193323Sed BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); 1900193323Sed BuildMI(MBB, MI, DL, get(X86::POPFD)); 1901193323Sed return true; 1902193323Sed } 1903193323Sed } 1904193323Sed 1905193323Sed // Moving from ST(0) turns into FpGET_ST0_32 etc. 1906193323Sed if (SrcRC == &X86::RSTRegClass) { 1907193323Sed // Copying from ST(0)/ST(1). 1908193323Sed if (SrcReg != X86::ST0 && SrcReg != X86::ST1) 1909193323Sed // Can only copy from ST(0)/ST(1) right now 1910193323Sed return false; 1911193323Sed bool isST0 = SrcReg == X86::ST0; 1912193323Sed unsigned Opc; 1913193323Sed if (DestRC == &X86::RFP32RegClass) 1914193323Sed Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32; 1915193323Sed else if (DestRC == &X86::RFP64RegClass) 1916193323Sed Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64; 1917193323Sed else { 1918193323Sed if (DestRC != &X86::RFP80RegClass) 1919193323Sed return false; 1920193323Sed Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80; 1921193323Sed } 1922193323Sed BuildMI(MBB, MI, DL, get(Opc), DestReg); 1923193323Sed return true; 1924193323Sed } 1925193323Sed 1926193323Sed // Moving to ST(0) turns into FpSET_ST0_32 etc. 1927193323Sed if (DestRC == &X86::RSTRegClass) { 1928193323Sed // Copying to ST(0) / ST(1). 1929193323Sed if (DestReg != X86::ST0 && DestReg != X86::ST1) 1930193323Sed // Can only copy to TOS right now 1931193323Sed return false; 1932193323Sed bool isST0 = DestReg == X86::ST0; 1933193323Sed unsigned Opc; 1934193323Sed if (SrcRC == &X86::RFP32RegClass) 1935193323Sed Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32; 1936193323Sed else if (SrcRC == &X86::RFP64RegClass) 1937193323Sed Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64; 1938193323Sed else { 1939193323Sed if (SrcRC != &X86::RFP80RegClass) 1940193323Sed return false; 1941193323Sed Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80; 1942193323Sed } 1943193323Sed BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg); 1944193323Sed return true; 1945193323Sed } 1946193323Sed 1947193323Sed // Not yet supported! 1948193323Sed return false; 1949193323Sed} 1950193323Sed 1951193323Sedstatic unsigned getStoreRegOpcode(unsigned SrcReg, 1952193323Sed const TargetRegisterClass *RC, 1953193323Sed bool isStackAligned, 1954193323Sed TargetMachine &TM) { 1955193323Sed unsigned Opc = 0; 1956198090Srdivacky if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { 1957193323Sed Opc = X86::MOV64mr; 1958198090Srdivacky } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { 1959193323Sed Opc = X86::MOV32mr; 1960193323Sed } else if (RC == &X86::GR16RegClass) { 1961193323Sed Opc = X86::MOV16mr; 1962193323Sed } else if (RC == &X86::GR8RegClass) { 1963193323Sed // Copying to or from a physical H register on x86-64 requires a NOREX 1964193323Sed // move. Otherwise use a normal move. 1965193323Sed if (isHReg(SrcReg) && 1966193323Sed TM.getSubtarget<X86Subtarget>().is64Bit()) 1967193323Sed Opc = X86::MOV8mr_NOREX; 1968193323Sed else 1969193323Sed Opc = X86::MOV8mr; 1970193323Sed } else if (RC == &X86::GR64_ABCDRegClass) { 1971193323Sed Opc = X86::MOV64mr; 1972193323Sed } else if (RC == &X86::GR32_ABCDRegClass) { 1973193323Sed Opc = X86::MOV32mr; 1974193323Sed } else if (RC == &X86::GR16_ABCDRegClass) { 1975193323Sed Opc = X86::MOV16mr; 1976193323Sed } else if (RC == &X86::GR8_ABCD_LRegClass) { 1977193323Sed Opc = X86::MOV8mr; 1978193323Sed } else if (RC == &X86::GR8_ABCD_HRegClass) { 1979193323Sed if (TM.getSubtarget<X86Subtarget>().is64Bit()) 1980193323Sed Opc = X86::MOV8mr_NOREX; 1981193323Sed else 1982193323Sed Opc = X86::MOV8mr; 1983198090Srdivacky } else if (RC == &X86::GR64_NOREXRegClass || 1984198090Srdivacky RC == &X86::GR64_NOREX_NOSPRegClass) { 1985193323Sed Opc = X86::MOV64mr; 1986193323Sed } else if (RC == &X86::GR32_NOREXRegClass) { 1987193323Sed Opc = X86::MOV32mr; 1988193323Sed } else if (RC == &X86::GR16_NOREXRegClass) { 1989193323Sed Opc = X86::MOV16mr; 1990193323Sed } else if (RC == &X86::GR8_NOREXRegClass) { 1991193323Sed Opc = X86::MOV8mr; 1992193323Sed } else if (RC == &X86::RFP80RegClass) { 1993193323Sed Opc = X86::ST_FpP80m; // pops 1994193323Sed } else if (RC == &X86::RFP64RegClass) { 1995193323Sed Opc = X86::ST_Fp64m; 1996193323Sed } else if (RC == &X86::RFP32RegClass) { 1997193323Sed Opc = X86::ST_Fp32m; 1998193323Sed } else if (RC == &X86::FR32RegClass) { 1999193323Sed Opc = X86::MOVSSmr; 2000193323Sed } else if (RC == &X86::FR64RegClass) { 2001193323Sed Opc = X86::MOVSDmr; 2002193323Sed } else if (RC == &X86::VR128RegClass) { 2003193323Sed // If stack is realigned we can use aligned stores. 2004193323Sed Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr; 2005193323Sed } else if (RC == &X86::VR64RegClass) { 2006193323Sed Opc = X86::MMX_MOVQ64mr; 2007193323Sed } else { 2008198090Srdivacky llvm_unreachable("Unknown regclass"); 2009193323Sed } 2010193323Sed 2011193323Sed return Opc; 2012193323Sed} 2013193323Sed 2014193323Sedvoid X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 2015193323Sed MachineBasicBlock::iterator MI, 2016193323Sed unsigned SrcReg, bool isKill, int FrameIdx, 2017193323Sed const TargetRegisterClass *RC) const { 2018193323Sed const MachineFunction &MF = *MBB.getParent(); 2019193323Sed bool isAligned = (RI.getStackAlignment() >= 16) || 2020193323Sed RI.needsStackRealignment(MF); 2021193323Sed unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 2022193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 2023193323Sed if (MI != MBB.end()) DL = MI->getDebugLoc(); 2024193323Sed addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) 2025193323Sed .addReg(SrcReg, getKillRegState(isKill)); 2026193323Sed} 2027193323Sed 2028193323Sedvoid X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, 2029193323Sed bool isKill, 2030193323Sed SmallVectorImpl<MachineOperand> &Addr, 2031193323Sed const TargetRegisterClass *RC, 2032198090Srdivacky MachineInstr::mmo_iterator MMOBegin, 2033198090Srdivacky MachineInstr::mmo_iterator MMOEnd, 2034193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2035199481Srdivacky bool isAligned = (*MMOBegin)->getAlignment() >= 16; 2036193323Sed unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 2037193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 2038193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); 2039193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2040193323Sed MIB.addOperand(Addr[i]); 2041193323Sed MIB.addReg(SrcReg, getKillRegState(isKill)); 2042198090Srdivacky (*MIB).setMemRefs(MMOBegin, MMOEnd); 2043193323Sed NewMIs.push_back(MIB); 2044193323Sed} 2045193323Sed 2046193323Sedstatic unsigned getLoadRegOpcode(unsigned DestReg, 2047193323Sed const TargetRegisterClass *RC, 2048193323Sed bool isStackAligned, 2049193323Sed const TargetMachine &TM) { 2050193323Sed unsigned Opc = 0; 2051198090Srdivacky if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { 2052193323Sed Opc = X86::MOV64rm; 2053198090Srdivacky } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { 2054193323Sed Opc = X86::MOV32rm; 2055193323Sed } else if (RC == &X86::GR16RegClass) { 2056193323Sed Opc = X86::MOV16rm; 2057193323Sed } else if (RC == &X86::GR8RegClass) { 2058193323Sed // Copying to or from a physical H register on x86-64 requires a NOREX 2059193323Sed // move. Otherwise use a normal move. 2060193323Sed if (isHReg(DestReg) && 2061193323Sed TM.getSubtarget<X86Subtarget>().is64Bit()) 2062193323Sed Opc = X86::MOV8rm_NOREX; 2063193323Sed else 2064193323Sed Opc = X86::MOV8rm; 2065193323Sed } else if (RC == &X86::GR64_ABCDRegClass) { 2066193323Sed Opc = X86::MOV64rm; 2067193323Sed } else if (RC == &X86::GR32_ABCDRegClass) { 2068193323Sed Opc = X86::MOV32rm; 2069193323Sed } else if (RC == &X86::GR16_ABCDRegClass) { 2070193323Sed Opc = X86::MOV16rm; 2071193323Sed } else if (RC == &X86::GR8_ABCD_LRegClass) { 2072193323Sed Opc = X86::MOV8rm; 2073193323Sed } else if (RC == &X86::GR8_ABCD_HRegClass) { 2074193323Sed if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2075193323Sed Opc = X86::MOV8rm_NOREX; 2076193323Sed else 2077193323Sed Opc = X86::MOV8rm; 2078198090Srdivacky } else if (RC == &X86::GR64_NOREXRegClass || 2079198090Srdivacky RC == &X86::GR64_NOREX_NOSPRegClass) { 2080193323Sed Opc = X86::MOV64rm; 2081193323Sed } else if (RC == &X86::GR32_NOREXRegClass) { 2082193323Sed Opc = X86::MOV32rm; 2083193323Sed } else if (RC == &X86::GR16_NOREXRegClass) { 2084193323Sed Opc = X86::MOV16rm; 2085193323Sed } else if (RC == &X86::GR8_NOREXRegClass) { 2086193323Sed Opc = X86::MOV8rm; 2087193323Sed } else if (RC == &X86::RFP80RegClass) { 2088193323Sed Opc = X86::LD_Fp80m; 2089193323Sed } else if (RC == &X86::RFP64RegClass) { 2090193323Sed Opc = X86::LD_Fp64m; 2091193323Sed } else if (RC == &X86::RFP32RegClass) { 2092193323Sed Opc = X86::LD_Fp32m; 2093193323Sed } else if (RC == &X86::FR32RegClass) { 2094193323Sed Opc = X86::MOVSSrm; 2095193323Sed } else if (RC == &X86::FR64RegClass) { 2096193323Sed Opc = X86::MOVSDrm; 2097193323Sed } else if (RC == &X86::VR128RegClass) { 2098193323Sed // If stack is realigned we can use aligned loads. 2099193323Sed Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm; 2100193323Sed } else if (RC == &X86::VR64RegClass) { 2101193323Sed Opc = X86::MMX_MOVQ64rm; 2102193323Sed } else { 2103198090Srdivacky llvm_unreachable("Unknown regclass"); 2104193323Sed } 2105193323Sed 2106193323Sed return Opc; 2107193323Sed} 2108193323Sed 2109193323Sedvoid X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 2110193323Sed MachineBasicBlock::iterator MI, 2111193323Sed unsigned DestReg, int FrameIdx, 2112193323Sed const TargetRegisterClass *RC) const{ 2113193323Sed const MachineFunction &MF = *MBB.getParent(); 2114193323Sed bool isAligned = (RI.getStackAlignment() >= 16) || 2115193323Sed RI.needsStackRealignment(MF); 2116193323Sed unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2117193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 2118193323Sed if (MI != MBB.end()) DL = MI->getDebugLoc(); 2119193323Sed addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); 2120193323Sed} 2121193323Sed 2122193323Sedvoid X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, 2123193323Sed SmallVectorImpl<MachineOperand> &Addr, 2124193323Sed const TargetRegisterClass *RC, 2125198090Srdivacky MachineInstr::mmo_iterator MMOBegin, 2126198090Srdivacky MachineInstr::mmo_iterator MMOEnd, 2127193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2128199481Srdivacky bool isAligned = (*MMOBegin)->getAlignment() >= 16; 2129193323Sed unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2130193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 2131193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); 2132193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2133193323Sed MIB.addOperand(Addr[i]); 2134198090Srdivacky (*MIB).setMemRefs(MMOBegin, MMOEnd); 2135193323Sed NewMIs.push_back(MIB); 2136193323Sed} 2137193323Sed 2138193323Sedbool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 2139193323Sed MachineBasicBlock::iterator MI, 2140193323Sed const std::vector<CalleeSavedInfo> &CSI) const { 2141193323Sed if (CSI.empty()) 2142193323Sed return false; 2143193323Sed 2144193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 2145193323Sed if (MI != MBB.end()) DL = MI->getDebugLoc(); 2146193323Sed 2147193323Sed bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2148198090Srdivacky bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); 2149193323Sed unsigned SlotSize = is64Bit ? 8 : 4; 2150193323Sed 2151193323Sed MachineFunction &MF = *MBB.getParent(); 2152198090Srdivacky unsigned FPReg = RI.getFrameRegister(MF); 2153193323Sed X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2154193574Sed unsigned CalleeFrameSize = 0; 2155193323Sed 2156193323Sed unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; 2157193323Sed for (unsigned i = CSI.size(); i != 0; --i) { 2158193323Sed unsigned Reg = CSI[i-1].getReg(); 2159193574Sed const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); 2160193323Sed // Add the callee-saved register as live-in. It's killed at the spill. 2161193323Sed MBB.addLiveIn(Reg); 2162198090Srdivacky if (Reg == FPReg) 2163198090Srdivacky // X86RegisterInfo::emitPrologue will handle spilling of frame register. 2164198090Srdivacky continue; 2165198090Srdivacky if (RegClass != &X86::VR128RegClass && !isWin64) { 2166193574Sed CalleeFrameSize += SlotSize; 2167198090Srdivacky BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); 2168193574Sed } else { 2169193574Sed storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); 2170193574Sed } 2171193323Sed } 2172193574Sed 2173193574Sed X86FI->setCalleeSavedFrameSize(CalleeFrameSize); 2174193323Sed return true; 2175193323Sed} 2176193323Sed 2177193323Sedbool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2178193323Sed MachineBasicBlock::iterator MI, 2179193323Sed const std::vector<CalleeSavedInfo> &CSI) const { 2180193323Sed if (CSI.empty()) 2181193323Sed return false; 2182193323Sed 2183193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 2184193323Sed if (MI != MBB.end()) DL = MI->getDebugLoc(); 2185193323Sed 2186198090Srdivacky MachineFunction &MF = *MBB.getParent(); 2187198090Srdivacky unsigned FPReg = RI.getFrameRegister(MF); 2188193323Sed bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2189198090Srdivacky bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); 2190193323Sed unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; 2191193323Sed for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2192193323Sed unsigned Reg = CSI[i].getReg(); 2193198090Srdivacky if (Reg == FPReg) 2194198090Srdivacky // X86RegisterInfo::emitEpilogue will handle restoring of frame register. 2195198090Srdivacky continue; 2196193574Sed const TargetRegisterClass *RegClass = CSI[i].getRegClass(); 2197198090Srdivacky if (RegClass != &X86::VR128RegClass && !isWin64) { 2198193574Sed BuildMI(MBB, MI, DL, get(Opc), Reg); 2199193574Sed } else { 2200193574Sed loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); 2201193574Sed } 2202193323Sed } 2203193323Sed return true; 2204193323Sed} 2205193323Sed 2206193323Sedstatic MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, 2207193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2208193323Sed MachineInstr *MI, 2209193323Sed const TargetInstrInfo &TII) { 2210193323Sed // Create the base instruction with the memory operand as the first part. 2211193323Sed MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2212193323Sed MI->getDebugLoc(), true); 2213193323Sed MachineInstrBuilder MIB(NewMI); 2214193323Sed unsigned NumAddrOps = MOs.size(); 2215193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2216193323Sed MIB.addOperand(MOs[i]); 2217193323Sed if (NumAddrOps < 4) // FrameIndex only 2218193323Sed addOffset(MIB, 0); 2219193323Sed 2220193323Sed // Loop over the rest of the ri operands, converting them over. 2221193323Sed unsigned NumOps = MI->getDesc().getNumOperands()-2; 2222193323Sed for (unsigned i = 0; i != NumOps; ++i) { 2223193323Sed MachineOperand &MO = MI->getOperand(i+2); 2224193323Sed MIB.addOperand(MO); 2225193323Sed } 2226193323Sed for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { 2227193323Sed MachineOperand &MO = MI->getOperand(i); 2228193323Sed MIB.addOperand(MO); 2229193323Sed } 2230193323Sed return MIB; 2231193323Sed} 2232193323Sed 2233193323Sedstatic MachineInstr *FuseInst(MachineFunction &MF, 2234193323Sed unsigned Opcode, unsigned OpNo, 2235193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2236193323Sed MachineInstr *MI, const TargetInstrInfo &TII) { 2237193323Sed MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2238193323Sed MI->getDebugLoc(), true); 2239193323Sed MachineInstrBuilder MIB(NewMI); 2240193323Sed 2241193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2242193323Sed MachineOperand &MO = MI->getOperand(i); 2243193323Sed if (i == OpNo) { 2244193323Sed assert(MO.isReg() && "Expected to fold into reg operand!"); 2245193323Sed unsigned NumAddrOps = MOs.size(); 2246193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2247193323Sed MIB.addOperand(MOs[i]); 2248193323Sed if (NumAddrOps < 4) // FrameIndex only 2249193323Sed addOffset(MIB, 0); 2250193323Sed } else { 2251193323Sed MIB.addOperand(MO); 2252193323Sed } 2253193323Sed } 2254193323Sed return MIB; 2255193323Sed} 2256193323Sed 2257193323Sedstatic MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, 2258193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2259193323Sed MachineInstr *MI) { 2260193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 2261193323Sed MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); 2262193323Sed 2263193323Sed unsigned NumAddrOps = MOs.size(); 2264193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2265193323Sed MIB.addOperand(MOs[i]); 2266193323Sed if (NumAddrOps < 4) // FrameIndex only 2267193323Sed addOffset(MIB, 0); 2268193323Sed return MIB.addImm(0); 2269193323Sed} 2270193323Sed 2271193323SedMachineInstr* 2272193323SedX86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2273193323Sed MachineInstr *MI, unsigned i, 2274198090Srdivacky const SmallVectorImpl<MachineOperand> &MOs, 2275198090Srdivacky unsigned Size, unsigned Align) const { 2276198090Srdivacky const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; 2277193323Sed bool isTwoAddrFold = false; 2278193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 2279193323Sed bool isTwoAddr = NumOps > 1 && 2280193323Sed MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2281193323Sed 2282193323Sed MachineInstr *NewMI = NULL; 2283193323Sed // Folding a memory location into the two-address part of a two-address 2284193323Sed // instruction is different than folding it other places. It requires 2285193323Sed // replacing the *two* registers with the memory location. 2286193323Sed if (isTwoAddr && NumOps >= 2 && i < 2 && 2287193323Sed MI->getOperand(0).isReg() && 2288193323Sed MI->getOperand(1).isReg() && 2289193323Sed MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { 2290193323Sed OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2291193323Sed isTwoAddrFold = true; 2292193323Sed } else if (i == 0) { // If operand 0 2293201360Srdivacky if (MI->getOpcode() == X86::MOV32r0) 2294193323Sed NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); 2295193323Sed else if (MI->getOpcode() == X86::MOV8r0) 2296193323Sed NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); 2297193323Sed if (NewMI) 2298193323Sed return NewMI; 2299193323Sed 2300193323Sed OpcodeTablePtr = &RegOp2MemOpTable0; 2301193323Sed } else if (i == 1) { 2302193323Sed OpcodeTablePtr = &RegOp2MemOpTable1; 2303193323Sed } else if (i == 2) { 2304193323Sed OpcodeTablePtr = &RegOp2MemOpTable2; 2305193323Sed } 2306193323Sed 2307193323Sed // If table selected... 2308193323Sed if (OpcodeTablePtr) { 2309193323Sed // Find the Opcode to fuse 2310199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2311193323Sed OpcodeTablePtr->find((unsigned*)MI->getOpcode()); 2312193323Sed if (I != OpcodeTablePtr->end()) { 2313198090Srdivacky unsigned Opcode = I->second.first; 2314198090Srdivacky unsigned MinAlign = I->second.second; 2315198090Srdivacky if (Align < MinAlign) 2316198090Srdivacky return NULL; 2317198090Srdivacky bool NarrowToMOV32rm = false; 2318198090Srdivacky if (Size) { 2319198090Srdivacky unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); 2320198090Srdivacky if (Size < RCSize) { 2321198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 2322198090Srdivacky // narrower than the load width, then it's not. 2323198090Srdivacky if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) 2324198090Srdivacky return NULL; 2325198090Srdivacky // If this is a 64-bit load, but the spill slot is 32, then we can do 2326198090Srdivacky // a 32-bit load which is implicitly zero-extended. This likely is due 2327198090Srdivacky // to liveintervalanalysis remat'ing a load from stack slot. 2328198090Srdivacky if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) 2329198090Srdivacky return NULL; 2330198090Srdivacky Opcode = X86::MOV32rm; 2331198090Srdivacky NarrowToMOV32rm = true; 2332198090Srdivacky } 2333198090Srdivacky } 2334198090Srdivacky 2335193323Sed if (isTwoAddrFold) 2336198090Srdivacky NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); 2337193323Sed else 2338198090Srdivacky NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this); 2339198090Srdivacky 2340198090Srdivacky if (NarrowToMOV32rm) { 2341198090Srdivacky // If this is the special case where we use a MOV32rm to load a 32-bit 2342198090Srdivacky // value and zero-extend the top bits. Change the destination register 2343198090Srdivacky // to a 32-bit one. 2344198090Srdivacky unsigned DstReg = NewMI->getOperand(0).getReg(); 2345198090Srdivacky if (TargetRegisterInfo::isPhysicalRegister(DstReg)) 2346198090Srdivacky NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, 2347198090Srdivacky 4/*x86_subreg_32bit*/)); 2348198090Srdivacky else 2349198090Srdivacky NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/); 2350198090Srdivacky } 2351193323Sed return NewMI; 2352193323Sed } 2353193323Sed } 2354193323Sed 2355193323Sed // No fusion 2356193323Sed if (PrintFailedFusing) 2357198090Srdivacky errs() << "We failed to fuse operand " << i << " in " << *MI; 2358193323Sed return NULL; 2359193323Sed} 2360193323Sed 2361193323Sed 2362193323SedMachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2363193323Sed MachineInstr *MI, 2364198090Srdivacky const SmallVectorImpl<unsigned> &Ops, 2365193323Sed int FrameIndex) const { 2366193323Sed // Check switch flag 2367193323Sed if (NoFusing) return NULL; 2368193323Sed 2369201360Srdivacky if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 2370201360Srdivacky switch (MI->getOpcode()) { 2371201360Srdivacky case X86::CVTSD2SSrr: 2372201360Srdivacky case X86::Int_CVTSD2SSrr: 2373201360Srdivacky case X86::CVTSS2SDrr: 2374201360Srdivacky case X86::Int_CVTSS2SDrr: 2375201360Srdivacky case X86::RCPSSr: 2376201360Srdivacky case X86::RCPSSr_Int: 2377201360Srdivacky case X86::ROUNDSDr_Int: 2378201360Srdivacky case X86::ROUNDSSr_Int: 2379201360Srdivacky case X86::RSQRTSSr: 2380201360Srdivacky case X86::RSQRTSSr_Int: 2381201360Srdivacky case X86::SQRTSSr: 2382201360Srdivacky case X86::SQRTSSr_Int: 2383201360Srdivacky return 0; 2384201360Srdivacky } 2385201360Srdivacky 2386193323Sed const MachineFrameInfo *MFI = MF.getFrameInfo(); 2387198090Srdivacky unsigned Size = MFI->getObjectSize(FrameIndex); 2388193323Sed unsigned Alignment = MFI->getObjectAlignment(FrameIndex); 2389193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2390193323Sed unsigned NewOpc = 0; 2391198090Srdivacky unsigned RCSize = 0; 2392193323Sed switch (MI->getOpcode()) { 2393193323Sed default: return NULL; 2394198090Srdivacky case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; 2395198090Srdivacky case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; 2396198090Srdivacky case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; 2397198090Srdivacky case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; 2398193323Sed } 2399198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 2400198090Srdivacky // narrower than the load width, then it's not. 2401198090Srdivacky if (Size < RCSize) 2402198090Srdivacky return NULL; 2403193323Sed // Change to CMPXXri r, 0 first. 2404193323Sed MI->setDesc(get(NewOpc)); 2405193323Sed MI->getOperand(1).ChangeToImmediate(0); 2406193323Sed } else if (Ops.size() != 1) 2407193323Sed return NULL; 2408193323Sed 2409193323Sed SmallVector<MachineOperand,4> MOs; 2410193323Sed MOs.push_back(MachineOperand::CreateFI(FrameIndex)); 2411198090Srdivacky return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment); 2412193323Sed} 2413193323Sed 2414193323SedMachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2415193323Sed MachineInstr *MI, 2416198090Srdivacky const SmallVectorImpl<unsigned> &Ops, 2417193323Sed MachineInstr *LoadMI) const { 2418193323Sed // Check switch flag 2419193323Sed if (NoFusing) return NULL; 2420193323Sed 2421201360Srdivacky if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 2422201360Srdivacky switch (MI->getOpcode()) { 2423201360Srdivacky case X86::CVTSD2SSrr: 2424201360Srdivacky case X86::Int_CVTSD2SSrr: 2425201360Srdivacky case X86::CVTSS2SDrr: 2426201360Srdivacky case X86::Int_CVTSS2SDrr: 2427201360Srdivacky case X86::RCPSSr: 2428201360Srdivacky case X86::RCPSSr_Int: 2429201360Srdivacky case X86::ROUNDSDr_Int: 2430201360Srdivacky case X86::ROUNDSSr_Int: 2431201360Srdivacky case X86::RSQRTSSr: 2432201360Srdivacky case X86::RSQRTSSr_Int: 2433201360Srdivacky case X86::SQRTSSr: 2434201360Srdivacky case X86::SQRTSSr_Int: 2435201360Srdivacky return 0; 2436201360Srdivacky } 2437201360Srdivacky 2438193323Sed // Determine the alignment of the load. 2439193323Sed unsigned Alignment = 0; 2440193323Sed if (LoadMI->hasOneMemOperand()) 2441198090Srdivacky Alignment = (*LoadMI->memoperands_begin())->getAlignment(); 2442198090Srdivacky else 2443198090Srdivacky switch (LoadMI->getOpcode()) { 2444198090Srdivacky case X86::V_SET0: 2445198090Srdivacky case X86::V_SETALLONES: 2446198090Srdivacky Alignment = 16; 2447198090Srdivacky break; 2448198090Srdivacky case X86::FsFLD0SD: 2449198090Srdivacky Alignment = 8; 2450198090Srdivacky break; 2451198090Srdivacky case X86::FsFLD0SS: 2452198090Srdivacky Alignment = 4; 2453198090Srdivacky break; 2454198090Srdivacky default: 2455198090Srdivacky llvm_unreachable("Don't know how to fold this instruction!"); 2456193323Sed } 2457193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2458193323Sed unsigned NewOpc = 0; 2459193323Sed switch (MI->getOpcode()) { 2460193323Sed default: return NULL; 2461193323Sed case X86::TEST8rr: NewOpc = X86::CMP8ri; break; 2462193323Sed case X86::TEST16rr: NewOpc = X86::CMP16ri; break; 2463193323Sed case X86::TEST32rr: NewOpc = X86::CMP32ri; break; 2464193323Sed case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; 2465193323Sed } 2466193323Sed // Change to CMPXXri r, 0 first. 2467193323Sed MI->setDesc(get(NewOpc)); 2468193323Sed MI->getOperand(1).ChangeToImmediate(0); 2469193323Sed } else if (Ops.size() != 1) 2470193323Sed return NULL; 2471193323Sed 2472193323Sed SmallVector<MachineOperand,X86AddrNumOperands> MOs; 2473198090Srdivacky switch (LoadMI->getOpcode()) { 2474198090Srdivacky case X86::V_SET0: 2475198090Srdivacky case X86::V_SETALLONES: 2476198090Srdivacky case X86::FsFLD0SD: 2477198090Srdivacky case X86::FsFLD0SS: { 2478193323Sed // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. 2479193323Sed // Create a constant-pool entry and operands to load from it. 2480193323Sed 2481193323Sed // x86-32 PIC requires a PIC base register for constant pools. 2482193323Sed unsigned PICBase = 0; 2483198090Srdivacky if (TM.getRelocationModel() == Reloc::PIC_) { 2484198090Srdivacky if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2485198090Srdivacky PICBase = X86::RIP; 2486198090Srdivacky else 2487198090Srdivacky // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); 2488198090Srdivacky // This doesn't work for several reasons. 2489198090Srdivacky // 1. GlobalBaseReg may have been spilled. 2490198090Srdivacky // 2. It may not be live at MI. 2491198090Srdivacky return NULL; 2492198090Srdivacky } 2493193323Sed 2494198090Srdivacky // Create a constant-pool entry. 2495193323Sed MachineConstantPool &MCP = *MF.getConstantPool(); 2496198090Srdivacky const Type *Ty; 2497198090Srdivacky if (LoadMI->getOpcode() == X86::FsFLD0SS) 2498198090Srdivacky Ty = Type::getFloatTy(MF.getFunction()->getContext()); 2499198090Srdivacky else if (LoadMI->getOpcode() == X86::FsFLD0SD) 2500198090Srdivacky Ty = Type::getDoubleTy(MF.getFunction()->getContext()); 2501198090Srdivacky else 2502198090Srdivacky Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); 2503198090Srdivacky Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? 2504198090Srdivacky Constant::getAllOnesValue(Ty) : 2505198090Srdivacky Constant::getNullValue(Ty); 2506198090Srdivacky unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); 2507193323Sed 2508193323Sed // Create operands to load from the constant pool entry. 2509193323Sed MOs.push_back(MachineOperand::CreateReg(PICBase, false)); 2510193323Sed MOs.push_back(MachineOperand::CreateImm(1)); 2511193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 2512193323Sed MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); 2513193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 2514198090Srdivacky break; 2515198090Srdivacky } 2516198090Srdivacky default: { 2517193323Sed // Folding a normal load. Just copy the load's address operands. 2518193323Sed unsigned NumOps = LoadMI->getDesc().getNumOperands(); 2519193323Sed for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) 2520193323Sed MOs.push_back(LoadMI->getOperand(i)); 2521198090Srdivacky break; 2522193323Sed } 2523198090Srdivacky } 2524198090Srdivacky return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment); 2525193323Sed} 2526193323Sed 2527193323Sed 2528193323Sedbool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, 2529193323Sed const SmallVectorImpl<unsigned> &Ops) const { 2530193323Sed // Check switch flag 2531193323Sed if (NoFusing) return 0; 2532193323Sed 2533193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2534193323Sed switch (MI->getOpcode()) { 2535193323Sed default: return false; 2536193323Sed case X86::TEST8rr: 2537193323Sed case X86::TEST16rr: 2538193323Sed case X86::TEST32rr: 2539193323Sed case X86::TEST64rr: 2540193323Sed return true; 2541193323Sed } 2542193323Sed } 2543193323Sed 2544193323Sed if (Ops.size() != 1) 2545193323Sed return false; 2546193323Sed 2547193323Sed unsigned OpNum = Ops[0]; 2548193323Sed unsigned Opc = MI->getOpcode(); 2549193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 2550193323Sed bool isTwoAddr = NumOps > 1 && 2551193323Sed MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2552193323Sed 2553193323Sed // Folding a memory location into the two-address part of a two-address 2554193323Sed // instruction is different than folding it other places. It requires 2555193323Sed // replacing the *two* registers with the memory location. 2556198090Srdivacky const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; 2557193323Sed if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 2558193323Sed OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2559193323Sed } else if (OpNum == 0) { // If operand 0 2560193323Sed switch (Opc) { 2561198090Srdivacky case X86::MOV8r0: 2562193323Sed case X86::MOV32r0: 2563193323Sed return true; 2564193323Sed default: break; 2565193323Sed } 2566193323Sed OpcodeTablePtr = &RegOp2MemOpTable0; 2567193323Sed } else if (OpNum == 1) { 2568193323Sed OpcodeTablePtr = &RegOp2MemOpTable1; 2569193323Sed } else if (OpNum == 2) { 2570193323Sed OpcodeTablePtr = &RegOp2MemOpTable2; 2571193323Sed } 2572193323Sed 2573193323Sed if (OpcodeTablePtr) { 2574193323Sed // Find the Opcode to fuse 2575199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2576193323Sed OpcodeTablePtr->find((unsigned*)Opc); 2577193323Sed if (I != OpcodeTablePtr->end()) 2578193323Sed return true; 2579193323Sed } 2580193323Sed return false; 2581193323Sed} 2582193323Sed 2583193323Sedbool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, 2584193323Sed unsigned Reg, bool UnfoldLoad, bool UnfoldStore, 2585193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2586199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2587193323Sed MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); 2588193323Sed if (I == MemOp2RegOpTable.end()) 2589193323Sed return false; 2590193323Sed unsigned Opc = I->second.first; 2591193323Sed unsigned Index = I->second.second & 0xf; 2592193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2593193323Sed bool FoldedStore = I->second.second & (1 << 5); 2594193323Sed if (UnfoldLoad && !FoldedLoad) 2595193323Sed return false; 2596193323Sed UnfoldLoad &= FoldedLoad; 2597193323Sed if (UnfoldStore && !FoldedStore) 2598193323Sed return false; 2599193323Sed UnfoldStore &= FoldedStore; 2600193323Sed 2601193323Sed const TargetInstrDesc &TID = get(Opc); 2602193323Sed const TargetOperandInfo &TOI = TID.OpInfo[Index]; 2603198090Srdivacky const TargetRegisterClass *RC = TOI.getRegClass(&RI); 2604193323Sed SmallVector<MachineOperand, X86AddrNumOperands> AddrOps; 2605193323Sed SmallVector<MachineOperand,2> BeforeOps; 2606193323Sed SmallVector<MachineOperand,2> AfterOps; 2607193323Sed SmallVector<MachineOperand,4> ImpOps; 2608193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2609193323Sed MachineOperand &Op = MI->getOperand(i); 2610193323Sed if (i >= Index && i < Index + X86AddrNumOperands) 2611193323Sed AddrOps.push_back(Op); 2612193323Sed else if (Op.isReg() && Op.isImplicit()) 2613193323Sed ImpOps.push_back(Op); 2614193323Sed else if (i < Index) 2615193323Sed BeforeOps.push_back(Op); 2616193323Sed else if (i > Index) 2617193323Sed AfterOps.push_back(Op); 2618193323Sed } 2619193323Sed 2620193323Sed // Emit the load instruction. 2621193323Sed if (UnfoldLoad) { 2622198090Srdivacky std::pair<MachineInstr::mmo_iterator, 2623198090Srdivacky MachineInstr::mmo_iterator> MMOs = 2624198090Srdivacky MF.extractLoadMemRefs(MI->memoperands_begin(), 2625198090Srdivacky MI->memoperands_end()); 2626198090Srdivacky loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); 2627193323Sed if (UnfoldStore) { 2628193323Sed // Address operands cannot be marked isKill. 2629193323Sed for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { 2630193323Sed MachineOperand &MO = NewMIs[0]->getOperand(i); 2631193323Sed if (MO.isReg()) 2632193323Sed MO.setIsKill(false); 2633193323Sed } 2634193323Sed } 2635193323Sed } 2636193323Sed 2637193323Sed // Emit the data processing instruction. 2638193323Sed MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); 2639193323Sed MachineInstrBuilder MIB(DataMI); 2640193323Sed 2641193323Sed if (FoldedStore) 2642193323Sed MIB.addReg(Reg, RegState::Define); 2643193323Sed for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) 2644193323Sed MIB.addOperand(BeforeOps[i]); 2645193323Sed if (FoldedLoad) 2646193323Sed MIB.addReg(Reg); 2647193323Sed for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) 2648193323Sed MIB.addOperand(AfterOps[i]); 2649193323Sed for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { 2650193323Sed MachineOperand &MO = ImpOps[i]; 2651193323Sed MIB.addReg(MO.getReg(), 2652193323Sed getDefRegState(MO.isDef()) | 2653193323Sed RegState::Implicit | 2654193323Sed getKillRegState(MO.isKill()) | 2655195340Sed getDeadRegState(MO.isDead()) | 2656195340Sed getUndefRegState(MO.isUndef())); 2657193323Sed } 2658193323Sed // Change CMP32ri r, 0 back to TEST32rr r, r, etc. 2659193323Sed unsigned NewOpc = 0; 2660193323Sed switch (DataMI->getOpcode()) { 2661193323Sed default: break; 2662193323Sed case X86::CMP64ri32: 2663193323Sed case X86::CMP32ri: 2664193323Sed case X86::CMP16ri: 2665193323Sed case X86::CMP8ri: { 2666193323Sed MachineOperand &MO0 = DataMI->getOperand(0); 2667193323Sed MachineOperand &MO1 = DataMI->getOperand(1); 2668193323Sed if (MO1.getImm() == 0) { 2669193323Sed switch (DataMI->getOpcode()) { 2670193323Sed default: break; 2671193323Sed case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; 2672193323Sed case X86::CMP32ri: NewOpc = X86::TEST32rr; break; 2673193323Sed case X86::CMP16ri: NewOpc = X86::TEST16rr; break; 2674193323Sed case X86::CMP8ri: NewOpc = X86::TEST8rr; break; 2675193323Sed } 2676193323Sed DataMI->setDesc(get(NewOpc)); 2677193323Sed MO1.ChangeToRegister(MO0.getReg(), false); 2678193323Sed } 2679193323Sed } 2680193323Sed } 2681193323Sed NewMIs.push_back(DataMI); 2682193323Sed 2683193323Sed // Emit the store instruction. 2684193323Sed if (UnfoldStore) { 2685198090Srdivacky const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); 2686198090Srdivacky std::pair<MachineInstr::mmo_iterator, 2687198090Srdivacky MachineInstr::mmo_iterator> MMOs = 2688198090Srdivacky MF.extractStoreMemRefs(MI->memoperands_begin(), 2689198090Srdivacky MI->memoperands_end()); 2690198090Srdivacky storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs); 2691193323Sed } 2692193323Sed 2693193323Sed return true; 2694193323Sed} 2695193323Sed 2696193323Sedbool 2697193323SedX86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, 2698193323Sed SmallVectorImpl<SDNode*> &NewNodes) const { 2699193323Sed if (!N->isMachineOpcode()) 2700193323Sed return false; 2701193323Sed 2702199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2703193323Sed MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); 2704193323Sed if (I == MemOp2RegOpTable.end()) 2705193323Sed return false; 2706193323Sed unsigned Opc = I->second.first; 2707193323Sed unsigned Index = I->second.second & 0xf; 2708193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2709193323Sed bool FoldedStore = I->second.second & (1 << 5); 2710193323Sed const TargetInstrDesc &TID = get(Opc); 2711198090Srdivacky const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); 2712193323Sed unsigned NumDefs = TID.NumDefs; 2713193323Sed std::vector<SDValue> AddrOps; 2714193323Sed std::vector<SDValue> BeforeOps; 2715193323Sed std::vector<SDValue> AfterOps; 2716193323Sed DebugLoc dl = N->getDebugLoc(); 2717193323Sed unsigned NumOps = N->getNumOperands(); 2718193323Sed for (unsigned i = 0; i != NumOps-1; ++i) { 2719193323Sed SDValue Op = N->getOperand(i); 2720193323Sed if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands) 2721193323Sed AddrOps.push_back(Op); 2722193323Sed else if (i < Index-NumDefs) 2723193323Sed BeforeOps.push_back(Op); 2724193323Sed else if (i > Index-NumDefs) 2725193323Sed AfterOps.push_back(Op); 2726193323Sed } 2727193323Sed SDValue Chain = N->getOperand(NumOps-1); 2728193323Sed AddrOps.push_back(Chain); 2729193323Sed 2730193323Sed // Emit the load instruction. 2731193323Sed SDNode *Load = 0; 2732198090Srdivacky MachineFunction &MF = DAG.getMachineFunction(); 2733193323Sed if (FoldedLoad) { 2734198090Srdivacky EVT VT = *RC->vt_begin(); 2735199481Srdivacky std::pair<MachineInstr::mmo_iterator, 2736199481Srdivacky MachineInstr::mmo_iterator> MMOs = 2737199481Srdivacky MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2738199481Srdivacky cast<MachineSDNode>(N)->memoperands_end()); 2739199481Srdivacky bool isAligned = (*MMOs.first)->getAlignment() >= 16; 2740198090Srdivacky Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, 2741198090Srdivacky VT, MVT::Other, &AddrOps[0], AddrOps.size()); 2742193323Sed NewNodes.push_back(Load); 2743198090Srdivacky 2744198090Srdivacky // Preserve memory reference information. 2745198090Srdivacky cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2746193323Sed } 2747193323Sed 2748193323Sed // Emit the data processing instruction. 2749198090Srdivacky std::vector<EVT> VTs; 2750193323Sed const TargetRegisterClass *DstRC = 0; 2751193323Sed if (TID.getNumDefs() > 0) { 2752198090Srdivacky DstRC = TID.OpInfo[0].getRegClass(&RI); 2753193323Sed VTs.push_back(*DstRC->vt_begin()); 2754193323Sed } 2755193323Sed for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 2756198090Srdivacky EVT VT = N->getValueType(i); 2757193323Sed if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) 2758193323Sed VTs.push_back(VT); 2759193323Sed } 2760193323Sed if (Load) 2761193323Sed BeforeOps.push_back(SDValue(Load, 0)); 2762193323Sed std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); 2763198090Srdivacky SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0], 2764198090Srdivacky BeforeOps.size()); 2765193323Sed NewNodes.push_back(NewNode); 2766193323Sed 2767193323Sed // Emit the store instruction. 2768193323Sed if (FoldedStore) { 2769193323Sed AddrOps.pop_back(); 2770193323Sed AddrOps.push_back(SDValue(NewNode, 0)); 2771193323Sed AddrOps.push_back(Chain); 2772199481Srdivacky std::pair<MachineInstr::mmo_iterator, 2773199481Srdivacky MachineInstr::mmo_iterator> MMOs = 2774199481Srdivacky MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2775199481Srdivacky cast<MachineSDNode>(N)->memoperands_end()); 2776199481Srdivacky bool isAligned = (*MMOs.first)->getAlignment() >= 16; 2777198090Srdivacky SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, 2778198090Srdivacky isAligned, TM), 2779198090Srdivacky dl, MVT::Other, 2780198090Srdivacky &AddrOps[0], AddrOps.size()); 2781193323Sed NewNodes.push_back(Store); 2782198090Srdivacky 2783198090Srdivacky // Preserve memory reference information. 2784198090Srdivacky cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2785193323Sed } 2786193323Sed 2787193323Sed return true; 2788193323Sed} 2789193323Sed 2790193323Sedunsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, 2791198892Srdivacky bool UnfoldLoad, bool UnfoldStore, 2792198892Srdivacky unsigned *LoadRegIndex) const { 2793199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2794193323Sed MemOp2RegOpTable.find((unsigned*)Opc); 2795193323Sed if (I == MemOp2RegOpTable.end()) 2796193323Sed return 0; 2797193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2798193323Sed bool FoldedStore = I->second.second & (1 << 5); 2799193323Sed if (UnfoldLoad && !FoldedLoad) 2800193323Sed return 0; 2801193323Sed if (UnfoldStore && !FoldedStore) 2802193323Sed return 0; 2803198892Srdivacky if (LoadRegIndex) 2804198892Srdivacky *LoadRegIndex = I->second.second & 0xf; 2805193323Sed return I->second.first; 2806193323Sed} 2807193323Sed 2808193323Sedbool X86InstrInfo:: 2809193323SedReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 2810193323Sed assert(Cond.size() == 1 && "Invalid X86 branch condition!"); 2811193323Sed X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm()); 2812193323Sed if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E) 2813193323Sed return true; 2814193323Sed Cond[0].setImm(GetOppositeBranchCondition(CC)); 2815193323Sed return false; 2816193323Sed} 2817193323Sed 2818193323Sedbool X86InstrInfo:: 2819193323SedisSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 2820193323Sed // FIXME: Return false for x87 stack register classes for now. We can't 2821193323Sed // allow any loads of these registers before FpGet_ST0_80. 2822193323Sed return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || 2823193323Sed RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); 2824193323Sed} 2825193323Sed 2826193323Sedunsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { 2827193323Sed switch (Desc->TSFlags & X86II::ImmMask) { 2828193323Sed case X86II::Imm8: return 1; 2829193323Sed case X86II::Imm16: return 2; 2830193323Sed case X86II::Imm32: return 4; 2831193323Sed case X86II::Imm64: return 8; 2832198090Srdivacky default: llvm_unreachable("Immediate size not set!"); 2833193323Sed return 0; 2834193323Sed } 2835193323Sed} 2836193323Sed 2837193323Sed/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register? 2838193323Sed/// e.g. r8, xmm8, etc. 2839193323Sedbool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) { 2840193323Sed if (!MO.isReg()) return false; 2841193323Sed switch (MO.getReg()) { 2842193323Sed default: break; 2843193323Sed case X86::R8: case X86::R9: case X86::R10: case X86::R11: 2844193323Sed case X86::R12: case X86::R13: case X86::R14: case X86::R15: 2845193323Sed case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: 2846193323Sed case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: 2847193323Sed case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: 2848193323Sed case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: 2849193323Sed case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: 2850193323Sed case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: 2851193323Sed case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: 2852193323Sed case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: 2853193323Sed return true; 2854193323Sed } 2855193323Sed return false; 2856193323Sed} 2857193323Sed 2858193323Sed 2859193323Sed/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 2860193323Sed/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand 2861193323Sed/// size, and 3) use of X86-64 extended registers. 2862193323Sedunsigned X86InstrInfo::determineREX(const MachineInstr &MI) { 2863193323Sed unsigned REX = 0; 2864193323Sed const TargetInstrDesc &Desc = MI.getDesc(); 2865193323Sed 2866193323Sed // Pseudo instructions do not need REX prefix byte. 2867193323Sed if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) 2868193323Sed return 0; 2869193323Sed if (Desc.TSFlags & X86II::REX_W) 2870193323Sed REX |= 1 << 3; 2871193323Sed 2872193323Sed unsigned NumOps = Desc.getNumOperands(); 2873193323Sed if (NumOps) { 2874193323Sed bool isTwoAddr = NumOps > 1 && 2875193323Sed Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; 2876193323Sed 2877193323Sed // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. 2878193323Sed unsigned i = isTwoAddr ? 1 : 0; 2879193323Sed for (unsigned e = NumOps; i != e; ++i) { 2880193323Sed const MachineOperand& MO = MI.getOperand(i); 2881193323Sed if (MO.isReg()) { 2882193323Sed unsigned Reg = MO.getReg(); 2883193323Sed if (isX86_64NonExtLowByteReg(Reg)) 2884193323Sed REX |= 0x40; 2885193323Sed } 2886193323Sed } 2887193323Sed 2888193323Sed switch (Desc.TSFlags & X86II::FormMask) { 2889193323Sed case X86II::MRMInitReg: 2890193323Sed if (isX86_64ExtendedReg(MI.getOperand(0))) 2891193323Sed REX |= (1 << 0) | (1 << 2); 2892193323Sed break; 2893193323Sed case X86II::MRMSrcReg: { 2894193323Sed if (isX86_64ExtendedReg(MI.getOperand(0))) 2895193323Sed REX |= 1 << 2; 2896193323Sed i = isTwoAddr ? 2 : 1; 2897193323Sed for (unsigned e = NumOps; i != e; ++i) { 2898193323Sed const MachineOperand& MO = MI.getOperand(i); 2899193323Sed if (isX86_64ExtendedReg(MO)) 2900193323Sed REX |= 1 << 0; 2901193323Sed } 2902193323Sed break; 2903193323Sed } 2904193323Sed case X86II::MRMSrcMem: { 2905193323Sed if (isX86_64ExtendedReg(MI.getOperand(0))) 2906193323Sed REX |= 1 << 2; 2907193323Sed unsigned Bit = 0; 2908193323Sed i = isTwoAddr ? 2 : 1; 2909193323Sed for (; i != NumOps; ++i) { 2910193323Sed const MachineOperand& MO = MI.getOperand(i); 2911193323Sed if (MO.isReg()) { 2912193323Sed if (isX86_64ExtendedReg(MO)) 2913193323Sed REX |= 1 << Bit; 2914193323Sed Bit++; 2915193323Sed } 2916193323Sed } 2917193323Sed break; 2918193323Sed } 2919193323Sed case X86II::MRM0m: case X86II::MRM1m: 2920193323Sed case X86II::MRM2m: case X86II::MRM3m: 2921193323Sed case X86II::MRM4m: case X86II::MRM5m: 2922193323Sed case X86II::MRM6m: case X86II::MRM7m: 2923193323Sed case X86II::MRMDestMem: { 2924193323Sed unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); 2925193323Sed i = isTwoAddr ? 1 : 0; 2926193323Sed if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) 2927193323Sed REX |= 1 << 2; 2928193323Sed unsigned Bit = 0; 2929193323Sed for (; i != e; ++i) { 2930193323Sed const MachineOperand& MO = MI.getOperand(i); 2931193323Sed if (MO.isReg()) { 2932193323Sed if (isX86_64ExtendedReg(MO)) 2933193323Sed REX |= 1 << Bit; 2934193323Sed Bit++; 2935193323Sed } 2936193323Sed } 2937193323Sed break; 2938193323Sed } 2939193323Sed default: { 2940193323Sed if (isX86_64ExtendedReg(MI.getOperand(0))) 2941193323Sed REX |= 1 << 0; 2942193323Sed i = isTwoAddr ? 2 : 1; 2943193323Sed for (unsigned e = NumOps; i != e; ++i) { 2944193323Sed const MachineOperand& MO = MI.getOperand(i); 2945193323Sed if (isX86_64ExtendedReg(MO)) 2946193323Sed REX |= 1 << 2; 2947193323Sed } 2948193323Sed break; 2949193323Sed } 2950193323Sed } 2951193323Sed } 2952193323Sed return REX; 2953193323Sed} 2954193323Sed 2955193323Sed/// sizePCRelativeBlockAddress - This method returns the size of a PC 2956193323Sed/// relative block address instruction 2957193323Sed/// 2958193323Sedstatic unsigned sizePCRelativeBlockAddress() { 2959193323Sed return 4; 2960193323Sed} 2961193323Sed 2962193323Sed/// sizeGlobalAddress - Give the size of the emission of this global address 2963193323Sed/// 2964193323Sedstatic unsigned sizeGlobalAddress(bool dword) { 2965193323Sed return dword ? 8 : 4; 2966193323Sed} 2967193323Sed 2968193323Sed/// sizeConstPoolAddress - Give the size of the emission of this constant 2969193323Sed/// pool address 2970193323Sed/// 2971193323Sedstatic unsigned sizeConstPoolAddress(bool dword) { 2972193323Sed return dword ? 8 : 4; 2973193323Sed} 2974193323Sed 2975193323Sed/// sizeExternalSymbolAddress - Give the size of the emission of this external 2976193323Sed/// symbol 2977193323Sed/// 2978193323Sedstatic unsigned sizeExternalSymbolAddress(bool dword) { 2979193323Sed return dword ? 8 : 4; 2980193323Sed} 2981193323Sed 2982193323Sed/// sizeJumpTableAddress - Give the size of the emission of this jump 2983193323Sed/// table address 2984193323Sed/// 2985193323Sedstatic unsigned sizeJumpTableAddress(bool dword) { 2986193323Sed return dword ? 8 : 4; 2987193323Sed} 2988193323Sed 2989193323Sedstatic unsigned sizeConstant(unsigned Size) { 2990193323Sed return Size; 2991193323Sed} 2992193323Sed 2993193323Sedstatic unsigned sizeRegModRMByte(){ 2994193323Sed return 1; 2995193323Sed} 2996193323Sed 2997193323Sedstatic unsigned sizeSIBByte(){ 2998193323Sed return 1; 2999193323Sed} 3000193323Sed 3001193323Sedstatic unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { 3002193323Sed unsigned FinalSize = 0; 3003193323Sed // If this is a simple integer displacement that doesn't require a relocation. 3004193323Sed if (!RelocOp) { 3005193323Sed FinalSize += sizeConstant(4); 3006193323Sed return FinalSize; 3007193323Sed } 3008193323Sed 3009193323Sed // Otherwise, this is something that requires a relocation. 3010193323Sed if (RelocOp->isGlobal()) { 3011193323Sed FinalSize += sizeGlobalAddress(false); 3012193323Sed } else if (RelocOp->isCPI()) { 3013193323Sed FinalSize += sizeConstPoolAddress(false); 3014193323Sed } else if (RelocOp->isJTI()) { 3015193323Sed FinalSize += sizeJumpTableAddress(false); 3016193323Sed } else { 3017198090Srdivacky llvm_unreachable("Unknown value to relocate!"); 3018193323Sed } 3019193323Sed return FinalSize; 3020193323Sed} 3021193323Sed 3022193323Sedstatic unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op, 3023193323Sed bool IsPIC, bool Is64BitMode) { 3024193323Sed const MachineOperand &Op3 = MI.getOperand(Op+3); 3025193323Sed int DispVal = 0; 3026193323Sed const MachineOperand *DispForReloc = 0; 3027193323Sed unsigned FinalSize = 0; 3028193323Sed 3029193323Sed // Figure out what sort of displacement we have to handle here. 3030193323Sed if (Op3.isGlobal()) { 3031193323Sed DispForReloc = &Op3; 3032193323Sed } else if (Op3.isCPI()) { 3033193323Sed if (Is64BitMode || IsPIC) { 3034193323Sed DispForReloc = &Op3; 3035193323Sed } else { 3036193323Sed DispVal = 1; 3037193323Sed } 3038193323Sed } else if (Op3.isJTI()) { 3039193323Sed if (Is64BitMode || IsPIC) { 3040193323Sed DispForReloc = &Op3; 3041193323Sed } else { 3042193323Sed DispVal = 1; 3043193323Sed } 3044193323Sed } else { 3045193323Sed DispVal = 1; 3046193323Sed } 3047193323Sed 3048193323Sed const MachineOperand &Base = MI.getOperand(Op); 3049193323Sed const MachineOperand &IndexReg = MI.getOperand(Op+2); 3050193323Sed 3051193323Sed unsigned BaseReg = Base.getReg(); 3052193323Sed 3053193323Sed // Is a SIB byte needed? 3054193323Sed if ((!Is64BitMode || DispForReloc || BaseReg != 0) && 3055193323Sed IndexReg.getReg() == 0 && 3056193323Sed (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) { 3057193323Sed if (BaseReg == 0) { // Just a displacement? 3058193323Sed // Emit special case [disp32] encoding 3059193323Sed ++FinalSize; 3060193323Sed FinalSize += getDisplacementFieldSize(DispForReloc); 3061193323Sed } else { 3062193323Sed unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg); 3063193323Sed if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { 3064193323Sed // Emit simple indirect register encoding... [EAX] f.e. 3065193323Sed ++FinalSize; 3066193323Sed // Be pessimistic and assume it's a disp32, not a disp8 3067193323Sed } else { 3068193323Sed // Emit the most general non-SIB encoding: [REG+disp32] 3069193323Sed ++FinalSize; 3070193323Sed FinalSize += getDisplacementFieldSize(DispForReloc); 3071193323Sed } 3072193323Sed } 3073193323Sed 3074193323Sed } else { // We need a SIB byte, so start by outputting the ModR/M byte first 3075193323Sed assert(IndexReg.getReg() != X86::ESP && 3076193323Sed IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); 3077193323Sed 3078193323Sed bool ForceDisp32 = false; 3079193323Sed if (BaseReg == 0 || DispForReloc) { 3080193323Sed // Emit the normal disp32 encoding. 3081193323Sed ++FinalSize; 3082193323Sed ForceDisp32 = true; 3083193323Sed } else { 3084193323Sed ++FinalSize; 3085193323Sed } 3086193323Sed 3087193323Sed FinalSize += sizeSIBByte(); 3088193323Sed 3089193323Sed // Do we need to output a displacement? 3090193323Sed if (DispVal != 0 || ForceDisp32) { 3091193323Sed FinalSize += getDisplacementFieldSize(DispForReloc); 3092193323Sed } 3093193323Sed } 3094193323Sed return FinalSize; 3095193323Sed} 3096193323Sed 3097193323Sed 3098193323Sedstatic unsigned GetInstSizeWithDesc(const MachineInstr &MI, 3099193323Sed const TargetInstrDesc *Desc, 3100193323Sed bool IsPIC, bool Is64BitMode) { 3101193323Sed 3102193323Sed unsigned Opcode = Desc->Opcode; 3103193323Sed unsigned FinalSize = 0; 3104193323Sed 3105193323Sed // Emit the lock opcode prefix as needed. 3106193323Sed if (Desc->TSFlags & X86II::LOCK) ++FinalSize; 3107193323Sed 3108193323Sed // Emit segment override opcode prefix as needed. 3109193323Sed switch (Desc->TSFlags & X86II::SegOvrMask) { 3110193323Sed case X86II::FS: 3111193323Sed case X86II::GS: 3112193323Sed ++FinalSize; 3113193323Sed break; 3114198090Srdivacky default: llvm_unreachable("Invalid segment!"); 3115193323Sed case 0: break; // No segment override! 3116193323Sed } 3117193323Sed 3118193323Sed // Emit the repeat opcode prefix as needed. 3119193323Sed if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize; 3120193323Sed 3121193323Sed // Emit the operand size opcode prefix as needed. 3122193323Sed if (Desc->TSFlags & X86II::OpSize) ++FinalSize; 3123193323Sed 3124193323Sed // Emit the address size opcode prefix as needed. 3125193323Sed if (Desc->TSFlags & X86II::AdSize) ++FinalSize; 3126193323Sed 3127193323Sed bool Need0FPrefix = false; 3128193323Sed switch (Desc->TSFlags & X86II::Op0Mask) { 3129193323Sed case X86II::TB: // Two-byte opcode prefix 3130193323Sed case X86II::T8: // 0F 38 3131193323Sed case X86II::TA: // 0F 3A 3132193323Sed Need0FPrefix = true; 3133193323Sed break; 3134198090Srdivacky case X86II::TF: // F2 0F 38 3135198090Srdivacky ++FinalSize; 3136198090Srdivacky Need0FPrefix = true; 3137198090Srdivacky break; 3138193323Sed case X86II::REP: break; // already handled. 3139193323Sed case X86II::XS: // F3 0F 3140193323Sed ++FinalSize; 3141193323Sed Need0FPrefix = true; 3142193323Sed break; 3143193323Sed case X86II::XD: // F2 0F 3144193323Sed ++FinalSize; 3145193323Sed Need0FPrefix = true; 3146193323Sed break; 3147193323Sed case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: 3148193323Sed case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: 3149193323Sed ++FinalSize; 3150193323Sed break; // Two-byte opcode prefix 3151198090Srdivacky default: llvm_unreachable("Invalid prefix!"); 3152193323Sed case 0: break; // No prefix! 3153193323Sed } 3154193323Sed 3155193323Sed if (Is64BitMode) { 3156193323Sed // REX prefix 3157193323Sed unsigned REX = X86InstrInfo::determineREX(MI); 3158193323Sed if (REX) 3159193323Sed ++FinalSize; 3160193323Sed } 3161193323Sed 3162193323Sed // 0x0F escape code must be emitted just before the opcode. 3163193323Sed if (Need0FPrefix) 3164193323Sed ++FinalSize; 3165193323Sed 3166193323Sed switch (Desc->TSFlags & X86II::Op0Mask) { 3167193323Sed case X86II::T8: // 0F 38 3168193323Sed ++FinalSize; 3169193323Sed break; 3170193323Sed case X86II::TA: // 0F 3A 3171193323Sed ++FinalSize; 3172193323Sed break; 3173198090Srdivacky case X86II::TF: // F2 0F 38 3174198090Srdivacky ++FinalSize; 3175198090Srdivacky break; 3176193323Sed } 3177193323Sed 3178193323Sed // If this is a two-address instruction, skip one of the register operands. 3179193323Sed unsigned NumOps = Desc->getNumOperands(); 3180193323Sed unsigned CurOp = 0; 3181193323Sed if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) 3182193323Sed CurOp++; 3183193323Sed else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) 3184193323Sed // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 3185193323Sed --NumOps; 3186193323Sed 3187193323Sed switch (Desc->TSFlags & X86II::FormMask) { 3188198090Srdivacky default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); 3189193323Sed case X86II::Pseudo: 3190193323Sed // Remember the current PC offset, this is the PIC relocation 3191193323Sed // base address. 3192193323Sed switch (Opcode) { 3193193323Sed default: 3194193323Sed break; 3195193323Sed case TargetInstrInfo::INLINEASM: { 3196193323Sed const MachineFunction *MF = MI.getParent()->getParent(); 3197198090Srdivacky const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); 3198198090Srdivacky FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), 3199198090Srdivacky *MF->getTarget().getMCAsmInfo()); 3200193323Sed break; 3201193323Sed } 3202193323Sed case TargetInstrInfo::DBG_LABEL: 3203193323Sed case TargetInstrInfo::EH_LABEL: 3204193323Sed break; 3205193323Sed case TargetInstrInfo::IMPLICIT_DEF: 3206198090Srdivacky case TargetInstrInfo::KILL: 3207193323Sed case X86::FP_REG_KILL: 3208193323Sed break; 3209193323Sed case X86::MOVPC32r: { 3210193323Sed // This emits the "call" portion of this pseudo instruction. 3211193323Sed ++FinalSize; 3212193323Sed FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3213193323Sed break; 3214193323Sed } 3215193323Sed } 3216193323Sed CurOp = NumOps; 3217193323Sed break; 3218193323Sed case X86II::RawFrm: 3219193323Sed ++FinalSize; 3220193323Sed 3221193323Sed if (CurOp != NumOps) { 3222193323Sed const MachineOperand &MO = MI.getOperand(CurOp++); 3223193323Sed if (MO.isMBB()) { 3224193323Sed FinalSize += sizePCRelativeBlockAddress(); 3225193323Sed } else if (MO.isGlobal()) { 3226193323Sed FinalSize += sizeGlobalAddress(false); 3227193323Sed } else if (MO.isSymbol()) { 3228193323Sed FinalSize += sizeExternalSymbolAddress(false); 3229193323Sed } else if (MO.isImm()) { 3230193323Sed FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3231193323Sed } else { 3232198090Srdivacky llvm_unreachable("Unknown RawFrm operand!"); 3233193323Sed } 3234193323Sed } 3235193323Sed break; 3236193323Sed 3237193323Sed case X86II::AddRegFrm: 3238193323Sed ++FinalSize; 3239193323Sed ++CurOp; 3240193323Sed 3241193323Sed if (CurOp != NumOps) { 3242193323Sed const MachineOperand &MO1 = MI.getOperand(CurOp++); 3243193323Sed unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3244193323Sed if (MO1.isImm()) 3245193323Sed FinalSize += sizeConstant(Size); 3246193323Sed else { 3247193323Sed bool dword = false; 3248193323Sed if (Opcode == X86::MOV64ri) 3249193323Sed dword = true; 3250193323Sed if (MO1.isGlobal()) { 3251193323Sed FinalSize += sizeGlobalAddress(dword); 3252193323Sed } else if (MO1.isSymbol()) 3253193323Sed FinalSize += sizeExternalSymbolAddress(dword); 3254193323Sed else if (MO1.isCPI()) 3255193323Sed FinalSize += sizeConstPoolAddress(dword); 3256193323Sed else if (MO1.isJTI()) 3257193323Sed FinalSize += sizeJumpTableAddress(dword); 3258193323Sed } 3259193323Sed } 3260193323Sed break; 3261193323Sed 3262193323Sed case X86II::MRMDestReg: { 3263193323Sed ++FinalSize; 3264193323Sed FinalSize += sizeRegModRMByte(); 3265193323Sed CurOp += 2; 3266193323Sed if (CurOp != NumOps) { 3267193323Sed ++CurOp; 3268193323Sed FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3269193323Sed } 3270193323Sed break; 3271193323Sed } 3272193323Sed case X86II::MRMDestMem: { 3273193323Sed ++FinalSize; 3274193323Sed FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); 3275193323Sed CurOp += X86AddrNumOperands + 1; 3276193323Sed if (CurOp != NumOps) { 3277193323Sed ++CurOp; 3278193323Sed FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3279193323Sed } 3280193323Sed break; 3281193323Sed } 3282193323Sed 3283193323Sed case X86II::MRMSrcReg: 3284193323Sed ++FinalSize; 3285193323Sed FinalSize += sizeRegModRMByte(); 3286193323Sed CurOp += 2; 3287193323Sed if (CurOp != NumOps) { 3288193323Sed ++CurOp; 3289193323Sed FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3290193323Sed } 3291193323Sed break; 3292193323Sed 3293193323Sed case X86II::MRMSrcMem: { 3294193323Sed int AddrOperands; 3295193323Sed if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || 3296193323Sed Opcode == X86::LEA16r || Opcode == X86::LEA32r) 3297193323Sed AddrOperands = X86AddrNumOperands - 1; // No segment register 3298193323Sed else 3299193323Sed AddrOperands = X86AddrNumOperands; 3300193323Sed 3301193323Sed ++FinalSize; 3302193323Sed FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); 3303193323Sed CurOp += AddrOperands + 1; 3304193323Sed if (CurOp != NumOps) { 3305193323Sed ++CurOp; 3306193323Sed FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3307193323Sed } 3308193323Sed break; 3309193323Sed } 3310193323Sed 3311193323Sed case X86II::MRM0r: case X86II::MRM1r: 3312193323Sed case X86II::MRM2r: case X86II::MRM3r: 3313193323Sed case X86II::MRM4r: case X86II::MRM5r: 3314193323Sed case X86II::MRM6r: case X86II::MRM7r: 3315193323Sed ++FinalSize; 3316193323Sed if (Desc->getOpcode() == X86::LFENCE || 3317193323Sed Desc->getOpcode() == X86::MFENCE) { 3318193323Sed // Special handling of lfence and mfence; 3319193323Sed FinalSize += sizeRegModRMByte(); 3320193323Sed } else if (Desc->getOpcode() == X86::MONITOR || 3321193323Sed Desc->getOpcode() == X86::MWAIT) { 3322193323Sed // Special handling of monitor and mwait. 3323193323Sed FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode. 3324193323Sed } else { 3325193323Sed ++CurOp; 3326193323Sed FinalSize += sizeRegModRMByte(); 3327193323Sed } 3328193323Sed 3329193323Sed if (CurOp != NumOps) { 3330193323Sed const MachineOperand &MO1 = MI.getOperand(CurOp++); 3331193323Sed unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3332193323Sed if (MO1.isImm()) 3333193323Sed FinalSize += sizeConstant(Size); 3334193323Sed else { 3335193323Sed bool dword = false; 3336193323Sed if (Opcode == X86::MOV64ri32) 3337193323Sed dword = true; 3338193323Sed if (MO1.isGlobal()) { 3339193323Sed FinalSize += sizeGlobalAddress(dword); 3340193323Sed } else if (MO1.isSymbol()) 3341193323Sed FinalSize += sizeExternalSymbolAddress(dword); 3342193323Sed else if (MO1.isCPI()) 3343193323Sed FinalSize += sizeConstPoolAddress(dword); 3344193323Sed else if (MO1.isJTI()) 3345193323Sed FinalSize += sizeJumpTableAddress(dword); 3346193323Sed } 3347193323Sed } 3348193323Sed break; 3349193323Sed 3350193323Sed case X86II::MRM0m: case X86II::MRM1m: 3351193323Sed case X86II::MRM2m: case X86II::MRM3m: 3352193323Sed case X86II::MRM4m: case X86II::MRM5m: 3353193323Sed case X86II::MRM6m: case X86II::MRM7m: { 3354193323Sed 3355193323Sed ++FinalSize; 3356193323Sed FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); 3357193323Sed CurOp += X86AddrNumOperands; 3358193323Sed 3359193323Sed if (CurOp != NumOps) { 3360193323Sed const MachineOperand &MO = MI.getOperand(CurOp++); 3361193323Sed unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3362193323Sed if (MO.isImm()) 3363193323Sed FinalSize += sizeConstant(Size); 3364193323Sed else { 3365193323Sed bool dword = false; 3366193323Sed if (Opcode == X86::MOV64mi32) 3367193323Sed dword = true; 3368193323Sed if (MO.isGlobal()) { 3369193323Sed FinalSize += sizeGlobalAddress(dword); 3370193323Sed } else if (MO.isSymbol()) 3371193323Sed FinalSize += sizeExternalSymbolAddress(dword); 3372193323Sed else if (MO.isCPI()) 3373193323Sed FinalSize += sizeConstPoolAddress(dword); 3374193323Sed else if (MO.isJTI()) 3375193323Sed FinalSize += sizeJumpTableAddress(dword); 3376193323Sed } 3377193323Sed } 3378193323Sed break; 3379193323Sed } 3380193323Sed 3381193323Sed case X86II::MRMInitReg: 3382193323Sed ++FinalSize; 3383193323Sed // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). 3384193323Sed FinalSize += sizeRegModRMByte(); 3385193323Sed ++CurOp; 3386193323Sed break; 3387193323Sed } 3388193323Sed 3389193323Sed if (!Desc->isVariadic() && CurOp != NumOps) { 3390198090Srdivacky std::string msg; 3391198090Srdivacky raw_string_ostream Msg(msg); 3392198090Srdivacky Msg << "Cannot determine size: " << MI; 3393198090Srdivacky llvm_report_error(Msg.str()); 3394193323Sed } 3395193323Sed 3396193323Sed 3397193323Sed return FinalSize; 3398193323Sed} 3399193323Sed 3400193323Sed 3401193323Sedunsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 3402193323Sed const TargetInstrDesc &Desc = MI->getDesc(); 3403198090Srdivacky bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; 3404193323Sed bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); 3405193323Sed unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); 3406195098Sed if (Desc.getOpcode() == X86::MOVPC32r) 3407193323Sed Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode); 3408193323Sed return Size; 3409193323Sed} 3410193323Sed 3411193323Sed/// getGlobalBaseReg - Return a virtual register initialized with the 3412193323Sed/// the global base register value. Output instructions required to 3413193323Sed/// initialize the register in the function entry block, if necessary. 3414193323Sed/// 3415193323Sedunsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { 3416193323Sed assert(!TM.getSubtarget<X86Subtarget>().is64Bit() && 3417193323Sed "X86-64 PIC uses RIP relative addressing"); 3418193323Sed 3419193323Sed X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); 3420193323Sed unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 3421193323Sed if (GlobalBaseReg != 0) 3422193323Sed return GlobalBaseReg; 3423193323Sed 3424193323Sed // Insert the set of GlobalBaseReg into the first MBB of the function 3425193323Sed MachineBasicBlock &FirstMBB = MF->front(); 3426193323Sed MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 3427193323Sed DebugLoc DL = DebugLoc::getUnknownLoc(); 3428193323Sed if (MBBI != FirstMBB.end()) DL = MBBI->getDebugLoc(); 3429193323Sed MachineRegisterInfo &RegInfo = MF->getRegInfo(); 3430193323Sed unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3431193323Sed 3432193323Sed const TargetInstrInfo *TII = TM.getInstrInfo(); 3433193323Sed // Operand of MovePCtoStack is completely ignored by asm printer. It's 3434193323Sed // only used in JIT code emission as displacement to pc. 3435195098Sed BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); 3436193323Sed 3437193323Sed // If we're using vanilla 'GOT' PIC style, we should use relative addressing 3438195098Sed // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. 3439198090Srdivacky if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) { 3440195098Sed GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3441195098Sed // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register 3442193323Sed BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) 3443198090Srdivacky .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 3444195098Sed X86II::MO_GOT_ABSOLUTE_ADDRESS); 3445193323Sed } else { 3446193323Sed GlobalBaseReg = PC; 3447193323Sed } 3448193323Sed 3449193323Sed X86FI->setGlobalBaseReg(GlobalBaseReg); 3450193323Sed return GlobalBaseReg; 3451193323Sed} 3452