X86InstrInfo.cpp revision 212904
1193323Sed//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file contains the X86 implementation of the TargetInstrInfo class. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#include "X86InstrInfo.h" 15193323Sed#include "X86.h" 16193323Sed#include "X86GenInstrInfo.inc" 17193323Sed#include "X86InstrBuilder.h" 18193323Sed#include "X86MachineFunctionInfo.h" 19193323Sed#include "X86Subtarget.h" 20193323Sed#include "X86TargetMachine.h" 21193323Sed#include "llvm/DerivedTypes.h" 22198090Srdivacky#include "llvm/LLVMContext.h" 23193323Sed#include "llvm/ADT/STLExtras.h" 24193323Sed#include "llvm/CodeGen/MachineConstantPool.h" 25193323Sed#include "llvm/CodeGen/MachineFrameInfo.h" 26193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 27193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 28193323Sed#include "llvm/CodeGen/LiveVariables.h" 29199481Srdivacky#include "llvm/CodeGen/PseudoSourceValue.h" 30207618Srdivacky#include "llvm/MC/MCInst.h" 31193323Sed#include "llvm/Support/CommandLine.h" 32202375Srdivacky#include "llvm/Support/Debug.h" 33198090Srdivacky#include "llvm/Support/ErrorHandling.h" 34198090Srdivacky#include "llvm/Support/raw_ostream.h" 35193323Sed#include "llvm/Target/TargetOptions.h" 36198090Srdivacky#include "llvm/MC/MCAsmInfo.h" 37199481Srdivacky 38199481Srdivacky#include <limits> 39199481Srdivacky 40193323Sedusing namespace llvm; 41193323Sed 42198090Srdivackystatic cl::opt<bool> 43198090SrdivackyNoFusing("disable-spill-fusing", 44198090Srdivacky cl::desc("Disable fusing of spill code into instructions")); 45198090Srdivackystatic cl::opt<bool> 46198090SrdivackyPrintFailedFusing("print-failed-fuse-candidates", 47198090Srdivacky cl::desc("Print instructions that the allocator wants to" 48198090Srdivacky " fuse, but the X86 backend currently can't"), 49198090Srdivacky cl::Hidden); 50198090Srdivackystatic cl::opt<bool> 51198090SrdivackyReMatPICStubLoad("remat-pic-stub-load", 52198090Srdivacky cl::desc("Re-materialize load from stub in PIC mode"), 53198090Srdivacky cl::init(false), cl::Hidden); 54193323Sed 55193323SedX86InstrInfo::X86InstrInfo(X86TargetMachine &tm) 56193323Sed : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), 57193323Sed TM(tm), RI(tm, *this) { 58193323Sed SmallVector<unsigned,16> AmbEntries; 59193323Sed static const unsigned OpTbl2Addr[][2] = { 60193323Sed { X86::ADC32ri, X86::ADC32mi }, 61193323Sed { X86::ADC32ri8, X86::ADC32mi8 }, 62193323Sed { X86::ADC32rr, X86::ADC32mr }, 63193323Sed { X86::ADC64ri32, X86::ADC64mi32 }, 64193323Sed { X86::ADC64ri8, X86::ADC64mi8 }, 65193323Sed { X86::ADC64rr, X86::ADC64mr }, 66193323Sed { X86::ADD16ri, X86::ADD16mi }, 67193323Sed { X86::ADD16ri8, X86::ADD16mi8 }, 68193323Sed { X86::ADD16rr, X86::ADD16mr }, 69193323Sed { X86::ADD32ri, X86::ADD32mi }, 70193323Sed { X86::ADD32ri8, X86::ADD32mi8 }, 71193323Sed { X86::ADD32rr, X86::ADD32mr }, 72193323Sed { X86::ADD64ri32, X86::ADD64mi32 }, 73193323Sed { X86::ADD64ri8, X86::ADD64mi8 }, 74193323Sed { X86::ADD64rr, X86::ADD64mr }, 75193323Sed { X86::ADD8ri, X86::ADD8mi }, 76193323Sed { X86::ADD8rr, X86::ADD8mr }, 77193323Sed { X86::AND16ri, X86::AND16mi }, 78193323Sed { X86::AND16ri8, X86::AND16mi8 }, 79193323Sed { X86::AND16rr, X86::AND16mr }, 80193323Sed { X86::AND32ri, X86::AND32mi }, 81193323Sed { X86::AND32ri8, X86::AND32mi8 }, 82193323Sed { X86::AND32rr, X86::AND32mr }, 83193323Sed { X86::AND64ri32, X86::AND64mi32 }, 84193323Sed { X86::AND64ri8, X86::AND64mi8 }, 85193323Sed { X86::AND64rr, X86::AND64mr }, 86193323Sed { X86::AND8ri, X86::AND8mi }, 87193323Sed { X86::AND8rr, X86::AND8mr }, 88193323Sed { X86::DEC16r, X86::DEC16m }, 89193323Sed { X86::DEC32r, X86::DEC32m }, 90193323Sed { X86::DEC64_16r, X86::DEC64_16m }, 91193323Sed { X86::DEC64_32r, X86::DEC64_32m }, 92193323Sed { X86::DEC64r, X86::DEC64m }, 93193323Sed { X86::DEC8r, X86::DEC8m }, 94193323Sed { X86::INC16r, X86::INC16m }, 95193323Sed { X86::INC32r, X86::INC32m }, 96193323Sed { X86::INC64_16r, X86::INC64_16m }, 97193323Sed { X86::INC64_32r, X86::INC64_32m }, 98193323Sed { X86::INC64r, X86::INC64m }, 99193323Sed { X86::INC8r, X86::INC8m }, 100193323Sed { X86::NEG16r, X86::NEG16m }, 101193323Sed { X86::NEG32r, X86::NEG32m }, 102193323Sed { X86::NEG64r, X86::NEG64m }, 103193323Sed { X86::NEG8r, X86::NEG8m }, 104193323Sed { X86::NOT16r, X86::NOT16m }, 105193323Sed { X86::NOT32r, X86::NOT32m }, 106193323Sed { X86::NOT64r, X86::NOT64m }, 107193323Sed { X86::NOT8r, X86::NOT8m }, 108193323Sed { X86::OR16ri, X86::OR16mi }, 109193323Sed { X86::OR16ri8, X86::OR16mi8 }, 110193323Sed { X86::OR16rr, X86::OR16mr }, 111193323Sed { X86::OR32ri, X86::OR32mi }, 112193323Sed { X86::OR32ri8, X86::OR32mi8 }, 113193323Sed { X86::OR32rr, X86::OR32mr }, 114193323Sed { X86::OR64ri32, X86::OR64mi32 }, 115193323Sed { X86::OR64ri8, X86::OR64mi8 }, 116193323Sed { X86::OR64rr, X86::OR64mr }, 117193323Sed { X86::OR8ri, X86::OR8mi }, 118193323Sed { X86::OR8rr, X86::OR8mr }, 119193323Sed { X86::ROL16r1, X86::ROL16m1 }, 120193323Sed { X86::ROL16rCL, X86::ROL16mCL }, 121193323Sed { X86::ROL16ri, X86::ROL16mi }, 122193323Sed { X86::ROL32r1, X86::ROL32m1 }, 123193323Sed { X86::ROL32rCL, X86::ROL32mCL }, 124193323Sed { X86::ROL32ri, X86::ROL32mi }, 125193323Sed { X86::ROL64r1, X86::ROL64m1 }, 126193323Sed { X86::ROL64rCL, X86::ROL64mCL }, 127193323Sed { X86::ROL64ri, X86::ROL64mi }, 128193323Sed { X86::ROL8r1, X86::ROL8m1 }, 129193323Sed { X86::ROL8rCL, X86::ROL8mCL }, 130193323Sed { X86::ROL8ri, X86::ROL8mi }, 131193323Sed { X86::ROR16r1, X86::ROR16m1 }, 132193323Sed { X86::ROR16rCL, X86::ROR16mCL }, 133193323Sed { X86::ROR16ri, X86::ROR16mi }, 134193323Sed { X86::ROR32r1, X86::ROR32m1 }, 135193323Sed { X86::ROR32rCL, X86::ROR32mCL }, 136193323Sed { X86::ROR32ri, X86::ROR32mi }, 137193323Sed { X86::ROR64r1, X86::ROR64m1 }, 138193323Sed { X86::ROR64rCL, X86::ROR64mCL }, 139193323Sed { X86::ROR64ri, X86::ROR64mi }, 140193323Sed { X86::ROR8r1, X86::ROR8m1 }, 141193323Sed { X86::ROR8rCL, X86::ROR8mCL }, 142193323Sed { X86::ROR8ri, X86::ROR8mi }, 143193323Sed { X86::SAR16r1, X86::SAR16m1 }, 144193323Sed { X86::SAR16rCL, X86::SAR16mCL }, 145193323Sed { X86::SAR16ri, X86::SAR16mi }, 146193323Sed { X86::SAR32r1, X86::SAR32m1 }, 147193323Sed { X86::SAR32rCL, X86::SAR32mCL }, 148193323Sed { X86::SAR32ri, X86::SAR32mi }, 149193323Sed { X86::SAR64r1, X86::SAR64m1 }, 150193323Sed { X86::SAR64rCL, X86::SAR64mCL }, 151193323Sed { X86::SAR64ri, X86::SAR64mi }, 152193323Sed { X86::SAR8r1, X86::SAR8m1 }, 153193323Sed { X86::SAR8rCL, X86::SAR8mCL }, 154193323Sed { X86::SAR8ri, X86::SAR8mi }, 155193323Sed { X86::SBB32ri, X86::SBB32mi }, 156193323Sed { X86::SBB32ri8, X86::SBB32mi8 }, 157193323Sed { X86::SBB32rr, X86::SBB32mr }, 158193323Sed { X86::SBB64ri32, X86::SBB64mi32 }, 159193323Sed { X86::SBB64ri8, X86::SBB64mi8 }, 160193323Sed { X86::SBB64rr, X86::SBB64mr }, 161193323Sed { X86::SHL16rCL, X86::SHL16mCL }, 162193323Sed { X86::SHL16ri, X86::SHL16mi }, 163193323Sed { X86::SHL32rCL, X86::SHL32mCL }, 164193323Sed { X86::SHL32ri, X86::SHL32mi }, 165193323Sed { X86::SHL64rCL, X86::SHL64mCL }, 166193323Sed { X86::SHL64ri, X86::SHL64mi }, 167193323Sed { X86::SHL8rCL, X86::SHL8mCL }, 168193323Sed { X86::SHL8ri, X86::SHL8mi }, 169193323Sed { X86::SHLD16rrCL, X86::SHLD16mrCL }, 170193323Sed { X86::SHLD16rri8, X86::SHLD16mri8 }, 171193323Sed { X86::SHLD32rrCL, X86::SHLD32mrCL }, 172193323Sed { X86::SHLD32rri8, X86::SHLD32mri8 }, 173193323Sed { X86::SHLD64rrCL, X86::SHLD64mrCL }, 174193323Sed { X86::SHLD64rri8, X86::SHLD64mri8 }, 175193323Sed { X86::SHR16r1, X86::SHR16m1 }, 176193323Sed { X86::SHR16rCL, X86::SHR16mCL }, 177193323Sed { X86::SHR16ri, X86::SHR16mi }, 178193323Sed { X86::SHR32r1, X86::SHR32m1 }, 179193323Sed { X86::SHR32rCL, X86::SHR32mCL }, 180193323Sed { X86::SHR32ri, X86::SHR32mi }, 181193323Sed { X86::SHR64r1, X86::SHR64m1 }, 182193323Sed { X86::SHR64rCL, X86::SHR64mCL }, 183193323Sed { X86::SHR64ri, X86::SHR64mi }, 184193323Sed { X86::SHR8r1, X86::SHR8m1 }, 185193323Sed { X86::SHR8rCL, X86::SHR8mCL }, 186193323Sed { X86::SHR8ri, X86::SHR8mi }, 187193323Sed { X86::SHRD16rrCL, X86::SHRD16mrCL }, 188193323Sed { X86::SHRD16rri8, X86::SHRD16mri8 }, 189193323Sed { X86::SHRD32rrCL, X86::SHRD32mrCL }, 190193323Sed { X86::SHRD32rri8, X86::SHRD32mri8 }, 191193323Sed { X86::SHRD64rrCL, X86::SHRD64mrCL }, 192193323Sed { X86::SHRD64rri8, X86::SHRD64mri8 }, 193193323Sed { X86::SUB16ri, X86::SUB16mi }, 194193323Sed { X86::SUB16ri8, X86::SUB16mi8 }, 195193323Sed { X86::SUB16rr, X86::SUB16mr }, 196193323Sed { X86::SUB32ri, X86::SUB32mi }, 197193323Sed { X86::SUB32ri8, X86::SUB32mi8 }, 198193323Sed { X86::SUB32rr, X86::SUB32mr }, 199193323Sed { X86::SUB64ri32, X86::SUB64mi32 }, 200193323Sed { X86::SUB64ri8, X86::SUB64mi8 }, 201193323Sed { X86::SUB64rr, X86::SUB64mr }, 202193323Sed { X86::SUB8ri, X86::SUB8mi }, 203193323Sed { X86::SUB8rr, X86::SUB8mr }, 204193323Sed { X86::XOR16ri, X86::XOR16mi }, 205193323Sed { X86::XOR16ri8, X86::XOR16mi8 }, 206193323Sed { X86::XOR16rr, X86::XOR16mr }, 207193323Sed { X86::XOR32ri, X86::XOR32mi }, 208193323Sed { X86::XOR32ri8, X86::XOR32mi8 }, 209193323Sed { X86::XOR32rr, X86::XOR32mr }, 210193323Sed { X86::XOR64ri32, X86::XOR64mi32 }, 211193323Sed { X86::XOR64ri8, X86::XOR64mi8 }, 212193323Sed { X86::XOR64rr, X86::XOR64mr }, 213193323Sed { X86::XOR8ri, X86::XOR8mi }, 214193323Sed { X86::XOR8rr, X86::XOR8mr } 215193323Sed }; 216193323Sed 217193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { 218193323Sed unsigned RegOp = OpTbl2Addr[i][0]; 219193323Sed unsigned MemOp = OpTbl2Addr[i][1]; 220193323Sed if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, 221198090Srdivacky std::make_pair(MemOp,0))).second) 222193323Sed assert(false && "Duplicated entries?"); 223198090Srdivacky // Index 0, folded load and store, no alignment requirement. 224198090Srdivacky unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); 225193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 226193323Sed std::make_pair(RegOp, 227193323Sed AuxInfo))).second) 228193323Sed AmbEntries.push_back(MemOp); 229193323Sed } 230193323Sed 231193323Sed // If the third value is 1, then it's folding either a load or a store. 232198090Srdivacky static const unsigned OpTbl0[][4] = { 233198090Srdivacky { X86::BT16ri8, X86::BT16mi8, 1, 0 }, 234198090Srdivacky { X86::BT32ri8, X86::BT32mi8, 1, 0 }, 235198090Srdivacky { X86::BT64ri8, X86::BT64mi8, 1, 0 }, 236198090Srdivacky { X86::CALL32r, X86::CALL32m, 1, 0 }, 237198090Srdivacky { X86::CALL64r, X86::CALL64m, 1, 0 }, 238212904Sdim { X86::WINCALL64r, X86::WINCALL64m, 1, 0 }, 239198090Srdivacky { X86::CMP16ri, X86::CMP16mi, 1, 0 }, 240198090Srdivacky { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, 241198090Srdivacky { X86::CMP16rr, X86::CMP16mr, 1, 0 }, 242198090Srdivacky { X86::CMP32ri, X86::CMP32mi, 1, 0 }, 243198090Srdivacky { X86::CMP32ri8, X86::CMP32mi8, 1, 0 }, 244198090Srdivacky { X86::CMP32rr, X86::CMP32mr, 1, 0 }, 245198090Srdivacky { X86::CMP64ri32, X86::CMP64mi32, 1, 0 }, 246198090Srdivacky { X86::CMP64ri8, X86::CMP64mi8, 1, 0 }, 247198090Srdivacky { X86::CMP64rr, X86::CMP64mr, 1, 0 }, 248198090Srdivacky { X86::CMP8ri, X86::CMP8mi, 1, 0 }, 249198090Srdivacky { X86::CMP8rr, X86::CMP8mr, 1, 0 }, 250198090Srdivacky { X86::DIV16r, X86::DIV16m, 1, 0 }, 251198090Srdivacky { X86::DIV32r, X86::DIV32m, 1, 0 }, 252198090Srdivacky { X86::DIV64r, X86::DIV64m, 1, 0 }, 253198090Srdivacky { X86::DIV8r, X86::DIV8m, 1, 0 }, 254198090Srdivacky { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, 255198090Srdivacky { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 }, 256198090Srdivacky { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 }, 257198090Srdivacky { X86::IDIV16r, X86::IDIV16m, 1, 0 }, 258198090Srdivacky { X86::IDIV32r, X86::IDIV32m, 1, 0 }, 259198090Srdivacky { X86::IDIV64r, X86::IDIV64m, 1, 0 }, 260198090Srdivacky { X86::IDIV8r, X86::IDIV8m, 1, 0 }, 261198090Srdivacky { X86::IMUL16r, X86::IMUL16m, 1, 0 }, 262198090Srdivacky { X86::IMUL32r, X86::IMUL32m, 1, 0 }, 263198090Srdivacky { X86::IMUL64r, X86::IMUL64m, 1, 0 }, 264198090Srdivacky { X86::IMUL8r, X86::IMUL8m, 1, 0 }, 265198090Srdivacky { X86::JMP32r, X86::JMP32m, 1, 0 }, 266198090Srdivacky { X86::JMP64r, X86::JMP64m, 1, 0 }, 267198090Srdivacky { X86::MOV16ri, X86::MOV16mi, 0, 0 }, 268198090Srdivacky { X86::MOV16rr, X86::MOV16mr, 0, 0 }, 269198090Srdivacky { X86::MOV32ri, X86::MOV32mi, 0, 0 }, 270198090Srdivacky { X86::MOV32rr, X86::MOV32mr, 0, 0 }, 271205218Srdivacky { X86::MOV32rr_TC, X86::MOV32mr_TC, 0, 0 }, 272198090Srdivacky { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, 273198090Srdivacky { X86::MOV64rr, X86::MOV64mr, 0, 0 }, 274198090Srdivacky { X86::MOV8ri, X86::MOV8mi, 0, 0 }, 275198090Srdivacky { X86::MOV8rr, X86::MOV8mr, 0, 0 }, 276198090Srdivacky { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 }, 277198090Srdivacky { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, 278198090Srdivacky { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, 279198090Srdivacky { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, 280198090Srdivacky { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, 281198090Srdivacky { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, 282198090Srdivacky { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, 283198090Srdivacky { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, 284198090Srdivacky { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, 285198090Srdivacky { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, 286198090Srdivacky { X86::MUL16r, X86::MUL16m, 1, 0 }, 287198090Srdivacky { X86::MUL32r, X86::MUL32m, 1, 0 }, 288198090Srdivacky { X86::MUL64r, X86::MUL64m, 1, 0 }, 289198090Srdivacky { X86::MUL8r, X86::MUL8m, 1, 0 }, 290198090Srdivacky { X86::SETAEr, X86::SETAEm, 0, 0 }, 291198090Srdivacky { X86::SETAr, X86::SETAm, 0, 0 }, 292198090Srdivacky { X86::SETBEr, X86::SETBEm, 0, 0 }, 293198090Srdivacky { X86::SETBr, X86::SETBm, 0, 0 }, 294198090Srdivacky { X86::SETEr, X86::SETEm, 0, 0 }, 295198090Srdivacky { X86::SETGEr, X86::SETGEm, 0, 0 }, 296198090Srdivacky { X86::SETGr, X86::SETGm, 0, 0 }, 297198090Srdivacky { X86::SETLEr, X86::SETLEm, 0, 0 }, 298198090Srdivacky { X86::SETLr, X86::SETLm, 0, 0 }, 299198090Srdivacky { X86::SETNEr, X86::SETNEm, 0, 0 }, 300198090Srdivacky { X86::SETNOr, X86::SETNOm, 0, 0 }, 301198090Srdivacky { X86::SETNPr, X86::SETNPm, 0, 0 }, 302198090Srdivacky { X86::SETNSr, X86::SETNSm, 0, 0 }, 303198090Srdivacky { X86::SETOr, X86::SETOm, 0, 0 }, 304198090Srdivacky { X86::SETPr, X86::SETPm, 0, 0 }, 305198090Srdivacky { X86::SETSr, X86::SETSm, 0, 0 }, 306198090Srdivacky { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, 307205218Srdivacky { X86::TAILJMPr64, X86::TAILJMPm64, 1, 0 }, 308198090Srdivacky { X86::TEST16ri, X86::TEST16mi, 1, 0 }, 309198090Srdivacky { X86::TEST32ri, X86::TEST32mi, 1, 0 }, 310198090Srdivacky { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, 311198090Srdivacky { X86::TEST8ri, X86::TEST8mi, 1, 0 } 312193323Sed }; 313193323Sed 314193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { 315193323Sed unsigned RegOp = OpTbl0[i][0]; 316193323Sed unsigned MemOp = OpTbl0[i][1]; 317198090Srdivacky unsigned Align = OpTbl0[i][3]; 318193323Sed if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, 319198090Srdivacky std::make_pair(MemOp,Align))).second) 320193323Sed assert(false && "Duplicated entries?"); 321193323Sed unsigned FoldedLoad = OpTbl0[i][2]; 322193323Sed // Index 0, folded load or store. 323193323Sed unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); 324193323Sed if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 325193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 326193323Sed std::make_pair(RegOp, AuxInfo))).second) 327193323Sed AmbEntries.push_back(MemOp); 328193323Sed } 329193323Sed 330198090Srdivacky static const unsigned OpTbl1[][3] = { 331198090Srdivacky { X86::CMP16rr, X86::CMP16rm, 0 }, 332198090Srdivacky { X86::CMP32rr, X86::CMP32rm, 0 }, 333198090Srdivacky { X86::CMP64rr, X86::CMP64rm, 0 }, 334198090Srdivacky { X86::CMP8rr, X86::CMP8rm, 0 }, 335198090Srdivacky { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, 336198090Srdivacky { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, 337198090Srdivacky { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, 338198090Srdivacky { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, 339198090Srdivacky { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, 340198090Srdivacky { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, 341198090Srdivacky { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, 342198090Srdivacky { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, 343198090Srdivacky { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, 344198090Srdivacky { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, 345198090Srdivacky { X86::FsMOVAPDrr, X86::MOVSDrm, 0 }, 346198090Srdivacky { X86::FsMOVAPSrr, X86::MOVSSrm, 0 }, 347198090Srdivacky { X86::IMUL16rri, X86::IMUL16rmi, 0 }, 348198090Srdivacky { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, 349198090Srdivacky { X86::IMUL32rri, X86::IMUL32rmi, 0 }, 350198090Srdivacky { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, 351198090Srdivacky { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, 352198090Srdivacky { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, 353198090Srdivacky { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, 354198090Srdivacky { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, 355198090Srdivacky { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, 356198090Srdivacky { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, 357198090Srdivacky { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, 358198090Srdivacky { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 }, 359198090Srdivacky { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 }, 360198090Srdivacky { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, 361198090Srdivacky { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, 362198090Srdivacky { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, 363198090Srdivacky { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 }, 364198090Srdivacky { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 }, 365198090Srdivacky { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, 366198090Srdivacky { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, 367198090Srdivacky { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, 368198090Srdivacky { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, 369198090Srdivacky { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, 370198090Srdivacky { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, 371198090Srdivacky { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, 372198090Srdivacky { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, 373198090Srdivacky { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 }, 374198090Srdivacky { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 }, 375198090Srdivacky { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, 376198090Srdivacky { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, 377198090Srdivacky { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, 378198090Srdivacky { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, 379198090Srdivacky { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, 380198090Srdivacky { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, 381198090Srdivacky { X86::MOV16rr, X86::MOV16rm, 0 }, 382198090Srdivacky { X86::MOV32rr, X86::MOV32rm, 0 }, 383205218Srdivacky { X86::MOV32rr_TC, X86::MOV32rm_TC, 0 }, 384198090Srdivacky { X86::MOV64rr, X86::MOV64rm, 0 }, 385198090Srdivacky { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, 386198090Srdivacky { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, 387198090Srdivacky { X86::MOV8rr, X86::MOV8rm, 0 }, 388198090Srdivacky { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, 389198090Srdivacky { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, 390198090Srdivacky { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, 391198090Srdivacky { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, 392198090Srdivacky { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, 393198090Srdivacky { X86::MOVDQArr, X86::MOVDQArm, 16 }, 394198090Srdivacky { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, 395198090Srdivacky { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, 396198090Srdivacky { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, 397198090Srdivacky { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, 398198090Srdivacky { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, 399198090Srdivacky { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, 400198090Srdivacky { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, 401198090Srdivacky { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, 402198090Srdivacky { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, 403202878Srdivacky { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, 404198090Srdivacky { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, 405198090Srdivacky { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, 406198090Srdivacky { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, 407198090Srdivacky { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, 408198090Srdivacky { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, 409198090Srdivacky { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, 410198090Srdivacky { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, 411198090Srdivacky { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, 412198090Srdivacky { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, 413198090Srdivacky { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, 414198090Srdivacky { X86::PSHUFDri, X86::PSHUFDmi, 16 }, 415198090Srdivacky { X86::PSHUFHWri, X86::PSHUFHWmi, 16 }, 416198090Srdivacky { X86::PSHUFLWri, X86::PSHUFLWmi, 16 }, 417198090Srdivacky { X86::RCPPSr, X86::RCPPSm, 16 }, 418198090Srdivacky { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 }, 419198090Srdivacky { X86::RSQRTPSr, X86::RSQRTPSm, 16 }, 420198090Srdivacky { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 }, 421198090Srdivacky { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, 422198090Srdivacky { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, 423198090Srdivacky { X86::SQRTPDr, X86::SQRTPDm, 16 }, 424198090Srdivacky { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 }, 425198090Srdivacky { X86::SQRTPSr, X86::SQRTPSm, 16 }, 426198090Srdivacky { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 }, 427198090Srdivacky { X86::SQRTSDr, X86::SQRTSDm, 0 }, 428198090Srdivacky { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, 429198090Srdivacky { X86::SQRTSSr, X86::SQRTSSm, 0 }, 430198090Srdivacky { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, 431198090Srdivacky { X86::TEST16rr, X86::TEST16rm, 0 }, 432198090Srdivacky { X86::TEST32rr, X86::TEST32rm, 0 }, 433198090Srdivacky { X86::TEST64rr, X86::TEST64rm, 0 }, 434198090Srdivacky { X86::TEST8rr, X86::TEST8rm, 0 }, 435193323Sed // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 436198090Srdivacky { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, 437198090Srdivacky { X86::UCOMISSrr, X86::UCOMISSrm, 0 } 438193323Sed }; 439193323Sed 440193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { 441193323Sed unsigned RegOp = OpTbl1[i][0]; 442193323Sed unsigned MemOp = OpTbl1[i][1]; 443198090Srdivacky unsigned Align = OpTbl1[i][2]; 444193323Sed if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, 445198090Srdivacky std::make_pair(MemOp,Align))).second) 446193323Sed assert(false && "Duplicated entries?"); 447198090Srdivacky // Index 1, folded load 448198090Srdivacky unsigned AuxInfo = 1 | (1 << 4); 449193323Sed if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 450193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 451193323Sed std::make_pair(RegOp, AuxInfo))).second) 452193323Sed AmbEntries.push_back(MemOp); 453193323Sed } 454193323Sed 455198090Srdivacky static const unsigned OpTbl2[][3] = { 456198090Srdivacky { X86::ADC32rr, X86::ADC32rm, 0 }, 457198090Srdivacky { X86::ADC64rr, X86::ADC64rm, 0 }, 458198090Srdivacky { X86::ADD16rr, X86::ADD16rm, 0 }, 459198090Srdivacky { X86::ADD32rr, X86::ADD32rm, 0 }, 460198090Srdivacky { X86::ADD64rr, X86::ADD64rm, 0 }, 461198090Srdivacky { X86::ADD8rr, X86::ADD8rm, 0 }, 462198090Srdivacky { X86::ADDPDrr, X86::ADDPDrm, 16 }, 463198090Srdivacky { X86::ADDPSrr, X86::ADDPSrm, 16 }, 464198090Srdivacky { X86::ADDSDrr, X86::ADDSDrm, 0 }, 465198090Srdivacky { X86::ADDSSrr, X86::ADDSSrm, 0 }, 466198090Srdivacky { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 }, 467198090Srdivacky { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 }, 468198090Srdivacky { X86::AND16rr, X86::AND16rm, 0 }, 469198090Srdivacky { X86::AND32rr, X86::AND32rm, 0 }, 470198090Srdivacky { X86::AND64rr, X86::AND64rm, 0 }, 471198090Srdivacky { X86::AND8rr, X86::AND8rm, 0 }, 472198090Srdivacky { X86::ANDNPDrr, X86::ANDNPDrm, 16 }, 473198090Srdivacky { X86::ANDNPSrr, X86::ANDNPSrm, 16 }, 474198090Srdivacky { X86::ANDPDrr, X86::ANDPDrm, 16 }, 475198090Srdivacky { X86::ANDPSrr, X86::ANDPSrm, 16 }, 476198090Srdivacky { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, 477198090Srdivacky { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, 478198090Srdivacky { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, 479198090Srdivacky { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, 480198090Srdivacky { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, 481198090Srdivacky { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, 482198090Srdivacky { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, 483198090Srdivacky { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, 484198090Srdivacky { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, 485198090Srdivacky { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, 486198090Srdivacky { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, 487198090Srdivacky { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, 488198090Srdivacky { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, 489198090Srdivacky { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, 490198090Srdivacky { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, 491198090Srdivacky { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, 492198090Srdivacky { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, 493198090Srdivacky { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, 494198090Srdivacky { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, 495198090Srdivacky { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, 496198090Srdivacky { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, 497198090Srdivacky { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, 498198090Srdivacky { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, 499198090Srdivacky { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, 500198090Srdivacky { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, 501198090Srdivacky { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, 502198090Srdivacky { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, 503198090Srdivacky { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, 504198090Srdivacky { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, 505198090Srdivacky { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, 506198090Srdivacky { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, 507198090Srdivacky { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, 508198090Srdivacky { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, 509198090Srdivacky { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, 510198090Srdivacky { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, 511198090Srdivacky { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, 512198090Srdivacky { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, 513198090Srdivacky { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, 514198090Srdivacky { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, 515198090Srdivacky { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, 516198090Srdivacky { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, 517198090Srdivacky { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, 518198090Srdivacky { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, 519198090Srdivacky { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, 520198090Srdivacky { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, 521198090Srdivacky { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, 522198090Srdivacky { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, 523198090Srdivacky { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, 524198090Srdivacky { X86::CMPPDrri, X86::CMPPDrmi, 16 }, 525198090Srdivacky { X86::CMPPSrri, X86::CMPPSrmi, 16 }, 526198090Srdivacky { X86::CMPSDrr, X86::CMPSDrm, 0 }, 527198090Srdivacky { X86::CMPSSrr, X86::CMPSSrm, 0 }, 528198090Srdivacky { X86::DIVPDrr, X86::DIVPDrm, 16 }, 529198090Srdivacky { X86::DIVPSrr, X86::DIVPSrm, 16 }, 530198090Srdivacky { X86::DIVSDrr, X86::DIVSDrm, 0 }, 531198090Srdivacky { X86::DIVSSrr, X86::DIVSSrm, 0 }, 532198090Srdivacky { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 }, 533198090Srdivacky { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 }, 534198090Srdivacky { X86::FsANDPDrr, X86::FsANDPDrm, 16 }, 535198090Srdivacky { X86::FsANDPSrr, X86::FsANDPSrm, 16 }, 536198090Srdivacky { X86::FsORPDrr, X86::FsORPDrm, 16 }, 537198090Srdivacky { X86::FsORPSrr, X86::FsORPSrm, 16 }, 538198090Srdivacky { X86::FsXORPDrr, X86::FsXORPDrm, 16 }, 539198090Srdivacky { X86::FsXORPSrr, X86::FsXORPSrm, 16 }, 540198090Srdivacky { X86::HADDPDrr, X86::HADDPDrm, 16 }, 541198090Srdivacky { X86::HADDPSrr, X86::HADDPSrm, 16 }, 542198090Srdivacky { X86::HSUBPDrr, X86::HSUBPDrm, 16 }, 543198090Srdivacky { X86::HSUBPSrr, X86::HSUBPSrm, 16 }, 544198090Srdivacky { X86::IMUL16rr, X86::IMUL16rm, 0 }, 545198090Srdivacky { X86::IMUL32rr, X86::IMUL32rm, 0 }, 546198090Srdivacky { X86::IMUL64rr, X86::IMUL64rm, 0 }, 547198090Srdivacky { X86::MAXPDrr, X86::MAXPDrm, 16 }, 548198090Srdivacky { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, 549198090Srdivacky { X86::MAXPSrr, X86::MAXPSrm, 16 }, 550198090Srdivacky { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 }, 551198090Srdivacky { X86::MAXSDrr, X86::MAXSDrm, 0 }, 552198090Srdivacky { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, 553198090Srdivacky { X86::MAXSSrr, X86::MAXSSrm, 0 }, 554198090Srdivacky { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, 555198090Srdivacky { X86::MINPDrr, X86::MINPDrm, 16 }, 556198090Srdivacky { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 }, 557198090Srdivacky { X86::MINPSrr, X86::MINPSrm, 16 }, 558198090Srdivacky { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 }, 559198090Srdivacky { X86::MINSDrr, X86::MINSDrm, 0 }, 560198090Srdivacky { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, 561198090Srdivacky { X86::MINSSrr, X86::MINSSrm, 0 }, 562198090Srdivacky { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, 563198090Srdivacky { X86::MULPDrr, X86::MULPDrm, 16 }, 564198090Srdivacky { X86::MULPSrr, X86::MULPSrm, 16 }, 565198090Srdivacky { X86::MULSDrr, X86::MULSDrm, 0 }, 566198090Srdivacky { X86::MULSSrr, X86::MULSSrm, 0 }, 567198090Srdivacky { X86::OR16rr, X86::OR16rm, 0 }, 568198090Srdivacky { X86::OR32rr, X86::OR32rm, 0 }, 569198090Srdivacky { X86::OR64rr, X86::OR64rm, 0 }, 570198090Srdivacky { X86::OR8rr, X86::OR8rm, 0 }, 571198090Srdivacky { X86::ORPDrr, X86::ORPDrm, 16 }, 572198090Srdivacky { X86::ORPSrr, X86::ORPSrm, 16 }, 573198090Srdivacky { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 }, 574198090Srdivacky { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 }, 575198090Srdivacky { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 }, 576198090Srdivacky { X86::PADDBrr, X86::PADDBrm, 16 }, 577198090Srdivacky { X86::PADDDrr, X86::PADDDrm, 16 }, 578198090Srdivacky { X86::PADDQrr, X86::PADDQrm, 16 }, 579198090Srdivacky { X86::PADDSBrr, X86::PADDSBrm, 16 }, 580198090Srdivacky { X86::PADDSWrr, X86::PADDSWrm, 16 }, 581198090Srdivacky { X86::PADDWrr, X86::PADDWrm, 16 }, 582198090Srdivacky { X86::PANDNrr, X86::PANDNrm, 16 }, 583198090Srdivacky { X86::PANDrr, X86::PANDrm, 16 }, 584198090Srdivacky { X86::PAVGBrr, X86::PAVGBrm, 16 }, 585198090Srdivacky { X86::PAVGWrr, X86::PAVGWrm, 16 }, 586198090Srdivacky { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 }, 587198090Srdivacky { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 }, 588198090Srdivacky { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 }, 589198090Srdivacky { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 }, 590198090Srdivacky { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 }, 591198090Srdivacky { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 }, 592198090Srdivacky { X86::PINSRWrri, X86::PINSRWrmi, 16 }, 593198090Srdivacky { X86::PMADDWDrr, X86::PMADDWDrm, 16 }, 594198090Srdivacky { X86::PMAXSWrr, X86::PMAXSWrm, 16 }, 595198090Srdivacky { X86::PMAXUBrr, X86::PMAXUBrm, 16 }, 596198090Srdivacky { X86::PMINSWrr, X86::PMINSWrm, 16 }, 597198090Srdivacky { X86::PMINUBrr, X86::PMINUBrm, 16 }, 598198090Srdivacky { X86::PMULDQrr, X86::PMULDQrm, 16 }, 599198090Srdivacky { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, 600198090Srdivacky { X86::PMULHWrr, X86::PMULHWrm, 16 }, 601198090Srdivacky { X86::PMULLDrr, X86::PMULLDrm, 16 }, 602198090Srdivacky { X86::PMULLWrr, X86::PMULLWrm, 16 }, 603198090Srdivacky { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, 604198090Srdivacky { X86::PORrr, X86::PORrm, 16 }, 605198090Srdivacky { X86::PSADBWrr, X86::PSADBWrm, 16 }, 606198090Srdivacky { X86::PSLLDrr, X86::PSLLDrm, 16 }, 607198090Srdivacky { X86::PSLLQrr, X86::PSLLQrm, 16 }, 608198090Srdivacky { X86::PSLLWrr, X86::PSLLWrm, 16 }, 609198090Srdivacky { X86::PSRADrr, X86::PSRADrm, 16 }, 610198090Srdivacky { X86::PSRAWrr, X86::PSRAWrm, 16 }, 611198090Srdivacky { X86::PSRLDrr, X86::PSRLDrm, 16 }, 612198090Srdivacky { X86::PSRLQrr, X86::PSRLQrm, 16 }, 613198090Srdivacky { X86::PSRLWrr, X86::PSRLWrm, 16 }, 614198090Srdivacky { X86::PSUBBrr, X86::PSUBBrm, 16 }, 615198090Srdivacky { X86::PSUBDrr, X86::PSUBDrm, 16 }, 616198090Srdivacky { X86::PSUBSBrr, X86::PSUBSBrm, 16 }, 617198090Srdivacky { X86::PSUBSWrr, X86::PSUBSWrm, 16 }, 618198090Srdivacky { X86::PSUBWrr, X86::PSUBWrm, 16 }, 619198090Srdivacky { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 }, 620198090Srdivacky { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 }, 621198090Srdivacky { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 }, 622198090Srdivacky { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 }, 623198090Srdivacky { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 }, 624198090Srdivacky { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 }, 625198090Srdivacky { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 }, 626198090Srdivacky { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 }, 627198090Srdivacky { X86::PXORrr, X86::PXORrm, 16 }, 628198090Srdivacky { X86::SBB32rr, X86::SBB32rm, 0 }, 629198090Srdivacky { X86::SBB64rr, X86::SBB64rm, 0 }, 630198090Srdivacky { X86::SHUFPDrri, X86::SHUFPDrmi, 16 }, 631198090Srdivacky { X86::SHUFPSrri, X86::SHUFPSrmi, 16 }, 632198090Srdivacky { X86::SUB16rr, X86::SUB16rm, 0 }, 633198090Srdivacky { X86::SUB32rr, X86::SUB32rm, 0 }, 634198090Srdivacky { X86::SUB64rr, X86::SUB64rm, 0 }, 635198090Srdivacky { X86::SUB8rr, X86::SUB8rm, 0 }, 636198090Srdivacky { X86::SUBPDrr, X86::SUBPDrm, 16 }, 637198090Srdivacky { X86::SUBPSrr, X86::SUBPSrm, 16 }, 638198090Srdivacky { X86::SUBSDrr, X86::SUBSDrm, 0 }, 639198090Srdivacky { X86::SUBSSrr, X86::SUBSSrm, 0 }, 640193323Sed // FIXME: TEST*rr -> swapped operand of TEST*mr. 641198090Srdivacky { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 }, 642198090Srdivacky { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 }, 643198090Srdivacky { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 }, 644198090Srdivacky { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 }, 645198090Srdivacky { X86::XOR16rr, X86::XOR16rm, 0 }, 646198090Srdivacky { X86::XOR32rr, X86::XOR32rm, 0 }, 647198090Srdivacky { X86::XOR64rr, X86::XOR64rm, 0 }, 648198090Srdivacky { X86::XOR8rr, X86::XOR8rm, 0 }, 649198090Srdivacky { X86::XORPDrr, X86::XORPDrm, 16 }, 650198090Srdivacky { X86::XORPSrr, X86::XORPSrm, 16 } 651193323Sed }; 652193323Sed 653193323Sed for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { 654193323Sed unsigned RegOp = OpTbl2[i][0]; 655193323Sed unsigned MemOp = OpTbl2[i][1]; 656198090Srdivacky unsigned Align = OpTbl2[i][2]; 657193323Sed if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, 658198090Srdivacky std::make_pair(MemOp,Align))).second) 659193323Sed assert(false && "Duplicated entries?"); 660198090Srdivacky // Index 2, folded load 661198090Srdivacky unsigned AuxInfo = 2 | (1 << 4); 662193323Sed if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 663193323Sed std::make_pair(RegOp, AuxInfo))).second) 664193323Sed AmbEntries.push_back(MemOp); 665193323Sed } 666193323Sed 667193323Sed // Remove ambiguous entries. 668193323Sed assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); 669193323Sed} 670193323Sed 671202375Srdivackybool 672202375SrdivackyX86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 673202375Srdivacky unsigned &SrcReg, unsigned &DstReg, 674202375Srdivacky unsigned &SubIdx) const { 675202375Srdivacky switch (MI.getOpcode()) { 676202375Srdivacky default: break; 677202375Srdivacky case X86::MOVSX16rr8: 678202375Srdivacky case X86::MOVZX16rr8: 679202375Srdivacky case X86::MOVSX32rr8: 680202375Srdivacky case X86::MOVZX32rr8: 681202375Srdivacky case X86::MOVSX64rr8: 682202375Srdivacky case X86::MOVZX64rr8: 683202375Srdivacky if (!TM.getSubtarget<X86Subtarget>().is64Bit()) 684202375Srdivacky // It's not always legal to reference the low 8-bit of the larger 685202375Srdivacky // register in 32-bit mode. 686202375Srdivacky return false; 687202375Srdivacky case X86::MOVSX32rr16: 688202375Srdivacky case X86::MOVZX32rr16: 689202375Srdivacky case X86::MOVSX64rr16: 690202375Srdivacky case X86::MOVZX64rr16: 691202375Srdivacky case X86::MOVSX64rr32: 692202375Srdivacky case X86::MOVZX64rr32: { 693202375Srdivacky if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) 694202375Srdivacky // Be conservative. 695202375Srdivacky return false; 696202375Srdivacky SrcReg = MI.getOperand(1).getReg(); 697202375Srdivacky DstReg = MI.getOperand(0).getReg(); 698202375Srdivacky switch (MI.getOpcode()) { 699202375Srdivacky default: 700202375Srdivacky llvm_unreachable(0); 701202375Srdivacky break; 702202375Srdivacky case X86::MOVSX16rr8: 703202375Srdivacky case X86::MOVZX16rr8: 704202375Srdivacky case X86::MOVSX32rr8: 705202375Srdivacky case X86::MOVZX32rr8: 706202375Srdivacky case X86::MOVSX64rr8: 707202375Srdivacky case X86::MOVZX64rr8: 708208599Srdivacky SubIdx = X86::sub_8bit; 709202375Srdivacky break; 710202375Srdivacky case X86::MOVSX32rr16: 711202375Srdivacky case X86::MOVZX32rr16: 712202375Srdivacky case X86::MOVSX64rr16: 713202375Srdivacky case X86::MOVZX64rr16: 714208599Srdivacky SubIdx = X86::sub_16bit; 715202375Srdivacky break; 716202375Srdivacky case X86::MOVSX64rr32: 717202375Srdivacky case X86::MOVZX64rr32: 718208599Srdivacky SubIdx = X86::sub_32bit; 719202375Srdivacky break; 720202375Srdivacky } 721202375Srdivacky return true; 722202375Srdivacky } 723202375Srdivacky } 724202375Srdivacky return false; 725202375Srdivacky} 726202375Srdivacky 727199481Srdivacky/// isFrameOperand - Return true and the FrameIndex if the specified 728199481Srdivacky/// operand and follow operands form a reference to the stack frame. 729199481Srdivackybool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, 730199481Srdivacky int &FrameIndex) const { 731199481Srdivacky if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && 732199481Srdivacky MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && 733199481Srdivacky MI->getOperand(Op+1).getImm() == 1 && 734199481Srdivacky MI->getOperand(Op+2).getReg() == 0 && 735199481Srdivacky MI->getOperand(Op+3).getImm() == 0) { 736199481Srdivacky FrameIndex = MI->getOperand(Op).getIndex(); 737199481Srdivacky return true; 738199481Srdivacky } 739199481Srdivacky return false; 740199481Srdivacky} 741199481Srdivacky 742199481Srdivackystatic bool isFrameLoadOpcode(int Opcode) { 743199481Srdivacky switch (Opcode) { 744193323Sed default: break; 745193323Sed case X86::MOV8rm: 746193323Sed case X86::MOV16rm: 747193323Sed case X86::MOV32rm: 748210299Sed case X86::MOV32rm_TC: 749193323Sed case X86::MOV64rm: 750210299Sed case X86::MOV64rm_TC: 751193323Sed case X86::LD_Fp64m: 752193323Sed case X86::MOVSSrm: 753193323Sed case X86::MOVSDrm: 754193323Sed case X86::MOVAPSrm: 755193323Sed case X86::MOVAPDrm: 756193323Sed case X86::MOVDQArm: 757193323Sed case X86::MMX_MOVD64rm: 758193323Sed case X86::MMX_MOVQ64rm: 759199481Srdivacky return true; 760193323Sed break; 761193323Sed } 762199481Srdivacky return false; 763193323Sed} 764193323Sed 765199481Srdivackystatic bool isFrameStoreOpcode(int Opcode) { 766199481Srdivacky switch (Opcode) { 767193323Sed default: break; 768193323Sed case X86::MOV8mr: 769193323Sed case X86::MOV16mr: 770193323Sed case X86::MOV32mr: 771210299Sed case X86::MOV32mr_TC: 772193323Sed case X86::MOV64mr: 773210299Sed case X86::MOV64mr_TC: 774193323Sed case X86::ST_FpP64m: 775193323Sed case X86::MOVSSmr: 776193323Sed case X86::MOVSDmr: 777193323Sed case X86::MOVAPSmr: 778193323Sed case X86::MOVAPDmr: 779193323Sed case X86::MOVDQAmr: 780193323Sed case X86::MMX_MOVD64mr: 781193323Sed case X86::MMX_MOVQ64mr: 782193323Sed case X86::MMX_MOVNTQmr: 783199481Srdivacky return true; 784199481Srdivacky } 785199481Srdivacky return false; 786199481Srdivacky} 787199481Srdivacky 788199481Srdivackyunsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 789199481Srdivacky int &FrameIndex) const { 790199481Srdivacky if (isFrameLoadOpcode(MI->getOpcode())) 791212904Sdim if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) 792199481Srdivacky return MI->getOperand(0).getReg(); 793199481Srdivacky return 0; 794199481Srdivacky} 795199481Srdivacky 796199481Srdivackyunsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 797199481Srdivacky int &FrameIndex) const { 798199481Srdivacky if (isFrameLoadOpcode(MI->getOpcode())) { 799199481Srdivacky unsigned Reg; 800199481Srdivacky if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) 801199481Srdivacky return Reg; 802199481Srdivacky // Check for post-frame index elimination operations 803200581Srdivacky const MachineMemOperand *Dummy; 804200581Srdivacky return hasLoadFromStackSlot(MI, Dummy, FrameIndex); 805199481Srdivacky } 806199481Srdivacky return 0; 807199481Srdivacky} 808199481Srdivacky 809199481Srdivackybool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, 810200581Srdivacky const MachineMemOperand *&MMO, 811199481Srdivacky int &FrameIndex) const { 812199481Srdivacky for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 813199481Srdivacky oe = MI->memoperands_end(); 814199481Srdivacky o != oe; 815199481Srdivacky ++o) { 816199481Srdivacky if ((*o)->isLoad() && (*o)->getValue()) 817199481Srdivacky if (const FixedStackPseudoSourceValue *Value = 818199481Srdivacky dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 819199481Srdivacky FrameIndex = Value->getFrameIndex(); 820200581Srdivacky MMO = *o; 821199481Srdivacky return true; 822199481Srdivacky } 823199481Srdivacky } 824199481Srdivacky return false; 825199481Srdivacky} 826199481Srdivacky 827199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 828199481Srdivacky int &FrameIndex) const { 829199481Srdivacky if (isFrameStoreOpcode(MI->getOpcode())) 830212904Sdim if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 && 831212904Sdim isFrameOperand(MI, 0, FrameIndex)) 832210299Sed return MI->getOperand(X86::AddrNumOperands).getReg(); 833199481Srdivacky return 0; 834199481Srdivacky} 835199481Srdivacky 836199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 837199481Srdivacky int &FrameIndex) const { 838199481Srdivacky if (isFrameStoreOpcode(MI->getOpcode())) { 839199481Srdivacky unsigned Reg; 840199481Srdivacky if ((Reg = isStoreToStackSlot(MI, FrameIndex))) 841199481Srdivacky return Reg; 842199481Srdivacky // Check for post-frame index elimination operations 843200581Srdivacky const MachineMemOperand *Dummy; 844200581Srdivacky return hasStoreToStackSlot(MI, Dummy, FrameIndex); 845193323Sed } 846193323Sed return 0; 847193323Sed} 848193323Sed 849199481Srdivackybool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, 850200581Srdivacky const MachineMemOperand *&MMO, 851199481Srdivacky int &FrameIndex) const { 852199481Srdivacky for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 853199481Srdivacky oe = MI->memoperands_end(); 854199481Srdivacky o != oe; 855199481Srdivacky ++o) { 856199481Srdivacky if ((*o)->isStore() && (*o)->getValue()) 857199481Srdivacky if (const FixedStackPseudoSourceValue *Value = 858199481Srdivacky dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 859199481Srdivacky FrameIndex = Value->getFrameIndex(); 860200581Srdivacky MMO = *o; 861199481Srdivacky return true; 862199481Srdivacky } 863199481Srdivacky } 864199481Srdivacky return false; 865199481Srdivacky} 866199481Srdivacky 867193323Sed/// regIsPICBase - Return true if register is PIC base (i.e.g defined by 868193323Sed/// X86::MOVPC32r. 869193323Sedstatic bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { 870193323Sed bool isPICBase = false; 871193323Sed for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 872193323Sed E = MRI.def_end(); I != E; ++I) { 873193323Sed MachineInstr *DefMI = I.getOperand().getParent(); 874193323Sed if (DefMI->getOpcode() != X86::MOVPC32r) 875193323Sed return false; 876193323Sed assert(!isPICBase && "More than one PIC base?"); 877193323Sed isPICBase = true; 878193323Sed } 879193323Sed return isPICBase; 880193323Sed} 881193323Sed 882193323Sedbool 883198090SrdivackyX86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, 884198090Srdivacky AliasAnalysis *AA) const { 885193323Sed switch (MI->getOpcode()) { 886193323Sed default: break; 887193323Sed case X86::MOV8rm: 888193323Sed case X86::MOV16rm: 889193323Sed case X86::MOV32rm: 890193323Sed case X86::MOV64rm: 891193323Sed case X86::LD_Fp64m: 892193323Sed case X86::MOVSSrm: 893193323Sed case X86::MOVSDrm: 894193323Sed case X86::MOVAPSrm: 895199481Srdivacky case X86::MOVUPSrm: 896199481Srdivacky case X86::MOVUPSrm_Int: 897193323Sed case X86::MOVAPDrm: 898193323Sed case X86::MOVDQArm: 899193323Sed case X86::MMX_MOVD64rm: 900199481Srdivacky case X86::MMX_MOVQ64rm: 901199481Srdivacky case X86::FsMOVAPSrm: 902199481Srdivacky case X86::FsMOVAPDrm: { 903193323Sed // Loads from constant pools are trivially rematerializable. 904193323Sed if (MI->getOperand(1).isReg() && 905193323Sed MI->getOperand(2).isImm() && 906193323Sed MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 907198090Srdivacky MI->isInvariantLoad(AA)) { 908193323Sed unsigned BaseReg = MI->getOperand(1).getReg(); 909195098Sed if (BaseReg == 0 || BaseReg == X86::RIP) 910193323Sed return true; 911193323Sed // Allow re-materialization of PIC load. 912193323Sed if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) 913193323Sed return false; 914193323Sed const MachineFunction &MF = *MI->getParent()->getParent(); 915193323Sed const MachineRegisterInfo &MRI = MF.getRegInfo(); 916193323Sed bool isPICBase = false; 917193323Sed for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 918193323Sed E = MRI.def_end(); I != E; ++I) { 919193323Sed MachineInstr *DefMI = I.getOperand().getParent(); 920193323Sed if (DefMI->getOpcode() != X86::MOVPC32r) 921193323Sed return false; 922193323Sed assert(!isPICBase && "More than one PIC base?"); 923193323Sed isPICBase = true; 924193323Sed } 925193323Sed return isPICBase; 926193323Sed } 927193323Sed return false; 928193323Sed } 929193323Sed 930193323Sed case X86::LEA32r: 931193323Sed case X86::LEA64r: { 932193323Sed if (MI->getOperand(2).isImm() && 933193323Sed MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 934193323Sed !MI->getOperand(4).isReg()) { 935193323Sed // lea fi#, lea GV, etc. are all rematerializable. 936193323Sed if (!MI->getOperand(1).isReg()) 937193323Sed return true; 938193323Sed unsigned BaseReg = MI->getOperand(1).getReg(); 939193323Sed if (BaseReg == 0) 940193323Sed return true; 941193323Sed // Allow re-materialization of lea PICBase + x. 942193323Sed const MachineFunction &MF = *MI->getParent()->getParent(); 943193323Sed const MachineRegisterInfo &MRI = MF.getRegInfo(); 944193323Sed return regIsPICBase(BaseReg, MRI); 945193323Sed } 946193323Sed return false; 947193323Sed } 948193323Sed } 949193323Sed 950193323Sed // All other instructions marked M_REMATERIALIZABLE are always trivially 951193323Sed // rematerializable. 952193323Sed return true; 953193323Sed} 954193323Sed 955193323Sed/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that 956193323Sed/// would clobber the EFLAGS condition register. Note the result may be 957193323Sed/// conservative. If it cannot definitely determine the safety after visiting 958198090Srdivacky/// a few instructions in each direction it assumes it's not safe. 959193323Sedstatic bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, 960193323Sed MachineBasicBlock::iterator I) { 961206083Srdivacky MachineBasicBlock::iterator E = MBB.end(); 962206083Srdivacky 963193323Sed // It's always safe to clobber EFLAGS at the end of a block. 964206083Srdivacky if (I == E) 965193323Sed return true; 966193323Sed 967193323Sed // For compile time consideration, if we are not able to determine the 968198090Srdivacky // safety after visiting 4 instructions in each direction, we will assume 969198090Srdivacky // it's not safe. 970198090Srdivacky MachineBasicBlock::iterator Iter = I; 971198090Srdivacky for (unsigned i = 0; i < 4; ++i) { 972193323Sed bool SeenDef = false; 973198090Srdivacky for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 974198090Srdivacky MachineOperand &MO = Iter->getOperand(j); 975193323Sed if (!MO.isReg()) 976193323Sed continue; 977193323Sed if (MO.getReg() == X86::EFLAGS) { 978193323Sed if (MO.isUse()) 979193323Sed return false; 980193323Sed SeenDef = true; 981193323Sed } 982193323Sed } 983193323Sed 984193323Sed if (SeenDef) 985193323Sed // This instruction defines EFLAGS, no need to look any further. 986193323Sed return true; 987198090Srdivacky ++Iter; 988206083Srdivacky // Skip over DBG_VALUE. 989206083Srdivacky while (Iter != E && Iter->isDebugValue()) 990206083Srdivacky ++Iter; 991193323Sed 992193323Sed // If we make it to the end of the block, it's safe to clobber EFLAGS. 993206083Srdivacky if (Iter == E) 994193323Sed return true; 995193323Sed } 996193323Sed 997206083Srdivacky MachineBasicBlock::iterator B = MBB.begin(); 998198090Srdivacky Iter = I; 999198090Srdivacky for (unsigned i = 0; i < 4; ++i) { 1000198090Srdivacky // If we make it to the beginning of the block, it's safe to clobber 1001198090Srdivacky // EFLAGS iff EFLAGS is not live-in. 1002206083Srdivacky if (Iter == B) 1003198090Srdivacky return !MBB.isLiveIn(X86::EFLAGS); 1004198090Srdivacky 1005198090Srdivacky --Iter; 1006206083Srdivacky // Skip over DBG_VALUE. 1007206083Srdivacky while (Iter != B && Iter->isDebugValue()) 1008206083Srdivacky --Iter; 1009206083Srdivacky 1010198090Srdivacky bool SawKill = false; 1011198090Srdivacky for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 1012198090Srdivacky MachineOperand &MO = Iter->getOperand(j); 1013198090Srdivacky if (MO.isReg() && MO.getReg() == X86::EFLAGS) { 1014198090Srdivacky if (MO.isDef()) return MO.isDead(); 1015198090Srdivacky if (MO.isKill()) SawKill = true; 1016198090Srdivacky } 1017198090Srdivacky } 1018198090Srdivacky 1019198090Srdivacky if (SawKill) 1020198090Srdivacky // This instruction kills EFLAGS and doesn't redefine it, so 1021198090Srdivacky // there's no need to look further. 1022198090Srdivacky return true; 1023198090Srdivacky } 1024198090Srdivacky 1025193323Sed // Conservative answer. 1026193323Sed return false; 1027193323Sed} 1028193323Sed 1029193323Sedvoid X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, 1030193323Sed MachineBasicBlock::iterator I, 1031198090Srdivacky unsigned DestReg, unsigned SubIdx, 1032199481Srdivacky const MachineInstr *Orig, 1033210299Sed const TargetRegisterInfo &TRI) const { 1034208599Srdivacky DebugLoc DL = Orig->getDebugLoc(); 1035193323Sed 1036193323Sed // MOV32r0 etc. are implemented with xor which clobbers condition code. 1037193323Sed // Re-materialize them as movri instructions to avoid side effects. 1038198090Srdivacky bool Clone = true; 1039198090Srdivacky unsigned Opc = Orig->getOpcode(); 1040198090Srdivacky switch (Opc) { 1041193323Sed default: break; 1042193323Sed case X86::MOV8r0: 1043202375Srdivacky case X86::MOV16r0: 1044202375Srdivacky case X86::MOV32r0: 1045202375Srdivacky case X86::MOV64r0: { 1046193323Sed if (!isSafeToClobberEFLAGS(MBB, I)) { 1047198090Srdivacky switch (Opc) { 1048193323Sed default: break; 1049193323Sed case X86::MOV8r0: Opc = X86::MOV8ri; break; 1050202375Srdivacky case X86::MOV16r0: Opc = X86::MOV16ri; break; 1051193323Sed case X86::MOV32r0: Opc = X86::MOV32ri; break; 1052204642Srdivacky case X86::MOV64r0: Opc = X86::MOV64ri64i32; break; 1053193323Sed } 1054198090Srdivacky Clone = false; 1055193323Sed } 1056193323Sed break; 1057193323Sed } 1058193323Sed } 1059193323Sed 1060198090Srdivacky if (Clone) { 1061193323Sed MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 1062193323Sed MBB.insert(I, MI); 1063198090Srdivacky } else { 1064210299Sed BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0); 1065193323Sed } 1066193323Sed 1067198090Srdivacky MachineInstr *NewMI = prior(I); 1068210299Sed NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); 1069193323Sed} 1070193323Sed 1071193323Sed/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that 1072193323Sed/// is not marked dead. 1073193323Sedstatic bool hasLiveCondCodeDef(MachineInstr *MI) { 1074193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1075193323Sed MachineOperand &MO = MI->getOperand(i); 1076193323Sed if (MO.isReg() && MO.isDef() && 1077193323Sed MO.getReg() == X86::EFLAGS && !MO.isDead()) { 1078193323Sed return true; 1079193323Sed } 1080193323Sed } 1081193323Sed return false; 1082193323Sed} 1083193323Sed 1084200581Srdivacky/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when 1085200581Srdivacky/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting 1086200581Srdivacky/// to a 32-bit superregister and then truncating back down to a 16-bit 1087200581Srdivacky/// subregister. 1088200581SrdivackyMachineInstr * 1089200581SrdivackyX86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, 1090200581Srdivacky MachineFunction::iterator &MFI, 1091200581Srdivacky MachineBasicBlock::iterator &MBBI, 1092200581Srdivacky LiveVariables *LV) const { 1093200581Srdivacky MachineInstr *MI = MBBI; 1094200581Srdivacky unsigned Dest = MI->getOperand(0).getReg(); 1095200581Srdivacky unsigned Src = MI->getOperand(1).getReg(); 1096200581Srdivacky bool isDead = MI->getOperand(0).isDead(); 1097200581Srdivacky bool isKill = MI->getOperand(1).isKill(); 1098200581Srdivacky 1099200581Srdivacky unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() 1100200581Srdivacky ? X86::LEA64_32r : X86::LEA32r; 1101200581Srdivacky MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); 1102200581Srdivacky unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1103200581Srdivacky unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1104200581Srdivacky 1105200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 1106200581Srdivacky // well be shifting and then extracting the lower 16-bits. 1107200581Srdivacky // This has the potential to cause partial register stall. e.g. 1108200581Srdivacky // movw (%rbp,%rcx,2), %dx 1109200581Srdivacky // leal -65(%rdx), %esi 1110200581Srdivacky // But testing has shown this *does* help performance in 64-bit mode (at 1111200581Srdivacky // least on modern x86 machines). 1112200581Srdivacky BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); 1113200581Srdivacky MachineInstr *InsMI = 1114210299Sed BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) 1115210299Sed .addReg(leaInReg, RegState::Define, X86::sub_16bit) 1116210299Sed .addReg(Src, getKillRegState(isKill)); 1117200581Srdivacky 1118200581Srdivacky MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), 1119200581Srdivacky get(Opc), leaOutReg); 1120200581Srdivacky switch (MIOpc) { 1121200581Srdivacky default: 1122200581Srdivacky llvm_unreachable(0); 1123200581Srdivacky break; 1124200581Srdivacky case X86::SHL16ri: { 1125200581Srdivacky unsigned ShAmt = MI->getOperand(2).getImm(); 1126200581Srdivacky MIB.addReg(0).addImm(1 << ShAmt) 1127210299Sed .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0); 1128200581Srdivacky break; 1129200581Srdivacky } 1130200581Srdivacky case X86::INC16r: 1131200581Srdivacky case X86::INC64_16r: 1132210299Sed addRegOffset(MIB, leaInReg, true, 1); 1133200581Srdivacky break; 1134200581Srdivacky case X86::DEC16r: 1135200581Srdivacky case X86::DEC64_16r: 1136210299Sed addRegOffset(MIB, leaInReg, true, -1); 1137200581Srdivacky break; 1138200581Srdivacky case X86::ADD16ri: 1139200581Srdivacky case X86::ADD16ri8: 1140210299Sed addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); 1141200581Srdivacky break; 1142200581Srdivacky case X86::ADD16rr: { 1143200581Srdivacky unsigned Src2 = MI->getOperand(2).getReg(); 1144200581Srdivacky bool isKill2 = MI->getOperand(2).isKill(); 1145200581Srdivacky unsigned leaInReg2 = 0; 1146200581Srdivacky MachineInstr *InsMI2 = 0; 1147200581Srdivacky if (Src == Src2) { 1148200581Srdivacky // ADD16rr %reg1028<kill>, %reg1028 1149200581Srdivacky // just a single insert_subreg. 1150200581Srdivacky addRegReg(MIB, leaInReg, true, leaInReg, false); 1151200581Srdivacky } else { 1152200581Srdivacky leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1153200581Srdivacky // Build and insert into an implicit UNDEF value. This is OK because 1154200581Srdivacky // well be shifting and then extracting the lower 16-bits. 1155200581Srdivacky BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); 1156200581Srdivacky InsMI2 = 1157210299Sed BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) 1158210299Sed .addReg(leaInReg2, RegState::Define, X86::sub_16bit) 1159210299Sed .addReg(Src2, getKillRegState(isKill2)); 1160200581Srdivacky addRegReg(MIB, leaInReg, true, leaInReg2, true); 1161200581Srdivacky } 1162200581Srdivacky if (LV && isKill2 && InsMI2) 1163200581Srdivacky LV->replaceKillInstruction(Src2, MI, InsMI2); 1164200581Srdivacky break; 1165200581Srdivacky } 1166200581Srdivacky } 1167200581Srdivacky 1168200581Srdivacky MachineInstr *NewMI = MIB; 1169200581Srdivacky MachineInstr *ExtMI = 1170210299Sed BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) 1171200581Srdivacky .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1172210299Sed .addReg(leaOutReg, RegState::Kill, X86::sub_16bit); 1173200581Srdivacky 1174200581Srdivacky if (LV) { 1175200581Srdivacky // Update live variables 1176200581Srdivacky LV->getVarInfo(leaInReg).Kills.push_back(NewMI); 1177200581Srdivacky LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); 1178200581Srdivacky if (isKill) 1179200581Srdivacky LV->replaceKillInstruction(Src, MI, InsMI); 1180200581Srdivacky if (isDead) 1181200581Srdivacky LV->replaceKillInstruction(Dest, MI, ExtMI); 1182200581Srdivacky } 1183200581Srdivacky 1184200581Srdivacky return ExtMI; 1185200581Srdivacky} 1186200581Srdivacky 1187193323Sed/// convertToThreeAddress - This method must be implemented by targets that 1188193323Sed/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target 1189193323Sed/// may be able to convert a two-address instruction into a true 1190193323Sed/// three-address instruction on demand. This allows the X86 target (for 1191193323Sed/// example) to convert ADD and SHL instructions into LEA instructions if they 1192193323Sed/// would require register copies due to two-addressness. 1193193323Sed/// 1194193323Sed/// This method returns a null pointer if the transformation cannot be 1195193323Sed/// performed, otherwise it returns the new instruction. 1196193323Sed/// 1197193323SedMachineInstr * 1198193323SedX86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 1199193323Sed MachineBasicBlock::iterator &MBBI, 1200193323Sed LiveVariables *LV) const { 1201193323Sed MachineInstr *MI = MBBI; 1202193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1203193323Sed // All instructions input are two-addr instructions. Get the known operands. 1204193323Sed unsigned Dest = MI->getOperand(0).getReg(); 1205193323Sed unsigned Src = MI->getOperand(1).getReg(); 1206193323Sed bool isDead = MI->getOperand(0).isDead(); 1207193323Sed bool isKill = MI->getOperand(1).isKill(); 1208193323Sed 1209193323Sed MachineInstr *NewMI = NULL; 1210193323Sed // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When 1211193323Sed // we have better subtarget support, enable the 16-bit LEA generation here. 1212200581Srdivacky // 16-bit LEA is also slow on Core2. 1213193323Sed bool DisableLEA16 = true; 1214200581Srdivacky bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 1215193323Sed 1216193323Sed unsigned MIOpc = MI->getOpcode(); 1217193323Sed switch (MIOpc) { 1218193323Sed case X86::SHUFPSrri: { 1219193323Sed assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); 1220193323Sed if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0; 1221193323Sed 1222193323Sed unsigned B = MI->getOperand(1).getReg(); 1223193323Sed unsigned C = MI->getOperand(2).getReg(); 1224193323Sed if (B != C) return 0; 1225193323Sed unsigned A = MI->getOperand(0).getReg(); 1226193323Sed unsigned M = MI->getOperand(3).getImm(); 1227193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) 1228193323Sed .addReg(A, RegState::Define | getDeadRegState(isDead)) 1229193323Sed .addReg(B, getKillRegState(isKill)).addImm(M); 1230193323Sed break; 1231193323Sed } 1232193323Sed case X86::SHL64ri: { 1233193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1234193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1235193323Sed // the flags produced by a shift yet, so this is safe. 1236193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1237193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1238193323Sed 1239193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1240193323Sed .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1241193323Sed .addReg(0).addImm(1 << ShAmt) 1242193323Sed .addReg(Src, getKillRegState(isKill)) 1243210299Sed .addImm(0).addReg(0); 1244193323Sed break; 1245193323Sed } 1246193323Sed case X86::SHL32ri: { 1247193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1248193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1249193323Sed // the flags produced by a shift yet, so this is safe. 1250193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1251193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1252193323Sed 1253200581Srdivacky unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1254193323Sed NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1255193323Sed .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1256193323Sed .addReg(0).addImm(1 << ShAmt) 1257210299Sed .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0); 1258193323Sed break; 1259193323Sed } 1260193323Sed case X86::SHL16ri: { 1261193323Sed assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1262193323Sed // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1263193323Sed // the flags produced by a shift yet, so this is safe. 1264193323Sed unsigned ShAmt = MI->getOperand(2).getImm(); 1265193323Sed if (ShAmt == 0 || ShAmt >= 4) return 0; 1266193323Sed 1267200581Srdivacky if (DisableLEA16) 1268200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1269200581Srdivacky NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1270200581Srdivacky .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1271200581Srdivacky .addReg(0).addImm(1 << ShAmt) 1272200581Srdivacky .addReg(Src, getKillRegState(isKill)) 1273210299Sed .addImm(0).addReg(0); 1274193323Sed break; 1275193323Sed } 1276193323Sed default: { 1277193323Sed // The following opcodes also sets the condition code register(s). Only 1278193323Sed // convert them to equivalent lea if the condition code register def's 1279193323Sed // are dead! 1280193323Sed if (hasLiveCondCodeDef(MI)) 1281193323Sed return 0; 1282193323Sed 1283193323Sed switch (MIOpc) { 1284193323Sed default: return 0; 1285193323Sed case X86::INC64r: 1286193323Sed case X86::INC32r: 1287193323Sed case X86::INC64_32r: { 1288193323Sed assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1289193323Sed unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r 1290193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1291210299Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1292193323Sed .addReg(Dest, RegState::Define | 1293193323Sed getDeadRegState(isDead)), 1294193323Sed Src, isKill, 1); 1295193323Sed break; 1296193323Sed } 1297193323Sed case X86::INC16r: 1298193323Sed case X86::INC64_16r: 1299200581Srdivacky if (DisableLEA16) 1300200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1301193323Sed assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1302193323Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1303193323Sed .addReg(Dest, RegState::Define | 1304193323Sed getDeadRegState(isDead)), 1305193323Sed Src, isKill, 1); 1306193323Sed break; 1307193323Sed case X86::DEC64r: 1308193323Sed case X86::DEC32r: 1309193323Sed case X86::DEC64_32r: { 1310193323Sed assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1311193323Sed unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r 1312193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1313210299Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1314193323Sed .addReg(Dest, RegState::Define | 1315193323Sed getDeadRegState(isDead)), 1316193323Sed Src, isKill, -1); 1317193323Sed break; 1318193323Sed } 1319193323Sed case X86::DEC16r: 1320193323Sed case X86::DEC64_16r: 1321200581Srdivacky if (DisableLEA16) 1322200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1323193323Sed assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1324193323Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1325193323Sed .addReg(Dest, RegState::Define | 1326193323Sed getDeadRegState(isDead)), 1327193323Sed Src, isKill, -1); 1328193323Sed break; 1329193323Sed case X86::ADD64rr: 1330193323Sed case X86::ADD32rr: { 1331193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1332193323Sed unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r 1333193323Sed : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1334193323Sed unsigned Src2 = MI->getOperand(2).getReg(); 1335193323Sed bool isKill2 = MI->getOperand(2).isKill(); 1336193323Sed NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1337193323Sed .addReg(Dest, RegState::Define | 1338193323Sed getDeadRegState(isDead)), 1339193323Sed Src, isKill, Src2, isKill2); 1340193323Sed if (LV && isKill2) 1341193323Sed LV->replaceKillInstruction(Src2, MI, NewMI); 1342193323Sed break; 1343193323Sed } 1344193323Sed case X86::ADD16rr: { 1345200581Srdivacky if (DisableLEA16) 1346200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1347193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1348193323Sed unsigned Src2 = MI->getOperand(2).getReg(); 1349193323Sed bool isKill2 = MI->getOperand(2).isKill(); 1350193323Sed NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1351193323Sed .addReg(Dest, RegState::Define | 1352193323Sed getDeadRegState(isDead)), 1353193323Sed Src, isKill, Src2, isKill2); 1354193323Sed if (LV && isKill2) 1355193323Sed LV->replaceKillInstruction(Src2, MI, NewMI); 1356193323Sed break; 1357193323Sed } 1358193323Sed case X86::ADD64ri32: 1359193323Sed case X86::ADD64ri8: 1360193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1361210299Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1362200581Srdivacky .addReg(Dest, RegState::Define | 1363200581Srdivacky getDeadRegState(isDead)), 1364200581Srdivacky Src, isKill, MI->getOperand(2).getImm()); 1365193323Sed break; 1366193323Sed case X86::ADD32ri: 1367200581Srdivacky case X86::ADD32ri8: { 1368193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1369200581Srdivacky unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1370210299Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1371200581Srdivacky .addReg(Dest, RegState::Define | 1372200581Srdivacky getDeadRegState(isDead)), 1373193323Sed Src, isKill, MI->getOperand(2).getImm()); 1374193323Sed break; 1375200581Srdivacky } 1376193323Sed case X86::ADD16ri: 1377193323Sed case X86::ADD16ri8: 1378200581Srdivacky if (DisableLEA16) 1379200581Srdivacky return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1380193323Sed assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1381210299Sed NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1382200581Srdivacky .addReg(Dest, RegState::Define | 1383200581Srdivacky getDeadRegState(isDead)), 1384200581Srdivacky Src, isKill, MI->getOperand(2).getImm()); 1385193323Sed break; 1386193323Sed } 1387193323Sed } 1388193323Sed } 1389193323Sed 1390193323Sed if (!NewMI) return 0; 1391193323Sed 1392193323Sed if (LV) { // Update live variables 1393193323Sed if (isKill) 1394193323Sed LV->replaceKillInstruction(Src, MI, NewMI); 1395193323Sed if (isDead) 1396193323Sed LV->replaceKillInstruction(Dest, MI, NewMI); 1397193323Sed } 1398193323Sed 1399193323Sed MFI->insert(MBBI, NewMI); // Insert the new inst 1400193323Sed return NewMI; 1401193323Sed} 1402193323Sed 1403193323Sed/// commuteInstruction - We have a few instructions that must be hacked on to 1404193323Sed/// commute them. 1405193323Sed/// 1406193323SedMachineInstr * 1407193323SedX86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { 1408193323Sed switch (MI->getOpcode()) { 1409193323Sed case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) 1410193323Sed case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) 1411193323Sed case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) 1412193323Sed case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) 1413193323Sed case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) 1414193323Sed case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) 1415193323Sed unsigned Opc; 1416193323Sed unsigned Size; 1417193323Sed switch (MI->getOpcode()) { 1418198090Srdivacky default: llvm_unreachable("Unreachable!"); 1419193323Sed case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; 1420193323Sed case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; 1421193323Sed case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; 1422193323Sed case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break; 1423193323Sed case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break; 1424193323Sed case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; 1425193323Sed } 1426193323Sed unsigned Amt = MI->getOperand(3).getImm(); 1427193323Sed if (NewMI) { 1428193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1429193323Sed MI = MF.CloneMachineInstr(MI); 1430193323Sed NewMI = false; 1431193323Sed } 1432193323Sed MI->setDesc(get(Opc)); 1433193323Sed MI->getOperand(3).setImm(Size-Amt); 1434193323Sed return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1435193323Sed } 1436193323Sed case X86::CMOVB16rr: 1437193323Sed case X86::CMOVB32rr: 1438193323Sed case X86::CMOVB64rr: 1439193323Sed case X86::CMOVAE16rr: 1440193323Sed case X86::CMOVAE32rr: 1441193323Sed case X86::CMOVAE64rr: 1442193323Sed case X86::CMOVE16rr: 1443193323Sed case X86::CMOVE32rr: 1444193323Sed case X86::CMOVE64rr: 1445193323Sed case X86::CMOVNE16rr: 1446193323Sed case X86::CMOVNE32rr: 1447193323Sed case X86::CMOVNE64rr: 1448193323Sed case X86::CMOVBE16rr: 1449193323Sed case X86::CMOVBE32rr: 1450193323Sed case X86::CMOVBE64rr: 1451193323Sed case X86::CMOVA16rr: 1452193323Sed case X86::CMOVA32rr: 1453193323Sed case X86::CMOVA64rr: 1454193323Sed case X86::CMOVL16rr: 1455193323Sed case X86::CMOVL32rr: 1456193323Sed case X86::CMOVL64rr: 1457193323Sed case X86::CMOVGE16rr: 1458193323Sed case X86::CMOVGE32rr: 1459193323Sed case X86::CMOVGE64rr: 1460193323Sed case X86::CMOVLE16rr: 1461193323Sed case X86::CMOVLE32rr: 1462193323Sed case X86::CMOVLE64rr: 1463193323Sed case X86::CMOVG16rr: 1464193323Sed case X86::CMOVG32rr: 1465193323Sed case X86::CMOVG64rr: 1466193323Sed case X86::CMOVS16rr: 1467193323Sed case X86::CMOVS32rr: 1468193323Sed case X86::CMOVS64rr: 1469193323Sed case X86::CMOVNS16rr: 1470193323Sed case X86::CMOVNS32rr: 1471193323Sed case X86::CMOVNS64rr: 1472193323Sed case X86::CMOVP16rr: 1473193323Sed case X86::CMOVP32rr: 1474193323Sed case X86::CMOVP64rr: 1475193323Sed case X86::CMOVNP16rr: 1476193323Sed case X86::CMOVNP32rr: 1477193323Sed case X86::CMOVNP64rr: 1478193323Sed case X86::CMOVO16rr: 1479193323Sed case X86::CMOVO32rr: 1480193323Sed case X86::CMOVO64rr: 1481193323Sed case X86::CMOVNO16rr: 1482193323Sed case X86::CMOVNO32rr: 1483193323Sed case X86::CMOVNO64rr: { 1484193323Sed unsigned Opc = 0; 1485193323Sed switch (MI->getOpcode()) { 1486193323Sed default: break; 1487193323Sed case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; 1488193323Sed case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; 1489193323Sed case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; 1490193323Sed case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break; 1491193323Sed case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break; 1492193323Sed case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break; 1493193323Sed case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break; 1494193323Sed case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break; 1495193323Sed case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break; 1496193323Sed case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; 1497193323Sed case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; 1498193323Sed case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; 1499193323Sed case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; 1500193323Sed case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; 1501193323Sed case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; 1502193323Sed case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; 1503193323Sed case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; 1504193323Sed case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; 1505193323Sed case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; 1506193323Sed case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; 1507193323Sed case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; 1508193323Sed case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break; 1509193323Sed case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break; 1510193323Sed case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break; 1511193323Sed case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break; 1512193323Sed case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break; 1513193323Sed case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break; 1514193323Sed case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break; 1515193323Sed case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break; 1516193323Sed case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; 1517193323Sed case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; 1518193323Sed case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; 1519193323Sed case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; 1520193323Sed case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; 1521193323Sed case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; 1522193323Sed case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; 1523193323Sed case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; 1524193323Sed case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; 1525193323Sed case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; 1526193323Sed case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; 1527193323Sed case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; 1528193323Sed case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; 1529193323Sed case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; 1530193323Sed case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; 1531193323Sed case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; 1532193323Sed case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; 1533193323Sed case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; 1534193323Sed case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; 1535193323Sed } 1536193323Sed if (NewMI) { 1537193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 1538193323Sed MI = MF.CloneMachineInstr(MI); 1539193323Sed NewMI = false; 1540193323Sed } 1541193323Sed MI->setDesc(get(Opc)); 1542193323Sed // Fallthrough intended. 1543193323Sed } 1544193323Sed default: 1545193323Sed return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1546193323Sed } 1547193323Sed} 1548193323Sed 1549193323Sedstatic X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { 1550193323Sed switch (BrOpc) { 1551193323Sed default: return X86::COND_INVALID; 1552203954Srdivacky case X86::JE_4: return X86::COND_E; 1553203954Srdivacky case X86::JNE_4: return X86::COND_NE; 1554203954Srdivacky case X86::JL_4: return X86::COND_L; 1555203954Srdivacky case X86::JLE_4: return X86::COND_LE; 1556203954Srdivacky case X86::JG_4: return X86::COND_G; 1557203954Srdivacky case X86::JGE_4: return X86::COND_GE; 1558203954Srdivacky case X86::JB_4: return X86::COND_B; 1559203954Srdivacky case X86::JBE_4: return X86::COND_BE; 1560203954Srdivacky case X86::JA_4: return X86::COND_A; 1561203954Srdivacky case X86::JAE_4: return X86::COND_AE; 1562203954Srdivacky case X86::JS_4: return X86::COND_S; 1563203954Srdivacky case X86::JNS_4: return X86::COND_NS; 1564203954Srdivacky case X86::JP_4: return X86::COND_P; 1565203954Srdivacky case X86::JNP_4: return X86::COND_NP; 1566203954Srdivacky case X86::JO_4: return X86::COND_O; 1567203954Srdivacky case X86::JNO_4: return X86::COND_NO; 1568193323Sed } 1569193323Sed} 1570193323Sed 1571193323Sedunsigned X86::GetCondBranchFromCond(X86::CondCode CC) { 1572193323Sed switch (CC) { 1573198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 1574203954Srdivacky case X86::COND_E: return X86::JE_4; 1575203954Srdivacky case X86::COND_NE: return X86::JNE_4; 1576203954Srdivacky case X86::COND_L: return X86::JL_4; 1577203954Srdivacky case X86::COND_LE: return X86::JLE_4; 1578203954Srdivacky case X86::COND_G: return X86::JG_4; 1579203954Srdivacky case X86::COND_GE: return X86::JGE_4; 1580203954Srdivacky case X86::COND_B: return X86::JB_4; 1581203954Srdivacky case X86::COND_BE: return X86::JBE_4; 1582203954Srdivacky case X86::COND_A: return X86::JA_4; 1583203954Srdivacky case X86::COND_AE: return X86::JAE_4; 1584203954Srdivacky case X86::COND_S: return X86::JS_4; 1585203954Srdivacky case X86::COND_NS: return X86::JNS_4; 1586203954Srdivacky case X86::COND_P: return X86::JP_4; 1587203954Srdivacky case X86::COND_NP: return X86::JNP_4; 1588203954Srdivacky case X86::COND_O: return X86::JO_4; 1589203954Srdivacky case X86::COND_NO: return X86::JNO_4; 1590193323Sed } 1591193323Sed} 1592193323Sed 1593193323Sed/// GetOppositeBranchCondition - Return the inverse of the specified condition, 1594193323Sed/// e.g. turning COND_E to COND_NE. 1595193323SedX86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { 1596193323Sed switch (CC) { 1597198090Srdivacky default: llvm_unreachable("Illegal condition code!"); 1598193323Sed case X86::COND_E: return X86::COND_NE; 1599193323Sed case X86::COND_NE: return X86::COND_E; 1600193323Sed case X86::COND_L: return X86::COND_GE; 1601193323Sed case X86::COND_LE: return X86::COND_G; 1602193323Sed case X86::COND_G: return X86::COND_LE; 1603193323Sed case X86::COND_GE: return X86::COND_L; 1604193323Sed case X86::COND_B: return X86::COND_AE; 1605193323Sed case X86::COND_BE: return X86::COND_A; 1606193323Sed case X86::COND_A: return X86::COND_BE; 1607193323Sed case X86::COND_AE: return X86::COND_B; 1608193323Sed case X86::COND_S: return X86::COND_NS; 1609193323Sed case X86::COND_NS: return X86::COND_S; 1610193323Sed case X86::COND_P: return X86::COND_NP; 1611193323Sed case X86::COND_NP: return X86::COND_P; 1612193323Sed case X86::COND_O: return X86::COND_NO; 1613193323Sed case X86::COND_NO: return X86::COND_O; 1614193323Sed } 1615193323Sed} 1616193323Sed 1617193323Sedbool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { 1618193323Sed const TargetInstrDesc &TID = MI->getDesc(); 1619193323Sed if (!TID.isTerminator()) return false; 1620193323Sed 1621193323Sed // Conditional branch is a special case. 1622193323Sed if (TID.isBranch() && !TID.isBarrier()) 1623193323Sed return true; 1624193323Sed if (!TID.isPredicable()) 1625193323Sed return true; 1626193323Sed return !isPredicated(MI); 1627193323Sed} 1628193323Sed 1629193323Sedbool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 1630193323Sed MachineBasicBlock *&TBB, 1631193323Sed MachineBasicBlock *&FBB, 1632193323Sed SmallVectorImpl<MachineOperand> &Cond, 1633193323Sed bool AllowModify) const { 1634193323Sed // Start from the bottom of the block and work up, examining the 1635193323Sed // terminator instructions. 1636193323Sed MachineBasicBlock::iterator I = MBB.end(); 1637207618Srdivacky MachineBasicBlock::iterator UnCondBrIter = MBB.end(); 1638193323Sed while (I != MBB.begin()) { 1639193323Sed --I; 1640206083Srdivacky if (I->isDebugValue()) 1641206083Srdivacky continue; 1642200581Srdivacky 1643200581Srdivacky // Working from the bottom, when we see a non-terminator instruction, we're 1644200581Srdivacky // done. 1645212904Sdim if (!isUnpredicatedTerminator(I)) 1646193323Sed break; 1647200581Srdivacky 1648200581Srdivacky // A terminator that isn't a branch can't easily be handled by this 1649200581Srdivacky // analysis. 1650193323Sed if (!I->getDesc().isBranch()) 1651193323Sed return true; 1652200581Srdivacky 1653193323Sed // Handle unconditional branches. 1654203954Srdivacky if (I->getOpcode() == X86::JMP_4) { 1655207618Srdivacky UnCondBrIter = I; 1656207618Srdivacky 1657193323Sed if (!AllowModify) { 1658193323Sed TBB = I->getOperand(0).getMBB(); 1659193323Sed continue; 1660193323Sed } 1661193323Sed 1662193323Sed // If the block has any instructions after a JMP, delete them. 1663200581Srdivacky while (llvm::next(I) != MBB.end()) 1664200581Srdivacky llvm::next(I)->eraseFromParent(); 1665200581Srdivacky 1666193323Sed Cond.clear(); 1667193323Sed FBB = 0; 1668200581Srdivacky 1669193323Sed // Delete the JMP if it's equivalent to a fall-through. 1670193323Sed if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { 1671193323Sed TBB = 0; 1672193323Sed I->eraseFromParent(); 1673193323Sed I = MBB.end(); 1674207618Srdivacky UnCondBrIter = MBB.end(); 1675193323Sed continue; 1676193323Sed } 1677200581Srdivacky 1678207618Srdivacky // TBB is used to indicate the unconditional destination. 1679193323Sed TBB = I->getOperand(0).getMBB(); 1680193323Sed continue; 1681193323Sed } 1682200581Srdivacky 1683193323Sed // Handle conditional branches. 1684193323Sed X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); 1685193323Sed if (BranchCode == X86::COND_INVALID) 1686193323Sed return true; // Can't handle indirect branch. 1687200581Srdivacky 1688193323Sed // Working from the bottom, handle the first conditional branch. 1689193323Sed if (Cond.empty()) { 1690207618Srdivacky MachineBasicBlock *TargetBB = I->getOperand(0).getMBB(); 1691207618Srdivacky if (AllowModify && UnCondBrIter != MBB.end() && 1692207618Srdivacky MBB.isLayoutSuccessor(TargetBB)) { 1693207618Srdivacky // If we can modify the code and it ends in something like: 1694207618Srdivacky // 1695207618Srdivacky // jCC L1 1696207618Srdivacky // jmp L2 1697207618Srdivacky // L1: 1698207618Srdivacky // ... 1699207618Srdivacky // L2: 1700207618Srdivacky // 1701207618Srdivacky // Then we can change this to: 1702207618Srdivacky // 1703207618Srdivacky // jnCC L2 1704207618Srdivacky // L1: 1705207618Srdivacky // ... 1706207618Srdivacky // L2: 1707207618Srdivacky // 1708207618Srdivacky // Which is a bit more efficient. 1709207618Srdivacky // We conditionally jump to the fall-through block. 1710207618Srdivacky BranchCode = GetOppositeBranchCondition(BranchCode); 1711207618Srdivacky unsigned JNCC = GetCondBranchFromCond(BranchCode); 1712207618Srdivacky MachineBasicBlock::iterator OldInst = I; 1713207618Srdivacky 1714207618Srdivacky BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC)) 1715207618Srdivacky .addMBB(UnCondBrIter->getOperand(0).getMBB()); 1716207618Srdivacky BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4)) 1717207618Srdivacky .addMBB(TargetBB); 1718207618Srdivacky MBB.addSuccessor(TargetBB); 1719207618Srdivacky 1720207618Srdivacky OldInst->eraseFromParent(); 1721207618Srdivacky UnCondBrIter->eraseFromParent(); 1722207618Srdivacky 1723207618Srdivacky // Restart the analysis. 1724207618Srdivacky UnCondBrIter = MBB.end(); 1725207618Srdivacky I = MBB.end(); 1726207618Srdivacky continue; 1727207618Srdivacky } 1728207618Srdivacky 1729193323Sed FBB = TBB; 1730193323Sed TBB = I->getOperand(0).getMBB(); 1731193323Sed Cond.push_back(MachineOperand::CreateImm(BranchCode)); 1732193323Sed continue; 1733193323Sed } 1734200581Srdivacky 1735200581Srdivacky // Handle subsequent conditional branches. Only handle the case where all 1736200581Srdivacky // conditional branches branch to the same destination and their condition 1737200581Srdivacky // opcodes fit one of the special multi-branch idioms. 1738193323Sed assert(Cond.size() == 1); 1739193323Sed assert(TBB); 1740200581Srdivacky 1741200581Srdivacky // Only handle the case where all conditional branches branch to the same 1742200581Srdivacky // destination. 1743193323Sed if (TBB != I->getOperand(0).getMBB()) 1744193323Sed return true; 1745200581Srdivacky 1746200581Srdivacky // If the conditions are the same, we can leave them alone. 1747193323Sed X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); 1748193323Sed if (OldBranchCode == BranchCode) 1749193323Sed continue; 1750200581Srdivacky 1751200581Srdivacky // If they differ, see if they fit one of the known patterns. Theoretically, 1752200581Srdivacky // we could handle more patterns here, but we shouldn't expect to see them 1753200581Srdivacky // if instruction selection has done a reasonable job. 1754193323Sed if ((OldBranchCode == X86::COND_NP && 1755193323Sed BranchCode == X86::COND_E) || 1756193323Sed (OldBranchCode == X86::COND_E && 1757193323Sed BranchCode == X86::COND_NP)) 1758193323Sed BranchCode = X86::COND_NP_OR_E; 1759193323Sed else if ((OldBranchCode == X86::COND_P && 1760193323Sed BranchCode == X86::COND_NE) || 1761193323Sed (OldBranchCode == X86::COND_NE && 1762193323Sed BranchCode == X86::COND_P)) 1763193323Sed BranchCode = X86::COND_NE_OR_P; 1764193323Sed else 1765193323Sed return true; 1766200581Srdivacky 1767193323Sed // Update the MachineOperand. 1768193323Sed Cond[0].setImm(BranchCode); 1769193323Sed } 1770193323Sed 1771193323Sed return false; 1772193323Sed} 1773193323Sed 1774193323Sedunsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 1775193323Sed MachineBasicBlock::iterator I = MBB.end(); 1776193323Sed unsigned Count = 0; 1777193323Sed 1778193323Sed while (I != MBB.begin()) { 1779193323Sed --I; 1780206083Srdivacky if (I->isDebugValue()) 1781206083Srdivacky continue; 1782203954Srdivacky if (I->getOpcode() != X86::JMP_4 && 1783193323Sed GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) 1784193323Sed break; 1785193323Sed // Remove the branch. 1786193323Sed I->eraseFromParent(); 1787193323Sed I = MBB.end(); 1788193323Sed ++Count; 1789193323Sed } 1790193323Sed 1791193323Sed return Count; 1792193323Sed} 1793193323Sed 1794193323Sedunsigned 1795193323SedX86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 1796193323Sed MachineBasicBlock *FBB, 1797210299Sed const SmallVectorImpl<MachineOperand> &Cond, 1798210299Sed DebugLoc DL) const { 1799193323Sed // Shouldn't be a fall through. 1800193323Sed assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 1801193323Sed assert((Cond.size() == 1 || Cond.size() == 0) && 1802193323Sed "X86 branch conditions have one component!"); 1803193323Sed 1804193323Sed if (Cond.empty()) { 1805193323Sed // Unconditional branch? 1806193323Sed assert(!FBB && "Unconditional branch with multiple successors!"); 1807210299Sed BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB); 1808193323Sed return 1; 1809193323Sed } 1810193323Sed 1811193323Sed // Conditional branch. 1812193323Sed unsigned Count = 0; 1813193323Sed X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); 1814193323Sed switch (CC) { 1815193323Sed case X86::COND_NP_OR_E: 1816193323Sed // Synthesize NP_OR_E with two branches. 1817210299Sed BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB); 1818193323Sed ++Count; 1819210299Sed BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB); 1820193323Sed ++Count; 1821193323Sed break; 1822193323Sed case X86::COND_NE_OR_P: 1823193323Sed // Synthesize NE_OR_P with two branches. 1824210299Sed BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB); 1825193323Sed ++Count; 1826210299Sed BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB); 1827193323Sed ++Count; 1828193323Sed break; 1829193323Sed default: { 1830193323Sed unsigned Opc = GetCondBranchFromCond(CC); 1831210299Sed BuildMI(&MBB, DL, get(Opc)).addMBB(TBB); 1832193323Sed ++Count; 1833193323Sed } 1834193323Sed } 1835193323Sed if (FBB) { 1836193323Sed // Two-way Conditional branch. Insert the second branch. 1837210299Sed BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB); 1838193323Sed ++Count; 1839193323Sed } 1840193323Sed return Count; 1841193323Sed} 1842193323Sed 1843193323Sed/// isHReg - Test if the given register is a physical h register. 1844193323Sedstatic bool isHReg(unsigned Reg) { 1845193323Sed return X86::GR8_ABCD_HRegClass.contains(Reg); 1846193323Sed} 1847193323Sed 1848212904Sdim// Try and copy between VR128/VR64 and GR64 registers. 1849212904Sdimstatic unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) { 1850212904Sdim // SrcReg(VR128) -> DestReg(GR64) 1851212904Sdim // SrcReg(VR64) -> DestReg(GR64) 1852212904Sdim // SrcReg(GR64) -> DestReg(VR128) 1853212904Sdim // SrcReg(GR64) -> DestReg(VR64) 1854212904Sdim 1855212904Sdim if (X86::GR64RegClass.contains(DestReg)) { 1856212904Sdim if (X86::VR128RegClass.contains(SrcReg)) { 1857212904Sdim // Copy from a VR128 register to a GR64 register. 1858212904Sdim return X86::MOVPQIto64rr; 1859212904Sdim } else if (X86::VR64RegClass.contains(SrcReg)) { 1860212904Sdim // Copy from a VR64 register to a GR64 register. 1861212904Sdim return X86::MOVSDto64rr; 1862212904Sdim } 1863212904Sdim } else if (X86::GR64RegClass.contains(SrcReg)) { 1864212904Sdim // Copy from a GR64 register to a VR128 register. 1865212904Sdim if (X86::VR128RegClass.contains(DestReg)) 1866212904Sdim return X86::MOV64toPQIrr; 1867212904Sdim // Copy from a GR64 register to a VR64 register. 1868212904Sdim else if (X86::VR64RegClass.contains(DestReg)) 1869212904Sdim return X86::MOV64toSDrr; 1870212904Sdim } 1871212904Sdim 1872212904Sdim return 0; 1873212904Sdim} 1874212904Sdim 1875210299Sedvoid X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 1876210299Sed MachineBasicBlock::iterator MI, DebugLoc DL, 1877210299Sed unsigned DestReg, unsigned SrcReg, 1878210299Sed bool KillSrc) const { 1879210299Sed // First deal with the normal symmetric copies. 1880210299Sed unsigned Opc = 0; 1881210299Sed if (X86::GR64RegClass.contains(DestReg, SrcReg)) 1882210299Sed Opc = X86::MOV64rr; 1883210299Sed else if (X86::GR32RegClass.contains(DestReg, SrcReg)) 1884210299Sed Opc = X86::MOV32rr; 1885210299Sed else if (X86::GR16RegClass.contains(DestReg, SrcReg)) 1886210299Sed Opc = X86::MOV16rr; 1887210299Sed else if (X86::GR8RegClass.contains(DestReg, SrcReg)) { 1888210299Sed // Copying to or from a physical H register on x86-64 requires a NOREX 1889210299Sed // move. Otherwise use a normal move. 1890210299Sed if ((isHReg(DestReg) || isHReg(SrcReg)) && 1891210299Sed TM.getSubtarget<X86Subtarget>().is64Bit()) 1892210299Sed Opc = X86::MOV8rr_NOREX; 1893198090Srdivacky else 1894210299Sed Opc = X86::MOV8rr; 1895210299Sed } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) 1896210299Sed Opc = X86::MOVAPSrr; 1897210299Sed else if (X86::VR64RegClass.contains(DestReg, SrcReg)) 1898210299Sed Opc = X86::MMX_MOVQ64rr; 1899212904Sdim else 1900212904Sdim Opc = CopyToFromAsymmetricReg(DestReg, SrcReg); 1901193323Sed 1902210299Sed if (Opc) { 1903210299Sed BuildMI(MBB, MI, DL, get(Opc), DestReg) 1904210299Sed .addReg(SrcReg, getKillRegState(KillSrc)); 1905210299Sed return; 1906193323Sed } 1907198090Srdivacky 1908193323Sed // Moving EFLAGS to / from another register requires a push and a pop. 1909210299Sed if (SrcReg == X86::EFLAGS) { 1910210299Sed if (X86::GR64RegClass.contains(DestReg)) { 1911208599Srdivacky BuildMI(MBB, MI, DL, get(X86::PUSHF64)); 1912193323Sed BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); 1913210299Sed return; 1914210299Sed } else if (X86::GR32RegClass.contains(DestReg)) { 1915208599Srdivacky BuildMI(MBB, MI, DL, get(X86::PUSHF32)); 1916193323Sed BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); 1917210299Sed return; 1918193323Sed } 1919210299Sed } 1920210299Sed if (DestReg == X86::EFLAGS) { 1921210299Sed if (X86::GR64RegClass.contains(SrcReg)) { 1922210299Sed BuildMI(MBB, MI, DL, get(X86::PUSH64r)) 1923210299Sed .addReg(SrcReg, getKillRegState(KillSrc)); 1924208599Srdivacky BuildMI(MBB, MI, DL, get(X86::POPF64)); 1925210299Sed return; 1926210299Sed } else if (X86::GR32RegClass.contains(SrcReg)) { 1927210299Sed BuildMI(MBB, MI, DL, get(X86::PUSH32r)) 1928210299Sed .addReg(SrcReg, getKillRegState(KillSrc)); 1929208599Srdivacky BuildMI(MBB, MI, DL, get(X86::POPF32)); 1930210299Sed return; 1931193323Sed } 1932193323Sed } 1933193323Sed 1934210299Sed DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) 1935210299Sed << " to " << RI.getName(DestReg) << '\n'); 1936210299Sed llvm_unreachable("Cannot emit physreg copy instruction"); 1937193323Sed} 1938193323Sed 1939210299Sedstatic unsigned getLoadStoreRegOpcode(unsigned Reg, 1940210299Sed const TargetRegisterClass *RC, 1941210299Sed bool isStackAligned, 1942210299Sed const TargetMachine &TM, 1943210299Sed bool load) { 1944210299Sed switch (RC->getID()) { 1945210299Sed default: 1946210299Sed llvm_unreachable("Unknown regclass"); 1947210299Sed case X86::GR64RegClassID: 1948210299Sed case X86::GR64_NOSPRegClassID: 1949210299Sed return load ? X86::MOV64rm : X86::MOV64mr; 1950210299Sed case X86::GR32RegClassID: 1951210299Sed case X86::GR32_NOSPRegClassID: 1952210299Sed case X86::GR32_ADRegClassID: 1953210299Sed return load ? X86::MOV32rm : X86::MOV32mr; 1954210299Sed case X86::GR16RegClassID: 1955210299Sed return load ? X86::MOV16rm : X86::MOV16mr; 1956210299Sed case X86::GR8RegClassID: 1957193323Sed // Copying to or from a physical H register on x86-64 requires a NOREX 1958193323Sed // move. Otherwise use a normal move. 1959210299Sed if (isHReg(Reg) && 1960193323Sed TM.getSubtarget<X86Subtarget>().is64Bit()) 1961210299Sed return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; 1962193323Sed else 1963210299Sed return load ? X86::MOV8rm : X86::MOV8mr; 1964210299Sed case X86::GR64_ABCDRegClassID: 1965210299Sed return load ? X86::MOV64rm : X86::MOV64mr; 1966210299Sed case X86::GR32_ABCDRegClassID: 1967210299Sed return load ? X86::MOV32rm : X86::MOV32mr; 1968210299Sed case X86::GR16_ABCDRegClassID: 1969210299Sed return load ? X86::MOV16rm : X86::MOV16mr; 1970210299Sed case X86::GR8_ABCD_LRegClassID: 1971210299Sed return load ? X86::MOV8rm :X86::MOV8mr; 1972210299Sed case X86::GR8_ABCD_HRegClassID: 1973193323Sed if (TM.getSubtarget<X86Subtarget>().is64Bit()) 1974210299Sed return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; 1975193323Sed else 1976210299Sed return load ? X86::MOV8rm : X86::MOV8mr; 1977210299Sed case X86::GR64_NOREXRegClassID: 1978210299Sed case X86::GR64_NOREX_NOSPRegClassID: 1979210299Sed return load ? X86::MOV64rm : X86::MOV64mr; 1980210299Sed case X86::GR32_NOREXRegClassID: 1981210299Sed return load ? X86::MOV32rm : X86::MOV32mr; 1982210299Sed case X86::GR16_NOREXRegClassID: 1983210299Sed return load ? X86::MOV16rm : X86::MOV16mr; 1984210299Sed case X86::GR8_NOREXRegClassID: 1985210299Sed return load ? X86::MOV8rm : X86::MOV8mr; 1986210299Sed case X86::GR64_TCRegClassID: 1987210299Sed return load ? X86::MOV64rm_TC : X86::MOV64mr_TC; 1988210299Sed case X86::GR32_TCRegClassID: 1989210299Sed return load ? X86::MOV32rm_TC : X86::MOV32mr_TC; 1990210299Sed case X86::RFP80RegClassID: 1991210299Sed return load ? X86::LD_Fp80m : X86::ST_FpP80m; 1992210299Sed case X86::RFP64RegClassID: 1993210299Sed return load ? X86::LD_Fp64m : X86::ST_Fp64m; 1994210299Sed case X86::RFP32RegClassID: 1995210299Sed return load ? X86::LD_Fp32m : X86::ST_Fp32m; 1996210299Sed case X86::FR32RegClassID: 1997210299Sed return load ? X86::MOVSSrm : X86::MOVSSmr; 1998210299Sed case X86::FR64RegClassID: 1999210299Sed return load ? X86::MOVSDrm : X86::MOVSDmr; 2000210299Sed case X86::VR128RegClassID: 2001193323Sed // If stack is realigned we can use aligned stores. 2002210299Sed if (isStackAligned) 2003210299Sed return load ? X86::MOVAPSrm : X86::MOVAPSmr; 2004210299Sed else 2005210299Sed return load ? X86::MOVUPSrm : X86::MOVUPSmr; 2006210299Sed case X86::VR64RegClassID: 2007210299Sed return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; 2008193323Sed } 2009210299Sed} 2010193323Sed 2011210299Sedstatic unsigned getStoreRegOpcode(unsigned SrcReg, 2012210299Sed const TargetRegisterClass *RC, 2013210299Sed bool isStackAligned, 2014210299Sed TargetMachine &TM) { 2015210299Sed return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false); 2016193323Sed} 2017193323Sed 2018210299Sed 2019210299Sedstatic unsigned getLoadRegOpcode(unsigned DestReg, 2020210299Sed const TargetRegisterClass *RC, 2021210299Sed bool isStackAligned, 2022210299Sed const TargetMachine &TM) { 2023210299Sed return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true); 2024210299Sed} 2025210299Sed 2026193323Sedvoid X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 2027193323Sed MachineBasicBlock::iterator MI, 2028193323Sed unsigned SrcReg, bool isKill, int FrameIdx, 2029208599Srdivacky const TargetRegisterClass *RC, 2030208599Srdivacky const TargetRegisterInfo *TRI) const { 2031193323Sed const MachineFunction &MF = *MBB.getParent(); 2032212904Sdim assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && 2033212904Sdim "Stack slot too small for store"); 2034202878Srdivacky bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); 2035193323Sed unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 2036203954Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 2037193323Sed addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) 2038193323Sed .addReg(SrcReg, getKillRegState(isKill)); 2039193323Sed} 2040193323Sed 2041193323Sedvoid X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, 2042193323Sed bool isKill, 2043193323Sed SmallVectorImpl<MachineOperand> &Addr, 2044193323Sed const TargetRegisterClass *RC, 2045198090Srdivacky MachineInstr::mmo_iterator MMOBegin, 2046198090Srdivacky MachineInstr::mmo_iterator MMOEnd, 2047193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2048210299Sed bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; 2049193323Sed unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 2050206124Srdivacky DebugLoc DL; 2051193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); 2052193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2053193323Sed MIB.addOperand(Addr[i]); 2054193323Sed MIB.addReg(SrcReg, getKillRegState(isKill)); 2055198090Srdivacky (*MIB).setMemRefs(MMOBegin, MMOEnd); 2056193323Sed NewMIs.push_back(MIB); 2057193323Sed} 2058193323Sed 2059193323Sed 2060193323Sedvoid X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 2061193323Sed MachineBasicBlock::iterator MI, 2062193323Sed unsigned DestReg, int FrameIdx, 2063208599Srdivacky const TargetRegisterClass *RC, 2064208599Srdivacky const TargetRegisterInfo *TRI) const { 2065193323Sed const MachineFunction &MF = *MBB.getParent(); 2066202878Srdivacky bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); 2067193323Sed unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2068203954Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 2069193323Sed addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); 2070193323Sed} 2071193323Sed 2072193323Sedvoid X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, 2073193323Sed SmallVectorImpl<MachineOperand> &Addr, 2074193323Sed const TargetRegisterClass *RC, 2075198090Srdivacky MachineInstr::mmo_iterator MMOBegin, 2076198090Srdivacky MachineInstr::mmo_iterator MMOEnd, 2077193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2078210299Sed bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; 2079193323Sed unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2080206124Srdivacky DebugLoc DL; 2081193323Sed MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); 2082193323Sed for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2083193323Sed MIB.addOperand(Addr[i]); 2084198090Srdivacky (*MIB).setMemRefs(MMOBegin, MMOEnd); 2085193323Sed NewMIs.push_back(MIB); 2086193323Sed} 2087193323Sed 2088193323Sedbool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 2089193323Sed MachineBasicBlock::iterator MI, 2090208599Srdivacky const std::vector<CalleeSavedInfo> &CSI, 2091208599Srdivacky const TargetRegisterInfo *TRI) const { 2092193323Sed if (CSI.empty()) 2093193323Sed return false; 2094193323Sed 2095202878Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 2096193323Sed 2097193323Sed bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2098198090Srdivacky bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); 2099193323Sed unsigned SlotSize = is64Bit ? 8 : 4; 2100193323Sed 2101193323Sed MachineFunction &MF = *MBB.getParent(); 2102198090Srdivacky unsigned FPReg = RI.getFrameRegister(MF); 2103193323Sed X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2104193574Sed unsigned CalleeFrameSize = 0; 2105193323Sed 2106193323Sed unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; 2107193323Sed for (unsigned i = CSI.size(); i != 0; --i) { 2108193323Sed unsigned Reg = CSI[i-1].getReg(); 2109193323Sed // Add the callee-saved register as live-in. It's killed at the spill. 2110193323Sed MBB.addLiveIn(Reg); 2111198090Srdivacky if (Reg == FPReg) 2112198090Srdivacky // X86RegisterInfo::emitPrologue will handle spilling of frame register. 2113198090Srdivacky continue; 2114210299Sed if (!X86::VR128RegClass.contains(Reg) && !isWin64) { 2115193574Sed CalleeFrameSize += SlotSize; 2116198090Srdivacky BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); 2117193574Sed } else { 2118212904Sdim const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2119210299Sed storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), 2120212904Sdim RC, &RI); 2121193574Sed } 2122193323Sed } 2123193574Sed 2124193574Sed X86FI->setCalleeSavedFrameSize(CalleeFrameSize); 2125193323Sed return true; 2126193323Sed} 2127193323Sed 2128193323Sedbool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2129193323Sed MachineBasicBlock::iterator MI, 2130208599Srdivacky const std::vector<CalleeSavedInfo> &CSI, 2131208599Srdivacky const TargetRegisterInfo *TRI) const { 2132193323Sed if (CSI.empty()) 2133193323Sed return false; 2134193323Sed 2135202878Srdivacky DebugLoc DL = MBB.findDebugLoc(MI); 2136193323Sed 2137198090Srdivacky MachineFunction &MF = *MBB.getParent(); 2138198090Srdivacky unsigned FPReg = RI.getFrameRegister(MF); 2139193323Sed bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2140198090Srdivacky bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); 2141193323Sed unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; 2142193323Sed for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2143193323Sed unsigned Reg = CSI[i].getReg(); 2144198090Srdivacky if (Reg == FPReg) 2145198090Srdivacky // X86RegisterInfo::emitEpilogue will handle restoring of frame register. 2146198090Srdivacky continue; 2147210299Sed if (!X86::VR128RegClass.contains(Reg) && !isWin64) { 2148193574Sed BuildMI(MBB, MI, DL, get(Opc), Reg); 2149193574Sed } else { 2150212904Sdim const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2151210299Sed loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), 2152212904Sdim RC, &RI); 2153193574Sed } 2154193323Sed } 2155193323Sed return true; 2156193323Sed} 2157193323Sed 2158207618SrdivackyMachineInstr* 2159207618SrdivackyX86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, 2160207618Srdivacky int FrameIx, uint64_t Offset, 2161207618Srdivacky const MDNode *MDPtr, 2162207618Srdivacky DebugLoc DL) const { 2163207618Srdivacky X86AddressMode AM; 2164207618Srdivacky AM.BaseType = X86AddressMode::FrameIndexBase; 2165207618Srdivacky AM.Base.FrameIndex = FrameIx; 2166207618Srdivacky MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE)); 2167207618Srdivacky addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr); 2168207618Srdivacky return &*MIB; 2169207618Srdivacky} 2170207618Srdivacky 2171193323Sedstatic MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, 2172193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2173193323Sed MachineInstr *MI, 2174193323Sed const TargetInstrInfo &TII) { 2175193323Sed // Create the base instruction with the memory operand as the first part. 2176193323Sed MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2177193323Sed MI->getDebugLoc(), true); 2178193323Sed MachineInstrBuilder MIB(NewMI); 2179193323Sed unsigned NumAddrOps = MOs.size(); 2180193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2181193323Sed MIB.addOperand(MOs[i]); 2182193323Sed if (NumAddrOps < 4) // FrameIndex only 2183193323Sed addOffset(MIB, 0); 2184193323Sed 2185193323Sed // Loop over the rest of the ri operands, converting them over. 2186193323Sed unsigned NumOps = MI->getDesc().getNumOperands()-2; 2187193323Sed for (unsigned i = 0; i != NumOps; ++i) { 2188193323Sed MachineOperand &MO = MI->getOperand(i+2); 2189193323Sed MIB.addOperand(MO); 2190193323Sed } 2191193323Sed for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { 2192193323Sed MachineOperand &MO = MI->getOperand(i); 2193193323Sed MIB.addOperand(MO); 2194193323Sed } 2195193323Sed return MIB; 2196193323Sed} 2197193323Sed 2198193323Sedstatic MachineInstr *FuseInst(MachineFunction &MF, 2199193323Sed unsigned Opcode, unsigned OpNo, 2200193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2201193323Sed MachineInstr *MI, const TargetInstrInfo &TII) { 2202193323Sed MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2203193323Sed MI->getDebugLoc(), true); 2204193323Sed MachineInstrBuilder MIB(NewMI); 2205193323Sed 2206193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2207193323Sed MachineOperand &MO = MI->getOperand(i); 2208193323Sed if (i == OpNo) { 2209193323Sed assert(MO.isReg() && "Expected to fold into reg operand!"); 2210193323Sed unsigned NumAddrOps = MOs.size(); 2211193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2212193323Sed MIB.addOperand(MOs[i]); 2213193323Sed if (NumAddrOps < 4) // FrameIndex only 2214193323Sed addOffset(MIB, 0); 2215193323Sed } else { 2216193323Sed MIB.addOperand(MO); 2217193323Sed } 2218193323Sed } 2219193323Sed return MIB; 2220193323Sed} 2221193323Sed 2222193323Sedstatic MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, 2223193323Sed const SmallVectorImpl<MachineOperand> &MOs, 2224193323Sed MachineInstr *MI) { 2225193323Sed MachineFunction &MF = *MI->getParent()->getParent(); 2226193323Sed MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); 2227193323Sed 2228193323Sed unsigned NumAddrOps = MOs.size(); 2229193323Sed for (unsigned i = 0; i != NumAddrOps; ++i) 2230193323Sed MIB.addOperand(MOs[i]); 2231193323Sed if (NumAddrOps < 4) // FrameIndex only 2232193323Sed addOffset(MIB, 0); 2233193323Sed return MIB.addImm(0); 2234193323Sed} 2235193323Sed 2236193323SedMachineInstr* 2237193323SedX86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2238193323Sed MachineInstr *MI, unsigned i, 2239198090Srdivacky const SmallVectorImpl<MachineOperand> &MOs, 2240198090Srdivacky unsigned Size, unsigned Align) const { 2241198090Srdivacky const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; 2242193323Sed bool isTwoAddrFold = false; 2243193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 2244193323Sed bool isTwoAddr = NumOps > 1 && 2245193323Sed MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2246193323Sed 2247193323Sed MachineInstr *NewMI = NULL; 2248193323Sed // Folding a memory location into the two-address part of a two-address 2249193323Sed // instruction is different than folding it other places. It requires 2250193323Sed // replacing the *two* registers with the memory location. 2251193323Sed if (isTwoAddr && NumOps >= 2 && i < 2 && 2252193323Sed MI->getOperand(0).isReg() && 2253193323Sed MI->getOperand(1).isReg() && 2254193323Sed MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { 2255193323Sed OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2256193323Sed isTwoAddrFold = true; 2257193323Sed } else if (i == 0) { // If operand 0 2258202375Srdivacky if (MI->getOpcode() == X86::MOV64r0) 2259202375Srdivacky NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); 2260202375Srdivacky else if (MI->getOpcode() == X86::MOV32r0) 2261193323Sed NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); 2262202375Srdivacky else if (MI->getOpcode() == X86::MOV16r0) 2263202375Srdivacky NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); 2264193323Sed else if (MI->getOpcode() == X86::MOV8r0) 2265193323Sed NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); 2266193323Sed if (NewMI) 2267193323Sed return NewMI; 2268193323Sed 2269193323Sed OpcodeTablePtr = &RegOp2MemOpTable0; 2270193323Sed } else if (i == 1) { 2271193323Sed OpcodeTablePtr = &RegOp2MemOpTable1; 2272193323Sed } else if (i == 2) { 2273193323Sed OpcodeTablePtr = &RegOp2MemOpTable2; 2274193323Sed } 2275193323Sed 2276193323Sed // If table selected... 2277193323Sed if (OpcodeTablePtr) { 2278193323Sed // Find the Opcode to fuse 2279199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2280193323Sed OpcodeTablePtr->find((unsigned*)MI->getOpcode()); 2281193323Sed if (I != OpcodeTablePtr->end()) { 2282198090Srdivacky unsigned Opcode = I->second.first; 2283198090Srdivacky unsigned MinAlign = I->second.second; 2284198090Srdivacky if (Align < MinAlign) 2285198090Srdivacky return NULL; 2286198090Srdivacky bool NarrowToMOV32rm = false; 2287198090Srdivacky if (Size) { 2288198090Srdivacky unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); 2289198090Srdivacky if (Size < RCSize) { 2290198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 2291198090Srdivacky // narrower than the load width, then it's not. 2292198090Srdivacky if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) 2293198090Srdivacky return NULL; 2294198090Srdivacky // If this is a 64-bit load, but the spill slot is 32, then we can do 2295198090Srdivacky // a 32-bit load which is implicitly zero-extended. This likely is due 2296198090Srdivacky // to liveintervalanalysis remat'ing a load from stack slot. 2297198090Srdivacky if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) 2298198090Srdivacky return NULL; 2299198090Srdivacky Opcode = X86::MOV32rm; 2300198090Srdivacky NarrowToMOV32rm = true; 2301198090Srdivacky } 2302198090Srdivacky } 2303198090Srdivacky 2304193323Sed if (isTwoAddrFold) 2305198090Srdivacky NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); 2306193323Sed else 2307198090Srdivacky NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this); 2308198090Srdivacky 2309198090Srdivacky if (NarrowToMOV32rm) { 2310198090Srdivacky // If this is the special case where we use a MOV32rm to load a 32-bit 2311198090Srdivacky // value and zero-extend the top bits. Change the destination register 2312198090Srdivacky // to a 32-bit one. 2313198090Srdivacky unsigned DstReg = NewMI->getOperand(0).getReg(); 2314198090Srdivacky if (TargetRegisterInfo::isPhysicalRegister(DstReg)) 2315198090Srdivacky NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, 2316208599Srdivacky X86::sub_32bit)); 2317198090Srdivacky else 2318208599Srdivacky NewMI->getOperand(0).setSubReg(X86::sub_32bit); 2319198090Srdivacky } 2320193323Sed return NewMI; 2321193323Sed } 2322193323Sed } 2323193323Sed 2324193323Sed // No fusion 2325210299Sed if (PrintFailedFusing && !MI->isCopy()) 2326202375Srdivacky dbgs() << "We failed to fuse operand " << i << " in " << *MI; 2327193323Sed return NULL; 2328193323Sed} 2329193323Sed 2330193323Sed 2331193323SedMachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2332193323Sed MachineInstr *MI, 2333198090Srdivacky const SmallVectorImpl<unsigned> &Ops, 2334193323Sed int FrameIndex) const { 2335193323Sed // Check switch flag 2336193323Sed if (NoFusing) return NULL; 2337193323Sed 2338201360Srdivacky if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 2339201360Srdivacky switch (MI->getOpcode()) { 2340201360Srdivacky case X86::CVTSD2SSrr: 2341201360Srdivacky case X86::Int_CVTSD2SSrr: 2342201360Srdivacky case X86::CVTSS2SDrr: 2343201360Srdivacky case X86::Int_CVTSS2SDrr: 2344201360Srdivacky case X86::RCPSSr: 2345201360Srdivacky case X86::RCPSSr_Int: 2346201360Srdivacky case X86::ROUNDSDr_Int: 2347201360Srdivacky case X86::ROUNDSSr_Int: 2348201360Srdivacky case X86::RSQRTSSr: 2349201360Srdivacky case X86::RSQRTSSr_Int: 2350201360Srdivacky case X86::SQRTSSr: 2351201360Srdivacky case X86::SQRTSSr_Int: 2352201360Srdivacky return 0; 2353201360Srdivacky } 2354201360Srdivacky 2355193323Sed const MachineFrameInfo *MFI = MF.getFrameInfo(); 2356198090Srdivacky unsigned Size = MFI->getObjectSize(FrameIndex); 2357193323Sed unsigned Alignment = MFI->getObjectAlignment(FrameIndex); 2358193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2359193323Sed unsigned NewOpc = 0; 2360198090Srdivacky unsigned RCSize = 0; 2361193323Sed switch (MI->getOpcode()) { 2362193323Sed default: return NULL; 2363198090Srdivacky case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; 2364208599Srdivacky case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break; 2365208599Srdivacky case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break; 2366208599Srdivacky case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break; 2367193323Sed } 2368198090Srdivacky // Check if it's safe to fold the load. If the size of the object is 2369198090Srdivacky // narrower than the load width, then it's not. 2370198090Srdivacky if (Size < RCSize) 2371198090Srdivacky return NULL; 2372193323Sed // Change to CMPXXri r, 0 first. 2373193323Sed MI->setDesc(get(NewOpc)); 2374193323Sed MI->getOperand(1).ChangeToImmediate(0); 2375193323Sed } else if (Ops.size() != 1) 2376193323Sed return NULL; 2377193323Sed 2378193323Sed SmallVector<MachineOperand,4> MOs; 2379193323Sed MOs.push_back(MachineOperand::CreateFI(FrameIndex)); 2380198090Srdivacky return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment); 2381193323Sed} 2382193323Sed 2383193323SedMachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2384193323Sed MachineInstr *MI, 2385198090Srdivacky const SmallVectorImpl<unsigned> &Ops, 2386193323Sed MachineInstr *LoadMI) const { 2387193323Sed // Check switch flag 2388193323Sed if (NoFusing) return NULL; 2389193323Sed 2390201360Srdivacky if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 2391201360Srdivacky switch (MI->getOpcode()) { 2392201360Srdivacky case X86::CVTSD2SSrr: 2393201360Srdivacky case X86::Int_CVTSD2SSrr: 2394201360Srdivacky case X86::CVTSS2SDrr: 2395201360Srdivacky case X86::Int_CVTSS2SDrr: 2396201360Srdivacky case X86::RCPSSr: 2397201360Srdivacky case X86::RCPSSr_Int: 2398201360Srdivacky case X86::ROUNDSDr_Int: 2399201360Srdivacky case X86::ROUNDSSr_Int: 2400201360Srdivacky case X86::RSQRTSSr: 2401201360Srdivacky case X86::RSQRTSSr_Int: 2402201360Srdivacky case X86::SQRTSSr: 2403201360Srdivacky case X86::SQRTSSr_Int: 2404201360Srdivacky return 0; 2405201360Srdivacky } 2406201360Srdivacky 2407193323Sed // Determine the alignment of the load. 2408193323Sed unsigned Alignment = 0; 2409193323Sed if (LoadMI->hasOneMemOperand()) 2410198090Srdivacky Alignment = (*LoadMI->memoperands_begin())->getAlignment(); 2411198090Srdivacky else 2412198090Srdivacky switch (LoadMI->getOpcode()) { 2413212904Sdim case X86::AVX_SET0PSY: 2414212904Sdim case X86::AVX_SET0PDY: 2415212904Sdim Alignment = 32; 2416212904Sdim break; 2417206083Srdivacky case X86::V_SET0PS: 2418206083Srdivacky case X86::V_SET0PD: 2419206083Srdivacky case X86::V_SET0PI: 2420198090Srdivacky case X86::V_SETALLONES: 2421212904Sdim case X86::AVX_SET0PS: 2422212904Sdim case X86::AVX_SET0PD: 2423212904Sdim case X86::AVX_SET0PI: 2424198090Srdivacky Alignment = 16; 2425198090Srdivacky break; 2426198090Srdivacky case X86::FsFLD0SD: 2427198090Srdivacky Alignment = 8; 2428198090Srdivacky break; 2429198090Srdivacky case X86::FsFLD0SS: 2430198090Srdivacky Alignment = 4; 2431198090Srdivacky break; 2432198090Srdivacky default: 2433198090Srdivacky llvm_unreachable("Don't know how to fold this instruction!"); 2434193323Sed } 2435193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2436193323Sed unsigned NewOpc = 0; 2437193323Sed switch (MI->getOpcode()) { 2438193323Sed default: return NULL; 2439193323Sed case X86::TEST8rr: NewOpc = X86::CMP8ri; break; 2440208599Srdivacky case X86::TEST16rr: NewOpc = X86::CMP16ri8; break; 2441208599Srdivacky case X86::TEST32rr: NewOpc = X86::CMP32ri8; break; 2442208599Srdivacky case X86::TEST64rr: NewOpc = X86::CMP64ri8; break; 2443193323Sed } 2444193323Sed // Change to CMPXXri r, 0 first. 2445193323Sed MI->setDesc(get(NewOpc)); 2446193323Sed MI->getOperand(1).ChangeToImmediate(0); 2447193323Sed } else if (Ops.size() != 1) 2448193323Sed return NULL; 2449193323Sed 2450212904Sdim // Make sure the subregisters match. 2451212904Sdim // Otherwise we risk changing the size of the load. 2452212904Sdim if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg()) 2453212904Sdim return NULL; 2454212904Sdim 2455210299Sed SmallVector<MachineOperand,X86::AddrNumOperands> MOs; 2456198090Srdivacky switch (LoadMI->getOpcode()) { 2457206083Srdivacky case X86::V_SET0PS: 2458206083Srdivacky case X86::V_SET0PD: 2459206083Srdivacky case X86::V_SET0PI: 2460198090Srdivacky case X86::V_SETALLONES: 2461212904Sdim case X86::AVX_SET0PS: 2462212904Sdim case X86::AVX_SET0PD: 2463212904Sdim case X86::AVX_SET0PI: 2464212904Sdim case X86::AVX_SET0PSY: 2465212904Sdim case X86::AVX_SET0PDY: 2466198090Srdivacky case X86::FsFLD0SD: 2467198090Srdivacky case X86::FsFLD0SS: { 2468206083Srdivacky // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure. 2469193323Sed // Create a constant-pool entry and operands to load from it. 2470193323Sed 2471204961Srdivacky // Medium and large mode can't fold loads this way. 2472204961Srdivacky if (TM.getCodeModel() != CodeModel::Small && 2473204961Srdivacky TM.getCodeModel() != CodeModel::Kernel) 2474204961Srdivacky return NULL; 2475204961Srdivacky 2476193323Sed // x86-32 PIC requires a PIC base register for constant pools. 2477193323Sed unsigned PICBase = 0; 2478198090Srdivacky if (TM.getRelocationModel() == Reloc::PIC_) { 2479198090Srdivacky if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2480198090Srdivacky PICBase = X86::RIP; 2481198090Srdivacky else 2482210299Sed // FIXME: PICBase = getGlobalBaseReg(&MF); 2483198090Srdivacky // This doesn't work for several reasons. 2484198090Srdivacky // 1. GlobalBaseReg may have been spilled. 2485198090Srdivacky // 2. It may not be live at MI. 2486198090Srdivacky return NULL; 2487198090Srdivacky } 2488193323Sed 2489198090Srdivacky // Create a constant-pool entry. 2490193323Sed MachineConstantPool &MCP = *MF.getConstantPool(); 2491198090Srdivacky const Type *Ty; 2492212904Sdim unsigned Opc = LoadMI->getOpcode(); 2493212904Sdim if (Opc == X86::FsFLD0SS) 2494198090Srdivacky Ty = Type::getFloatTy(MF.getFunction()->getContext()); 2495212904Sdim else if (Opc == X86::FsFLD0SD) 2496198090Srdivacky Ty = Type::getDoubleTy(MF.getFunction()->getContext()); 2497212904Sdim else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) 2498212904Sdim Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); 2499198090Srdivacky else 2500198090Srdivacky Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); 2501207618Srdivacky const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? 2502198090Srdivacky Constant::getAllOnesValue(Ty) : 2503198090Srdivacky Constant::getNullValue(Ty); 2504198090Srdivacky unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); 2505193323Sed 2506193323Sed // Create operands to load from the constant pool entry. 2507193323Sed MOs.push_back(MachineOperand::CreateReg(PICBase, false)); 2508193323Sed MOs.push_back(MachineOperand::CreateImm(1)); 2509193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 2510193323Sed MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); 2511193323Sed MOs.push_back(MachineOperand::CreateReg(0, false)); 2512198090Srdivacky break; 2513198090Srdivacky } 2514198090Srdivacky default: { 2515193323Sed // Folding a normal load. Just copy the load's address operands. 2516193323Sed unsigned NumOps = LoadMI->getDesc().getNumOperands(); 2517210299Sed for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) 2518193323Sed MOs.push_back(LoadMI->getOperand(i)); 2519198090Srdivacky break; 2520193323Sed } 2521198090Srdivacky } 2522198090Srdivacky return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment); 2523193323Sed} 2524193323Sed 2525193323Sed 2526193323Sedbool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, 2527193323Sed const SmallVectorImpl<unsigned> &Ops) const { 2528193323Sed // Check switch flag 2529193323Sed if (NoFusing) return 0; 2530193323Sed 2531193323Sed if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2532193323Sed switch (MI->getOpcode()) { 2533193323Sed default: return false; 2534193323Sed case X86::TEST8rr: 2535193323Sed case X86::TEST16rr: 2536193323Sed case X86::TEST32rr: 2537193323Sed case X86::TEST64rr: 2538193323Sed return true; 2539193323Sed } 2540193323Sed } 2541193323Sed 2542193323Sed if (Ops.size() != 1) 2543193323Sed return false; 2544193323Sed 2545193323Sed unsigned OpNum = Ops[0]; 2546193323Sed unsigned Opc = MI->getOpcode(); 2547193323Sed unsigned NumOps = MI->getDesc().getNumOperands(); 2548193323Sed bool isTwoAddr = NumOps > 1 && 2549193323Sed MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2550193323Sed 2551193323Sed // Folding a memory location into the two-address part of a two-address 2552193323Sed // instruction is different than folding it other places. It requires 2553193323Sed // replacing the *two* registers with the memory location. 2554198090Srdivacky const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; 2555193323Sed if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 2556193323Sed OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2557193323Sed } else if (OpNum == 0) { // If operand 0 2558193323Sed switch (Opc) { 2559198090Srdivacky case X86::MOV8r0: 2560202375Srdivacky case X86::MOV16r0: 2561193323Sed case X86::MOV32r0: 2562202375Srdivacky case X86::MOV64r0: 2563193323Sed return true; 2564193323Sed default: break; 2565193323Sed } 2566193323Sed OpcodeTablePtr = &RegOp2MemOpTable0; 2567193323Sed } else if (OpNum == 1) { 2568193323Sed OpcodeTablePtr = &RegOp2MemOpTable1; 2569193323Sed } else if (OpNum == 2) { 2570193323Sed OpcodeTablePtr = &RegOp2MemOpTable2; 2571193323Sed } 2572193323Sed 2573193323Sed if (OpcodeTablePtr) { 2574193323Sed // Find the Opcode to fuse 2575199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2576193323Sed OpcodeTablePtr->find((unsigned*)Opc); 2577193323Sed if (I != OpcodeTablePtr->end()) 2578193323Sed return true; 2579193323Sed } 2580210299Sed return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops); 2581193323Sed} 2582193323Sed 2583193323Sedbool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, 2584193323Sed unsigned Reg, bool UnfoldLoad, bool UnfoldStore, 2585193323Sed SmallVectorImpl<MachineInstr*> &NewMIs) const { 2586199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2587193323Sed MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); 2588193323Sed if (I == MemOp2RegOpTable.end()) 2589193323Sed return false; 2590193323Sed unsigned Opc = I->second.first; 2591193323Sed unsigned Index = I->second.second & 0xf; 2592193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2593193323Sed bool FoldedStore = I->second.second & (1 << 5); 2594193323Sed if (UnfoldLoad && !FoldedLoad) 2595193323Sed return false; 2596193323Sed UnfoldLoad &= FoldedLoad; 2597193323Sed if (UnfoldStore && !FoldedStore) 2598193323Sed return false; 2599193323Sed UnfoldStore &= FoldedStore; 2600193323Sed 2601193323Sed const TargetInstrDesc &TID = get(Opc); 2602193323Sed const TargetOperandInfo &TOI = TID.OpInfo[Index]; 2603198090Srdivacky const TargetRegisterClass *RC = TOI.getRegClass(&RI); 2604210299Sed if (!MI->hasOneMemOperand() && 2605210299Sed RC == &X86::VR128RegClass && 2606210299Sed !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) 2607210299Sed // Without memoperands, loadRegFromAddr and storeRegToStackSlot will 2608210299Sed // conservatively assume the address is unaligned. That's bad for 2609210299Sed // performance. 2610210299Sed return false; 2611210299Sed SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps; 2612193323Sed SmallVector<MachineOperand,2> BeforeOps; 2613193323Sed SmallVector<MachineOperand,2> AfterOps; 2614193323Sed SmallVector<MachineOperand,4> ImpOps; 2615193323Sed for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2616193323Sed MachineOperand &Op = MI->getOperand(i); 2617210299Sed if (i >= Index && i < Index + X86::AddrNumOperands) 2618193323Sed AddrOps.push_back(Op); 2619193323Sed else if (Op.isReg() && Op.isImplicit()) 2620193323Sed ImpOps.push_back(Op); 2621193323Sed else if (i < Index) 2622193323Sed BeforeOps.push_back(Op); 2623193323Sed else if (i > Index) 2624193323Sed AfterOps.push_back(Op); 2625193323Sed } 2626193323Sed 2627193323Sed // Emit the load instruction. 2628193323Sed if (UnfoldLoad) { 2629198090Srdivacky std::pair<MachineInstr::mmo_iterator, 2630198090Srdivacky MachineInstr::mmo_iterator> MMOs = 2631198090Srdivacky MF.extractLoadMemRefs(MI->memoperands_begin(), 2632198090Srdivacky MI->memoperands_end()); 2633198090Srdivacky loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); 2634193323Sed if (UnfoldStore) { 2635193323Sed // Address operands cannot be marked isKill. 2636210299Sed for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) { 2637193323Sed MachineOperand &MO = NewMIs[0]->getOperand(i); 2638193323Sed if (MO.isReg()) 2639193323Sed MO.setIsKill(false); 2640193323Sed } 2641193323Sed } 2642193323Sed } 2643193323Sed 2644193323Sed // Emit the data processing instruction. 2645193323Sed MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); 2646193323Sed MachineInstrBuilder MIB(DataMI); 2647193323Sed 2648193323Sed if (FoldedStore) 2649193323Sed MIB.addReg(Reg, RegState::Define); 2650193323Sed for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) 2651193323Sed MIB.addOperand(BeforeOps[i]); 2652193323Sed if (FoldedLoad) 2653193323Sed MIB.addReg(Reg); 2654193323Sed for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) 2655193323Sed MIB.addOperand(AfterOps[i]); 2656193323Sed for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { 2657193323Sed MachineOperand &MO = ImpOps[i]; 2658193323Sed MIB.addReg(MO.getReg(), 2659193323Sed getDefRegState(MO.isDef()) | 2660193323Sed RegState::Implicit | 2661193323Sed getKillRegState(MO.isKill()) | 2662195340Sed getDeadRegState(MO.isDead()) | 2663195340Sed getUndefRegState(MO.isUndef())); 2664193323Sed } 2665193323Sed // Change CMP32ri r, 0 back to TEST32rr r, r, etc. 2666193323Sed unsigned NewOpc = 0; 2667193323Sed switch (DataMI->getOpcode()) { 2668193323Sed default: break; 2669193323Sed case X86::CMP64ri32: 2670208599Srdivacky case X86::CMP64ri8: 2671193323Sed case X86::CMP32ri: 2672208599Srdivacky case X86::CMP32ri8: 2673193323Sed case X86::CMP16ri: 2674208599Srdivacky case X86::CMP16ri8: 2675193323Sed case X86::CMP8ri: { 2676193323Sed MachineOperand &MO0 = DataMI->getOperand(0); 2677193323Sed MachineOperand &MO1 = DataMI->getOperand(1); 2678193323Sed if (MO1.getImm() == 0) { 2679193323Sed switch (DataMI->getOpcode()) { 2680193323Sed default: break; 2681208599Srdivacky case X86::CMP64ri8: 2682193323Sed case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; 2683208599Srdivacky case X86::CMP32ri8: 2684193323Sed case X86::CMP32ri: NewOpc = X86::TEST32rr; break; 2685208599Srdivacky case X86::CMP16ri8: 2686193323Sed case X86::CMP16ri: NewOpc = X86::TEST16rr; break; 2687193323Sed case X86::CMP8ri: NewOpc = X86::TEST8rr; break; 2688193323Sed } 2689193323Sed DataMI->setDesc(get(NewOpc)); 2690193323Sed MO1.ChangeToRegister(MO0.getReg(), false); 2691193323Sed } 2692193323Sed } 2693193323Sed } 2694193323Sed NewMIs.push_back(DataMI); 2695193323Sed 2696193323Sed // Emit the store instruction. 2697193323Sed if (UnfoldStore) { 2698198090Srdivacky const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); 2699198090Srdivacky std::pair<MachineInstr::mmo_iterator, 2700198090Srdivacky MachineInstr::mmo_iterator> MMOs = 2701198090Srdivacky MF.extractStoreMemRefs(MI->memoperands_begin(), 2702198090Srdivacky MI->memoperands_end()); 2703198090Srdivacky storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs); 2704193323Sed } 2705193323Sed 2706193323Sed return true; 2707193323Sed} 2708193323Sed 2709193323Sedbool 2710193323SedX86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, 2711193323Sed SmallVectorImpl<SDNode*> &NewNodes) const { 2712193323Sed if (!N->isMachineOpcode()) 2713193323Sed return false; 2714193323Sed 2715199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2716193323Sed MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); 2717193323Sed if (I == MemOp2RegOpTable.end()) 2718193323Sed return false; 2719193323Sed unsigned Opc = I->second.first; 2720193323Sed unsigned Index = I->second.second & 0xf; 2721193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2722193323Sed bool FoldedStore = I->second.second & (1 << 5); 2723193323Sed const TargetInstrDesc &TID = get(Opc); 2724198090Srdivacky const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); 2725193323Sed unsigned NumDefs = TID.NumDefs; 2726193323Sed std::vector<SDValue> AddrOps; 2727193323Sed std::vector<SDValue> BeforeOps; 2728193323Sed std::vector<SDValue> AfterOps; 2729193323Sed DebugLoc dl = N->getDebugLoc(); 2730193323Sed unsigned NumOps = N->getNumOperands(); 2731193323Sed for (unsigned i = 0; i != NumOps-1; ++i) { 2732193323Sed SDValue Op = N->getOperand(i); 2733210299Sed if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands) 2734193323Sed AddrOps.push_back(Op); 2735193323Sed else if (i < Index-NumDefs) 2736193323Sed BeforeOps.push_back(Op); 2737193323Sed else if (i > Index-NumDefs) 2738193323Sed AfterOps.push_back(Op); 2739193323Sed } 2740193323Sed SDValue Chain = N->getOperand(NumOps-1); 2741193323Sed AddrOps.push_back(Chain); 2742193323Sed 2743193323Sed // Emit the load instruction. 2744193323Sed SDNode *Load = 0; 2745198090Srdivacky MachineFunction &MF = DAG.getMachineFunction(); 2746193323Sed if (FoldedLoad) { 2747198090Srdivacky EVT VT = *RC->vt_begin(); 2748199481Srdivacky std::pair<MachineInstr::mmo_iterator, 2749199481Srdivacky MachineInstr::mmo_iterator> MMOs = 2750199481Srdivacky MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2751199481Srdivacky cast<MachineSDNode>(N)->memoperands_end()); 2752210299Sed if (!(*MMOs.first) && 2753210299Sed RC == &X86::VR128RegClass && 2754210299Sed !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) 2755210299Sed // Do not introduce a slow unaligned load. 2756210299Sed return false; 2757210299Sed bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; 2758198090Srdivacky Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, 2759198090Srdivacky VT, MVT::Other, &AddrOps[0], AddrOps.size()); 2760193323Sed NewNodes.push_back(Load); 2761198090Srdivacky 2762198090Srdivacky // Preserve memory reference information. 2763198090Srdivacky cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2764193323Sed } 2765193323Sed 2766193323Sed // Emit the data processing instruction. 2767198090Srdivacky std::vector<EVT> VTs; 2768193323Sed const TargetRegisterClass *DstRC = 0; 2769193323Sed if (TID.getNumDefs() > 0) { 2770198090Srdivacky DstRC = TID.OpInfo[0].getRegClass(&RI); 2771193323Sed VTs.push_back(*DstRC->vt_begin()); 2772193323Sed } 2773193323Sed for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 2774198090Srdivacky EVT VT = N->getValueType(i); 2775193323Sed if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) 2776193323Sed VTs.push_back(VT); 2777193323Sed } 2778193323Sed if (Load) 2779193323Sed BeforeOps.push_back(SDValue(Load, 0)); 2780193323Sed std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); 2781198090Srdivacky SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0], 2782198090Srdivacky BeforeOps.size()); 2783193323Sed NewNodes.push_back(NewNode); 2784193323Sed 2785193323Sed // Emit the store instruction. 2786193323Sed if (FoldedStore) { 2787193323Sed AddrOps.pop_back(); 2788193323Sed AddrOps.push_back(SDValue(NewNode, 0)); 2789193323Sed AddrOps.push_back(Chain); 2790199481Srdivacky std::pair<MachineInstr::mmo_iterator, 2791199481Srdivacky MachineInstr::mmo_iterator> MMOs = 2792199481Srdivacky MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2793199481Srdivacky cast<MachineSDNode>(N)->memoperands_end()); 2794210299Sed if (!(*MMOs.first) && 2795210299Sed RC == &X86::VR128RegClass && 2796210299Sed !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) 2797210299Sed // Do not introduce a slow unaligned store. 2798210299Sed return false; 2799210299Sed bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; 2800198090Srdivacky SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, 2801198090Srdivacky isAligned, TM), 2802198090Srdivacky dl, MVT::Other, 2803198090Srdivacky &AddrOps[0], AddrOps.size()); 2804193323Sed NewNodes.push_back(Store); 2805198090Srdivacky 2806198090Srdivacky // Preserve memory reference information. 2807198090Srdivacky cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2808193323Sed } 2809193323Sed 2810193323Sed return true; 2811193323Sed} 2812193323Sed 2813193323Sedunsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, 2814198892Srdivacky bool UnfoldLoad, bool UnfoldStore, 2815198892Srdivacky unsigned *LoadRegIndex) const { 2816199481Srdivacky DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2817193323Sed MemOp2RegOpTable.find((unsigned*)Opc); 2818193323Sed if (I == MemOp2RegOpTable.end()) 2819193323Sed return 0; 2820193323Sed bool FoldedLoad = I->second.second & (1 << 4); 2821193323Sed bool FoldedStore = I->second.second & (1 << 5); 2822193323Sed if (UnfoldLoad && !FoldedLoad) 2823193323Sed return 0; 2824193323Sed if (UnfoldStore && !FoldedStore) 2825193323Sed return 0; 2826198892Srdivacky if (LoadRegIndex) 2827198892Srdivacky *LoadRegIndex = I->second.second & 0xf; 2828193323Sed return I->second.first; 2829193323Sed} 2830193323Sed 2831202878Srdivackybool 2832202878SrdivackyX86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 2833202878Srdivacky int64_t &Offset1, int64_t &Offset2) const { 2834202878Srdivacky if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 2835202878Srdivacky return false; 2836202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 2837202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 2838202878Srdivacky switch (Opc1) { 2839202878Srdivacky default: return false; 2840202878Srdivacky case X86::MOV8rm: 2841202878Srdivacky case X86::MOV16rm: 2842202878Srdivacky case X86::MOV32rm: 2843202878Srdivacky case X86::MOV64rm: 2844202878Srdivacky case X86::LD_Fp32m: 2845202878Srdivacky case X86::LD_Fp64m: 2846202878Srdivacky case X86::LD_Fp80m: 2847202878Srdivacky case X86::MOVSSrm: 2848202878Srdivacky case X86::MOVSDrm: 2849202878Srdivacky case X86::MMX_MOVD64rm: 2850202878Srdivacky case X86::MMX_MOVQ64rm: 2851202878Srdivacky case X86::FsMOVAPSrm: 2852202878Srdivacky case X86::FsMOVAPDrm: 2853202878Srdivacky case X86::MOVAPSrm: 2854202878Srdivacky case X86::MOVUPSrm: 2855202878Srdivacky case X86::MOVUPSrm_Int: 2856202878Srdivacky case X86::MOVAPDrm: 2857202878Srdivacky case X86::MOVDQArm: 2858202878Srdivacky case X86::MOVDQUrm: 2859202878Srdivacky case X86::MOVDQUrm_Int: 2860202878Srdivacky break; 2861202878Srdivacky } 2862202878Srdivacky switch (Opc2) { 2863202878Srdivacky default: return false; 2864202878Srdivacky case X86::MOV8rm: 2865202878Srdivacky case X86::MOV16rm: 2866202878Srdivacky case X86::MOV32rm: 2867202878Srdivacky case X86::MOV64rm: 2868202878Srdivacky case X86::LD_Fp32m: 2869202878Srdivacky case X86::LD_Fp64m: 2870202878Srdivacky case X86::LD_Fp80m: 2871202878Srdivacky case X86::MOVSSrm: 2872202878Srdivacky case X86::MOVSDrm: 2873202878Srdivacky case X86::MMX_MOVD64rm: 2874202878Srdivacky case X86::MMX_MOVQ64rm: 2875202878Srdivacky case X86::FsMOVAPSrm: 2876202878Srdivacky case X86::FsMOVAPDrm: 2877202878Srdivacky case X86::MOVAPSrm: 2878202878Srdivacky case X86::MOVUPSrm: 2879202878Srdivacky case X86::MOVUPSrm_Int: 2880202878Srdivacky case X86::MOVAPDrm: 2881202878Srdivacky case X86::MOVDQArm: 2882202878Srdivacky case X86::MOVDQUrm: 2883202878Srdivacky case X86::MOVDQUrm_Int: 2884202878Srdivacky break; 2885202878Srdivacky } 2886202878Srdivacky 2887202878Srdivacky // Check if chain operands and base addresses match. 2888202878Srdivacky if (Load1->getOperand(0) != Load2->getOperand(0) || 2889202878Srdivacky Load1->getOperand(5) != Load2->getOperand(5)) 2890202878Srdivacky return false; 2891202878Srdivacky // Segment operands should match as well. 2892202878Srdivacky if (Load1->getOperand(4) != Load2->getOperand(4)) 2893202878Srdivacky return false; 2894202878Srdivacky // Scale should be 1, Index should be Reg0. 2895202878Srdivacky if (Load1->getOperand(1) == Load2->getOperand(1) && 2896202878Srdivacky Load1->getOperand(2) == Load2->getOperand(2)) { 2897202878Srdivacky if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1) 2898202878Srdivacky return false; 2899202878Srdivacky 2900202878Srdivacky // Now let's examine the displacements. 2901202878Srdivacky if (isa<ConstantSDNode>(Load1->getOperand(3)) && 2902202878Srdivacky isa<ConstantSDNode>(Load2->getOperand(3))) { 2903202878Srdivacky Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue(); 2904202878Srdivacky Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue(); 2905202878Srdivacky return true; 2906202878Srdivacky } 2907202878Srdivacky } 2908202878Srdivacky return false; 2909202878Srdivacky} 2910202878Srdivacky 2911202878Srdivackybool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 2912202878Srdivacky int64_t Offset1, int64_t Offset2, 2913202878Srdivacky unsigned NumLoads) const { 2914202878Srdivacky assert(Offset2 > Offset1); 2915202878Srdivacky if ((Offset2 - Offset1) / 8 > 64) 2916202878Srdivacky return false; 2917202878Srdivacky 2918202878Srdivacky unsigned Opc1 = Load1->getMachineOpcode(); 2919202878Srdivacky unsigned Opc2 = Load2->getMachineOpcode(); 2920202878Srdivacky if (Opc1 != Opc2) 2921202878Srdivacky return false; // FIXME: overly conservative? 2922202878Srdivacky 2923202878Srdivacky switch (Opc1) { 2924202878Srdivacky default: break; 2925202878Srdivacky case X86::LD_Fp32m: 2926202878Srdivacky case X86::LD_Fp64m: 2927202878Srdivacky case X86::LD_Fp80m: 2928202878Srdivacky case X86::MMX_MOVD64rm: 2929202878Srdivacky case X86::MMX_MOVQ64rm: 2930202878Srdivacky return false; 2931202878Srdivacky } 2932202878Srdivacky 2933202878Srdivacky EVT VT = Load1->getValueType(0); 2934202878Srdivacky switch (VT.getSimpleVT().SimpleTy) { 2935210299Sed default: 2936202878Srdivacky // XMM registers. In 64-bit mode we can be a bit more aggressive since we 2937202878Srdivacky // have 16 of them to play with. 2938202878Srdivacky if (TM.getSubtargetImpl()->is64Bit()) { 2939202878Srdivacky if (NumLoads >= 3) 2940202878Srdivacky return false; 2941210299Sed } else if (NumLoads) { 2942202878Srdivacky return false; 2943210299Sed } 2944202878Srdivacky break; 2945202878Srdivacky case MVT::i8: 2946202878Srdivacky case MVT::i16: 2947202878Srdivacky case MVT::i32: 2948202878Srdivacky case MVT::i64: 2949202878Srdivacky case MVT::f32: 2950202878Srdivacky case MVT::f64: 2951202878Srdivacky if (NumLoads) 2952202878Srdivacky return false; 2953210299Sed break; 2954202878Srdivacky } 2955202878Srdivacky 2956202878Srdivacky return true; 2957202878Srdivacky} 2958202878Srdivacky 2959202878Srdivacky 2960193323Sedbool X86InstrInfo:: 2961193323SedReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 2962193323Sed assert(Cond.size() == 1 && "Invalid X86 branch condition!"); 2963193323Sed X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm()); 2964193323Sed if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E) 2965193323Sed return true; 2966193323Sed Cond[0].setImm(GetOppositeBranchCondition(CC)); 2967193323Sed return false; 2968193323Sed} 2969193323Sed 2970193323Sedbool X86InstrInfo:: 2971193323SedisSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 2972193323Sed // FIXME: Return false for x87 stack register classes for now. We can't 2973193323Sed // allow any loads of these registers before FpGet_ST0_80. 2974193323Sed return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || 2975193323Sed RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); 2976193323Sed} 2977193323Sed 2978193323Sed 2979203954Srdivacky/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher) 2980203954Srdivacky/// register? e.g. r8, xmm8, xmm13, etc. 2981203954Srdivackybool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { 2982203954Srdivacky switch (RegNo) { 2983193323Sed default: break; 2984193323Sed case X86::R8: case X86::R9: case X86::R10: case X86::R11: 2985193323Sed case X86::R12: case X86::R13: case X86::R14: case X86::R15: 2986193323Sed case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: 2987193323Sed case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: 2988193323Sed case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: 2989193323Sed case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: 2990193323Sed case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: 2991193323Sed case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: 2992193323Sed case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: 2993193323Sed case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: 2994210299Sed case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: 2995210299Sed case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: 2996193323Sed return true; 2997193323Sed } 2998193323Sed return false; 2999193323Sed} 3000193323Sed 3001193323Sed/// getGlobalBaseReg - Return a virtual register initialized with the 3002193323Sed/// the global base register value. Output instructions required to 3003193323Sed/// initialize the register in the function entry block, if necessary. 3004193323Sed/// 3005210299Sed/// TODO: Eliminate this and move the code to X86MachineFunctionInfo. 3006210299Sed/// 3007193323Sedunsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { 3008193323Sed assert(!TM.getSubtarget<X86Subtarget>().is64Bit() && 3009193323Sed "X86-64 PIC uses RIP relative addressing"); 3010193323Sed 3011193323Sed X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); 3012193323Sed unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 3013193323Sed if (GlobalBaseReg != 0) 3014193323Sed return GlobalBaseReg; 3015193323Sed 3016210299Sed // Create the register. The code to initialize it is inserted 3017210299Sed // later, by the CGBR pass (below). 3018193323Sed MachineRegisterInfo &RegInfo = MF->getRegInfo(); 3019210299Sed GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3020193323Sed X86FI->setGlobalBaseReg(GlobalBaseReg); 3021193323Sed return GlobalBaseReg; 3022193323Sed} 3023206083Srdivacky 3024206083Srdivacky// These are the replaceable SSE instructions. Some of these have Int variants 3025206083Srdivacky// that we don't include here. We don't want to replace instructions selected 3026206083Srdivacky// by intrinsics. 3027206083Srdivackystatic const unsigned ReplaceableInstrs[][3] = { 3028212904Sdim //PackedSingle PackedDouble PackedInt 3029206083Srdivacky { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, 3030206083Srdivacky { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, 3031206083Srdivacky { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, 3032206083Srdivacky { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr }, 3033206083Srdivacky { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm }, 3034206083Srdivacky { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr }, 3035206083Srdivacky { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm }, 3036206083Srdivacky { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr }, 3037206083Srdivacky { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm }, 3038206083Srdivacky { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, 3039206083Srdivacky { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, 3040206083Srdivacky { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, 3041206083Srdivacky { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI }, 3042206083Srdivacky { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, 3043206083Srdivacky { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, 3044212904Sdim // AVX 128-bit support 3045212904Sdim { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, 3046212904Sdim { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, 3047212904Sdim { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr }, 3048212904Sdim { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr }, 3049212904Sdim { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm }, 3050212904Sdim { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr }, 3051212904Sdim { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm }, 3052212904Sdim { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr }, 3053212904Sdim { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm }, 3054212904Sdim { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr }, 3055212904Sdim { X86::VORPSrm, X86::VORPDrm, X86::VPORrm }, 3056212904Sdim { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, 3057212904Sdim { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI }, 3058212904Sdim { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, 3059212904Sdim { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, 3060206083Srdivacky}; 3061206083Srdivacky 3062206083Srdivacky// FIXME: Some shuffle and unpack instructions have equivalents in different 3063206083Srdivacky// domains, but they require a bit more work than just switching opcodes. 3064206083Srdivacky 3065206083Srdivackystatic const unsigned *lookup(unsigned opcode, unsigned domain) { 3066206083Srdivacky for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) 3067206083Srdivacky if (ReplaceableInstrs[i][domain-1] == opcode) 3068206083Srdivacky return ReplaceableInstrs[i]; 3069206083Srdivacky return 0; 3070206083Srdivacky} 3071206083Srdivacky 3072206083Srdivackystd::pair<uint16_t, uint16_t> 3073206083SrdivackyX86InstrInfo::GetSSEDomain(const MachineInstr *MI) const { 3074206083Srdivacky uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 3075206083Srdivacky return std::make_pair(domain, 3076206083Srdivacky domain && lookup(MI->getOpcode(), domain) ? 0xe : 0); 3077206083Srdivacky} 3078206083Srdivacky 3079206083Srdivackyvoid X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const { 3080206083Srdivacky assert(Domain>0 && Domain<4 && "Invalid execution domain"); 3081206083Srdivacky uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; 3082206083Srdivacky assert(dom && "Not an SSE instruction"); 3083206083Srdivacky const unsigned *table = lookup(MI->getOpcode(), dom); 3084206083Srdivacky assert(table && "Cannot change domain"); 3085206083Srdivacky MI->setDesc(get(table[Domain-1])); 3086206083Srdivacky} 3087207618Srdivacky 3088207618Srdivacky/// getNoopForMachoTarget - Return the noop instruction to use for a noop. 3089207618Srdivackyvoid X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { 3090207618Srdivacky NopInst.setOpcode(X86::NOOP); 3091207618Srdivacky} 3092207618Srdivacky 3093210299Sednamespace { 3094210299Sed /// CGBR - Create Global Base Reg pass. This initializes the PIC 3095210299Sed /// global base register for x86-32. 3096210299Sed struct CGBR : public MachineFunctionPass { 3097210299Sed static char ID; 3098212904Sdim CGBR() : MachineFunctionPass(ID) {} 3099210299Sed 3100210299Sed virtual bool runOnMachineFunction(MachineFunction &MF) { 3101210299Sed const X86TargetMachine *TM = 3102210299Sed static_cast<const X86TargetMachine *>(&MF.getTarget()); 3103210299Sed 3104210299Sed assert(!TM->getSubtarget<X86Subtarget>().is64Bit() && 3105210299Sed "X86-64 PIC uses RIP relative addressing"); 3106210299Sed 3107210299Sed // Only emit a global base reg in PIC mode. 3108210299Sed if (TM->getRelocationModel() != Reloc::PIC_) 3109210299Sed return false; 3110210299Sed 3111210299Sed // Insert the set of GlobalBaseReg into the first MBB of the function 3112210299Sed MachineBasicBlock &FirstMBB = MF.front(); 3113210299Sed MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 3114210299Sed DebugLoc DL = FirstMBB.findDebugLoc(MBBI); 3115210299Sed MachineRegisterInfo &RegInfo = MF.getRegInfo(); 3116210299Sed const X86InstrInfo *TII = TM->getInstrInfo(); 3117210299Sed 3118210299Sed unsigned PC; 3119210299Sed if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) 3120210299Sed PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3121210299Sed else 3122210299Sed PC = TII->getGlobalBaseReg(&MF); 3123210299Sed 3124210299Sed // Operand of MovePCtoStack is completely ignored by asm printer. It's 3125210299Sed // only used in JIT code emission as displacement to pc. 3126210299Sed BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); 3127210299Sed 3128210299Sed // If we're using vanilla 'GOT' PIC style, we should use relative addressing 3129210299Sed // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. 3130210299Sed if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) { 3131210299Sed unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF); 3132210299Sed // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register 3133210299Sed BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) 3134210299Sed .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 3135210299Sed X86II::MO_GOT_ABSOLUTE_ADDRESS); 3136210299Sed } 3137210299Sed 3138210299Sed return true; 3139210299Sed } 3140210299Sed 3141210299Sed virtual const char *getPassName() const { 3142210299Sed return "X86 PIC Global Base Reg Initialization"; 3143210299Sed } 3144210299Sed 3145210299Sed virtual void getAnalysisUsage(AnalysisUsage &AU) const { 3146210299Sed AU.setPreservesCFG(); 3147210299Sed MachineFunctionPass::getAnalysisUsage(AU); 3148210299Sed } 3149210299Sed }; 3150210299Sed} 3151210299Sed 3152210299Sedchar CGBR::ID = 0; 3153210299SedFunctionPass* 3154210299Sedllvm::createGlobalBaseRegPass() { return new CGBR(); } 3155