1251607Sdim//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// 2251607Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6251607Sdim// 7251607Sdim//===----------------------------------------------------------------------===// 8251607Sdim// 9276479Sdim// This file defines the pass that finds instructions that can be 10276479Sdim// re-written as LEA instructions in order to reduce pipeline delays. 11353358Sdim// It replaces LEAs with ADD/INC/DEC when that is better for size/speed. 12251607Sdim// 13251607Sdim//===----------------------------------------------------------------------===// 14251607Sdim 15251607Sdim#include "X86.h" 16251607Sdim#include "X86InstrInfo.h" 17251607Sdim#include "X86Subtarget.h" 18251607Sdim#include "llvm/ADT/Statistic.h" 19251607Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 20251607Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 21251607Sdim#include "llvm/CodeGen/Passes.h" 22341825Sdim#include "llvm/CodeGen/TargetSchedule.h" 23251607Sdim#include "llvm/Support/Debug.h" 24251607Sdim#include "llvm/Support/raw_ostream.h" 25251607Sdimusing namespace llvm; 26251607Sdim 27321369Sdim#define FIXUPLEA_DESC "X86 LEA Fixup" 28321369Sdim#define FIXUPLEA_NAME "x86-fixup-LEAs" 29321369Sdim 30321369Sdim#define DEBUG_TYPE FIXUPLEA_NAME 31321369Sdim 32251607SdimSTATISTIC(NumLEAs, "Number of LEA instructions created"); 33251607Sdim 34251607Sdimnamespace { 35276479Sdimclass FixupLEAPass : public MachineFunctionPass { 36276479Sdim enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; 37321369Sdim 38341825Sdim /// Given a machine register, look for the instruction 39276479Sdim /// which writes it in the current basic block. If found, 40276479Sdim /// try to replace it with an equivalent LEA instruction. 41288943Sdim /// If replacement succeeds, then also process the newly created 42276479Sdim /// instruction. 43276479Sdim void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I, 44353358Sdim MachineBasicBlock &MBB); 45251607Sdim 46341825Sdim /// Given a memory access or LEA instruction 47276479Sdim /// whose address mode uses a base and/or index register, look for 48276479Sdim /// an opportunity to replace the instruction which sets the base or index 49276479Sdim /// register with an equivalent LEA instruction. 50276479Sdim void processInstruction(MachineBasicBlock::iterator &I, 51353358Sdim MachineBasicBlock &MBB); 52251607Sdim 53341825Sdim /// Given a LEA instruction which is unprofitable 54344779Sdim /// on SlowLEA targets try to replace it with an equivalent ADD instruction. 55344779Sdim void processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 56353358Sdim MachineBasicBlock &MBB); 57251607Sdim 58341825Sdim /// Given a LEA instruction which is unprofitable 59321369Sdim /// on SNB+ try to replace it with other instructions. 60321369Sdim /// According to Intel's Optimization Reference Manual: 61321369Sdim /// " For LEA instructions with three source operands and some specific 62321369Sdim /// situations, instruction latency has increased to 3 cycles, and must 63321369Sdim /// dispatch via port 1: 64321369Sdim /// - LEA that has all three source operands: base, index, and offset 65321369Sdim /// - LEA that uses base and index registers where the base is EBP, RBP, 66321369Sdim /// or R13 67321369Sdim /// - LEA that uses RIP relative addressing mode 68321369Sdim /// - LEA that uses 16-bit addressing mode " 69321369Sdim /// This function currently handles the first 2 cases only. 70360784Sdim void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 71360784Sdim MachineBasicBlock &MBB, bool OptIncDec); 72321369Sdim 73353358Sdim /// Look for LEAs that are really two address LEAs that we might be able to 74353358Sdim /// turn into regular ADD instructions. 75353358Sdim bool optTwoAddrLEA(MachineBasicBlock::iterator &I, 76353358Sdim MachineBasicBlock &MBB, bool OptIncDec, 77353358Sdim bool UseLEAForSP) const; 78296417Sdim 79341825Sdim /// Determine if an instruction references a machine register 80276479Sdim /// and, if so, whether it reads or writes the register. 81276479Sdim RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); 82251607Sdim 83341825Sdim /// Step backwards through a basic block, looking 84276479Sdim /// for an instruction which writes a register within 85276479Sdim /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. 86276479Sdim MachineBasicBlock::iterator searchBackwards(MachineOperand &p, 87276479Sdim MachineBasicBlock::iterator &I, 88353358Sdim MachineBasicBlock &MBB); 89251607Sdim 90341825Sdim /// if an instruction can be converted to an 91276479Sdim /// equivalent LEA, insert the new instruction into the basic block 92276479Sdim /// and return a pointer to it. Otherwise, return zero. 93353358Sdim MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB, 94276479Sdim MachineBasicBlock::iterator &MBBI) const; 95251607Sdim 96276479Sdimpublic: 97321369Sdim static char ID; 98251607Sdim 99321369Sdim StringRef getPassName() const override { return FIXUPLEA_DESC; } 100321369Sdim 101353358Sdim FixupLEAPass() : MachineFunctionPass(ID) { } 102321369Sdim 103341825Sdim /// Loop over all of the basic blocks, 104276479Sdim /// replacing instructions by equivalent LEA instructions 105276479Sdim /// if needed and when possible. 106276479Sdim bool runOnMachineFunction(MachineFunction &MF) override; 107251607Sdim 108309124Sdim // This pass runs after regalloc and doesn't support VReg operands. 109309124Sdim MachineFunctionProperties getRequiredProperties() const override { 110309124Sdim return MachineFunctionProperties().set( 111314564Sdim MachineFunctionProperties::Property::NoVRegs); 112309124Sdim } 113309124Sdim 114276479Sdimprivate: 115341825Sdim TargetSchedModel TSM; 116360784Sdim const X86InstrInfo *TII = nullptr; 117360784Sdim const X86RegisterInfo *TRI = nullptr; 118276479Sdim}; 119251607Sdim} 120251607Sdim 121321369Sdimchar FixupLEAPass::ID = 0; 122321369Sdim 123321369SdimINITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false) 124321369Sdim 125251607SdimMachineInstr * 126353358SdimFixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB, 127251607Sdim MachineBasicBlock::iterator &MBBI) const { 128309124Sdim MachineInstr &MI = *MBBI; 129309124Sdim switch (MI.getOpcode()) { 130276479Sdim case X86::MOV32rr: 131251607Sdim case X86::MOV64rr: { 132309124Sdim const MachineOperand &Src = MI.getOperand(1); 133309124Sdim const MachineOperand &Dest = MI.getOperand(0); 134309124Sdim MachineInstr *NewMI = 135353358Sdim BuildMI(MBB, MBBI, MI.getDebugLoc(), 136309124Sdim TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r 137309124Sdim : X86::LEA64r)) 138321369Sdim .add(Dest) 139321369Sdim .add(Src) 140309124Sdim .addImm(1) 141309124Sdim .addReg(0) 142309124Sdim .addImm(0) 143309124Sdim .addReg(0); 144251607Sdim return NewMI; 145251607Sdim } 146352915Sdim } 147352915Sdim 148352915Sdim if (!MI.isConvertibleTo3Addr()) 149352915Sdim return nullptr; 150352915Sdim 151352915Sdim switch (MI.getOpcode()) { 152352915Sdim default: 153352915Sdim // Only convert instructions that we've verified are safe. 154352915Sdim return nullptr; 155251607Sdim case X86::ADD64ri32: 156251607Sdim case X86::ADD64ri8: 157251607Sdim case X86::ADD64ri32_DB: 158251607Sdim case X86::ADD64ri8_DB: 159251607Sdim case X86::ADD32ri: 160251607Sdim case X86::ADD32ri8: 161251607Sdim case X86::ADD32ri_DB: 162251607Sdim case X86::ADD32ri8_DB: 163309124Sdim if (!MI.getOperand(2).isImm()) { 164251607Sdim // convertToThreeAddress will call getImm() 165251607Sdim // which requires isImm() to be true 166276479Sdim return nullptr; 167251607Sdim } 168255978Sdim break; 169352915Sdim case X86::SHL64ri: 170352915Sdim case X86::SHL32ri: 171352915Sdim case X86::INC64r: 172352915Sdim case X86::INC32r: 173352915Sdim case X86::DEC64r: 174352915Sdim case X86::DEC32r: 175352915Sdim case X86::ADD64rr: 176352915Sdim case X86::ADD64rr_DB: 177352915Sdim case X86::ADD32rr: 178352915Sdim case X86::ADD32rr_DB: 179352915Sdim // These instructions are all fine to convert. 180352915Sdim break; 181251607Sdim } 182353358Sdim MachineFunction::iterator MFI = MBB.getIterator(); 183309124Sdim return TII->convertToThreeAddress(MFI, MI, nullptr); 184251607Sdim} 185251607Sdim 186276479SdimFunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } 187251607Sdim 188353358Sdimstatic bool isLEA(unsigned Opcode) { 189353358Sdim return Opcode == X86::LEA32r || Opcode == X86::LEA64r || 190353358Sdim Opcode == X86::LEA64_32r; 191353358Sdim} 192353358Sdim 193353358Sdimbool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { 194353358Sdim if (skipFunction(MF.getFunction())) 195309124Sdim return false; 196309124Sdim 197353358Sdim const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 198344779Sdim bool IsSlowLEA = ST.slowLEA(); 199344779Sdim bool IsSlow3OpsLEA = ST.slow3OpsLEA(); 200353358Sdim bool LEAUsesAG = ST.LEAusesAG(); 201344779Sdim 202353358Sdim bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize(); 203353358Sdim bool UseLEAForSP = ST.useLeaForSP(); 204296417Sdim 205353358Sdim TSM.init(&ST); 206288943Sdim TII = ST.getInstrInfo(); 207353358Sdim TRI = ST.getRegisterInfo(); 208276479Sdim 209341825Sdim LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";); 210353358Sdim for (MachineBasicBlock &MBB : MF) { 211353358Sdim // First pass. Try to remove or optimize existing LEAs. 212353358Sdim for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 213353358Sdim if (!isLEA(I->getOpcode())) 214353358Sdim continue; 215353358Sdim 216353358Sdim if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP)) 217353358Sdim continue; 218353358Sdim 219360784Sdim if (IsSlowLEA) 220353358Sdim processInstructionForSlowLEA(I, MBB); 221360784Sdim else if (IsSlow3OpsLEA) 222360784Sdim processInstrForSlow3OpLEA(I, MBB, OptIncDec); 223353358Sdim } 224353358Sdim 225353358Sdim // Second pass for creating LEAs. This may reverse some of the 226353358Sdim // transformations above. 227353358Sdim if (LEAUsesAG) { 228353358Sdim for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) 229353358Sdim processInstruction(I, MBB); 230353358Sdim } 231353358Sdim } 232353358Sdim 233341825Sdim LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";); 234251607Sdim 235251607Sdim return true; 236251607Sdim} 237251607Sdim 238276479SdimFixupLEAPass::RegUsageState 239276479SdimFixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { 240251607Sdim RegUsageState RegUsage = RU_NotUsed; 241309124Sdim MachineInstr &MI = *I; 242251607Sdim 243353358Sdim for (unsigned i = 0; i < MI.getNumOperands(); ++i) { 244309124Sdim MachineOperand &opnd = MI.getOperand(i); 245276479Sdim if (opnd.isReg() && opnd.getReg() == p.getReg()) { 246251607Sdim if (opnd.isDef()) 247251607Sdim return RU_Write; 248251607Sdim RegUsage = RU_Read; 249251607Sdim } 250251607Sdim } 251251607Sdim return RegUsage; 252251607Sdim} 253251607Sdim 254251607Sdim/// getPreviousInstr - Given a reference to an instruction in a basic 255251607Sdim/// block, return a reference to the previous instruction in the block, 256251607Sdim/// wrapping around to the last instruction of the block if the block 257251607Sdim/// branches to itself. 258276479Sdimstatic inline bool getPreviousInstr(MachineBasicBlock::iterator &I, 259353358Sdim MachineBasicBlock &MBB) { 260353358Sdim if (I == MBB.begin()) { 261353358Sdim if (MBB.isPredecessor(&MBB)) { 262353358Sdim I = --MBB.end(); 263251607Sdim return true; 264276479Sdim } else 265251607Sdim return false; 266251607Sdim } 267251607Sdim --I; 268251607Sdim return true; 269251607Sdim} 270251607Sdim 271276479SdimMachineBasicBlock::iterator 272276479SdimFixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, 273353358Sdim MachineBasicBlock &MBB) { 274251607Sdim int InstrDistance = 1; 275251607Sdim MachineBasicBlock::iterator CurInst; 276251607Sdim static const int INSTR_DISTANCE_THRESHOLD = 5; 277251607Sdim 278251607Sdim CurInst = I; 279251607Sdim bool Found; 280353358Sdim Found = getPreviousInstr(CurInst, MBB); 281276479Sdim while (Found && I != CurInst) { 282251607Sdim if (CurInst->isCall() || CurInst->isInlineAsm()) 283251607Sdim break; 284251607Sdim if (InstrDistance > INSTR_DISTANCE_THRESHOLD) 285251607Sdim break; // too far back to make a difference 286276479Sdim if (usesRegister(p, CurInst) == RU_Write) { 287251607Sdim return CurInst; 288251607Sdim } 289341825Sdim InstrDistance += TSM.computeInstrLatency(&*CurInst); 290353358Sdim Found = getPreviousInstr(CurInst, MBB); 291251607Sdim } 292309124Sdim return MachineBasicBlock::iterator(); 293251607Sdim} 294251607Sdim 295353358Sdimstatic inline bool isInefficientLEAReg(unsigned Reg) { 296344779Sdim return Reg == X86::EBP || Reg == X86::RBP || 297344779Sdim Reg == X86::R13D || Reg == X86::R13; 298321369Sdim} 299321369Sdim 300344779Sdim/// Returns true if this LEA uses base an index registers, and the base register 301344779Sdim/// is known to be inefficient for the subtarget. 302341825Sdim// TODO: use a variant scheduling class to model the latency profile 303341825Sdim// of LEA instructions, and implement this logic as a scheduling predicate. 304321369Sdimstatic inline bool hasInefficientLEABaseReg(const MachineOperand &Base, 305321369Sdim const MachineOperand &Index) { 306360784Sdim return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() && 307360784Sdim Index.getReg() != X86::NoRegister; 308321369Sdim} 309321369Sdim 310321369Sdimstatic inline bool hasLEAOffset(const MachineOperand &Offset) { 311321369Sdim return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal(); 312321369Sdim} 313321369Sdim 314353358Sdimstatic inline unsigned getADDrrFromLEA(unsigned LEAOpcode) { 315321369Sdim switch (LEAOpcode) { 316321369Sdim default: 317321369Sdim llvm_unreachable("Unexpected LEA instruction"); 318321369Sdim case X86::LEA32r: 319353358Sdim case X86::LEA64_32r: 320321369Sdim return X86::ADD32rr; 321321369Sdim case X86::LEA64r: 322321369Sdim return X86::ADD64rr; 323321369Sdim } 324321369Sdim} 325321369Sdim 326353358Sdimstatic inline unsigned getADDriFromLEA(unsigned LEAOpcode, 327353358Sdim const MachineOperand &Offset) { 328321369Sdim bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); 329321369Sdim switch (LEAOpcode) { 330321369Sdim default: 331321369Sdim llvm_unreachable("Unexpected LEA instruction"); 332321369Sdim case X86::LEA32r: 333321369Sdim case X86::LEA64_32r: 334321369Sdim return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri; 335321369Sdim case X86::LEA64r: 336321369Sdim return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32; 337321369Sdim } 338321369Sdim} 339321369Sdim 340353358Sdimstatic inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) { 341353358Sdim switch (LEAOpcode) { 342353358Sdim default: 343353358Sdim llvm_unreachable("Unexpected LEA instruction"); 344353358Sdim case X86::LEA32r: 345353358Sdim case X86::LEA64_32r: 346353358Sdim return IsINC ? X86::INC32r : X86::DEC32r; 347353358Sdim case X86::LEA64r: 348353358Sdim return IsINC ? X86::INC64r : X86::DEC64r; 349353358Sdim } 350296417Sdim} 351296417Sdim 352353358Sdimbool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, 353353358Sdim MachineBasicBlock &MBB, bool OptIncDec, 354353358Sdim bool UseLEAForSP) const { 355309124Sdim MachineInstr &MI = *I; 356353358Sdim 357353358Sdim const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 358353358Sdim const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 359353358Sdim const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 360353358Sdim const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp); 361353358Sdim const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 362353358Sdim 363353358Sdim if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 || 364353358Sdim !TII->isSafeToClobberEFLAGS(MBB, I)) 365296417Sdim return false; 366296417Sdim 367360784Sdim Register DestReg = MI.getOperand(0).getReg(); 368360784Sdim Register BaseReg = Base.getReg(); 369360784Sdim Register IndexReg = Index.getReg(); 370353358Sdim 371353358Sdim // Don't change stack adjustment LEAs. 372353358Sdim if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP)) 373353358Sdim return false; 374353358Sdim 375353358Sdim // LEA64_32 has 64-bit operands but 32-bit result. 376353358Sdim if (MI.getOpcode() == X86::LEA64_32r) { 377353358Sdim if (BaseReg != 0) 378353358Sdim BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 379353358Sdim if (IndexReg != 0) 380353358Sdim IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 381353358Sdim } 382353358Sdim 383353358Sdim MachineInstr *NewMI = nullptr; 384353358Sdim 385353358Sdim // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1 386353358Sdim // which can be turned into add %reg2, %reg1 387353358Sdim if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 && 388353358Sdim (DestReg == BaseReg || DestReg == IndexReg)) { 389353358Sdim unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode()); 390353358Sdim if (DestReg != BaseReg) 391353358Sdim std::swap(BaseReg, IndexReg); 392353358Sdim 393353358Sdim if (MI.getOpcode() == X86::LEA64_32r) { 394353358Sdim // TODO: Do we need the super register implicit use? 395353358Sdim NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 396353358Sdim .addReg(BaseReg).addReg(IndexReg) 397353358Sdim .addReg(Base.getReg(), RegState::Implicit) 398353358Sdim .addReg(Index.getReg(), RegState::Implicit); 399353358Sdim } else { 400353358Sdim NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 401353358Sdim .addReg(BaseReg).addReg(IndexReg); 402296417Sdim } 403353358Sdim } else if (DestReg == BaseReg && IndexReg == 0) { 404353358Sdim // This is an LEA with only a base register and a displacement, 405353358Sdim // We can use ADDri or INC/DEC. 406296417Sdim 407353358Sdim // Does this LEA have one these forms: 408353358Sdim // lea %reg, 1(%reg) 409353358Sdim // lea %reg, -1(%reg) 410353358Sdim if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) { 411353358Sdim bool IsINC = Disp.getImm() == 1; 412353358Sdim unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC); 413353358Sdim 414353358Sdim if (MI.getOpcode() == X86::LEA64_32r) { 415353358Sdim // TODO: Do we need the super register implicit use? 416353358Sdim NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 417353358Sdim .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit); 418353358Sdim } else { 419353358Sdim NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 420353358Sdim .addReg(BaseReg); 421353358Sdim } 422353358Sdim } else { 423353358Sdim unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp); 424353358Sdim if (MI.getOpcode() == X86::LEA64_32r) { 425353358Sdim // TODO: Do we need the super register implicit use? 426353358Sdim NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 427353358Sdim .addReg(BaseReg).addImm(Disp.getImm()) 428353358Sdim .addReg(Base.getReg(), RegState::Implicit); 429353358Sdim } else { 430353358Sdim NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg) 431353358Sdim .addReg(BaseReg).addImm(Disp.getImm()); 432353358Sdim } 433353358Sdim } 434353358Sdim } else 435353358Sdim return false; 436353358Sdim 437353358Sdim MBB.erase(I); 438353358Sdim I = NewMI; 439353358Sdim return true; 440296417Sdim} 441296417Sdim 442276479Sdimvoid FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, 443353358Sdim MachineBasicBlock &MBB) { 444251607Sdim // Process a load, store, or LEA instruction. 445309124Sdim MachineInstr &MI = *I; 446309124Sdim const MCInstrDesc &Desc = MI.getDesc(); 447309124Sdim int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags); 448251607Sdim if (AddrOffset >= 0) { 449251607Sdim AddrOffset += X86II::getOperandBias(Desc); 450309124Sdim MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg); 451251607Sdim if (p.isReg() && p.getReg() != X86::ESP) { 452353358Sdim seekLEAFixup(p, I, MBB); 453251607Sdim } 454309124Sdim MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg); 455251607Sdim if (q.isReg() && q.getReg() != X86::ESP) { 456353358Sdim seekLEAFixup(q, I, MBB); 457251607Sdim } 458251607Sdim } 459251607Sdim} 460251607Sdim 461276479Sdimvoid FixupLEAPass::seekLEAFixup(MachineOperand &p, 462276479Sdim MachineBasicBlock::iterator &I, 463353358Sdim MachineBasicBlock &MBB) { 464353358Sdim MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB); 465309124Sdim if (MBI != MachineBasicBlock::iterator()) { 466353358Sdim MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI); 467251607Sdim if (NewMI) { 468251607Sdim ++NumLEAs; 469341825Sdim LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); 470251607Sdim // now to replace with an equivalent LEA... 471341825Sdim LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); 472353358Sdim MBB.erase(MBI); 473251607Sdim MachineBasicBlock::iterator J = 474276479Sdim static_cast<MachineBasicBlock::iterator>(NewMI); 475353358Sdim processInstruction(J, MBB); 476251607Sdim } 477251607Sdim } 478251607Sdim} 479251607Sdim 480344779Sdimvoid FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, 481353358Sdim MachineBasicBlock &MBB) { 482309124Sdim MachineInstr &MI = *I; 483353358Sdim const unsigned Opcode = MI.getOpcode(); 484344779Sdim 485344779Sdim const MachineOperand &Dst = MI.getOperand(0); 486344779Sdim const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 487344779Sdim const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 488344779Sdim const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 489344779Sdim const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 490344779Sdim const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 491344779Sdim 492344779Sdim if (Segment.getReg() != 0 || !Offset.isImm() || 493353358Sdim !TII->isSafeToClobberEFLAGS(MBB, I)) 494276479Sdim return; 495360784Sdim const Register DstR = Dst.getReg(); 496360784Sdim const Register SrcR1 = Base.getReg(); 497360784Sdim const Register SrcR2 = Index.getReg(); 498276479Sdim if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) 499276479Sdim return; 500344779Sdim if (Scale.getImm() > 1) 501276479Sdim return; 502341825Sdim LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); 503341825Sdim LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 504276479Sdim MachineInstr *NewMI = nullptr; 505276479Sdim // Make ADD instruction for two registers writing to LEA's destination 506276479Sdim if (SrcR1 != 0 && SrcR2 != 0) { 507321369Sdim const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); 508344779Sdim const MachineOperand &Src = SrcR1 == DstR ? Index : Base; 509321369Sdim NewMI = 510353358Sdim BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); 511341825Sdim LLVM_DEBUG(NewMI->dump();); 512276479Sdim } 513276479Sdim // Make ADD instruction for immediate 514344779Sdim if (Offset.getImm() != 0) { 515321369Sdim const MCInstrDesc &ADDri = 516344779Sdim TII->get(getADDriFromLEA(Opcode, Offset)); 517344779Sdim const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index; 518353358Sdim NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR) 519321369Sdim .add(SrcR) 520344779Sdim .addImm(Offset.getImm()); 521341825Sdim LLVM_DEBUG(NewMI->dump();); 522276479Sdim } 523276479Sdim if (NewMI) { 524353358Sdim MBB.erase(I); 525321369Sdim I = NewMI; 526276479Sdim } 527276479Sdim} 528276479Sdim 529360784Sdimvoid FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, 530360784Sdim MachineBasicBlock &MBB, 531360784Sdim bool OptIncDec) { 532360784Sdim MachineInstr &MI = *I; 533353358Sdim const unsigned LEAOpcode = MI.getOpcode(); 534321369Sdim 535360784Sdim const MachineOperand &Dest = MI.getOperand(0); 536344779Sdim const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); 537344779Sdim const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); 538344779Sdim const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); 539344779Sdim const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); 540344779Sdim const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); 541321369Sdim 542360784Sdim if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) || 543353358Sdim !TII->isSafeToClobberEFLAGS(MBB, MI) || 544321369Sdim Segment.getReg() != X86::NoRegister) 545360784Sdim return; 546321369Sdim 547360784Sdim Register DestReg = Dest.getReg(); 548360784Sdim Register BaseReg = Base.getReg(); 549360784Sdim Register IndexReg = Index.getReg(); 550360784Sdim 551360784Sdim if (MI.getOpcode() == X86::LEA64_32r) { 552360784Sdim if (BaseReg != 0) 553360784Sdim BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); 554360784Sdim if (IndexReg != 0) 555360784Sdim IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); 556360784Sdim } 557360784Sdim 558321369Sdim bool IsScale1 = Scale.getImm() == 1; 559360784Sdim bool IsInefficientBase = isInefficientLEAReg(BaseReg); 560360784Sdim bool IsInefficientIndex = isInefficientLEAReg(IndexReg); 561321369Sdim 562321369Sdim // Skip these cases since it takes more than 2 instructions 563321369Sdim // to replace the LEA instruction. 564360784Sdim if (IsInefficientBase && DestReg == BaseReg && !IsScale1) 565360784Sdim return; 566321369Sdim 567341825Sdim LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); 568341825Sdim LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); 569321369Sdim 570360784Sdim MachineInstr *NewMI = nullptr; 571360784Sdim 572321369Sdim // First try to replace LEA with one or two (for the 3-op LEA case) 573321369Sdim // add instructions: 574321369Sdim // 1.lea (%base,%index,1), %base => add %index,%base 575321369Sdim // 2.lea (%base,%index,1), %index => add %base,%index 576360784Sdim if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) { 577360784Sdim unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 578360784Sdim if (DestReg != BaseReg) 579360784Sdim std::swap(BaseReg, IndexReg); 580360784Sdim 581360784Sdim if (MI.getOpcode() == X86::LEA64_32r) { 582360784Sdim // TODO: Do we need the super register implicit use? 583360784Sdim NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 584360784Sdim .addReg(BaseReg) 585360784Sdim .addReg(IndexReg) 586360784Sdim .addReg(Base.getReg(), RegState::Implicit) 587360784Sdim .addReg(Index.getReg(), RegState::Implicit); 588360784Sdim } else { 589360784Sdim NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 590360784Sdim .addReg(BaseReg) 591360784Sdim .addReg(IndexReg); 592360784Sdim } 593360784Sdim } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { 594360784Sdim // If the base is inefficient try switching the index and base operands, 595360784Sdim // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: 596360784Sdim // lea offset(%base,%index,scale),%dst => 597360784Sdim // lea (%base,%index,scale); add offset,%dst 598360784Sdim NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 599360784Sdim .add(Dest) 600360784Sdim .add(IsInefficientBase ? Index : Base) 601360784Sdim .add(Scale) 602360784Sdim .add(IsInefficientBase ? Base : Index) 603360784Sdim .addImm(0) 604360784Sdim .add(Segment); 605341825Sdim LLVM_DEBUG(NewMI->dump();); 606321369Sdim } 607360784Sdim 608360784Sdim // If either replacement succeeded above, add the offset if needed, then 609360784Sdim // replace the instruction. 610360784Sdim if (NewMI) { 611321369Sdim // Create ADD instruction for the Offset in case of 3-Ops LEA. 612321369Sdim if (hasLEAOffset(Offset)) { 613360784Sdim if (OptIncDec && Offset.isImm() && 614360784Sdim (Offset.getImm() == 1 || Offset.getImm() == -1)) { 615360784Sdim unsigned NewOpc = 616360784Sdim getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1); 617360784Sdim NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 618360784Sdim .addReg(DestReg); 619360784Sdim LLVM_DEBUG(NewMI->dump();); 620360784Sdim } else { 621360784Sdim unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset); 622360784Sdim NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 623360784Sdim .addReg(DestReg) 624360784Sdim .add(Offset); 625360784Sdim LLVM_DEBUG(NewMI->dump();); 626360784Sdim } 627321369Sdim } 628360784Sdim 629360784Sdim MBB.erase(I); 630360784Sdim I = NewMI; 631360784Sdim return; 632321369Sdim } 633360784Sdim 634321369Sdim // Handle the rest of the cases with inefficient base register: 635360784Sdim assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!"); 636321369Sdim assert(IsInefficientBase && "efficient base should be handled already!"); 637321369Sdim 638360784Sdim // FIXME: Handle LEA64_32r. 639360784Sdim if (LEAOpcode == X86::LEA64_32r) 640360784Sdim return; 641360784Sdim 642321369Sdim // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst 643321369Sdim if (IsScale1 && !hasLEAOffset(Offset)) { 644360784Sdim bool BIK = Base.isKill() && BaseReg != IndexReg; 645360784Sdim TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK); 646341825Sdim LLVM_DEBUG(MI.getPrevNode()->dump();); 647321369Sdim 648360784Sdim unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 649360784Sdim NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 650360784Sdim .addReg(DestReg) 651360784Sdim .add(Index); 652341825Sdim LLVM_DEBUG(NewMI->dump();); 653360784Sdim 654360784Sdim MBB.erase(I); 655360784Sdim I = NewMI; 656360784Sdim return; 657321369Sdim } 658360784Sdim 659321369Sdim // lea offset(%base,%index,scale), %dst => 660321369Sdim // lea offset( ,%index,scale), %dst; add %base,%dst 661360784Sdim NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) 662360784Sdim .add(Dest) 663360784Sdim .addReg(0) 664360784Sdim .add(Scale) 665360784Sdim .add(Index) 666360784Sdim .add(Offset) 667360784Sdim .add(Segment); 668341825Sdim LLVM_DEBUG(NewMI->dump();); 669321369Sdim 670360784Sdim unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); 671360784Sdim NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) 672360784Sdim .addReg(DestReg) 673360784Sdim .add(Base); 674341825Sdim LLVM_DEBUG(NewMI->dump();); 675360784Sdim 676360784Sdim MBB.erase(I); 677360784Sdim I = NewMI; 678321369Sdim} 679