1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-ldst-opt" 16#include "ARM.h" 17#include "ARMBaseInstrInfo.h" 18#include "ARMBaseRegisterInfo.h" 19#include "ARMMachineFunctionInfo.h" 20#include "MCTargetDesc/ARMAddressingModes.h" 21#include "llvm/ADT/DenseMap.h" 22#include "llvm/ADT/STLExtras.h" 23#include "llvm/ADT/SmallPtrSet.h" 24#include "llvm/ADT/SmallSet.h" 25#include "llvm/ADT/SmallVector.h" 26#include "llvm/ADT/Statistic.h" 27#include "llvm/CodeGen/MachineBasicBlock.h" 28#include "llvm/CodeGen/MachineFunctionPass.h" 29#include "llvm/CodeGen/MachineInstr.h" 30#include "llvm/CodeGen/MachineInstrBuilder.h" 31#include "llvm/CodeGen/MachineRegisterInfo.h" 32#include "llvm/CodeGen/RegisterScavenging.h" 33#include "llvm/CodeGen/SelectionDAGNodes.h" 34#include "llvm/IR/DataLayout.h" 35#include "llvm/IR/DerivedTypes.h" 36#include "llvm/IR/Function.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Support/ErrorHandling.h" 39#include "llvm/Support/raw_ostream.h" 40#include "llvm/Target/TargetInstrInfo.h" 41#include "llvm/Target/TargetMachine.h" 42#include "llvm/Target/TargetRegisterInfo.h" 43using namespace llvm; 44 45STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 46STATISTIC(NumSTMGened , "Number of stm instructions generated"); 47STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 48STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 49STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 50STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 51STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 52STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 53STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 54STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 55STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 56 57/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 58/// load / store instructions to form ldm / stm instructions. 59 60namespace { 61 struct ARMLoadStoreOpt : public MachineFunctionPass { 62 static char ID; 63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 64 65 const TargetInstrInfo *TII; 66 const TargetRegisterInfo *TRI; 67 const ARMSubtarget *STI; 68 ARMFunctionInfo *AFI; 69 RegScavenger *RS; 70 bool isThumb2; 71 72 virtual bool runOnMachineFunction(MachineFunction &Fn); 73 74 virtual const char *getPassName() const { 75 return "ARM load / store optimization pass"; 76 } 77 78 private: 79 struct MemOpQueueEntry { 80 int Offset; 81 unsigned Reg; 82 bool isKill; 83 unsigned Position; 84 MachineBasicBlock::iterator MBBI; 85 bool Merged; 86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 87 MachineBasicBlock::iterator i) 88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 89 }; 90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 91 typedef MemOpQueue::iterator MemOpQueueIter; 92 93 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, 94 const MemOpQueue &MemOps, unsigned DefReg, 95 unsigned RangeBegin, unsigned RangeEnd); 96 97 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 98 int Offset, unsigned Base, bool BaseKill, int Opcode, 99 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 100 DebugLoc dl, 101 ArrayRef<std::pair<unsigned, bool> > Regs, 102 ArrayRef<unsigned> ImpDefs); 103 void MergeOpsUpdate(MachineBasicBlock &MBB, 104 MemOpQueue &MemOps, 105 unsigned memOpsBegin, 106 unsigned memOpsEnd, 107 unsigned insertAfter, 108 int Offset, 109 unsigned Base, 110 bool BaseKill, 111 int Opcode, 112 ARMCC::CondCodes Pred, 113 unsigned PredReg, 114 unsigned Scratch, 115 DebugLoc dl, 116 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 117 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 118 int Opcode, unsigned Size, 119 ARMCC::CondCodes Pred, unsigned PredReg, 120 unsigned Scratch, MemOpQueue &MemOps, 121 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 122 123 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 124 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 125 MachineBasicBlock::iterator &MBBI); 126 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 127 MachineBasicBlock::iterator MBBI, 128 const TargetInstrInfo *TII, 129 bool &Advance, 130 MachineBasicBlock::iterator &I); 131 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 132 MachineBasicBlock::iterator MBBI, 133 bool &Advance, 134 MachineBasicBlock::iterator &I); 135 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 136 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 137 }; 138 char ARMLoadStoreOpt::ID = 0; 139} 140 141static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 142 switch (Opcode) { 143 default: llvm_unreachable("Unhandled opcode!"); 144 case ARM::LDRi12: 145 ++NumLDMGened; 146 switch (Mode) { 147 default: llvm_unreachable("Unhandled submode!"); 148 case ARM_AM::ia: return ARM::LDMIA; 149 case ARM_AM::da: return ARM::LDMDA; 150 case ARM_AM::db: return ARM::LDMDB; 151 case ARM_AM::ib: return ARM::LDMIB; 152 } 153 case ARM::STRi12: 154 ++NumSTMGened; 155 switch (Mode) { 156 default: llvm_unreachable("Unhandled submode!"); 157 case ARM_AM::ia: return ARM::STMIA; 158 case ARM_AM::da: return ARM::STMDA; 159 case ARM_AM::db: return ARM::STMDB; 160 case ARM_AM::ib: return ARM::STMIB; 161 } 162 case ARM::t2LDRi8: 163 case ARM::t2LDRi12: 164 ++NumLDMGened; 165 switch (Mode) { 166 default: llvm_unreachable("Unhandled submode!"); 167 case ARM_AM::ia: return ARM::t2LDMIA; 168 case ARM_AM::db: return ARM::t2LDMDB; 169 } 170 case ARM::t2STRi8: 171 case ARM::t2STRi12: 172 ++NumSTMGened; 173 switch (Mode) { 174 default: llvm_unreachable("Unhandled submode!"); 175 case ARM_AM::ia: return ARM::t2STMIA; 176 case ARM_AM::db: return ARM::t2STMDB; 177 } 178 case ARM::VLDRS: 179 ++NumVLDMGened; 180 switch (Mode) { 181 default: llvm_unreachable("Unhandled submode!"); 182 case ARM_AM::ia: return ARM::VLDMSIA; 183 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 184 } 185 case ARM::VSTRS: 186 ++NumVSTMGened; 187 switch (Mode) { 188 default: llvm_unreachable("Unhandled submode!"); 189 case ARM_AM::ia: return ARM::VSTMSIA; 190 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 191 } 192 case ARM::VLDRD: 193 ++NumVLDMGened; 194 switch (Mode) { 195 default: llvm_unreachable("Unhandled submode!"); 196 case ARM_AM::ia: return ARM::VLDMDIA; 197 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 198 } 199 case ARM::VSTRD: 200 ++NumVSTMGened; 201 switch (Mode) { 202 default: llvm_unreachable("Unhandled submode!"); 203 case ARM_AM::ia: return ARM::VSTMDIA; 204 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 205 } 206 } 207} 208 209namespace llvm { 210 namespace ARM_AM { 211 212AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 213 switch (Opcode) { 214 default: llvm_unreachable("Unhandled opcode!"); 215 case ARM::LDMIA_RET: 216 case ARM::LDMIA: 217 case ARM::LDMIA_UPD: 218 case ARM::STMIA: 219 case ARM::STMIA_UPD: 220 case ARM::t2LDMIA_RET: 221 case ARM::t2LDMIA: 222 case ARM::t2LDMIA_UPD: 223 case ARM::t2STMIA: 224 case ARM::t2STMIA_UPD: 225 case ARM::VLDMSIA: 226 case ARM::VLDMSIA_UPD: 227 case ARM::VSTMSIA: 228 case ARM::VSTMSIA_UPD: 229 case ARM::VLDMDIA: 230 case ARM::VLDMDIA_UPD: 231 case ARM::VSTMDIA: 232 case ARM::VSTMDIA_UPD: 233 return ARM_AM::ia; 234 235 case ARM::LDMDA: 236 case ARM::LDMDA_UPD: 237 case ARM::STMDA: 238 case ARM::STMDA_UPD: 239 return ARM_AM::da; 240 241 case ARM::LDMDB: 242 case ARM::LDMDB_UPD: 243 case ARM::STMDB: 244 case ARM::STMDB_UPD: 245 case ARM::t2LDMDB: 246 case ARM::t2LDMDB_UPD: 247 case ARM::t2STMDB: 248 case ARM::t2STMDB_UPD: 249 case ARM::VLDMSDB_UPD: 250 case ARM::VSTMSDB_UPD: 251 case ARM::VLDMDDB_UPD: 252 case ARM::VSTMDDB_UPD: 253 return ARM_AM::db; 254 255 case ARM::LDMIB: 256 case ARM::LDMIB_UPD: 257 case ARM::STMIB: 258 case ARM::STMIB_UPD: 259 return ARM_AM::ib; 260 } 261} 262 263 } // end namespace ARM_AM 264} // end namespace llvm 265 266static bool isT2i32Load(unsigned Opc) { 267 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 268} 269 270static bool isi32Load(unsigned Opc) { 271 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 272} 273 274static bool isT2i32Store(unsigned Opc) { 275 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 276} 277 278static bool isi32Store(unsigned Opc) { 279 return Opc == ARM::STRi12 || isT2i32Store(Opc); 280} 281 282/// MergeOps - Create and insert a LDM or STM with Base as base register and 283/// registers in Regs as the register operands that would be loaded / stored. 284/// It returns true if the transformation is done. 285bool 286ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 287 MachineBasicBlock::iterator MBBI, 288 int Offset, unsigned Base, bool BaseKill, 289 int Opcode, ARMCC::CondCodes Pred, 290 unsigned PredReg, unsigned Scratch, DebugLoc dl, 291 ArrayRef<std::pair<unsigned, bool> > Regs, 292 ArrayRef<unsigned> ImpDefs) { 293 // Only a single register to load / store. Don't bother. 294 unsigned NumRegs = Regs.size(); 295 if (NumRegs <= 1) 296 return false; 297 298 ARM_AM::AMSubMode Mode = ARM_AM::ia; 299 // VFP and Thumb2 do not support IB or DA modes. 300 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 301 bool haveIBAndDA = isNotVFP && !isThumb2; 302 if (Offset == 4 && haveIBAndDA) 303 Mode = ARM_AM::ib; 304 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 305 Mode = ARM_AM::da; 306 else if (Offset == -4 * (int)NumRegs && isNotVFP) 307 // VLDM/VSTM do not support DB mode without also updating the base reg. 308 Mode = ARM_AM::db; 309 else if (Offset != 0) { 310 // Check if this is a supported opcode before we insert instructions to 311 // calculate a new base register. 312 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 313 314 // If starting offset isn't zero, insert a MI to materialize a new base. 315 // But only do so if it is cost effective, i.e. merging more than two 316 // loads / stores. 317 if (NumRegs <= 2) 318 return false; 319 320 unsigned NewBase; 321 if (isi32Load(Opcode)) 322 // If it is a load, then just use one of the destination register to 323 // use as the new base. 324 NewBase = Regs[NumRegs-1].first; 325 else { 326 // Use the scratch register to use as a new base. 327 NewBase = Scratch; 328 if (NewBase == 0) 329 return false; 330 } 331 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; 332 if (Offset < 0) { 333 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; 334 Offset = - Offset; 335 } 336 int ImmedOffset = isThumb2 337 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 338 if (ImmedOffset == -1) 339 // FIXME: Try t2ADDri12 or t2SUBri12? 340 return false; // Probably not worth it then. 341 342 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 343 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 344 .addImm(Pred).addReg(PredReg).addReg(0); 345 Base = NewBase; 346 BaseKill = true; // New base is always killed right its use. 347 } 348 349 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 350 Opcode == ARM::VLDRD); 351 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 352 if (!Opcode) return false; 353 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 354 .addReg(Base, getKillRegState(BaseKill)) 355 .addImm(Pred).addReg(PredReg); 356 for (unsigned i = 0; i != NumRegs; ++i) 357 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 358 | getKillRegState(Regs[i].second)); 359 360 // Add implicit defs for super-registers. 361 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) 362 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); 363 364 return true; 365} 366 367/// \brief Find all instructions using a given imp-def within a range. 368/// 369/// We are trying to combine a range of instructions, one of which (located at 370/// position RangeBegin) implicitly defines a register. The final LDM/STM will 371/// be placed at RangeEnd, and so any uses of this definition between RangeStart 372/// and RangeEnd must be modified to use an undefined value. 373/// 374/// The live range continues until we find a second definition or one of the 375/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so 376/// we must consider all uses and decide which are relevant in a second pass. 377void ARMLoadStoreOpt::findUsesOfImpDef( 378 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, 379 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { 380 std::map<unsigned, MachineOperand *> Uses; 381 unsigned LastLivePos = RangeEnd; 382 383 // First we find all uses of this register with Position between RangeBegin 384 // and RangeEnd, any or all of these could be uses of a definition at 385 // RangeBegin. We also record the latest position a definition at RangeBegin 386 // would be considered live. 387 for (unsigned i = 0; i < MemOps.size(); ++i) { 388 MachineInstr &MI = *MemOps[i].MBBI; 389 unsigned MIPosition = MemOps[i].Position; 390 if (MIPosition <= RangeBegin || MIPosition > RangeEnd) 391 continue; 392 393 // If this instruction defines the register, then any later use will be of 394 // that definition rather than ours. 395 if (MI.definesRegister(DefReg)) 396 LastLivePos = std::min(LastLivePos, MIPosition); 397 398 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); 399 if (!UseOp) 400 continue; 401 402 // If this instruction kills the register then (assuming liveness is 403 // correct when we start) we don't need to think about anything after here. 404 if (UseOp->isKill()) 405 LastLivePos = std::min(LastLivePos, MIPosition); 406 407 Uses[MIPosition] = UseOp; 408 } 409 410 // Now we traverse the list of all uses, and append the ones that actually use 411 // our definition to the requested list. 412 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), 413 E = Uses.end(); 414 I != E; ++I) { 415 // List is sorted by position so once we've found one out of range there 416 // will be no more to consider. 417 if (I->first > LastLivePos) 418 break; 419 UsesOfImpDefs.push_back(I->second); 420 } 421} 422 423// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 424// success. 425void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 426 MemOpQueue &memOps, 427 unsigned memOpsBegin, unsigned memOpsEnd, 428 unsigned insertAfter, int Offset, 429 unsigned Base, bool BaseKill, 430 int Opcode, 431 ARMCC::CondCodes Pred, unsigned PredReg, 432 unsigned Scratch, 433 DebugLoc dl, 434 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 435 // First calculate which of the registers should be killed by the merged 436 // instruction. 437 const unsigned insertPos = memOps[insertAfter].Position; 438 SmallSet<unsigned, 4> KilledRegs; 439 DenseMap<unsigned, unsigned> Killer; 440 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 441 if (i == memOpsBegin) { 442 i = memOpsEnd; 443 if (i == e) 444 break; 445 } 446 if (memOps[i].Position < insertPos && memOps[i].isKill) { 447 unsigned Reg = memOps[i].Reg; 448 KilledRegs.insert(Reg); 449 Killer[Reg] = i; 450 } 451 } 452 453 SmallVector<std::pair<unsigned, bool>, 8> Regs; 454 SmallVector<unsigned, 8> ImpDefs; 455 SmallVector<MachineOperand *, 8> UsesOfImpDefs; 456 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 457 unsigned Reg = memOps[i].Reg; 458 // If we are inserting the merged operation after an operation that 459 // uses the same register, make sure to transfer any kill flag. 460 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 461 Regs.push_back(std::make_pair(Reg, isKill)); 462 463 // Collect any implicit defs of super-registers. They must be preserved. 464 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { 465 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) 466 continue; 467 unsigned DefReg = MO->getReg(); 468 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 469 ImpDefs.push_back(DefReg); 470 471 // There may be other uses of the definition between this instruction and 472 // the eventual LDM/STM position. These should be marked undef if the 473 // merge takes place. 474 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, 475 insertPos); 476 } 477 } 478 479 // Try to do the merge. 480 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 481 ++Loc; 482 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 483 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 484 return; 485 486 // Merge succeeded, update records. 487 Merges.push_back(prior(Loc)); 488 489 // In gathering loads together, we may have moved the imp-def of a register 490 // past one of its uses. This is OK, since we know better than the rest of 491 // LLVM what's OK with ARM loads and stores; but we still have to adjust the 492 // affected uses. 493 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), 494 E = UsesOfImpDefs.end(); 495 I != E; ++I) 496 (*I)->setIsUndef(); 497 498 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 499 // Remove kill flags from any memops that come before insertPos. 500 if (Regs[i-memOpsBegin].second) { 501 unsigned Reg = Regs[i-memOpsBegin].first; 502 if (KilledRegs.count(Reg)) { 503 unsigned j = Killer[Reg]; 504 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 505 assert(Idx >= 0 && "Cannot find killing operand"); 506 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 507 memOps[j].isKill = false; 508 } 509 memOps[i].isKill = true; 510 } 511 MBB.erase(memOps[i].MBBI); 512 // Update this memop to refer to the merged instruction. 513 // We may need to move kill flags again. 514 memOps[i].Merged = true; 515 memOps[i].MBBI = Merges.back(); 516 memOps[i].Position = insertPos; 517 } 518} 519 520/// MergeLDR_STR - Merge a number of load / store instructions into one or more 521/// load / store multiple instructions. 522void 523ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 524 unsigned Base, int Opcode, unsigned Size, 525 ARMCC::CondCodes Pred, unsigned PredReg, 526 unsigned Scratch, MemOpQueue &MemOps, 527 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 528 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 529 int Offset = MemOps[SIndex].Offset; 530 int SOffset = Offset; 531 unsigned insertAfter = SIndex; 532 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 533 DebugLoc dl = Loc->getDebugLoc(); 534 const MachineOperand &PMO = Loc->getOperand(0); 535 unsigned PReg = PMO.getReg(); 536 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 537 unsigned Count = 1; 538 unsigned Limit = ~0U; 539 540 // vldm / vstm limit are 32 for S variants, 16 for D variants. 541 542 switch (Opcode) { 543 default: break; 544 case ARM::VSTRS: 545 Limit = 32; 546 break; 547 case ARM::VSTRD: 548 Limit = 16; 549 break; 550 case ARM::VLDRD: 551 Limit = 16; 552 break; 553 case ARM::VLDRS: 554 Limit = 32; 555 break; 556 } 557 558 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 559 int NewOffset = MemOps[i].Offset; 560 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 561 unsigned Reg = MO.getReg(); 562 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 563 // Register numbers must be in ascending order. For VFP / NEON load and 564 // store multiples, the registers must also be consecutive and within the 565 // limit on the number of registers per instruction. 566 if (Reg != ARM::SP && 567 NewOffset == Offset + (int)Size && 568 ((isNotVFP && RegNum > PRegNum) || 569 ((Count < Limit) && RegNum == PRegNum+1)) && 570 // On Swift we don't want vldm/vstm to start with a odd register num 571 // because Q register unaligned vldm/vstm need more uops. 572 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { 573 Offset += Size; 574 PRegNum = RegNum; 575 ++Count; 576 } else { 577 // Can't merge this in. Try merge the earlier ones first. 578 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 579 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 580 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 581 MemOps, Merges); 582 return; 583 } 584 585 if (MemOps[i].Position > MemOps[insertAfter].Position) 586 insertAfter = i; 587 } 588 589 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 590 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 591 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 592 return; 593} 594 595static bool definesCPSR(MachineInstr *MI) { 596 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 597 const MachineOperand &MO = MI->getOperand(i); 598 if (!MO.isReg()) 599 continue; 600 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 601 // If the instruction has live CPSR def, then it's not safe to fold it 602 // into load / store. 603 return true; 604 } 605 606 return false; 607} 608 609static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 610 unsigned Bytes, unsigned Limit, 611 ARMCC::CondCodes Pred, unsigned PredReg) { 612 unsigned MyPredReg = 0; 613 if (!MI) 614 return false; 615 616 bool CheckCPSRDef = false; 617 switch (MI->getOpcode()) { 618 default: return false; 619 case ARM::t2SUBri: 620 case ARM::SUBri: 621 CheckCPSRDef = true; 622 // fallthrough 623 case ARM::tSUBspi: 624 break; 625 } 626 627 // Make sure the offset fits in 8 bits. 628 if (Bytes == 0 || (Limit && Bytes >= Limit)) 629 return false; 630 631 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 632 if (!(MI->getOperand(0).getReg() == Base && 633 MI->getOperand(1).getReg() == Base && 634 (MI->getOperand(2).getImm()*Scale) == Bytes && 635 getInstrPredicate(MI, MyPredReg) == Pred && 636 MyPredReg == PredReg)) 637 return false; 638 639 return CheckCPSRDef ? !definesCPSR(MI) : true; 640} 641 642static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 643 unsigned Bytes, unsigned Limit, 644 ARMCC::CondCodes Pred, unsigned PredReg) { 645 unsigned MyPredReg = 0; 646 if (!MI) 647 return false; 648 649 bool CheckCPSRDef = false; 650 switch (MI->getOpcode()) { 651 default: return false; 652 case ARM::t2ADDri: 653 case ARM::ADDri: 654 CheckCPSRDef = true; 655 // fallthrough 656 case ARM::tADDspi: 657 break; 658 } 659 660 if (Bytes == 0 || (Limit && Bytes >= Limit)) 661 // Make sure the offset fits in 8 bits. 662 return false; 663 664 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 665 if (!(MI->getOperand(0).getReg() == Base && 666 MI->getOperand(1).getReg() == Base && 667 (MI->getOperand(2).getImm()*Scale) == Bytes && 668 getInstrPredicate(MI, MyPredReg) == Pred && 669 MyPredReg == PredReg)) 670 return false; 671 672 return CheckCPSRDef ? !definesCPSR(MI) : true; 673} 674 675static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 676 switch (MI->getOpcode()) { 677 default: return 0; 678 case ARM::LDRi12: 679 case ARM::STRi12: 680 case ARM::t2LDRi8: 681 case ARM::t2LDRi12: 682 case ARM::t2STRi8: 683 case ARM::t2STRi12: 684 case ARM::VLDRS: 685 case ARM::VSTRS: 686 return 4; 687 case ARM::VLDRD: 688 case ARM::VSTRD: 689 return 8; 690 case ARM::LDMIA: 691 case ARM::LDMDA: 692 case ARM::LDMDB: 693 case ARM::LDMIB: 694 case ARM::STMIA: 695 case ARM::STMDA: 696 case ARM::STMDB: 697 case ARM::STMIB: 698 case ARM::t2LDMIA: 699 case ARM::t2LDMDB: 700 case ARM::t2STMIA: 701 case ARM::t2STMDB: 702 case ARM::VLDMSIA: 703 case ARM::VSTMSIA: 704 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 705 case ARM::VLDMDIA: 706 case ARM::VSTMDIA: 707 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 708 } 709} 710 711static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 712 ARM_AM::AMSubMode Mode) { 713 switch (Opc) { 714 default: llvm_unreachable("Unhandled opcode!"); 715 case ARM::LDMIA: 716 case ARM::LDMDA: 717 case ARM::LDMDB: 718 case ARM::LDMIB: 719 switch (Mode) { 720 default: llvm_unreachable("Unhandled submode!"); 721 case ARM_AM::ia: return ARM::LDMIA_UPD; 722 case ARM_AM::ib: return ARM::LDMIB_UPD; 723 case ARM_AM::da: return ARM::LDMDA_UPD; 724 case ARM_AM::db: return ARM::LDMDB_UPD; 725 } 726 case ARM::STMIA: 727 case ARM::STMDA: 728 case ARM::STMDB: 729 case ARM::STMIB: 730 switch (Mode) { 731 default: llvm_unreachable("Unhandled submode!"); 732 case ARM_AM::ia: return ARM::STMIA_UPD; 733 case ARM_AM::ib: return ARM::STMIB_UPD; 734 case ARM_AM::da: return ARM::STMDA_UPD; 735 case ARM_AM::db: return ARM::STMDB_UPD; 736 } 737 case ARM::t2LDMIA: 738 case ARM::t2LDMDB: 739 switch (Mode) { 740 default: llvm_unreachable("Unhandled submode!"); 741 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 742 case ARM_AM::db: return ARM::t2LDMDB_UPD; 743 } 744 case ARM::t2STMIA: 745 case ARM::t2STMDB: 746 switch (Mode) { 747 default: llvm_unreachable("Unhandled submode!"); 748 case ARM_AM::ia: return ARM::t2STMIA_UPD; 749 case ARM_AM::db: return ARM::t2STMDB_UPD; 750 } 751 case ARM::VLDMSIA: 752 switch (Mode) { 753 default: llvm_unreachable("Unhandled submode!"); 754 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 755 case ARM_AM::db: return ARM::VLDMSDB_UPD; 756 } 757 case ARM::VLDMDIA: 758 switch (Mode) { 759 default: llvm_unreachable("Unhandled submode!"); 760 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 761 case ARM_AM::db: return ARM::VLDMDDB_UPD; 762 } 763 case ARM::VSTMSIA: 764 switch (Mode) { 765 default: llvm_unreachable("Unhandled submode!"); 766 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 767 case ARM_AM::db: return ARM::VSTMSDB_UPD; 768 } 769 case ARM::VSTMDIA: 770 switch (Mode) { 771 default: llvm_unreachable("Unhandled submode!"); 772 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 773 case ARM_AM::db: return ARM::VSTMDDB_UPD; 774 } 775 } 776} 777 778/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 779/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 780/// 781/// stmia rn, <ra, rb, rc> 782/// rn := rn + 4 * 3; 783/// => 784/// stmia rn!, <ra, rb, rc> 785/// 786/// rn := rn - 4 * 3; 787/// ldmia rn, <ra, rb, rc> 788/// => 789/// ldmdb rn!, <ra, rb, rc> 790bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 791 MachineBasicBlock::iterator MBBI, 792 bool &Advance, 793 MachineBasicBlock::iterator &I) { 794 MachineInstr *MI = MBBI; 795 unsigned Base = MI->getOperand(0).getReg(); 796 bool BaseKill = MI->getOperand(0).isKill(); 797 unsigned Bytes = getLSMultipleTransferSize(MI); 798 unsigned PredReg = 0; 799 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 800 int Opcode = MI->getOpcode(); 801 DebugLoc dl = MI->getDebugLoc(); 802 803 // Can't use an updating ld/st if the base register is also a dest 804 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 805 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 806 if (MI->getOperand(i).getReg() == Base) 807 return false; 808 809 bool DoMerge = false; 810 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 811 812 // Try merging with the previous instruction. 813 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 814 if (MBBI != BeginMBBI) { 815 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 816 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 817 --PrevMBBI; 818 if (Mode == ARM_AM::ia && 819 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 820 Mode = ARM_AM::db; 821 DoMerge = true; 822 } else if (Mode == ARM_AM::ib && 823 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 824 Mode = ARM_AM::da; 825 DoMerge = true; 826 } 827 if (DoMerge) 828 MBB.erase(PrevMBBI); 829 } 830 831 // Try merging with the next instruction. 832 MachineBasicBlock::iterator EndMBBI = MBB.end(); 833 if (!DoMerge && MBBI != EndMBBI) { 834 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 835 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 836 ++NextMBBI; 837 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 838 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 839 DoMerge = true; 840 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 841 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 842 DoMerge = true; 843 } 844 if (DoMerge) { 845 if (NextMBBI == I) { 846 Advance = true; 847 ++I; 848 } 849 MBB.erase(NextMBBI); 850 } 851 } 852 853 if (!DoMerge) 854 return false; 855 856 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 857 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 858 .addReg(Base, getDefRegState(true)) // WB base register 859 .addReg(Base, getKillRegState(BaseKill)) 860 .addImm(Pred).addReg(PredReg); 861 862 // Transfer the rest of operands. 863 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 864 MIB.addOperand(MI->getOperand(OpNum)); 865 866 // Transfer memoperands. 867 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 868 869 MBB.erase(MBBI); 870 return true; 871} 872 873static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 874 ARM_AM::AddrOpc Mode) { 875 switch (Opc) { 876 case ARM::LDRi12: 877 return ARM::LDR_PRE_IMM; 878 case ARM::STRi12: 879 return ARM::STR_PRE_IMM; 880 case ARM::VLDRS: 881 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 882 case ARM::VLDRD: 883 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 884 case ARM::VSTRS: 885 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 886 case ARM::VSTRD: 887 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 888 case ARM::t2LDRi8: 889 case ARM::t2LDRi12: 890 return ARM::t2LDR_PRE; 891 case ARM::t2STRi8: 892 case ARM::t2STRi12: 893 return ARM::t2STR_PRE; 894 default: llvm_unreachable("Unhandled opcode!"); 895 } 896} 897 898static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 899 ARM_AM::AddrOpc Mode) { 900 switch (Opc) { 901 case ARM::LDRi12: 902 return ARM::LDR_POST_IMM; 903 case ARM::STRi12: 904 return ARM::STR_POST_IMM; 905 case ARM::VLDRS: 906 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 907 case ARM::VLDRD: 908 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 909 case ARM::VSTRS: 910 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 911 case ARM::VSTRD: 912 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 913 case ARM::t2LDRi8: 914 case ARM::t2LDRi12: 915 return ARM::t2LDR_POST; 916 case ARM::t2STRi8: 917 case ARM::t2STRi12: 918 return ARM::t2STR_POST; 919 default: llvm_unreachable("Unhandled opcode!"); 920 } 921} 922 923/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 924/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 925bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 926 MachineBasicBlock::iterator MBBI, 927 const TargetInstrInfo *TII, 928 bool &Advance, 929 MachineBasicBlock::iterator &I) { 930 MachineInstr *MI = MBBI; 931 unsigned Base = MI->getOperand(1).getReg(); 932 bool BaseKill = MI->getOperand(1).isKill(); 933 unsigned Bytes = getLSMultipleTransferSize(MI); 934 int Opcode = MI->getOpcode(); 935 DebugLoc dl = MI->getDebugLoc(); 936 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 937 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 938 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 939 if (isi32Load(Opcode) || isi32Store(Opcode)) 940 if (MI->getOperand(2).getImm() != 0) 941 return false; 942 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 943 return false; 944 945 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 946 // Can't do the merge if the destination register is the same as the would-be 947 // writeback register. 948 if (MI->getOperand(0).getReg() == Base) 949 return false; 950 951 unsigned PredReg = 0; 952 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 953 bool DoMerge = false; 954 ARM_AM::AddrOpc AddSub = ARM_AM::add; 955 unsigned NewOpc = 0; 956 // AM2 - 12 bits, thumb2 - 8 bits. 957 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 958 959 // Try merging with the previous instruction. 960 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 961 if (MBBI != BeginMBBI) { 962 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 963 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 964 --PrevMBBI; 965 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 966 DoMerge = true; 967 AddSub = ARM_AM::sub; 968 } else if (!isAM5 && 969 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 970 DoMerge = true; 971 } 972 if (DoMerge) { 973 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 974 MBB.erase(PrevMBBI); 975 } 976 } 977 978 // Try merging with the next instruction. 979 MachineBasicBlock::iterator EndMBBI = MBB.end(); 980 if (!DoMerge && MBBI != EndMBBI) { 981 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 982 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 983 ++NextMBBI; 984 if (!isAM5 && 985 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 986 DoMerge = true; 987 AddSub = ARM_AM::sub; 988 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 989 DoMerge = true; 990 } 991 if (DoMerge) { 992 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 993 if (NextMBBI == I) { 994 Advance = true; 995 ++I; 996 } 997 MBB.erase(NextMBBI); 998 } 999 } 1000 1001 if (!DoMerge) 1002 return false; 1003 1004 if (isAM5) { 1005 // VLDM[SD}_UPD, VSTM[SD]_UPD 1006 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1007 // updating load/store-multiple instructions can be used with only one 1008 // register.) 1009 MachineOperand &MO = MI->getOperand(0); 1010 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1011 .addReg(Base, getDefRegState(true)) // WB base register 1012 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1013 .addImm(Pred).addReg(PredReg) 1014 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1015 getKillRegState(MO.isKill()))); 1016 } else if (isLd) { 1017 if (isAM2) { 1018 // LDR_PRE, LDR_POST 1019 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1020 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1021 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1022 .addReg(Base, RegState::Define) 1023 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1024 } else { 1025 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1026 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1027 .addReg(Base, RegState::Define) 1028 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1029 } 1030 } else { 1031 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1032 // t2LDR_PRE, t2LDR_POST 1033 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1034 .addReg(Base, RegState::Define) 1035 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1036 } 1037 } else { 1038 MachineOperand &MO = MI->getOperand(0); 1039 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1040 // the vestigal zero-reg offset register. When that's fixed, this clause 1041 // can be removed entirely. 1042 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1043 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1044 // STR_PRE, STR_POST 1045 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1046 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1047 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1048 } else { 1049 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1050 // t2STR_PRE, t2STR_POST 1051 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1052 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1053 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1054 } 1055 } 1056 MBB.erase(MBBI); 1057 1058 return true; 1059} 1060 1061/// isMemoryOp - Returns true if instruction is a memory operation that this 1062/// pass is capable of operating on. 1063static bool isMemoryOp(const MachineInstr *MI) { 1064 // When no memory operands are present, conservatively assume unaligned, 1065 // volatile, unfoldable. 1066 if (!MI->hasOneMemOperand()) 1067 return false; 1068 1069 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1070 1071 // Don't touch volatile memory accesses - we may be changing their order. 1072 if (MMO->isVolatile()) 1073 return false; 1074 1075 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1076 // not. 1077 if (MMO->getAlignment() < 4) 1078 return false; 1079 1080 // str <undef> could probably be eliminated entirely, but for now we just want 1081 // to avoid making a mess of it. 1082 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1083 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1084 MI->getOperand(0).isUndef()) 1085 return false; 1086 1087 // Likewise don't mess with references to undefined addresses. 1088 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1089 MI->getOperand(1).isUndef()) 1090 return false; 1091 1092 int Opcode = MI->getOpcode(); 1093 switch (Opcode) { 1094 default: break; 1095 case ARM::VLDRS: 1096 case ARM::VSTRS: 1097 return MI->getOperand(1).isReg(); 1098 case ARM::VLDRD: 1099 case ARM::VSTRD: 1100 return MI->getOperand(1).isReg(); 1101 case ARM::LDRi12: 1102 case ARM::STRi12: 1103 case ARM::t2LDRi8: 1104 case ARM::t2LDRi12: 1105 case ARM::t2STRi8: 1106 case ARM::t2STRi12: 1107 return MI->getOperand(1).isReg(); 1108 } 1109 return false; 1110} 1111 1112/// AdvanceRS - Advance register scavenger to just before the earliest memory 1113/// op that is being merged. 1114void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1115 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1116 unsigned Position = MemOps[0].Position; 1117 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1118 if (MemOps[i].Position < Position) { 1119 Position = MemOps[i].Position; 1120 Loc = MemOps[i].MBBI; 1121 } 1122 } 1123 1124 if (Loc != MBB.begin()) 1125 RS->forward(prior(Loc)); 1126} 1127 1128static int getMemoryOpOffset(const MachineInstr *MI) { 1129 int Opcode = MI->getOpcode(); 1130 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1131 unsigned NumOperands = MI->getDesc().getNumOperands(); 1132 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1133 1134 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1135 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1136 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1137 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1138 return OffField; 1139 1140 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1141 : ARM_AM::getAM5Offset(OffField) * 4; 1142 if (isAM3) { 1143 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1144 Offset = -Offset; 1145 } else { 1146 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1147 Offset = -Offset; 1148 } 1149 return Offset; 1150} 1151 1152static void InsertLDR_STR(MachineBasicBlock &MBB, 1153 MachineBasicBlock::iterator &MBBI, 1154 int Offset, bool isDef, 1155 DebugLoc dl, unsigned NewOpc, 1156 unsigned Reg, bool RegDeadKill, bool RegUndef, 1157 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1158 bool OffKill, bool OffUndef, 1159 ARMCC::CondCodes Pred, unsigned PredReg, 1160 const TargetInstrInfo *TII, bool isT2) { 1161 if (isDef) { 1162 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1163 TII->get(NewOpc)) 1164 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1165 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1166 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1167 } else { 1168 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1169 TII->get(NewOpc)) 1170 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1171 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1172 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1173 } 1174} 1175 1176bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1177 MachineBasicBlock::iterator &MBBI) { 1178 MachineInstr *MI = &*MBBI; 1179 unsigned Opcode = MI->getOpcode(); 1180 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1181 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1182 const MachineOperand &BaseOp = MI->getOperand(2); 1183 unsigned BaseReg = BaseOp.getReg(); 1184 unsigned EvenReg = MI->getOperand(0).getReg(); 1185 unsigned OddReg = MI->getOperand(1).getReg(); 1186 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1187 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1188 // ARM errata 602117: LDRD with base in list may result in incorrect base 1189 // register when interrupted or faulted. 1190 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1191 if (!Errata602117 && 1192 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1193 return false; 1194 1195 MachineBasicBlock::iterator NewBBI = MBBI; 1196 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1197 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1198 bool EvenDeadKill = isLd ? 1199 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1200 bool EvenUndef = MI->getOperand(0).isUndef(); 1201 bool OddDeadKill = isLd ? 1202 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1203 bool OddUndef = MI->getOperand(1).isUndef(); 1204 bool BaseKill = BaseOp.isKill(); 1205 bool BaseUndef = BaseOp.isUndef(); 1206 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1207 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1208 int OffImm = getMemoryOpOffset(MI); 1209 unsigned PredReg = 0; 1210 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1211 1212 if (OddRegNum > EvenRegNum && OffImm == 0) { 1213 // Ascending register numbers and no offset. It's safe to change it to a 1214 // ldm or stm. 1215 unsigned NewOpc = (isLd) 1216 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1217 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1218 if (isLd) { 1219 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1220 .addReg(BaseReg, getKillRegState(BaseKill)) 1221 .addImm(Pred).addReg(PredReg) 1222 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1223 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1224 ++NumLDRD2LDM; 1225 } else { 1226 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1227 .addReg(BaseReg, getKillRegState(BaseKill)) 1228 .addImm(Pred).addReg(PredReg) 1229 .addReg(EvenReg, 1230 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1231 .addReg(OddReg, 1232 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1233 ++NumSTRD2STM; 1234 } 1235 NewBBI = llvm::prior(MBBI); 1236 } else { 1237 // Split into two instructions. 1238 unsigned NewOpc = (isLd) 1239 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1240 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1241 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1242 // so adjust and use t2LDRi12 here for that. 1243 unsigned NewOpc2 = (isLd) 1244 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1245 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1246 DebugLoc dl = MBBI->getDebugLoc(); 1247 // If this is a load and base register is killed, it may have been 1248 // re-defed by the load, make sure the first load does not clobber it. 1249 if (isLd && 1250 (BaseKill || OffKill) && 1251 (TRI->regsOverlap(EvenReg, BaseReg))) { 1252 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1253 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1254 OddReg, OddDeadKill, false, 1255 BaseReg, false, BaseUndef, false, OffUndef, 1256 Pred, PredReg, TII, isT2); 1257 NewBBI = llvm::prior(MBBI); 1258 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1259 EvenReg, EvenDeadKill, false, 1260 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1261 Pred, PredReg, TII, isT2); 1262 } else { 1263 if (OddReg == EvenReg && EvenDeadKill) { 1264 // If the two source operands are the same, the kill marker is 1265 // probably on the first one. e.g. 1266 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1267 EvenDeadKill = false; 1268 OddDeadKill = true; 1269 } 1270 // Never kill the base register in the first instruction. 1271 if (EvenReg == BaseReg) 1272 EvenDeadKill = false; 1273 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1274 EvenReg, EvenDeadKill, EvenUndef, 1275 BaseReg, false, BaseUndef, false, OffUndef, 1276 Pred, PredReg, TII, isT2); 1277 NewBBI = llvm::prior(MBBI); 1278 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1279 OddReg, OddDeadKill, OddUndef, 1280 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1281 Pred, PredReg, TII, isT2); 1282 } 1283 if (isLd) 1284 ++NumLDRD2LDR; 1285 else 1286 ++NumSTRD2STR; 1287 } 1288 1289 MBB.erase(MI); 1290 MBBI = NewBBI; 1291 return true; 1292 } 1293 return false; 1294} 1295 1296/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1297/// ops of the same base and incrementing offset into LDM / STM ops. 1298bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1299 unsigned NumMerges = 0; 1300 unsigned NumMemOps = 0; 1301 MemOpQueue MemOps; 1302 unsigned CurrBase = 0; 1303 int CurrOpc = -1; 1304 unsigned CurrSize = 0; 1305 ARMCC::CondCodes CurrPred = ARMCC::AL; 1306 unsigned CurrPredReg = 0; 1307 unsigned Position = 0; 1308 SmallVector<MachineBasicBlock::iterator,4> Merges; 1309 1310 RS->enterBasicBlock(&MBB); 1311 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1312 while (MBBI != E) { 1313 if (FixInvalidRegPairOp(MBB, MBBI)) 1314 continue; 1315 1316 bool Advance = false; 1317 bool TryMerge = false; 1318 bool Clobber = false; 1319 1320 bool isMemOp = isMemoryOp(MBBI); 1321 if (isMemOp) { 1322 int Opcode = MBBI->getOpcode(); 1323 unsigned Size = getLSMultipleTransferSize(MBBI); 1324 const MachineOperand &MO = MBBI->getOperand(0); 1325 unsigned Reg = MO.getReg(); 1326 bool isKill = MO.isDef() ? false : MO.isKill(); 1327 unsigned Base = MBBI->getOperand(1).getReg(); 1328 unsigned PredReg = 0; 1329 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1330 int Offset = getMemoryOpOffset(MBBI); 1331 // Watch out for: 1332 // r4 := ldr [r5] 1333 // r5 := ldr [r5, #4] 1334 // r6 := ldr [r5, #8] 1335 // 1336 // The second ldr has effectively broken the chain even though it 1337 // looks like the later ldr(s) use the same base register. Try to 1338 // merge the ldr's so far, including this one. But don't try to 1339 // combine the following ldr(s). 1340 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1341 1342 // Watch out for: 1343 // r4 := ldr [r0, #8] 1344 // r4 := ldr [r0, #4] 1345 // 1346 // The optimization may reorder the second ldr in front of the first 1347 // ldr, which violates write after write(WAW) dependence. The same as 1348 // str. Try to merge inst(s) already in MemOps. 1349 bool Overlap = false; 1350 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { 1351 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { 1352 Overlap = true; 1353 break; 1354 } 1355 } 1356 1357 if (CurrBase == 0 && !Clobber) { 1358 // Start of a new chain. 1359 CurrBase = Base; 1360 CurrOpc = Opcode; 1361 CurrSize = Size; 1362 CurrPred = Pred; 1363 CurrPredReg = PredReg; 1364 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1365 ++NumMemOps; 1366 Advance = true; 1367 } else if (!Overlap) { 1368 if (Clobber) { 1369 TryMerge = true; 1370 Advance = true; 1371 } 1372 1373 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1374 // No need to match PredReg. 1375 // Continue adding to the queue. 1376 if (Offset > MemOps.back().Offset) { 1377 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1378 Position, MBBI)); 1379 ++NumMemOps; 1380 Advance = true; 1381 } else { 1382 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1383 I != E; ++I) { 1384 if (Offset < I->Offset) { 1385 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1386 Position, MBBI)); 1387 ++NumMemOps; 1388 Advance = true; 1389 break; 1390 } else if (Offset == I->Offset) { 1391 // Collision! This can't be merged! 1392 break; 1393 } 1394 } 1395 } 1396 } 1397 } 1398 } 1399 1400 if (MBBI->isDebugValue()) { 1401 ++MBBI; 1402 if (MBBI == E) 1403 // Reach the end of the block, try merging the memory instructions. 1404 TryMerge = true; 1405 } else if (Advance) { 1406 ++Position; 1407 ++MBBI; 1408 if (MBBI == E) 1409 // Reach the end of the block, try merging the memory instructions. 1410 TryMerge = true; 1411 } else 1412 TryMerge = true; 1413 1414 if (TryMerge) { 1415 if (NumMemOps > 1) { 1416 // Try to find a free register to use as a new base in case it's needed. 1417 // First advance to the instruction just before the start of the chain. 1418 AdvanceRS(MBB, MemOps); 1419 // Find a scratch register. 1420 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass); 1421 // Process the load / store instructions. 1422 RS->forward(prior(MBBI)); 1423 1424 // Merge ops. 1425 Merges.clear(); 1426 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1427 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1428 1429 // Try folding preceding/trailing base inc/dec into the generated 1430 // LDM/STM ops. 1431 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1432 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1433 ++NumMerges; 1434 NumMerges += Merges.size(); 1435 1436 // Try folding preceding/trailing base inc/dec into those load/store 1437 // that were not merged to form LDM/STM ops. 1438 for (unsigned i = 0; i != NumMemOps; ++i) 1439 if (!MemOps[i].Merged) 1440 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1441 ++NumMerges; 1442 1443 // RS may be pointing to an instruction that's deleted. 1444 RS->skipTo(prior(MBBI)); 1445 } else if (NumMemOps == 1) { 1446 // Try folding preceding/trailing base inc/dec into the single 1447 // load/store. 1448 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1449 ++NumMerges; 1450 RS->forward(prior(MBBI)); 1451 } 1452 } 1453 1454 CurrBase = 0; 1455 CurrOpc = -1; 1456 CurrSize = 0; 1457 CurrPred = ARMCC::AL; 1458 CurrPredReg = 0; 1459 if (NumMemOps) { 1460 MemOps.clear(); 1461 NumMemOps = 0; 1462 } 1463 1464 // If iterator hasn't been advanced and this is not a memory op, skip it. 1465 // It can't start a new chain anyway. 1466 if (!Advance && !isMemOp && MBBI != E) { 1467 ++Position; 1468 ++MBBI; 1469 } 1470 } 1471 } 1472 return NumMerges > 0; 1473} 1474 1475/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1476/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1477/// directly restore the value of LR into pc. 1478/// ldmfd sp!, {..., lr} 1479/// bx lr 1480/// or 1481/// ldmfd sp!, {..., lr} 1482/// mov pc, lr 1483/// => 1484/// ldmfd sp!, {..., pc} 1485bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1486 if (MBB.empty()) return false; 1487 1488 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1489 if (MBBI != MBB.begin() && 1490 (MBBI->getOpcode() == ARM::BX_RET || 1491 MBBI->getOpcode() == ARM::tBX_RET || 1492 MBBI->getOpcode() == ARM::MOVPCLR)) { 1493 MachineInstr *PrevMI = prior(MBBI); 1494 unsigned Opcode = PrevMI->getOpcode(); 1495 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1496 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1497 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1498 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1499 if (MO.getReg() != ARM::LR) 1500 return false; 1501 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1502 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1503 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1504 PrevMI->setDesc(TII->get(NewOpc)); 1505 MO.setReg(ARM::PC); 1506 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1507 MBB.erase(MBBI); 1508 return true; 1509 } 1510 } 1511 return false; 1512} 1513 1514bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1515 const TargetMachine &TM = Fn.getTarget(); 1516 AFI = Fn.getInfo<ARMFunctionInfo>(); 1517 TII = TM.getInstrInfo(); 1518 TRI = TM.getRegisterInfo(); 1519 STI = &TM.getSubtarget<ARMSubtarget>(); 1520 RS = new RegScavenger(); 1521 isThumb2 = AFI->isThumb2Function(); 1522 1523 bool Modified = false; 1524 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1525 ++MFI) { 1526 MachineBasicBlock &MBB = *MFI; 1527 Modified |= LoadStoreMultipleOpti(MBB); 1528 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1529 Modified |= MergeReturnIntoLDM(MBB); 1530 } 1531 1532 delete RS; 1533 return Modified; 1534} 1535 1536 1537/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1538/// load / stores from consecutive locations close to make it more 1539/// likely they will be combined later. 1540 1541namespace { 1542 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1543 static char ID; 1544 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1545 1546 const DataLayout *TD; 1547 const TargetInstrInfo *TII; 1548 const TargetRegisterInfo *TRI; 1549 const ARMSubtarget *STI; 1550 MachineRegisterInfo *MRI; 1551 MachineFunction *MF; 1552 1553 virtual bool runOnMachineFunction(MachineFunction &Fn); 1554 1555 virtual const char *getPassName() const { 1556 return "ARM pre- register allocation load / store optimization pass"; 1557 } 1558 1559 private: 1560 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1561 unsigned &NewOpc, unsigned &EvenReg, 1562 unsigned &OddReg, unsigned &BaseReg, 1563 int &Offset, 1564 unsigned &PredReg, ARMCC::CondCodes &Pred, 1565 bool &isT2); 1566 bool RescheduleOps(MachineBasicBlock *MBB, 1567 SmallVectorImpl<MachineInstr *> &Ops, 1568 unsigned Base, bool isLd, 1569 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1570 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1571 }; 1572 char ARMPreAllocLoadStoreOpt::ID = 0; 1573} 1574 1575bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1576 TD = Fn.getTarget().getDataLayout(); 1577 TII = Fn.getTarget().getInstrInfo(); 1578 TRI = Fn.getTarget().getRegisterInfo(); 1579 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1580 MRI = &Fn.getRegInfo(); 1581 MF = &Fn; 1582 1583 bool Modified = false; 1584 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1585 ++MFI) 1586 Modified |= RescheduleLoadStoreInstrs(MFI); 1587 1588 return Modified; 1589} 1590 1591static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1592 MachineBasicBlock::iterator I, 1593 MachineBasicBlock::iterator E, 1594 SmallPtrSet<MachineInstr*, 4> &MemOps, 1595 SmallSet<unsigned, 4> &MemRegs, 1596 const TargetRegisterInfo *TRI) { 1597 // Are there stores / loads / calls between them? 1598 // FIXME: This is overly conservative. We should make use of alias information 1599 // some day. 1600 SmallSet<unsigned, 4> AddedRegPressure; 1601 while (++I != E) { 1602 if (I->isDebugValue() || MemOps.count(&*I)) 1603 continue; 1604 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1605 return false; 1606 if (isLd && I->mayStore()) 1607 return false; 1608 if (!isLd) { 1609 if (I->mayLoad()) 1610 return false; 1611 // It's not safe to move the first 'str' down. 1612 // str r1, [r0] 1613 // strh r5, [r0] 1614 // str r4, [r0, #+4] 1615 if (I->mayStore()) 1616 return false; 1617 } 1618 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1619 MachineOperand &MO = I->getOperand(j); 1620 if (!MO.isReg()) 1621 continue; 1622 unsigned Reg = MO.getReg(); 1623 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1624 return false; 1625 if (Reg != Base && !MemRegs.count(Reg)) 1626 AddedRegPressure.insert(Reg); 1627 } 1628 } 1629 1630 // Estimate register pressure increase due to the transformation. 1631 if (MemRegs.size() <= 4) 1632 // Ok if we are moving small number of instructions. 1633 return true; 1634 return AddedRegPressure.size() <= MemRegs.size() * 2; 1635} 1636 1637 1638/// Copy Op0 and Op1 operands into a new array assigned to MI. 1639static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1640 MachineInstr *Op1) { 1641 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1642 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1643 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1644 1645 MachineFunction *MF = MI->getParent()->getParent(); 1646 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1647 MachineSDNode::mmo_iterator MemEnd = 1648 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1649 MemEnd = 1650 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1651 MI->setMemRefs(MemBegin, MemEnd); 1652} 1653 1654bool 1655ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1656 DebugLoc &dl, 1657 unsigned &NewOpc, unsigned &EvenReg, 1658 unsigned &OddReg, unsigned &BaseReg, 1659 int &Offset, unsigned &PredReg, 1660 ARMCC::CondCodes &Pred, 1661 bool &isT2) { 1662 // Make sure we're allowed to generate LDRD/STRD. 1663 if (!STI->hasV5TEOps()) 1664 return false; 1665 1666 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1667 unsigned Scale = 1; 1668 unsigned Opcode = Op0->getOpcode(); 1669 if (Opcode == ARM::LDRi12) 1670 NewOpc = ARM::LDRD; 1671 else if (Opcode == ARM::STRi12) 1672 NewOpc = ARM::STRD; 1673 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1674 NewOpc = ARM::t2LDRDi8; 1675 Scale = 4; 1676 isT2 = true; 1677 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1678 NewOpc = ARM::t2STRDi8; 1679 Scale = 4; 1680 isT2 = true; 1681 } else 1682 return false; 1683 1684 // Make sure the base address satisfies i64 ld / st alignment requirement. 1685 // At the moment, we ignore the memoryoperand's value. 1686 // If we want to use AliasAnalysis, we should check it accordingly. 1687 if (!Op0->hasOneMemOperand() || 1688 (*Op0->memoperands_begin())->isVolatile()) 1689 return false; 1690 1691 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1692 const Function *Func = MF->getFunction(); 1693 unsigned ReqAlign = STI->hasV6Ops() 1694 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1695 : 8; // Pre-v6 need 8-byte align 1696 if (Align < ReqAlign) 1697 return false; 1698 1699 // Then make sure the immediate offset fits. 1700 int OffImm = getMemoryOpOffset(Op0); 1701 if (isT2) { 1702 int Limit = (1 << 8) * Scale; 1703 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1704 return false; 1705 Offset = OffImm; 1706 } else { 1707 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1708 if (OffImm < 0) { 1709 AddSub = ARM_AM::sub; 1710 OffImm = - OffImm; 1711 } 1712 int Limit = (1 << 8) * Scale; 1713 if (OffImm >= Limit || (OffImm & (Scale-1))) 1714 return false; 1715 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1716 } 1717 EvenReg = Op0->getOperand(0).getReg(); 1718 OddReg = Op1->getOperand(0).getReg(); 1719 if (EvenReg == OddReg) 1720 return false; 1721 BaseReg = Op0->getOperand(1).getReg(); 1722 Pred = getInstrPredicate(Op0, PredReg); 1723 dl = Op0->getDebugLoc(); 1724 return true; 1725} 1726 1727namespace { 1728 struct OffsetCompare { 1729 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1730 int LOffset = getMemoryOpOffset(LHS); 1731 int ROffset = getMemoryOpOffset(RHS); 1732 assert(LHS == RHS || LOffset != ROffset); 1733 return LOffset > ROffset; 1734 } 1735 }; 1736} 1737 1738bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1739 SmallVectorImpl<MachineInstr *> &Ops, 1740 unsigned Base, bool isLd, 1741 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1742 bool RetVal = false; 1743 1744 // Sort by offset (in reverse order). 1745 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1746 1747 // The loads / stores of the same base are in order. Scan them from first to 1748 // last and check for the following: 1749 // 1. Any def of base. 1750 // 2. Any gaps. 1751 while (Ops.size() > 1) { 1752 unsigned FirstLoc = ~0U; 1753 unsigned LastLoc = 0; 1754 MachineInstr *FirstOp = 0; 1755 MachineInstr *LastOp = 0; 1756 int LastOffset = 0; 1757 unsigned LastOpcode = 0; 1758 unsigned LastBytes = 0; 1759 unsigned NumMove = 0; 1760 for (int i = Ops.size() - 1; i >= 0; --i) { 1761 MachineInstr *Op = Ops[i]; 1762 unsigned Loc = MI2LocMap[Op]; 1763 if (Loc <= FirstLoc) { 1764 FirstLoc = Loc; 1765 FirstOp = Op; 1766 } 1767 if (Loc >= LastLoc) { 1768 LastLoc = Loc; 1769 LastOp = Op; 1770 } 1771 1772 unsigned LSMOpcode 1773 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 1774 if (LastOpcode && LSMOpcode != LastOpcode) 1775 break; 1776 1777 int Offset = getMemoryOpOffset(Op); 1778 unsigned Bytes = getLSMultipleTransferSize(Op); 1779 if (LastBytes) { 1780 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1781 break; 1782 } 1783 LastOffset = Offset; 1784 LastBytes = Bytes; 1785 LastOpcode = LSMOpcode; 1786 if (++NumMove == 8) // FIXME: Tune this limit. 1787 break; 1788 } 1789 1790 if (NumMove <= 1) 1791 Ops.pop_back(); 1792 else { 1793 SmallPtrSet<MachineInstr*, 4> MemOps; 1794 SmallSet<unsigned, 4> MemRegs; 1795 for (int i = NumMove-1; i >= 0; --i) { 1796 MemOps.insert(Ops[i]); 1797 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1798 } 1799 1800 // Be conservative, if the instructions are too far apart, don't 1801 // move them. We want to limit the increase of register pressure. 1802 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1803 if (DoMove) 1804 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1805 MemOps, MemRegs, TRI); 1806 if (!DoMove) { 1807 for (unsigned i = 0; i != NumMove; ++i) 1808 Ops.pop_back(); 1809 } else { 1810 // This is the new location for the loads / stores. 1811 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1812 while (InsertPos != MBB->end() 1813 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1814 ++InsertPos; 1815 1816 // If we are moving a pair of loads / stores, see if it makes sense 1817 // to try to allocate a pair of registers that can form register pairs. 1818 MachineInstr *Op0 = Ops.back(); 1819 MachineInstr *Op1 = Ops[Ops.size()-2]; 1820 unsigned EvenReg = 0, OddReg = 0; 1821 unsigned BaseReg = 0, PredReg = 0; 1822 ARMCC::CondCodes Pred = ARMCC::AL; 1823 bool isT2 = false; 1824 unsigned NewOpc = 0; 1825 int Offset = 0; 1826 DebugLoc dl; 1827 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1828 EvenReg, OddReg, BaseReg, 1829 Offset, PredReg, Pred, isT2)) { 1830 Ops.pop_back(); 1831 Ops.pop_back(); 1832 1833 const MCInstrDesc &MCID = TII->get(NewOpc); 1834 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 1835 MRI->constrainRegClass(EvenReg, TRC); 1836 MRI->constrainRegClass(OddReg, TRC); 1837 1838 // Form the pair instruction. 1839 if (isLd) { 1840 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1841 .addReg(EvenReg, RegState::Define) 1842 .addReg(OddReg, RegState::Define) 1843 .addReg(BaseReg); 1844 // FIXME: We're converting from LDRi12 to an insn that still 1845 // uses addrmode2, so we need an explicit offset reg. It should 1846 // always by reg0 since we're transforming LDRi12s. 1847 if (!isT2) 1848 MIB.addReg(0); 1849 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1850 concatenateMemOperands(MIB, Op0, Op1); 1851 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1852 ++NumLDRDFormed; 1853 } else { 1854 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1855 .addReg(EvenReg) 1856 .addReg(OddReg) 1857 .addReg(BaseReg); 1858 // FIXME: We're converting from LDRi12 to an insn that still 1859 // uses addrmode2, so we need an explicit offset reg. It should 1860 // always by reg0 since we're transforming STRi12s. 1861 if (!isT2) 1862 MIB.addReg(0); 1863 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1864 concatenateMemOperands(MIB, Op0, Op1); 1865 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1866 ++NumSTRDFormed; 1867 } 1868 MBB->erase(Op0); 1869 MBB->erase(Op1); 1870 1871 // Add register allocation hints to form register pairs. 1872 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1873 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1874 } else { 1875 for (unsigned i = 0; i != NumMove; ++i) { 1876 MachineInstr *Op = Ops.back(); 1877 Ops.pop_back(); 1878 MBB->splice(InsertPos, MBB, Op); 1879 } 1880 } 1881 1882 NumLdStMoved += NumMove; 1883 RetVal = true; 1884 } 1885 } 1886 } 1887 1888 return RetVal; 1889} 1890 1891bool 1892ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1893 bool RetVal = false; 1894 1895 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1896 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1897 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1898 SmallVector<unsigned, 4> LdBases; 1899 SmallVector<unsigned, 4> StBases; 1900 1901 unsigned Loc = 0; 1902 MachineBasicBlock::iterator MBBI = MBB->begin(); 1903 MachineBasicBlock::iterator E = MBB->end(); 1904 while (MBBI != E) { 1905 for (; MBBI != E; ++MBBI) { 1906 MachineInstr *MI = MBBI; 1907 if (MI->isCall() || MI->isTerminator()) { 1908 // Stop at barriers. 1909 ++MBBI; 1910 break; 1911 } 1912 1913 if (!MI->isDebugValue()) 1914 MI2LocMap[MI] = ++Loc; 1915 1916 if (!isMemoryOp(MI)) 1917 continue; 1918 unsigned PredReg = 0; 1919 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 1920 continue; 1921 1922 int Opc = MI->getOpcode(); 1923 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1924 unsigned Base = MI->getOperand(1).getReg(); 1925 int Offset = getMemoryOpOffset(MI); 1926 1927 bool StopHere = false; 1928 if (isLd) { 1929 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1930 Base2LdsMap.find(Base); 1931 if (BI != Base2LdsMap.end()) { 1932 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1933 if (Offset == getMemoryOpOffset(BI->second[i])) { 1934 StopHere = true; 1935 break; 1936 } 1937 } 1938 if (!StopHere) 1939 BI->second.push_back(MI); 1940 } else { 1941 Base2LdsMap[Base].push_back(MI); 1942 LdBases.push_back(Base); 1943 } 1944 } else { 1945 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1946 Base2StsMap.find(Base); 1947 if (BI != Base2StsMap.end()) { 1948 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1949 if (Offset == getMemoryOpOffset(BI->second[i])) { 1950 StopHere = true; 1951 break; 1952 } 1953 } 1954 if (!StopHere) 1955 BI->second.push_back(MI); 1956 } else { 1957 Base2StsMap[Base].push_back(MI); 1958 StBases.push_back(Base); 1959 } 1960 } 1961 1962 if (StopHere) { 1963 // Found a duplicate (a base+offset combination that's seen earlier). 1964 // Backtrack. 1965 --Loc; 1966 break; 1967 } 1968 } 1969 1970 // Re-schedule loads. 1971 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1972 unsigned Base = LdBases[i]; 1973 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 1974 if (Lds.size() > 1) 1975 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1976 } 1977 1978 // Re-schedule stores. 1979 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1980 unsigned Base = StBases[i]; 1981 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 1982 if (Sts.size() > 1) 1983 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1984 } 1985 1986 if (MBBI != E) { 1987 Base2LdsMap.clear(); 1988 Base2StsMap.clear(); 1989 LdBases.clear(); 1990 StBases.clear(); 1991 } 1992 } 1993 1994 return RetVal; 1995} 1996 1997 1998/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1999/// optimization pass. 2000FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2001 if (PreAlloc) 2002 return new ARMPreAllocLoadStoreOpt(); 2003 return new ARMLoadStoreOpt(); 2004} 2005