AArch64LoadStoreOptimizer.cpp revision 288943
1274955Ssvnmir//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=// 2274955Ssvnmir// 3274955Ssvnmir// The LLVM Compiler Infrastructure 4274955Ssvnmir// 5274955Ssvnmir// This file is distributed under the University of Illinois Open Source 6274955Ssvnmir// License. See LICENSE.TXT for details. 7274955Ssvnmir// 8274955Ssvnmir//===----------------------------------------------------------------------===// 9274955Ssvnmir// 10274955Ssvnmir// This file contains a pass that performs load / store related peephole 11274955Ssvnmir// optimizations. This pass should be run after register allocation. 12274955Ssvnmir// 13274955Ssvnmir//===----------------------------------------------------------------------===// 14274955Ssvnmir 15274955Ssvnmir#include "AArch64InstrInfo.h" 16280031Sdim#include "AArch64Subtarget.h" 17274955Ssvnmir#include "MCTargetDesc/AArch64AddressingModes.h" 18274955Ssvnmir#include "llvm/ADT/BitVector.h" 19288943Sdim#include "llvm/ADT/SmallVector.h" 20280031Sdim#include "llvm/ADT/Statistic.h" 21274955Ssvnmir#include "llvm/CodeGen/MachineBasicBlock.h" 22274955Ssvnmir#include "llvm/CodeGen/MachineFunctionPass.h" 23274955Ssvnmir#include "llvm/CodeGen/MachineInstr.h" 24274955Ssvnmir#include "llvm/CodeGen/MachineInstrBuilder.h" 25274955Ssvnmir#include "llvm/Support/CommandLine.h" 26274955Ssvnmir#include "llvm/Support/Debug.h" 27274955Ssvnmir#include "llvm/Support/ErrorHandling.h" 28274955Ssvnmir#include "llvm/Support/raw_ostream.h" 29280031Sdim#include "llvm/Target/TargetInstrInfo.h" 30280031Sdim#include "llvm/Target/TargetMachine.h" 31280031Sdim#include "llvm/Target/TargetRegisterInfo.h" 32274955Ssvnmirusing namespace llvm; 33274955Ssvnmir 34274955Ssvnmir#define DEBUG_TYPE "aarch64-ldst-opt" 35274955Ssvnmir 36274955Ssvnmir/// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine 37274955Ssvnmir/// load / store instructions to form ldp / stp instructions. 38274955Ssvnmir 39274955SsvnmirSTATISTIC(NumPairCreated, "Number of load/store pair instructions generated"); 40274955SsvnmirSTATISTIC(NumPostFolded, "Number of post-index updates folded"); 41274955SsvnmirSTATISTIC(NumPreFolded, "Number of pre-index updates folded"); 42274955SsvnmirSTATISTIC(NumUnscaledPairCreated, 43274955Ssvnmir "Number of load/store from unscaled generated"); 44274955Ssvnmir 45274955Ssvnmirstatic cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", 46274955Ssvnmir cl::init(20), cl::Hidden); 47274955Ssvnmir 48274955Ssvnmir// Place holder while testing unscaled load/store combining 49274955Ssvnmirstatic cl::opt<bool> EnableAArch64UnscaledMemOp( 50274955Ssvnmir "aarch64-unscaled-mem-op", cl::Hidden, 51274955Ssvnmir cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true)); 52274955Ssvnmir 53274955Ssvnmirnamespace { 54274955Ssvnmirstruct AArch64LoadStoreOpt : public MachineFunctionPass { 55274955Ssvnmir static char ID; 56274955Ssvnmir AArch64LoadStoreOpt() : MachineFunctionPass(ID) {} 57274955Ssvnmir 58274955Ssvnmir const AArch64InstrInfo *TII; 59274955Ssvnmir const TargetRegisterInfo *TRI; 60274955Ssvnmir 61274955Ssvnmir // Scan the instructions looking for a load/store that can be combined 62274955Ssvnmir // with the current instruction into a load/store pair. 63274955Ssvnmir // Return the matching instruction if one is found, else MBB->end(). 64274955Ssvnmir // If a matching instruction is found, MergeForward is set to true if the 65274955Ssvnmir // merge is to remove the first instruction and replace the second with 66274955Ssvnmir // a pair-wise insn, and false if the reverse is true. 67288943Sdim // \p SExtIdx[out] gives the index of the result of the load pair that 68288943Sdim // must be extended. The value of SExtIdx assumes that the paired load 69288943Sdim // produces the value in this order: (I, returned iterator), i.e., 70288943Sdim // -1 means no value has to be extended, 0 means I, and 1 means the 71288943Sdim // returned iterator. 72274955Ssvnmir MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, 73288943Sdim bool &MergeForward, int &SExtIdx, 74274955Ssvnmir unsigned Limit); 75274955Ssvnmir // Merge the two instructions indicated into a single pair-wise instruction. 76274955Ssvnmir // If MergeForward is true, erase the first instruction and fold its 77274955Ssvnmir // operation into the second. If false, the reverse. Return the instruction 78274955Ssvnmir // following the first instruction (which may change during processing). 79288943Sdim // \p SExtIdx index of the result that must be extended for a paired load. 80288943Sdim // -1 means none, 0 means I, and 1 means Paired. 81274955Ssvnmir MachineBasicBlock::iterator 82274955Ssvnmir mergePairedInsns(MachineBasicBlock::iterator I, 83288943Sdim MachineBasicBlock::iterator Paired, bool MergeForward, 84288943Sdim int SExtIdx); 85274955Ssvnmir 86274955Ssvnmir // Scan the instruction list to find a base register update that can 87274955Ssvnmir // be combined with the current instruction (a load or store) using 88274955Ssvnmir // pre or post indexed addressing with writeback. Scan forwards. 89274955Ssvnmir MachineBasicBlock::iterator 90274955Ssvnmir findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit, 91274955Ssvnmir int Value); 92274955Ssvnmir 93274955Ssvnmir // Scan the instruction list to find a base register update that can 94274955Ssvnmir // be combined with the current instruction (a load or store) using 95274955Ssvnmir // pre or post indexed addressing with writeback. Scan backwards. 96274955Ssvnmir MachineBasicBlock::iterator 97274955Ssvnmir findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); 98274955Ssvnmir 99274955Ssvnmir // Merge a pre-index base register update into a ld/st instruction. 100274955Ssvnmir MachineBasicBlock::iterator 101274955Ssvnmir mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, 102274955Ssvnmir MachineBasicBlock::iterator Update); 103274955Ssvnmir 104274955Ssvnmir // Merge a post-index base register update into a ld/st instruction. 105274955Ssvnmir MachineBasicBlock::iterator 106274955Ssvnmir mergePostIdxUpdateInsn(MachineBasicBlock::iterator I, 107274955Ssvnmir MachineBasicBlock::iterator Update); 108274955Ssvnmir 109274955Ssvnmir bool optimizeBlock(MachineBasicBlock &MBB); 110274955Ssvnmir 111274955Ssvnmir bool runOnMachineFunction(MachineFunction &Fn) override; 112274955Ssvnmir 113274955Ssvnmir const char *getPassName() const override { 114274955Ssvnmir return "AArch64 load / store optimization pass"; 115274955Ssvnmir } 116274955Ssvnmir 117274955Ssvnmirprivate: 118274955Ssvnmir int getMemSize(MachineInstr *MemMI); 119274955Ssvnmir}; 120274955Ssvnmirchar AArch64LoadStoreOpt::ID = 0; 121280031Sdim} // namespace 122274955Ssvnmir 123274955Ssvnmirstatic bool isUnscaledLdst(unsigned Opc) { 124274955Ssvnmir switch (Opc) { 125274955Ssvnmir default: 126274955Ssvnmir return false; 127274955Ssvnmir case AArch64::STURSi: 128274955Ssvnmir return true; 129274955Ssvnmir case AArch64::STURDi: 130274955Ssvnmir return true; 131274955Ssvnmir case AArch64::STURQi: 132274955Ssvnmir return true; 133274955Ssvnmir case AArch64::STURWi: 134274955Ssvnmir return true; 135274955Ssvnmir case AArch64::STURXi: 136274955Ssvnmir return true; 137274955Ssvnmir case AArch64::LDURSi: 138274955Ssvnmir return true; 139274955Ssvnmir case AArch64::LDURDi: 140274955Ssvnmir return true; 141274955Ssvnmir case AArch64::LDURQi: 142274955Ssvnmir return true; 143274955Ssvnmir case AArch64::LDURWi: 144274955Ssvnmir return true; 145274955Ssvnmir case AArch64::LDURXi: 146274955Ssvnmir return true; 147288943Sdim case AArch64::LDURSWi: 148288943Sdim return true; 149274955Ssvnmir } 150274955Ssvnmir} 151274955Ssvnmir 152274955Ssvnmir// Size in bytes of the data moved by an unscaled load or store 153274955Ssvnmirint AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { 154274955Ssvnmir switch (MemMI->getOpcode()) { 155274955Ssvnmir default: 156274955Ssvnmir llvm_unreachable("Opcode has unknown size!"); 157274955Ssvnmir case AArch64::STRSui: 158274955Ssvnmir case AArch64::STURSi: 159274955Ssvnmir return 4; 160274955Ssvnmir case AArch64::STRDui: 161274955Ssvnmir case AArch64::STURDi: 162274955Ssvnmir return 8; 163274955Ssvnmir case AArch64::STRQui: 164274955Ssvnmir case AArch64::STURQi: 165274955Ssvnmir return 16; 166274955Ssvnmir case AArch64::STRWui: 167274955Ssvnmir case AArch64::STURWi: 168274955Ssvnmir return 4; 169274955Ssvnmir case AArch64::STRXui: 170274955Ssvnmir case AArch64::STURXi: 171274955Ssvnmir return 8; 172274955Ssvnmir case AArch64::LDRSui: 173274955Ssvnmir case AArch64::LDURSi: 174274955Ssvnmir return 4; 175274955Ssvnmir case AArch64::LDRDui: 176274955Ssvnmir case AArch64::LDURDi: 177274955Ssvnmir return 8; 178274955Ssvnmir case AArch64::LDRQui: 179274955Ssvnmir case AArch64::LDURQi: 180274955Ssvnmir return 16; 181274955Ssvnmir case AArch64::LDRWui: 182274955Ssvnmir case AArch64::LDURWi: 183274955Ssvnmir return 4; 184274955Ssvnmir case AArch64::LDRXui: 185274955Ssvnmir case AArch64::LDURXi: 186274955Ssvnmir return 8; 187288943Sdim case AArch64::LDRSWui: 188288943Sdim case AArch64::LDURSWi: 189288943Sdim return 4; 190274955Ssvnmir } 191274955Ssvnmir} 192274955Ssvnmir 193288943Sdimstatic unsigned getMatchingNonSExtOpcode(unsigned Opc, 194288943Sdim bool *IsValidLdStrOpc = nullptr) { 195288943Sdim if (IsValidLdStrOpc) 196288943Sdim *IsValidLdStrOpc = true; 197288943Sdim switch (Opc) { 198288943Sdim default: 199288943Sdim if (IsValidLdStrOpc) 200288943Sdim *IsValidLdStrOpc = false; 201288943Sdim return UINT_MAX; 202288943Sdim case AArch64::STRDui: 203288943Sdim case AArch64::STURDi: 204288943Sdim case AArch64::STRQui: 205288943Sdim case AArch64::STURQi: 206288943Sdim case AArch64::STRWui: 207288943Sdim case AArch64::STURWi: 208288943Sdim case AArch64::STRXui: 209288943Sdim case AArch64::STURXi: 210288943Sdim case AArch64::LDRDui: 211288943Sdim case AArch64::LDURDi: 212288943Sdim case AArch64::LDRQui: 213288943Sdim case AArch64::LDURQi: 214288943Sdim case AArch64::LDRWui: 215288943Sdim case AArch64::LDURWi: 216288943Sdim case AArch64::LDRXui: 217288943Sdim case AArch64::LDURXi: 218288943Sdim case AArch64::STRSui: 219288943Sdim case AArch64::STURSi: 220288943Sdim case AArch64::LDRSui: 221288943Sdim case AArch64::LDURSi: 222288943Sdim return Opc; 223288943Sdim case AArch64::LDRSWui: 224288943Sdim return AArch64::LDRWui; 225288943Sdim case AArch64::LDURSWi: 226288943Sdim return AArch64::LDURWi; 227288943Sdim } 228288943Sdim} 229288943Sdim 230274955Ssvnmirstatic unsigned getMatchingPairOpcode(unsigned Opc) { 231274955Ssvnmir switch (Opc) { 232274955Ssvnmir default: 233274955Ssvnmir llvm_unreachable("Opcode has no pairwise equivalent!"); 234274955Ssvnmir case AArch64::STRSui: 235274955Ssvnmir case AArch64::STURSi: 236274955Ssvnmir return AArch64::STPSi; 237274955Ssvnmir case AArch64::STRDui: 238274955Ssvnmir case AArch64::STURDi: 239274955Ssvnmir return AArch64::STPDi; 240274955Ssvnmir case AArch64::STRQui: 241274955Ssvnmir case AArch64::STURQi: 242274955Ssvnmir return AArch64::STPQi; 243274955Ssvnmir case AArch64::STRWui: 244274955Ssvnmir case AArch64::STURWi: 245274955Ssvnmir return AArch64::STPWi; 246274955Ssvnmir case AArch64::STRXui: 247274955Ssvnmir case AArch64::STURXi: 248274955Ssvnmir return AArch64::STPXi; 249274955Ssvnmir case AArch64::LDRSui: 250274955Ssvnmir case AArch64::LDURSi: 251274955Ssvnmir return AArch64::LDPSi; 252274955Ssvnmir case AArch64::LDRDui: 253274955Ssvnmir case AArch64::LDURDi: 254274955Ssvnmir return AArch64::LDPDi; 255274955Ssvnmir case AArch64::LDRQui: 256274955Ssvnmir case AArch64::LDURQi: 257274955Ssvnmir return AArch64::LDPQi; 258274955Ssvnmir case AArch64::LDRWui: 259274955Ssvnmir case AArch64::LDURWi: 260274955Ssvnmir return AArch64::LDPWi; 261274955Ssvnmir case AArch64::LDRXui: 262274955Ssvnmir case AArch64::LDURXi: 263274955Ssvnmir return AArch64::LDPXi; 264288943Sdim case AArch64::LDRSWui: 265288943Sdim case AArch64::LDURSWi: 266288943Sdim return AArch64::LDPSWi; 267274955Ssvnmir } 268274955Ssvnmir} 269274955Ssvnmir 270274955Ssvnmirstatic unsigned getPreIndexedOpcode(unsigned Opc) { 271274955Ssvnmir switch (Opc) { 272274955Ssvnmir default: 273274955Ssvnmir llvm_unreachable("Opcode has no pre-indexed equivalent!"); 274274955Ssvnmir case AArch64::STRSui: 275274955Ssvnmir return AArch64::STRSpre; 276274955Ssvnmir case AArch64::STRDui: 277274955Ssvnmir return AArch64::STRDpre; 278274955Ssvnmir case AArch64::STRQui: 279274955Ssvnmir return AArch64::STRQpre; 280274955Ssvnmir case AArch64::STRWui: 281274955Ssvnmir return AArch64::STRWpre; 282274955Ssvnmir case AArch64::STRXui: 283274955Ssvnmir return AArch64::STRXpre; 284274955Ssvnmir case AArch64::LDRSui: 285274955Ssvnmir return AArch64::LDRSpre; 286274955Ssvnmir case AArch64::LDRDui: 287274955Ssvnmir return AArch64::LDRDpre; 288274955Ssvnmir case AArch64::LDRQui: 289274955Ssvnmir return AArch64::LDRQpre; 290274955Ssvnmir case AArch64::LDRWui: 291274955Ssvnmir return AArch64::LDRWpre; 292274955Ssvnmir case AArch64::LDRXui: 293274955Ssvnmir return AArch64::LDRXpre; 294288943Sdim case AArch64::LDRSWui: 295288943Sdim return AArch64::LDRSWpre; 296274955Ssvnmir } 297274955Ssvnmir} 298274955Ssvnmir 299274955Ssvnmirstatic unsigned getPostIndexedOpcode(unsigned Opc) { 300274955Ssvnmir switch (Opc) { 301274955Ssvnmir default: 302274955Ssvnmir llvm_unreachable("Opcode has no post-indexed wise equivalent!"); 303274955Ssvnmir case AArch64::STRSui: 304274955Ssvnmir return AArch64::STRSpost; 305274955Ssvnmir case AArch64::STRDui: 306274955Ssvnmir return AArch64::STRDpost; 307274955Ssvnmir case AArch64::STRQui: 308274955Ssvnmir return AArch64::STRQpost; 309274955Ssvnmir case AArch64::STRWui: 310274955Ssvnmir return AArch64::STRWpost; 311274955Ssvnmir case AArch64::STRXui: 312274955Ssvnmir return AArch64::STRXpost; 313274955Ssvnmir case AArch64::LDRSui: 314274955Ssvnmir return AArch64::LDRSpost; 315274955Ssvnmir case AArch64::LDRDui: 316274955Ssvnmir return AArch64::LDRDpost; 317274955Ssvnmir case AArch64::LDRQui: 318274955Ssvnmir return AArch64::LDRQpost; 319274955Ssvnmir case AArch64::LDRWui: 320274955Ssvnmir return AArch64::LDRWpost; 321274955Ssvnmir case AArch64::LDRXui: 322274955Ssvnmir return AArch64::LDRXpost; 323288943Sdim case AArch64::LDRSWui: 324288943Sdim return AArch64::LDRSWpost; 325274955Ssvnmir } 326274955Ssvnmir} 327274955Ssvnmir 328274955SsvnmirMachineBasicBlock::iterator 329274955SsvnmirAArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, 330274955Ssvnmir MachineBasicBlock::iterator Paired, 331288943Sdim bool MergeForward, int SExtIdx) { 332274955Ssvnmir MachineBasicBlock::iterator NextI = I; 333274955Ssvnmir ++NextI; 334274955Ssvnmir // If NextI is the second of the two instructions to be merged, we need 335274955Ssvnmir // to skip one further. Either way we merge will invalidate the iterator, 336274955Ssvnmir // and we don't need to scan the new instruction, as it's a pairwise 337274955Ssvnmir // instruction, which we're not considering for further action anyway. 338274955Ssvnmir if (NextI == Paired) 339274955Ssvnmir ++NextI; 340274955Ssvnmir 341288943Sdim unsigned Opc = 342288943Sdim SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); 343288943Sdim bool IsUnscaled = isUnscaledLdst(Opc); 344274955Ssvnmir int OffsetStride = 345274955Ssvnmir IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1; 346274955Ssvnmir 347288943Sdim unsigned NewOpc = getMatchingPairOpcode(Opc); 348274955Ssvnmir // Insert our new paired instruction after whichever of the paired 349274955Ssvnmir // instructions MergeForward indicates. 350274955Ssvnmir MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; 351274955Ssvnmir // Also based on MergeForward is from where we copy the base register operand 352274955Ssvnmir // so we get the flags compatible with the input code. 353274955Ssvnmir MachineOperand &BaseRegOp = 354274955Ssvnmir MergeForward ? Paired->getOperand(1) : I->getOperand(1); 355274955Ssvnmir 356274955Ssvnmir // Which register is Rt and which is Rt2 depends on the offset order. 357274955Ssvnmir MachineInstr *RtMI, *Rt2MI; 358274955Ssvnmir if (I->getOperand(2).getImm() == 359274955Ssvnmir Paired->getOperand(2).getImm() + OffsetStride) { 360274955Ssvnmir RtMI = Paired; 361274955Ssvnmir Rt2MI = I; 362288943Sdim // Here we swapped the assumption made for SExtIdx. 363288943Sdim // I.e., we turn ldp I, Paired into ldp Paired, I. 364288943Sdim // Update the index accordingly. 365288943Sdim if (SExtIdx != -1) 366288943Sdim SExtIdx = (SExtIdx + 1) % 2; 367274955Ssvnmir } else { 368274955Ssvnmir RtMI = I; 369274955Ssvnmir Rt2MI = Paired; 370274955Ssvnmir } 371274955Ssvnmir // Handle Unscaled 372274955Ssvnmir int OffsetImm = RtMI->getOperand(2).getImm(); 373274955Ssvnmir if (IsUnscaled && EnableAArch64UnscaledMemOp) 374274955Ssvnmir OffsetImm /= OffsetStride; 375274955Ssvnmir 376274955Ssvnmir // Construct the new instruction. 377274955Ssvnmir MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint, 378274955Ssvnmir I->getDebugLoc(), TII->get(NewOpc)) 379274955Ssvnmir .addOperand(RtMI->getOperand(0)) 380274955Ssvnmir .addOperand(Rt2MI->getOperand(0)) 381274955Ssvnmir .addOperand(BaseRegOp) 382274955Ssvnmir .addImm(OffsetImm); 383274955Ssvnmir (void)MIB; 384274955Ssvnmir 385274955Ssvnmir // FIXME: Do we need/want to copy the mem operands from the source 386274955Ssvnmir // instructions? Probably. What uses them after this? 387274955Ssvnmir 388274955Ssvnmir DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); 389274955Ssvnmir DEBUG(I->print(dbgs())); 390274955Ssvnmir DEBUG(dbgs() << " "); 391274955Ssvnmir DEBUG(Paired->print(dbgs())); 392274955Ssvnmir DEBUG(dbgs() << " with instruction:\n "); 393274955Ssvnmir 394288943Sdim if (SExtIdx != -1) { 395288943Sdim // Generate the sign extension for the proper result of the ldp. 396288943Sdim // I.e., with X1, that would be: 397288943Sdim // %W1<def> = KILL %W1, %X1<imp-def> 398288943Sdim // %X1<def> = SBFMXri %X1<kill>, 0, 31 399288943Sdim MachineOperand &DstMO = MIB->getOperand(SExtIdx); 400288943Sdim // Right now, DstMO has the extended register, since it comes from an 401288943Sdim // extended opcode. 402288943Sdim unsigned DstRegX = DstMO.getReg(); 403288943Sdim // Get the W variant of that register. 404288943Sdim unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); 405288943Sdim // Update the result of LDP to use the W instead of the X variant. 406288943Sdim DstMO.setReg(DstRegW); 407288943Sdim DEBUG(((MachineInstr *)MIB)->print(dbgs())); 408288943Sdim DEBUG(dbgs() << "\n"); 409288943Sdim // Make the machine verifier happy by providing a definition for 410288943Sdim // the X register. 411288943Sdim // Insert this definition right after the generated LDP, i.e., before 412288943Sdim // InsertionPoint. 413288943Sdim MachineInstrBuilder MIBKill = 414288943Sdim BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 415288943Sdim TII->get(TargetOpcode::KILL), DstRegW) 416288943Sdim .addReg(DstRegW) 417288943Sdim .addReg(DstRegX, RegState::Define); 418288943Sdim MIBKill->getOperand(2).setImplicit(); 419288943Sdim // Create the sign extension. 420288943Sdim MachineInstrBuilder MIBSXTW = 421288943Sdim BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 422288943Sdim TII->get(AArch64::SBFMXri), DstRegX) 423288943Sdim .addReg(DstRegX) 424288943Sdim .addImm(0) 425288943Sdim .addImm(31); 426288943Sdim (void)MIBSXTW; 427288943Sdim DEBUG(dbgs() << " Extend operand:\n "); 428288943Sdim DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); 429288943Sdim DEBUG(dbgs() << "\n"); 430288943Sdim } else { 431288943Sdim DEBUG(((MachineInstr *)MIB)->print(dbgs())); 432288943Sdim DEBUG(dbgs() << "\n"); 433288943Sdim } 434288943Sdim 435274955Ssvnmir // Erase the old instructions. 436274955Ssvnmir I->eraseFromParent(); 437274955Ssvnmir Paired->eraseFromParent(); 438274955Ssvnmir 439274955Ssvnmir return NextI; 440274955Ssvnmir} 441274955Ssvnmir 442274955Ssvnmir/// trackRegDefsUses - Remember what registers the specified instruction uses 443274955Ssvnmir/// and modifies. 444274955Ssvnmirstatic void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs, 445274955Ssvnmir BitVector &UsedRegs, 446274955Ssvnmir const TargetRegisterInfo *TRI) { 447274955Ssvnmir for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 448274955Ssvnmir MachineOperand &MO = MI->getOperand(i); 449274955Ssvnmir if (MO.isRegMask()) 450274955Ssvnmir ModifiedRegs.setBitsNotInMask(MO.getRegMask()); 451274955Ssvnmir 452274955Ssvnmir if (!MO.isReg()) 453274955Ssvnmir continue; 454274955Ssvnmir unsigned Reg = MO.getReg(); 455274955Ssvnmir if (MO.isDef()) { 456274955Ssvnmir for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 457274955Ssvnmir ModifiedRegs.set(*AI); 458274955Ssvnmir } else { 459274955Ssvnmir assert(MO.isUse() && "Reg operand not a def and not a use?!?"); 460274955Ssvnmir for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 461274955Ssvnmir UsedRegs.set(*AI); 462274955Ssvnmir } 463274955Ssvnmir } 464274955Ssvnmir} 465274955Ssvnmir 466274955Ssvnmirstatic bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { 467274955Ssvnmir if (!IsUnscaled && (Offset > 63 || Offset < -64)) 468274955Ssvnmir return false; 469274955Ssvnmir if (IsUnscaled) { 470274955Ssvnmir // Convert the byte-offset used by unscaled into an "element" offset used 471274955Ssvnmir // by the scaled pair load/store instructions. 472274955Ssvnmir int ElemOffset = Offset / OffsetStride; 473274955Ssvnmir if (ElemOffset > 63 || ElemOffset < -64) 474274955Ssvnmir return false; 475274955Ssvnmir } 476274955Ssvnmir return true; 477274955Ssvnmir} 478274955Ssvnmir 479274955Ssvnmir// Do alignment, specialized to power of 2 and for signed ints, 480274955Ssvnmir// avoiding having to do a C-style cast from uint_64t to int when 481274955Ssvnmir// using RoundUpToAlignment from include/llvm/Support/MathExtras.h. 482274955Ssvnmir// FIXME: Move this function to include/MathExtras.h? 483274955Ssvnmirstatic int alignTo(int Num, int PowOf2) { 484274955Ssvnmir return (Num + PowOf2 - 1) & ~(PowOf2 - 1); 485274955Ssvnmir} 486274955Ssvnmir 487288943Sdimstatic bool mayAlias(MachineInstr *MIa, MachineInstr *MIb, 488288943Sdim const AArch64InstrInfo *TII) { 489288943Sdim // One of the instructions must modify memory. 490288943Sdim if (!MIa->mayStore() && !MIb->mayStore()) 491288943Sdim return false; 492288943Sdim 493288943Sdim // Both instructions must be memory operations. 494288943Sdim if (!MIa->mayLoadOrStore() && !MIb->mayLoadOrStore()) 495288943Sdim return false; 496288943Sdim 497288943Sdim return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb); 498288943Sdim} 499288943Sdim 500288943Sdimstatic bool mayAlias(MachineInstr *MIa, 501288943Sdim SmallVectorImpl<MachineInstr *> &MemInsns, 502288943Sdim const AArch64InstrInfo *TII) { 503288943Sdim for (auto &MIb : MemInsns) 504288943Sdim if (mayAlias(MIa, MIb, TII)) 505288943Sdim return true; 506288943Sdim 507288943Sdim return false; 508288943Sdim} 509288943Sdim 510274955Ssvnmir/// findMatchingInsn - Scan the instructions looking for a load/store that can 511274955Ssvnmir/// be combined with the current instruction into a load/store pair. 512274955SsvnmirMachineBasicBlock::iterator 513274955SsvnmirAArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, 514288943Sdim bool &MergeForward, int &SExtIdx, 515288943Sdim unsigned Limit) { 516274955Ssvnmir MachineBasicBlock::iterator E = I->getParent()->end(); 517274955Ssvnmir MachineBasicBlock::iterator MBBI = I; 518274955Ssvnmir MachineInstr *FirstMI = I; 519274955Ssvnmir ++MBBI; 520274955Ssvnmir 521288943Sdim unsigned Opc = FirstMI->getOpcode(); 522274955Ssvnmir bool MayLoad = FirstMI->mayLoad(); 523274955Ssvnmir bool IsUnscaled = isUnscaledLdst(Opc); 524274955Ssvnmir unsigned Reg = FirstMI->getOperand(0).getReg(); 525274955Ssvnmir unsigned BaseReg = FirstMI->getOperand(1).getReg(); 526274955Ssvnmir int Offset = FirstMI->getOperand(2).getImm(); 527274955Ssvnmir 528274955Ssvnmir // Early exit if the first instruction modifies the base register. 529274955Ssvnmir // e.g., ldr x0, [x0] 530274955Ssvnmir // Early exit if the offset if not possible to match. (6 bits of positive 531274955Ssvnmir // range, plus allow an extra one in case we find a later insn that matches 532274955Ssvnmir // with Offset-1 533274955Ssvnmir if (FirstMI->modifiesRegister(BaseReg, TRI)) 534274955Ssvnmir return E; 535274955Ssvnmir int OffsetStride = 536274955Ssvnmir IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1; 537274955Ssvnmir if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) 538274955Ssvnmir return E; 539274955Ssvnmir 540274955Ssvnmir // Track which registers have been modified and used between the first insn 541274955Ssvnmir // (inclusive) and the second insn. 542274955Ssvnmir BitVector ModifiedRegs, UsedRegs; 543274955Ssvnmir ModifiedRegs.resize(TRI->getNumRegs()); 544274955Ssvnmir UsedRegs.resize(TRI->getNumRegs()); 545288943Sdim 546288943Sdim // Remember any instructions that read/write memory between FirstMI and MI. 547288943Sdim SmallVector<MachineInstr *, 4> MemInsns; 548288943Sdim 549274955Ssvnmir for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { 550274955Ssvnmir MachineInstr *MI = MBBI; 551274955Ssvnmir // Skip DBG_VALUE instructions. Otherwise debug info can affect the 552274955Ssvnmir // optimization by changing how far we scan. 553274955Ssvnmir if (MI->isDebugValue()) 554274955Ssvnmir continue; 555274955Ssvnmir 556274955Ssvnmir // Now that we know this is a real instruction, count it. 557274955Ssvnmir ++Count; 558274955Ssvnmir 559288943Sdim bool CanMergeOpc = Opc == MI->getOpcode(); 560288943Sdim SExtIdx = -1; 561288943Sdim if (!CanMergeOpc) { 562288943Sdim bool IsValidLdStrOpc; 563288943Sdim unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc); 564288943Sdim if (!IsValidLdStrOpc) 565288943Sdim continue; 566288943Sdim // Opc will be the first instruction in the pair. 567288943Sdim SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0; 568288943Sdim CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode()); 569288943Sdim } 570288943Sdim 571288943Sdim if (CanMergeOpc && MI->getOperand(2).isImm()) { 572274955Ssvnmir // If we've found another instruction with the same opcode, check to see 573274955Ssvnmir // if the base and offset are compatible with our starting instruction. 574274955Ssvnmir // These instructions all have scaled immediate operands, so we just 575274955Ssvnmir // check for +1/-1. Make sure to check the new instruction offset is 576274955Ssvnmir // actually an immediate and not a symbolic reference destined for 577274955Ssvnmir // a relocation. 578274955Ssvnmir // 579274955Ssvnmir // Pairwise instructions have a 7-bit signed offset field. Single insns 580274955Ssvnmir // have a 12-bit unsigned offset field. To be a valid combine, the 581274955Ssvnmir // final offset must be in range. 582274955Ssvnmir unsigned MIBaseReg = MI->getOperand(1).getReg(); 583274955Ssvnmir int MIOffset = MI->getOperand(2).getImm(); 584274955Ssvnmir if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || 585274955Ssvnmir (Offset + OffsetStride == MIOffset))) { 586274955Ssvnmir int MinOffset = Offset < MIOffset ? Offset : MIOffset; 587274955Ssvnmir // If this is a volatile load/store that otherwise matched, stop looking 588274955Ssvnmir // as something is going on that we don't have enough information to 589274955Ssvnmir // safely transform. Similarly, stop if we see a hint to avoid pairs. 590274955Ssvnmir if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) 591274955Ssvnmir return E; 592274955Ssvnmir // If the resultant immediate offset of merging these instructions 593274955Ssvnmir // is out of range for a pairwise instruction, bail and keep looking. 594274955Ssvnmir bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode()); 595274955Ssvnmir if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { 596274955Ssvnmir trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 597288943Sdim if (MI->mayLoadOrStore()) 598288943Sdim MemInsns.push_back(MI); 599274955Ssvnmir continue; 600274955Ssvnmir } 601274955Ssvnmir // If the alignment requirements of the paired (scaled) instruction 602274955Ssvnmir // can't express the offset of the unscaled input, bail and keep 603274955Ssvnmir // looking. 604274955Ssvnmir if (IsUnscaled && EnableAArch64UnscaledMemOp && 605274955Ssvnmir (alignTo(MinOffset, OffsetStride) != MinOffset)) { 606274955Ssvnmir trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 607288943Sdim if (MI->mayLoadOrStore()) 608288943Sdim MemInsns.push_back(MI); 609274955Ssvnmir continue; 610274955Ssvnmir } 611274955Ssvnmir // If the destination register of the loads is the same register, bail 612274955Ssvnmir // and keep looking. A load-pair instruction with both destination 613274955Ssvnmir // registers the same is UNPREDICTABLE and will result in an exception. 614274955Ssvnmir if (MayLoad && Reg == MI->getOperand(0).getReg()) { 615274955Ssvnmir trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 616288943Sdim if (MI->mayLoadOrStore()) 617288943Sdim MemInsns.push_back(MI); 618274955Ssvnmir continue; 619274955Ssvnmir } 620274955Ssvnmir 621274955Ssvnmir // If the Rt of the second instruction was not modified or used between 622288943Sdim // the two instructions and none of the instructions between the second 623288943Sdim // and first alias with the second, we can combine the second into the 624288943Sdim // first. 625274955Ssvnmir if (!ModifiedRegs[MI->getOperand(0).getReg()] && 626288943Sdim !(MI->mayLoad() && UsedRegs[MI->getOperand(0).getReg()]) && 627288943Sdim !mayAlias(MI, MemInsns, TII)) { 628274955Ssvnmir MergeForward = false; 629274955Ssvnmir return MBBI; 630274955Ssvnmir } 631274955Ssvnmir 632274955Ssvnmir // Likewise, if the Rt of the first instruction is not modified or used 633288943Sdim // between the two instructions and none of the instructions between the 634288943Sdim // first and the second alias with the first, we can combine the first 635288943Sdim // into the second. 636274955Ssvnmir if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] && 637288943Sdim !(FirstMI->mayLoad() && 638288943Sdim UsedRegs[FirstMI->getOperand(0).getReg()]) && 639288943Sdim !mayAlias(FirstMI, MemInsns, TII)) { 640274955Ssvnmir MergeForward = true; 641274955Ssvnmir return MBBI; 642274955Ssvnmir } 643274955Ssvnmir // Unable to combine these instructions due to interference in between. 644274955Ssvnmir // Keep looking. 645274955Ssvnmir } 646274955Ssvnmir } 647274955Ssvnmir 648288943Sdim // If the instruction wasn't a matching load or store. Stop searching if we 649288943Sdim // encounter a call instruction that might modify memory. 650288943Sdim if (MI->isCall()) 651274955Ssvnmir return E; 652274955Ssvnmir 653274955Ssvnmir // Update modified / uses register lists. 654274955Ssvnmir trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 655274955Ssvnmir 656274955Ssvnmir // Otherwise, if the base register is modified, we have no match, so 657274955Ssvnmir // return early. 658274955Ssvnmir if (ModifiedRegs[BaseReg]) 659274955Ssvnmir return E; 660288943Sdim 661288943Sdim // Update list of instructions that read/write memory. 662288943Sdim if (MI->mayLoadOrStore()) 663288943Sdim MemInsns.push_back(MI); 664274955Ssvnmir } 665274955Ssvnmir return E; 666274955Ssvnmir} 667274955Ssvnmir 668274955SsvnmirMachineBasicBlock::iterator 669274955SsvnmirAArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, 670274955Ssvnmir MachineBasicBlock::iterator Update) { 671274955Ssvnmir assert((Update->getOpcode() == AArch64::ADDXri || 672274955Ssvnmir Update->getOpcode() == AArch64::SUBXri) && 673274955Ssvnmir "Unexpected base register update instruction to merge!"); 674274955Ssvnmir MachineBasicBlock::iterator NextI = I; 675274955Ssvnmir // Return the instruction following the merged instruction, which is 676274955Ssvnmir // the instruction following our unmerged load. Unless that's the add/sub 677274955Ssvnmir // instruction we're merging, in which case it's the one after that. 678274955Ssvnmir if (++NextI == Update) 679274955Ssvnmir ++NextI; 680274955Ssvnmir 681274955Ssvnmir int Value = Update->getOperand(2).getImm(); 682274955Ssvnmir assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && 683274955Ssvnmir "Can't merge 1 << 12 offset into pre-indexed load / store"); 684274955Ssvnmir if (Update->getOpcode() == AArch64::SUBXri) 685274955Ssvnmir Value = -Value; 686274955Ssvnmir 687274955Ssvnmir unsigned NewOpc = getPreIndexedOpcode(I->getOpcode()); 688274955Ssvnmir MachineInstrBuilder MIB = 689274955Ssvnmir BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 690274955Ssvnmir .addOperand(Update->getOperand(0)) 691274955Ssvnmir .addOperand(I->getOperand(0)) 692274955Ssvnmir .addOperand(I->getOperand(1)) 693274955Ssvnmir .addImm(Value); 694274955Ssvnmir (void)MIB; 695274955Ssvnmir 696274955Ssvnmir DEBUG(dbgs() << "Creating pre-indexed load/store."); 697274955Ssvnmir DEBUG(dbgs() << " Replacing instructions:\n "); 698274955Ssvnmir DEBUG(I->print(dbgs())); 699274955Ssvnmir DEBUG(dbgs() << " "); 700274955Ssvnmir DEBUG(Update->print(dbgs())); 701274955Ssvnmir DEBUG(dbgs() << " with instruction:\n "); 702274955Ssvnmir DEBUG(((MachineInstr *)MIB)->print(dbgs())); 703274955Ssvnmir DEBUG(dbgs() << "\n"); 704274955Ssvnmir 705274955Ssvnmir // Erase the old instructions for the block. 706274955Ssvnmir I->eraseFromParent(); 707274955Ssvnmir Update->eraseFromParent(); 708274955Ssvnmir 709274955Ssvnmir return NextI; 710274955Ssvnmir} 711274955Ssvnmir 712274955SsvnmirMachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn( 713274955Ssvnmir MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) { 714274955Ssvnmir assert((Update->getOpcode() == AArch64::ADDXri || 715274955Ssvnmir Update->getOpcode() == AArch64::SUBXri) && 716274955Ssvnmir "Unexpected base register update instruction to merge!"); 717274955Ssvnmir MachineBasicBlock::iterator NextI = I; 718274955Ssvnmir // Return the instruction following the merged instruction, which is 719274955Ssvnmir // the instruction following our unmerged load. Unless that's the add/sub 720274955Ssvnmir // instruction we're merging, in which case it's the one after that. 721274955Ssvnmir if (++NextI == Update) 722274955Ssvnmir ++NextI; 723274955Ssvnmir 724274955Ssvnmir int Value = Update->getOperand(2).getImm(); 725274955Ssvnmir assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && 726274955Ssvnmir "Can't merge 1 << 12 offset into post-indexed load / store"); 727274955Ssvnmir if (Update->getOpcode() == AArch64::SUBXri) 728274955Ssvnmir Value = -Value; 729274955Ssvnmir 730274955Ssvnmir unsigned NewOpc = getPostIndexedOpcode(I->getOpcode()); 731274955Ssvnmir MachineInstrBuilder MIB = 732274955Ssvnmir BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 733274955Ssvnmir .addOperand(Update->getOperand(0)) 734274955Ssvnmir .addOperand(I->getOperand(0)) 735274955Ssvnmir .addOperand(I->getOperand(1)) 736274955Ssvnmir .addImm(Value); 737274955Ssvnmir (void)MIB; 738274955Ssvnmir 739274955Ssvnmir DEBUG(dbgs() << "Creating post-indexed load/store."); 740274955Ssvnmir DEBUG(dbgs() << " Replacing instructions:\n "); 741274955Ssvnmir DEBUG(I->print(dbgs())); 742274955Ssvnmir DEBUG(dbgs() << " "); 743274955Ssvnmir DEBUG(Update->print(dbgs())); 744274955Ssvnmir DEBUG(dbgs() << " with instruction:\n "); 745274955Ssvnmir DEBUG(((MachineInstr *)MIB)->print(dbgs())); 746274955Ssvnmir DEBUG(dbgs() << "\n"); 747274955Ssvnmir 748274955Ssvnmir // Erase the old instructions for the block. 749274955Ssvnmir I->eraseFromParent(); 750274955Ssvnmir Update->eraseFromParent(); 751274955Ssvnmir 752274955Ssvnmir return NextI; 753274955Ssvnmir} 754274955Ssvnmir 755274955Ssvnmirstatic bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg, 756274955Ssvnmir int Offset) { 757274955Ssvnmir switch (MI->getOpcode()) { 758274955Ssvnmir default: 759274955Ssvnmir break; 760274955Ssvnmir case AArch64::SUBXri: 761274955Ssvnmir // Negate the offset for a SUB instruction. 762274955Ssvnmir Offset *= -1; 763274955Ssvnmir // FALLTHROUGH 764274955Ssvnmir case AArch64::ADDXri: 765274955Ssvnmir // Make sure it's a vanilla immediate operand, not a relocation or 766274955Ssvnmir // anything else we can't handle. 767274955Ssvnmir if (!MI->getOperand(2).isImm()) 768274955Ssvnmir break; 769274955Ssvnmir // Watch out for 1 << 12 shifted value. 770274955Ssvnmir if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm())) 771274955Ssvnmir break; 772274955Ssvnmir // If the instruction has the base register as source and dest and the 773274955Ssvnmir // immediate will fit in a signed 9-bit integer, then we have a match. 774274955Ssvnmir if (MI->getOperand(0).getReg() == BaseReg && 775274955Ssvnmir MI->getOperand(1).getReg() == BaseReg && 776274955Ssvnmir MI->getOperand(2).getImm() <= 255 && 777274955Ssvnmir MI->getOperand(2).getImm() >= -256) { 778274955Ssvnmir // If we have a non-zero Offset, we check that it matches the amount 779274955Ssvnmir // we're adding to the register. 780274955Ssvnmir if (!Offset || Offset == MI->getOperand(2).getImm()) 781274955Ssvnmir return true; 782274955Ssvnmir } 783274955Ssvnmir break; 784274955Ssvnmir } 785274955Ssvnmir return false; 786274955Ssvnmir} 787274955Ssvnmir 788274955SsvnmirMachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( 789274955Ssvnmir MachineBasicBlock::iterator I, unsigned Limit, int Value) { 790274955Ssvnmir MachineBasicBlock::iterator E = I->getParent()->end(); 791274955Ssvnmir MachineInstr *MemMI = I; 792274955Ssvnmir MachineBasicBlock::iterator MBBI = I; 793274955Ssvnmir const MachineFunction &MF = *MemMI->getParent()->getParent(); 794274955Ssvnmir 795274955Ssvnmir unsigned DestReg = MemMI->getOperand(0).getReg(); 796274955Ssvnmir unsigned BaseReg = MemMI->getOperand(1).getReg(); 797274955Ssvnmir int Offset = MemMI->getOperand(2).getImm() * 798274955Ssvnmir TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); 799274955Ssvnmir 800274955Ssvnmir // If the base register overlaps the destination register, we can't 801274955Ssvnmir // merge the update. 802274955Ssvnmir if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 803274955Ssvnmir return E; 804274955Ssvnmir 805274955Ssvnmir // Scan forward looking for post-index opportunities. 806274955Ssvnmir // Updating instructions can't be formed if the memory insn already 807274955Ssvnmir // has an offset other than the value we're looking for. 808274955Ssvnmir if (Offset != Value) 809274955Ssvnmir return E; 810274955Ssvnmir 811274955Ssvnmir // Track which registers have been modified and used between the first insn 812274955Ssvnmir // (inclusive) and the second insn. 813274955Ssvnmir BitVector ModifiedRegs, UsedRegs; 814274955Ssvnmir ModifiedRegs.resize(TRI->getNumRegs()); 815274955Ssvnmir UsedRegs.resize(TRI->getNumRegs()); 816274955Ssvnmir ++MBBI; 817274955Ssvnmir for (unsigned Count = 0; MBBI != E; ++MBBI) { 818274955Ssvnmir MachineInstr *MI = MBBI; 819274955Ssvnmir // Skip DBG_VALUE instructions. Otherwise debug info can affect the 820274955Ssvnmir // optimization by changing how far we scan. 821274955Ssvnmir if (MI->isDebugValue()) 822274955Ssvnmir continue; 823274955Ssvnmir 824274955Ssvnmir // Now that we know this is a real instruction, count it. 825274955Ssvnmir ++Count; 826274955Ssvnmir 827274955Ssvnmir // If we found a match, return it. 828274955Ssvnmir if (isMatchingUpdateInsn(MI, BaseReg, Value)) 829274955Ssvnmir return MBBI; 830274955Ssvnmir 831274955Ssvnmir // Update the status of what the instruction clobbered and used. 832274955Ssvnmir trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 833274955Ssvnmir 834274955Ssvnmir // Otherwise, if the base register is used or modified, we have no match, so 835274955Ssvnmir // return early. 836274955Ssvnmir if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) 837274955Ssvnmir return E; 838274955Ssvnmir } 839274955Ssvnmir return E; 840274955Ssvnmir} 841274955Ssvnmir 842274955SsvnmirMachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( 843274955Ssvnmir MachineBasicBlock::iterator I, unsigned Limit) { 844274955Ssvnmir MachineBasicBlock::iterator B = I->getParent()->begin(); 845274955Ssvnmir MachineBasicBlock::iterator E = I->getParent()->end(); 846274955Ssvnmir MachineInstr *MemMI = I; 847274955Ssvnmir MachineBasicBlock::iterator MBBI = I; 848274955Ssvnmir const MachineFunction &MF = *MemMI->getParent()->getParent(); 849274955Ssvnmir 850274955Ssvnmir unsigned DestReg = MemMI->getOperand(0).getReg(); 851274955Ssvnmir unsigned BaseReg = MemMI->getOperand(1).getReg(); 852274955Ssvnmir int Offset = MemMI->getOperand(2).getImm(); 853274955Ssvnmir unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); 854274955Ssvnmir 855274955Ssvnmir // If the load/store is the first instruction in the block, there's obviously 856274955Ssvnmir // not any matching update. Ditto if the memory offset isn't zero. 857274955Ssvnmir if (MBBI == B || Offset != 0) 858274955Ssvnmir return E; 859274955Ssvnmir // If the base register overlaps the destination register, we can't 860274955Ssvnmir // merge the update. 861274955Ssvnmir if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 862274955Ssvnmir return E; 863274955Ssvnmir 864274955Ssvnmir // Track which registers have been modified and used between the first insn 865274955Ssvnmir // (inclusive) and the second insn. 866274955Ssvnmir BitVector ModifiedRegs, UsedRegs; 867274955Ssvnmir ModifiedRegs.resize(TRI->getNumRegs()); 868274955Ssvnmir UsedRegs.resize(TRI->getNumRegs()); 869274955Ssvnmir --MBBI; 870274955Ssvnmir for (unsigned Count = 0; MBBI != B; --MBBI) { 871274955Ssvnmir MachineInstr *MI = MBBI; 872274955Ssvnmir // Skip DBG_VALUE instructions. Otherwise debug info can affect the 873274955Ssvnmir // optimization by changing how far we scan. 874274955Ssvnmir if (MI->isDebugValue()) 875274955Ssvnmir continue; 876274955Ssvnmir 877274955Ssvnmir // Now that we know this is a real instruction, count it. 878274955Ssvnmir ++Count; 879274955Ssvnmir 880274955Ssvnmir // If we found a match, return it. 881274955Ssvnmir if (isMatchingUpdateInsn(MI, BaseReg, RegSize)) 882274955Ssvnmir return MBBI; 883274955Ssvnmir 884274955Ssvnmir // Update the status of what the instruction clobbered and used. 885274955Ssvnmir trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 886274955Ssvnmir 887274955Ssvnmir // Otherwise, if the base register is used or modified, we have no match, so 888274955Ssvnmir // return early. 889274955Ssvnmir if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) 890274955Ssvnmir return E; 891274955Ssvnmir } 892274955Ssvnmir return E; 893274955Ssvnmir} 894274955Ssvnmir 895274955Ssvnmirbool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { 896274955Ssvnmir bool Modified = false; 897274955Ssvnmir // Two tranformations to do here: 898274955Ssvnmir // 1) Find loads and stores that can be merged into a single load or store 899274955Ssvnmir // pair instruction. 900274955Ssvnmir // e.g., 901274955Ssvnmir // ldr x0, [x2] 902274955Ssvnmir // ldr x1, [x2, #8] 903274955Ssvnmir // ; becomes 904274955Ssvnmir // ldp x0, x1, [x2] 905274955Ssvnmir // 2) Find base register updates that can be merged into the load or store 906274955Ssvnmir // as a base-reg writeback. 907274955Ssvnmir // e.g., 908274955Ssvnmir // ldr x0, [x2] 909274955Ssvnmir // add x2, x2, #4 910274955Ssvnmir // ; becomes 911274955Ssvnmir // ldr x0, [x2], #4 912274955Ssvnmir 913274955Ssvnmir for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 914274955Ssvnmir MBBI != E;) { 915274955Ssvnmir MachineInstr *MI = MBBI; 916274955Ssvnmir switch (MI->getOpcode()) { 917274955Ssvnmir default: 918274955Ssvnmir // Just move on to the next instruction. 919274955Ssvnmir ++MBBI; 920274955Ssvnmir break; 921274955Ssvnmir case AArch64::STRSui: 922274955Ssvnmir case AArch64::STRDui: 923274955Ssvnmir case AArch64::STRQui: 924274955Ssvnmir case AArch64::STRXui: 925274955Ssvnmir case AArch64::STRWui: 926274955Ssvnmir case AArch64::LDRSui: 927274955Ssvnmir case AArch64::LDRDui: 928274955Ssvnmir case AArch64::LDRQui: 929274955Ssvnmir case AArch64::LDRXui: 930274955Ssvnmir case AArch64::LDRWui: 931288943Sdim case AArch64::LDRSWui: 932274955Ssvnmir // do the unscaled versions as well 933274955Ssvnmir case AArch64::STURSi: 934274955Ssvnmir case AArch64::STURDi: 935274955Ssvnmir case AArch64::STURQi: 936274955Ssvnmir case AArch64::STURWi: 937274955Ssvnmir case AArch64::STURXi: 938274955Ssvnmir case AArch64::LDURSi: 939274955Ssvnmir case AArch64::LDURDi: 940274955Ssvnmir case AArch64::LDURQi: 941274955Ssvnmir case AArch64::LDURWi: 942288943Sdim case AArch64::LDURXi: 943288943Sdim case AArch64::LDURSWi: { 944274955Ssvnmir // If this is a volatile load/store, don't mess with it. 945274955Ssvnmir if (MI->hasOrderedMemoryRef()) { 946274955Ssvnmir ++MBBI; 947274955Ssvnmir break; 948274955Ssvnmir } 949274955Ssvnmir // Make sure this is a reg+imm (as opposed to an address reloc). 950274955Ssvnmir if (!MI->getOperand(2).isImm()) { 951274955Ssvnmir ++MBBI; 952274955Ssvnmir break; 953274955Ssvnmir } 954274955Ssvnmir // Check if this load/store has a hint to avoid pair formation. 955274955Ssvnmir // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 956274955Ssvnmir if (TII->isLdStPairSuppressed(MI)) { 957274955Ssvnmir ++MBBI; 958274955Ssvnmir break; 959274955Ssvnmir } 960274955Ssvnmir // Look ahead up to ScanLimit instructions for a pairable instruction. 961274955Ssvnmir bool MergeForward = false; 962288943Sdim int SExtIdx = -1; 963274955Ssvnmir MachineBasicBlock::iterator Paired = 964288943Sdim findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit); 965274955Ssvnmir if (Paired != E) { 966274955Ssvnmir // Merge the loads into a pair. Keeping the iterator straight is a 967274955Ssvnmir // pain, so we let the merge routine tell us what the next instruction 968274955Ssvnmir // is after it's done mucking about. 969288943Sdim MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx); 970274955Ssvnmir 971274955Ssvnmir Modified = true; 972274955Ssvnmir ++NumPairCreated; 973274955Ssvnmir if (isUnscaledLdst(MI->getOpcode())) 974274955Ssvnmir ++NumUnscaledPairCreated; 975274955Ssvnmir break; 976274955Ssvnmir } 977274955Ssvnmir ++MBBI; 978274955Ssvnmir break; 979274955Ssvnmir } 980274955Ssvnmir // FIXME: Do the other instructions. 981274955Ssvnmir } 982274955Ssvnmir } 983274955Ssvnmir 984274955Ssvnmir for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 985274955Ssvnmir MBBI != E;) { 986274955Ssvnmir MachineInstr *MI = MBBI; 987274955Ssvnmir // Do update merging. It's simpler to keep this separate from the above 988274955Ssvnmir // switch, though not strictly necessary. 989288943Sdim unsigned Opc = MI->getOpcode(); 990274955Ssvnmir switch (Opc) { 991274955Ssvnmir default: 992274955Ssvnmir // Just move on to the next instruction. 993274955Ssvnmir ++MBBI; 994274955Ssvnmir break; 995274955Ssvnmir case AArch64::STRSui: 996274955Ssvnmir case AArch64::STRDui: 997274955Ssvnmir case AArch64::STRQui: 998274955Ssvnmir case AArch64::STRXui: 999274955Ssvnmir case AArch64::STRWui: 1000274955Ssvnmir case AArch64::LDRSui: 1001274955Ssvnmir case AArch64::LDRDui: 1002274955Ssvnmir case AArch64::LDRQui: 1003274955Ssvnmir case AArch64::LDRXui: 1004274955Ssvnmir case AArch64::LDRWui: 1005274955Ssvnmir // do the unscaled versions as well 1006274955Ssvnmir case AArch64::STURSi: 1007274955Ssvnmir case AArch64::STURDi: 1008274955Ssvnmir case AArch64::STURQi: 1009274955Ssvnmir case AArch64::STURWi: 1010274955Ssvnmir case AArch64::STURXi: 1011274955Ssvnmir case AArch64::LDURSi: 1012274955Ssvnmir case AArch64::LDURDi: 1013274955Ssvnmir case AArch64::LDURQi: 1014274955Ssvnmir case AArch64::LDURWi: 1015274955Ssvnmir case AArch64::LDURXi: { 1016274955Ssvnmir // Make sure this is a reg+imm (as opposed to an address reloc). 1017274955Ssvnmir if (!MI->getOperand(2).isImm()) { 1018274955Ssvnmir ++MBBI; 1019274955Ssvnmir break; 1020274955Ssvnmir } 1021274955Ssvnmir // Look ahead up to ScanLimit instructions for a mergable instruction. 1022274955Ssvnmir MachineBasicBlock::iterator Update = 1023274955Ssvnmir findMatchingUpdateInsnForward(MBBI, ScanLimit, 0); 1024274955Ssvnmir if (Update != E) { 1025274955Ssvnmir // Merge the update into the ld/st. 1026274955Ssvnmir MBBI = mergePostIdxUpdateInsn(MBBI, Update); 1027274955Ssvnmir Modified = true; 1028274955Ssvnmir ++NumPostFolded; 1029274955Ssvnmir break; 1030274955Ssvnmir } 1031274955Ssvnmir // Don't know how to handle pre/post-index versions, so move to the next 1032274955Ssvnmir // instruction. 1033274955Ssvnmir if (isUnscaledLdst(Opc)) { 1034274955Ssvnmir ++MBBI; 1035274955Ssvnmir break; 1036274955Ssvnmir } 1037274955Ssvnmir 1038274955Ssvnmir // Look back to try to find a pre-index instruction. For example, 1039274955Ssvnmir // add x0, x0, #8 1040274955Ssvnmir // ldr x1, [x0] 1041274955Ssvnmir // merged into: 1042274955Ssvnmir // ldr x1, [x0, #8]! 1043274955Ssvnmir Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit); 1044274955Ssvnmir if (Update != E) { 1045274955Ssvnmir // Merge the update into the ld/st. 1046274955Ssvnmir MBBI = mergePreIdxUpdateInsn(MBBI, Update); 1047274955Ssvnmir Modified = true; 1048274955Ssvnmir ++NumPreFolded; 1049274955Ssvnmir break; 1050274955Ssvnmir } 1051274955Ssvnmir 1052274955Ssvnmir // Look forward to try to find a post-index instruction. For example, 1053274955Ssvnmir // ldr x1, [x0, #64] 1054274955Ssvnmir // add x0, x0, #64 1055274955Ssvnmir // merged into: 1056274955Ssvnmir // ldr x1, [x0, #64]! 1057274955Ssvnmir 1058274955Ssvnmir // The immediate in the load/store is scaled by the size of the register 1059274955Ssvnmir // being loaded. The immediate in the add we're looking for, 1060274955Ssvnmir // however, is not, so adjust here. 1061274955Ssvnmir int Value = MI->getOperand(2).getImm() * 1062274955Ssvnmir TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent())) 1063274955Ssvnmir ->getSize(); 1064274955Ssvnmir Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value); 1065274955Ssvnmir if (Update != E) { 1066274955Ssvnmir // Merge the update into the ld/st. 1067274955Ssvnmir MBBI = mergePreIdxUpdateInsn(MBBI, Update); 1068274955Ssvnmir Modified = true; 1069274955Ssvnmir ++NumPreFolded; 1070274955Ssvnmir break; 1071274955Ssvnmir } 1072274955Ssvnmir 1073274955Ssvnmir // Nothing found. Just move to the next instruction. 1074274955Ssvnmir ++MBBI; 1075274955Ssvnmir break; 1076274955Ssvnmir } 1077274955Ssvnmir // FIXME: Do the other instructions. 1078274955Ssvnmir } 1079274955Ssvnmir } 1080274955Ssvnmir 1081274955Ssvnmir return Modified; 1082274955Ssvnmir} 1083274955Ssvnmir 1084274955Ssvnmirbool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1085288943Sdim TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo()); 1086288943Sdim TRI = Fn.getSubtarget().getRegisterInfo(); 1087274955Ssvnmir 1088274955Ssvnmir bool Modified = false; 1089274955Ssvnmir for (auto &MBB : Fn) 1090274955Ssvnmir Modified |= optimizeBlock(MBB); 1091274955Ssvnmir 1092274955Ssvnmir return Modified; 1093274955Ssvnmir} 1094274955Ssvnmir 1095274955Ssvnmir// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep 1096274955Ssvnmir// loads and stores near one another? 1097274955Ssvnmir 1098274955Ssvnmir/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1099274955Ssvnmir/// optimization pass. 1100274955SsvnmirFunctionPass *llvm::createAArch64LoadStoreOptimizationPass() { 1101274955Ssvnmir return new AArch64LoadStoreOpt(); 1102274955Ssvnmir} 1103