1274955Ssvnmir//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=// 2274955Ssvnmir// 3274955Ssvnmir// The LLVM Compiler Infrastructure 4274955Ssvnmir// 5274955Ssvnmir// This file is distributed under the University of Illinois Open Source 6274955Ssvnmir// License. See LICENSE.TXT for details. 7274955Ssvnmir// 8274955Ssvnmir//===----------------------------------------------------------------------===// 9274955Ssvnmir// 10274955Ssvnmir// This file contains a pass that performs load / store related peephole 11274955Ssvnmir// optimizations. This pass should be run after register allocation. 12274955Ssvnmir// 13274955Ssvnmir//===----------------------------------------------------------------------===// 14274955Ssvnmir 15274955Ssvnmir#include "AArch64InstrInfo.h" 16280031Sdim#include "AArch64Subtarget.h" 17274955Ssvnmir#include "MCTargetDesc/AArch64AddressingModes.h" 18274955Ssvnmir#include "llvm/ADT/BitVector.h" 19288943Sdim#include "llvm/ADT/SmallVector.h" 20280031Sdim#include "llvm/ADT/Statistic.h" 21274955Ssvnmir#include "llvm/CodeGen/MachineBasicBlock.h" 22274955Ssvnmir#include "llvm/CodeGen/MachineFunctionPass.h" 23274955Ssvnmir#include "llvm/CodeGen/MachineInstr.h" 24274955Ssvnmir#include "llvm/CodeGen/MachineInstrBuilder.h" 25274955Ssvnmir#include "llvm/Support/CommandLine.h" 26274955Ssvnmir#include "llvm/Support/Debug.h" 27274955Ssvnmir#include "llvm/Support/ErrorHandling.h" 28274955Ssvnmir#include "llvm/Support/raw_ostream.h" 29280031Sdim#include "llvm/Target/TargetInstrInfo.h" 30280031Sdim#include "llvm/Target/TargetMachine.h" 31280031Sdim#include "llvm/Target/TargetRegisterInfo.h" 32274955Ssvnmirusing namespace llvm; 33274955Ssvnmir 34274955Ssvnmir#define DEBUG_TYPE "aarch64-ldst-opt" 35274955Ssvnmir 36274955Ssvnmir/// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine 37274955Ssvnmir/// load / store instructions to form ldp / stp instructions. 38274955Ssvnmir 39274955SsvnmirSTATISTIC(NumPairCreated, "Number of load/store pair instructions generated"); 40274955SsvnmirSTATISTIC(NumPostFolded, "Number of post-index updates folded"); 41274955SsvnmirSTATISTIC(NumPreFolded, "Number of pre-index updates folded"); 42274955SsvnmirSTATISTIC(NumUnscaledPairCreated, 43274955Ssvnmir "Number of load/store from unscaled generated"); 44296417SdimSTATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted"); 45296417SdimSTATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); 46296417SdimSTATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); 47274955Ssvnmir 48274955Ssvnmirstatic cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", 49274955Ssvnmir cl::init(20), cl::Hidden); 50274955Ssvnmir 51296417Sdimnamespace llvm { 52296417Sdimvoid initializeAArch64LoadStoreOptPass(PassRegistry &); 53296417Sdim} 54274955Ssvnmir 55296417Sdim#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass" 56296417Sdim 57274955Ssvnmirnamespace { 58296417Sdim 59296417Sdimtypedef struct LdStPairFlags { 60296417Sdim // If a matching instruction is found, MergeForward is set to true if the 61296417Sdim // merge is to remove the first instruction and replace the second with 62296417Sdim // a pair-wise insn, and false if the reverse is true. 63296417Sdim bool MergeForward; 64296417Sdim 65296417Sdim // SExtIdx gives the index of the result of the load pair that must be 66296417Sdim // extended. The value of SExtIdx assumes that the paired load produces the 67296417Sdim // value in this order: (I, returned iterator), i.e., -1 means no value has 68296417Sdim // to be extended, 0 means I, and 1 means the returned iterator. 69296417Sdim int SExtIdx; 70296417Sdim 71296417Sdim LdStPairFlags() : MergeForward(false), SExtIdx(-1) {} 72296417Sdim 73296417Sdim void setMergeForward(bool V = true) { MergeForward = V; } 74296417Sdim bool getMergeForward() const { return MergeForward; } 75296417Sdim 76296417Sdim void setSExtIdx(int V) { SExtIdx = V; } 77296417Sdim int getSExtIdx() const { return SExtIdx; } 78296417Sdim 79296417Sdim} LdStPairFlags; 80296417Sdim 81274955Ssvnmirstruct AArch64LoadStoreOpt : public MachineFunctionPass { 82274955Ssvnmir static char ID; 83296417Sdim AArch64LoadStoreOpt() : MachineFunctionPass(ID) { 84296417Sdim initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry()); 85296417Sdim } 86274955Ssvnmir 87274955Ssvnmir const AArch64InstrInfo *TII; 88274955Ssvnmir const TargetRegisterInfo *TRI; 89296417Sdim const AArch64Subtarget *Subtarget; 90274955Ssvnmir 91274955Ssvnmir // Scan the instructions looking for a load/store that can be combined 92274955Ssvnmir // with the current instruction into a load/store pair. 93274955Ssvnmir // Return the matching instruction if one is found, else MBB->end(). 94274955Ssvnmir MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, 95296417Sdim LdStPairFlags &Flags, 96274955Ssvnmir unsigned Limit); 97296417Sdim 98296417Sdim // Scan the instructions looking for a store that writes to the address from 99296417Sdim // which the current load instruction reads. Return true if one is found. 100296417Sdim bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit, 101296417Sdim MachineBasicBlock::iterator &StoreI); 102296417Sdim 103274955Ssvnmir // Merge the two instructions indicated into a single pair-wise instruction. 104274955Ssvnmir // If MergeForward is true, erase the first instruction and fold its 105274955Ssvnmir // operation into the second. If false, the reverse. Return the instruction 106274955Ssvnmir // following the first instruction (which may change during processing). 107274955Ssvnmir MachineBasicBlock::iterator 108274955Ssvnmir mergePairedInsns(MachineBasicBlock::iterator I, 109296417Sdim MachineBasicBlock::iterator Paired, 110296417Sdim const LdStPairFlags &Flags); 111274955Ssvnmir 112296417Sdim // Promote the load that reads directly from the address stored to. 113296417Sdim MachineBasicBlock::iterator 114296417Sdim promoteLoadFromStore(MachineBasicBlock::iterator LoadI, 115296417Sdim MachineBasicBlock::iterator StoreI); 116296417Sdim 117274955Ssvnmir // Scan the instruction list to find a base register update that can 118274955Ssvnmir // be combined with the current instruction (a load or store) using 119274955Ssvnmir // pre or post indexed addressing with writeback. Scan forwards. 120274955Ssvnmir MachineBasicBlock::iterator 121274955Ssvnmir findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit, 122296417Sdim int UnscaledOffset); 123274955Ssvnmir 124274955Ssvnmir // Scan the instruction list to find a base register update that can 125274955Ssvnmir // be combined with the current instruction (a load or store) using 126274955Ssvnmir // pre or post indexed addressing with writeback. Scan backwards. 127274955Ssvnmir MachineBasicBlock::iterator 128274955Ssvnmir findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); 129274955Ssvnmir 130296417Sdim // Find an instruction that updates the base register of the ld/st 131296417Sdim // instruction. 132296417Sdim bool isMatchingUpdateInsn(MachineInstr *MemMI, MachineInstr *MI, 133296417Sdim unsigned BaseReg, int Offset); 134274955Ssvnmir 135296417Sdim // Merge a pre- or post-index base register update into a ld/st instruction. 136274955Ssvnmir MachineBasicBlock::iterator 137296417Sdim mergeUpdateInsn(MachineBasicBlock::iterator I, 138296417Sdim MachineBasicBlock::iterator Update, bool IsPreIdx); 139274955Ssvnmir 140296417Sdim // Find and merge foldable ldr/str instructions. 141296417Sdim bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI); 142274955Ssvnmir 143296417Sdim // Find and promote load instructions which read directly from store. 144296417Sdim bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI); 145296417Sdim 146296417Sdim // Check if converting two narrow loads into a single wider load with 147296417Sdim // bitfield extracts could be enabled. 148296417Sdim bool enableNarrowLdMerge(MachineFunction &Fn); 149296417Sdim 150296417Sdim bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt); 151296417Sdim 152274955Ssvnmir bool runOnMachineFunction(MachineFunction &Fn) override; 153274955Ssvnmir 154274955Ssvnmir const char *getPassName() const override { 155296417Sdim return AARCH64_LOAD_STORE_OPT_NAME; 156274955Ssvnmir } 157274955Ssvnmir}; 158274955Ssvnmirchar AArch64LoadStoreOpt::ID = 0; 159280031Sdim} // namespace 160274955Ssvnmir 161296417SdimINITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", 162296417Sdim AARCH64_LOAD_STORE_OPT_NAME, false, false) 163296417Sdim 164296417Sdimstatic bool isUnscaledLdSt(unsigned Opc) { 165274955Ssvnmir switch (Opc) { 166274955Ssvnmir default: 167274955Ssvnmir return false; 168274955Ssvnmir case AArch64::STURSi: 169274955Ssvnmir case AArch64::STURDi: 170274955Ssvnmir case AArch64::STURQi: 171296417Sdim case AArch64::STURBBi: 172296417Sdim case AArch64::STURHHi: 173274955Ssvnmir case AArch64::STURWi: 174274955Ssvnmir case AArch64::STURXi: 175274955Ssvnmir case AArch64::LDURSi: 176274955Ssvnmir case AArch64::LDURDi: 177274955Ssvnmir case AArch64::LDURQi: 178274955Ssvnmir case AArch64::LDURWi: 179274955Ssvnmir case AArch64::LDURXi: 180288943Sdim case AArch64::LDURSWi: 181296417Sdim case AArch64::LDURHHi: 182296417Sdim case AArch64::LDURBBi: 183296417Sdim case AArch64::LDURSBWi: 184296417Sdim case AArch64::LDURSHWi: 185288943Sdim return true; 186274955Ssvnmir } 187274955Ssvnmir} 188274955Ssvnmir 189296417Sdimstatic bool isUnscaledLdSt(MachineInstr *MI) { 190296417Sdim return isUnscaledLdSt(MI->getOpcode()); 191296417Sdim} 192296417Sdim 193296417Sdimstatic unsigned getBitExtrOpcode(MachineInstr *MI) { 194296417Sdim switch (MI->getOpcode()) { 195274955Ssvnmir default: 196296417Sdim llvm_unreachable("Unexpected opcode."); 197296417Sdim case AArch64::LDRBBui: 198296417Sdim case AArch64::LDURBBi: 199296417Sdim case AArch64::LDRHHui: 200296417Sdim case AArch64::LDURHHi: 201296417Sdim return AArch64::UBFMWri; 202296417Sdim case AArch64::LDRSBWui: 203296417Sdim case AArch64::LDURSBWi: 204296417Sdim case AArch64::LDRSHWui: 205296417Sdim case AArch64::LDURSHWi: 206296417Sdim return AArch64::SBFMWri; 207296417Sdim } 208296417Sdim} 209296417Sdim 210296417Sdimstatic bool isNarrowStore(unsigned Opc) { 211296417Sdim switch (Opc) { 212296417Sdim default: 213296417Sdim return false; 214296417Sdim case AArch64::STRBBui: 215296417Sdim case AArch64::STURBBi: 216296417Sdim case AArch64::STRHHui: 217296417Sdim case AArch64::STURHHi: 218296417Sdim return true; 219296417Sdim } 220296417Sdim} 221296417Sdim 222296417Sdimstatic bool isNarrowStore(MachineInstr *MI) { 223296417Sdim return isNarrowStore(MI->getOpcode()); 224296417Sdim} 225296417Sdim 226296417Sdimstatic bool isNarrowLoad(unsigned Opc) { 227296417Sdim switch (Opc) { 228296417Sdim default: 229296417Sdim return false; 230296417Sdim case AArch64::LDRHHui: 231296417Sdim case AArch64::LDURHHi: 232296417Sdim case AArch64::LDRBBui: 233296417Sdim case AArch64::LDURBBi: 234296417Sdim case AArch64::LDRSHWui: 235296417Sdim case AArch64::LDURSHWi: 236296417Sdim case AArch64::LDRSBWui: 237296417Sdim case AArch64::LDURSBWi: 238296417Sdim return true; 239296417Sdim } 240296417Sdim} 241296417Sdim 242296417Sdimstatic bool isNarrowLoad(MachineInstr *MI) { 243296417Sdim return isNarrowLoad(MI->getOpcode()); 244296417Sdim} 245296417Sdim 246296417Sdim// Scaling factor for unscaled load or store. 247296417Sdimstatic int getMemScale(MachineInstr *MI) { 248296417Sdim switch (MI->getOpcode()) { 249296417Sdim default: 250296417Sdim llvm_unreachable("Opcode has unknown scale!"); 251296417Sdim case AArch64::LDRBBui: 252296417Sdim case AArch64::LDURBBi: 253296417Sdim case AArch64::LDRSBWui: 254296417Sdim case AArch64::LDURSBWi: 255296417Sdim case AArch64::STRBBui: 256296417Sdim case AArch64::STURBBi: 257296417Sdim return 1; 258296417Sdim case AArch64::LDRHHui: 259296417Sdim case AArch64::LDURHHi: 260296417Sdim case AArch64::LDRSHWui: 261296417Sdim case AArch64::LDURSHWi: 262296417Sdim case AArch64::STRHHui: 263296417Sdim case AArch64::STURHHi: 264296417Sdim return 2; 265296417Sdim case AArch64::LDRSui: 266296417Sdim case AArch64::LDURSi: 267296417Sdim case AArch64::LDRSWui: 268296417Sdim case AArch64::LDURSWi: 269296417Sdim case AArch64::LDRWui: 270296417Sdim case AArch64::LDURWi: 271274955Ssvnmir case AArch64::STRSui: 272274955Ssvnmir case AArch64::STURSi: 273296417Sdim case AArch64::STRWui: 274296417Sdim case AArch64::STURWi: 275296417Sdim case AArch64::LDPSi: 276296417Sdim case AArch64::LDPSWi: 277296417Sdim case AArch64::LDPWi: 278296417Sdim case AArch64::STPSi: 279296417Sdim case AArch64::STPWi: 280274955Ssvnmir return 4; 281296417Sdim case AArch64::LDRDui: 282296417Sdim case AArch64::LDURDi: 283296417Sdim case AArch64::LDRXui: 284296417Sdim case AArch64::LDURXi: 285274955Ssvnmir case AArch64::STRDui: 286274955Ssvnmir case AArch64::STURDi: 287274955Ssvnmir case AArch64::STRXui: 288274955Ssvnmir case AArch64::STURXi: 289296417Sdim case AArch64::LDPDi: 290296417Sdim case AArch64::LDPXi: 291296417Sdim case AArch64::STPDi: 292296417Sdim case AArch64::STPXi: 293274955Ssvnmir return 8; 294274955Ssvnmir case AArch64::LDRQui: 295274955Ssvnmir case AArch64::LDURQi: 296296417Sdim case AArch64::STRQui: 297296417Sdim case AArch64::STURQi: 298296417Sdim case AArch64::LDPQi: 299296417Sdim case AArch64::STPQi: 300274955Ssvnmir return 16; 301274955Ssvnmir } 302274955Ssvnmir} 303274955Ssvnmir 304288943Sdimstatic unsigned getMatchingNonSExtOpcode(unsigned Opc, 305288943Sdim bool *IsValidLdStrOpc = nullptr) { 306288943Sdim if (IsValidLdStrOpc) 307288943Sdim *IsValidLdStrOpc = true; 308288943Sdim switch (Opc) { 309288943Sdim default: 310288943Sdim if (IsValidLdStrOpc) 311288943Sdim *IsValidLdStrOpc = false; 312288943Sdim return UINT_MAX; 313288943Sdim case AArch64::STRDui: 314288943Sdim case AArch64::STURDi: 315288943Sdim case AArch64::STRQui: 316288943Sdim case AArch64::STURQi: 317296417Sdim case AArch64::STRBBui: 318296417Sdim case AArch64::STURBBi: 319296417Sdim case AArch64::STRHHui: 320296417Sdim case AArch64::STURHHi: 321288943Sdim case AArch64::STRWui: 322288943Sdim case AArch64::STURWi: 323288943Sdim case AArch64::STRXui: 324288943Sdim case AArch64::STURXi: 325288943Sdim case AArch64::LDRDui: 326288943Sdim case AArch64::LDURDi: 327288943Sdim case AArch64::LDRQui: 328288943Sdim case AArch64::LDURQi: 329288943Sdim case AArch64::LDRWui: 330288943Sdim case AArch64::LDURWi: 331288943Sdim case AArch64::LDRXui: 332288943Sdim case AArch64::LDURXi: 333288943Sdim case AArch64::STRSui: 334288943Sdim case AArch64::STURSi: 335288943Sdim case AArch64::LDRSui: 336288943Sdim case AArch64::LDURSi: 337296417Sdim case AArch64::LDRHHui: 338296417Sdim case AArch64::LDURHHi: 339296417Sdim case AArch64::LDRBBui: 340296417Sdim case AArch64::LDURBBi: 341288943Sdim return Opc; 342288943Sdim case AArch64::LDRSWui: 343288943Sdim return AArch64::LDRWui; 344288943Sdim case AArch64::LDURSWi: 345288943Sdim return AArch64::LDURWi; 346296417Sdim case AArch64::LDRSBWui: 347296417Sdim return AArch64::LDRBBui; 348296417Sdim case AArch64::LDRSHWui: 349296417Sdim return AArch64::LDRHHui; 350296417Sdim case AArch64::LDURSBWi: 351296417Sdim return AArch64::LDURBBi; 352296417Sdim case AArch64::LDURSHWi: 353296417Sdim return AArch64::LDURHHi; 354288943Sdim } 355288943Sdim} 356288943Sdim 357274955Ssvnmirstatic unsigned getMatchingPairOpcode(unsigned Opc) { 358274955Ssvnmir switch (Opc) { 359274955Ssvnmir default: 360274955Ssvnmir llvm_unreachable("Opcode has no pairwise equivalent!"); 361274955Ssvnmir case AArch64::STRSui: 362274955Ssvnmir case AArch64::STURSi: 363274955Ssvnmir return AArch64::STPSi; 364274955Ssvnmir case AArch64::STRDui: 365274955Ssvnmir case AArch64::STURDi: 366274955Ssvnmir return AArch64::STPDi; 367274955Ssvnmir case AArch64::STRQui: 368274955Ssvnmir case AArch64::STURQi: 369274955Ssvnmir return AArch64::STPQi; 370296417Sdim case AArch64::STRBBui: 371296417Sdim return AArch64::STRHHui; 372296417Sdim case AArch64::STRHHui: 373296417Sdim return AArch64::STRWui; 374296417Sdim case AArch64::STURBBi: 375296417Sdim return AArch64::STURHHi; 376296417Sdim case AArch64::STURHHi: 377296417Sdim return AArch64::STURWi; 378274955Ssvnmir case AArch64::STRWui: 379274955Ssvnmir case AArch64::STURWi: 380274955Ssvnmir return AArch64::STPWi; 381274955Ssvnmir case AArch64::STRXui: 382274955Ssvnmir case AArch64::STURXi: 383274955Ssvnmir return AArch64::STPXi; 384274955Ssvnmir case AArch64::LDRSui: 385274955Ssvnmir case AArch64::LDURSi: 386274955Ssvnmir return AArch64::LDPSi; 387274955Ssvnmir case AArch64::LDRDui: 388274955Ssvnmir case AArch64::LDURDi: 389274955Ssvnmir return AArch64::LDPDi; 390274955Ssvnmir case AArch64::LDRQui: 391274955Ssvnmir case AArch64::LDURQi: 392274955Ssvnmir return AArch64::LDPQi; 393274955Ssvnmir case AArch64::LDRWui: 394274955Ssvnmir case AArch64::LDURWi: 395274955Ssvnmir return AArch64::LDPWi; 396274955Ssvnmir case AArch64::LDRXui: 397274955Ssvnmir case AArch64::LDURXi: 398274955Ssvnmir return AArch64::LDPXi; 399288943Sdim case AArch64::LDRSWui: 400288943Sdim case AArch64::LDURSWi: 401288943Sdim return AArch64::LDPSWi; 402296417Sdim case AArch64::LDRHHui: 403296417Sdim case AArch64::LDRSHWui: 404296417Sdim return AArch64::LDRWui; 405296417Sdim case AArch64::LDURHHi: 406296417Sdim case AArch64::LDURSHWi: 407296417Sdim return AArch64::LDURWi; 408296417Sdim case AArch64::LDRBBui: 409296417Sdim case AArch64::LDRSBWui: 410296417Sdim return AArch64::LDRHHui; 411296417Sdim case AArch64::LDURBBi: 412296417Sdim case AArch64::LDURSBWi: 413296417Sdim return AArch64::LDURHHi; 414274955Ssvnmir } 415274955Ssvnmir} 416274955Ssvnmir 417296417Sdimstatic unsigned isMatchingStore(MachineInstr *LoadInst, 418296417Sdim MachineInstr *StoreInst) { 419296417Sdim unsigned LdOpc = LoadInst->getOpcode(); 420296417Sdim unsigned StOpc = StoreInst->getOpcode(); 421296417Sdim switch (LdOpc) { 422296417Sdim default: 423296417Sdim llvm_unreachable("Unsupported load instruction!"); 424296417Sdim case AArch64::LDRBBui: 425296417Sdim return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui || 426296417Sdim StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; 427296417Sdim case AArch64::LDURBBi: 428296417Sdim return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi || 429296417Sdim StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; 430296417Sdim case AArch64::LDRHHui: 431296417Sdim return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui || 432296417Sdim StOpc == AArch64::STRXui; 433296417Sdim case AArch64::LDURHHi: 434296417Sdim return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi || 435296417Sdim StOpc == AArch64::STURXi; 436296417Sdim case AArch64::LDRWui: 437296417Sdim return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; 438296417Sdim case AArch64::LDURWi: 439296417Sdim return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; 440296417Sdim case AArch64::LDRXui: 441296417Sdim return StOpc == AArch64::STRXui; 442296417Sdim case AArch64::LDURXi: 443296417Sdim return StOpc == AArch64::STURXi; 444296417Sdim } 445296417Sdim} 446296417Sdim 447274955Ssvnmirstatic unsigned getPreIndexedOpcode(unsigned Opc) { 448274955Ssvnmir switch (Opc) { 449274955Ssvnmir default: 450274955Ssvnmir llvm_unreachable("Opcode has no pre-indexed equivalent!"); 451274955Ssvnmir case AArch64::STRSui: 452274955Ssvnmir return AArch64::STRSpre; 453274955Ssvnmir case AArch64::STRDui: 454274955Ssvnmir return AArch64::STRDpre; 455274955Ssvnmir case AArch64::STRQui: 456274955Ssvnmir return AArch64::STRQpre; 457296417Sdim case AArch64::STRBBui: 458296417Sdim return AArch64::STRBBpre; 459296417Sdim case AArch64::STRHHui: 460296417Sdim return AArch64::STRHHpre; 461274955Ssvnmir case AArch64::STRWui: 462274955Ssvnmir return AArch64::STRWpre; 463274955Ssvnmir case AArch64::STRXui: 464274955Ssvnmir return AArch64::STRXpre; 465274955Ssvnmir case AArch64::LDRSui: 466274955Ssvnmir return AArch64::LDRSpre; 467274955Ssvnmir case AArch64::LDRDui: 468274955Ssvnmir return AArch64::LDRDpre; 469274955Ssvnmir case AArch64::LDRQui: 470274955Ssvnmir return AArch64::LDRQpre; 471296417Sdim case AArch64::LDRBBui: 472296417Sdim return AArch64::LDRBBpre; 473296417Sdim case AArch64::LDRHHui: 474296417Sdim return AArch64::LDRHHpre; 475274955Ssvnmir case AArch64::LDRWui: 476274955Ssvnmir return AArch64::LDRWpre; 477274955Ssvnmir case AArch64::LDRXui: 478274955Ssvnmir return AArch64::LDRXpre; 479288943Sdim case AArch64::LDRSWui: 480288943Sdim return AArch64::LDRSWpre; 481296417Sdim case AArch64::LDPSi: 482296417Sdim return AArch64::LDPSpre; 483296417Sdim case AArch64::LDPSWi: 484296417Sdim return AArch64::LDPSWpre; 485296417Sdim case AArch64::LDPDi: 486296417Sdim return AArch64::LDPDpre; 487296417Sdim case AArch64::LDPQi: 488296417Sdim return AArch64::LDPQpre; 489296417Sdim case AArch64::LDPWi: 490296417Sdim return AArch64::LDPWpre; 491296417Sdim case AArch64::LDPXi: 492296417Sdim return AArch64::LDPXpre; 493296417Sdim case AArch64::STPSi: 494296417Sdim return AArch64::STPSpre; 495296417Sdim case AArch64::STPDi: 496296417Sdim return AArch64::STPDpre; 497296417Sdim case AArch64::STPQi: 498296417Sdim return AArch64::STPQpre; 499296417Sdim case AArch64::STPWi: 500296417Sdim return AArch64::STPWpre; 501296417Sdim case AArch64::STPXi: 502296417Sdim return AArch64::STPXpre; 503274955Ssvnmir } 504274955Ssvnmir} 505274955Ssvnmir 506274955Ssvnmirstatic unsigned getPostIndexedOpcode(unsigned Opc) { 507274955Ssvnmir switch (Opc) { 508274955Ssvnmir default: 509274955Ssvnmir llvm_unreachable("Opcode has no post-indexed wise equivalent!"); 510274955Ssvnmir case AArch64::STRSui: 511274955Ssvnmir return AArch64::STRSpost; 512274955Ssvnmir case AArch64::STRDui: 513274955Ssvnmir return AArch64::STRDpost; 514274955Ssvnmir case AArch64::STRQui: 515274955Ssvnmir return AArch64::STRQpost; 516296417Sdim case AArch64::STRBBui: 517296417Sdim return AArch64::STRBBpost; 518296417Sdim case AArch64::STRHHui: 519296417Sdim return AArch64::STRHHpost; 520274955Ssvnmir case AArch64::STRWui: 521274955Ssvnmir return AArch64::STRWpost; 522274955Ssvnmir case AArch64::STRXui: 523274955Ssvnmir return AArch64::STRXpost; 524274955Ssvnmir case AArch64::LDRSui: 525274955Ssvnmir return AArch64::LDRSpost; 526274955Ssvnmir case AArch64::LDRDui: 527274955Ssvnmir return AArch64::LDRDpost; 528274955Ssvnmir case AArch64::LDRQui: 529274955Ssvnmir return AArch64::LDRQpost; 530296417Sdim case AArch64::LDRBBui: 531296417Sdim return AArch64::LDRBBpost; 532296417Sdim case AArch64::LDRHHui: 533296417Sdim return AArch64::LDRHHpost; 534274955Ssvnmir case AArch64::LDRWui: 535274955Ssvnmir return AArch64::LDRWpost; 536274955Ssvnmir case AArch64::LDRXui: 537274955Ssvnmir return AArch64::LDRXpost; 538288943Sdim case AArch64::LDRSWui: 539288943Sdim return AArch64::LDRSWpost; 540296417Sdim case AArch64::LDPSi: 541296417Sdim return AArch64::LDPSpost; 542296417Sdim case AArch64::LDPSWi: 543296417Sdim return AArch64::LDPSWpost; 544296417Sdim case AArch64::LDPDi: 545296417Sdim return AArch64::LDPDpost; 546296417Sdim case AArch64::LDPQi: 547296417Sdim return AArch64::LDPQpost; 548296417Sdim case AArch64::LDPWi: 549296417Sdim return AArch64::LDPWpost; 550296417Sdim case AArch64::LDPXi: 551296417Sdim return AArch64::LDPXpost; 552296417Sdim case AArch64::STPSi: 553296417Sdim return AArch64::STPSpost; 554296417Sdim case AArch64::STPDi: 555296417Sdim return AArch64::STPDpost; 556296417Sdim case AArch64::STPQi: 557296417Sdim return AArch64::STPQpost; 558296417Sdim case AArch64::STPWi: 559296417Sdim return AArch64::STPWpost; 560296417Sdim case AArch64::STPXi: 561296417Sdim return AArch64::STPXpost; 562274955Ssvnmir } 563274955Ssvnmir} 564274955Ssvnmir 565296417Sdimstatic bool isPairedLdSt(const MachineInstr *MI) { 566296417Sdim switch (MI->getOpcode()) { 567296417Sdim default: 568296417Sdim return false; 569296417Sdim case AArch64::LDPSi: 570296417Sdim case AArch64::LDPSWi: 571296417Sdim case AArch64::LDPDi: 572296417Sdim case AArch64::LDPQi: 573296417Sdim case AArch64::LDPWi: 574296417Sdim case AArch64::LDPXi: 575296417Sdim case AArch64::STPSi: 576296417Sdim case AArch64::STPDi: 577296417Sdim case AArch64::STPQi: 578296417Sdim case AArch64::STPWi: 579296417Sdim case AArch64::STPXi: 580296417Sdim return true; 581296417Sdim } 582296417Sdim} 583296417Sdim 584296417Sdimstatic const MachineOperand &getLdStRegOp(const MachineInstr *MI, 585296417Sdim unsigned PairedRegOp = 0) { 586296417Sdim assert(PairedRegOp < 2 && "Unexpected register operand idx."); 587296417Sdim unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0; 588296417Sdim return MI->getOperand(Idx); 589296417Sdim} 590296417Sdim 591296417Sdimstatic const MachineOperand &getLdStBaseOp(const MachineInstr *MI) { 592296417Sdim unsigned Idx = isPairedLdSt(MI) ? 2 : 1; 593296417Sdim return MI->getOperand(Idx); 594296417Sdim} 595296417Sdim 596296417Sdimstatic const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) { 597296417Sdim unsigned Idx = isPairedLdSt(MI) ? 3 : 2; 598296417Sdim return MI->getOperand(Idx); 599296417Sdim} 600296417Sdim 601296417Sdimstatic bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst, 602296417Sdim MachineInstr *StoreInst) { 603296417Sdim assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st."); 604296417Sdim int LoadSize = getMemScale(LoadInst); 605296417Sdim int StoreSize = getMemScale(StoreInst); 606296417Sdim int UnscaledStOffset = isUnscaledLdSt(StoreInst) 607296417Sdim ? getLdStOffsetOp(StoreInst).getImm() 608296417Sdim : getLdStOffsetOp(StoreInst).getImm() * StoreSize; 609296417Sdim int UnscaledLdOffset = isUnscaledLdSt(LoadInst) 610296417Sdim ? getLdStOffsetOp(LoadInst).getImm() 611296417Sdim : getLdStOffsetOp(LoadInst).getImm() * LoadSize; 612296417Sdim return (UnscaledStOffset <= UnscaledLdOffset) && 613296417Sdim (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize)); 614296417Sdim} 615296417Sdim 616274955SsvnmirMachineBasicBlock::iterator 617274955SsvnmirAArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, 618274955Ssvnmir MachineBasicBlock::iterator Paired, 619296417Sdim const LdStPairFlags &Flags) { 620274955Ssvnmir MachineBasicBlock::iterator NextI = I; 621274955Ssvnmir ++NextI; 622274955Ssvnmir // If NextI is the second of the two instructions to be merged, we need 623274955Ssvnmir // to skip one further. Either way we merge will invalidate the iterator, 624274955Ssvnmir // and we don't need to scan the new instruction, as it's a pairwise 625274955Ssvnmir // instruction, which we're not considering for further action anyway. 626274955Ssvnmir if (NextI == Paired) 627274955Ssvnmir ++NextI; 628274955Ssvnmir 629296417Sdim int SExtIdx = Flags.getSExtIdx(); 630288943Sdim unsigned Opc = 631288943Sdim SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); 632296417Sdim bool IsUnscaled = isUnscaledLdSt(Opc); 633296417Sdim int OffsetStride = IsUnscaled ? getMemScale(I) : 1; 634274955Ssvnmir 635296417Sdim bool MergeForward = Flags.getMergeForward(); 636288943Sdim unsigned NewOpc = getMatchingPairOpcode(Opc); 637274955Ssvnmir // Insert our new paired instruction after whichever of the paired 638274955Ssvnmir // instructions MergeForward indicates. 639274955Ssvnmir MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; 640274955Ssvnmir // Also based on MergeForward is from where we copy the base register operand 641274955Ssvnmir // so we get the flags compatible with the input code. 642296417Sdim const MachineOperand &BaseRegOp = 643296417Sdim MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I); 644274955Ssvnmir 645274955Ssvnmir // Which register is Rt and which is Rt2 depends on the offset order. 646274955Ssvnmir MachineInstr *RtMI, *Rt2MI; 647296417Sdim if (getLdStOffsetOp(I).getImm() == 648296417Sdim getLdStOffsetOp(Paired).getImm() + OffsetStride) { 649274955Ssvnmir RtMI = Paired; 650274955Ssvnmir Rt2MI = I; 651288943Sdim // Here we swapped the assumption made for SExtIdx. 652288943Sdim // I.e., we turn ldp I, Paired into ldp Paired, I. 653288943Sdim // Update the index accordingly. 654288943Sdim if (SExtIdx != -1) 655288943Sdim SExtIdx = (SExtIdx + 1) % 2; 656274955Ssvnmir } else { 657274955Ssvnmir RtMI = I; 658274955Ssvnmir Rt2MI = Paired; 659274955Ssvnmir } 660274955Ssvnmir 661296417Sdim int OffsetImm = getLdStOffsetOp(RtMI).getImm(); 662296417Sdim 663296417Sdim if (isNarrowLoad(Opc)) { 664296417Sdim // Change the scaled offset from small to large type. 665296417Sdim if (!IsUnscaled) { 666296417Sdim assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge"); 667296417Sdim OffsetImm /= 2; 668296417Sdim } 669296417Sdim MachineInstr *RtNewDest = MergeForward ? I : Paired; 670296417Sdim // When merging small (< 32 bit) loads for big-endian targets, the order of 671296417Sdim // the component parts gets swapped. 672296417Sdim if (!Subtarget->isLittleEndian()) 673296417Sdim std::swap(RtMI, Rt2MI); 674296417Sdim // Construct the new load instruction. 675296417Sdim MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2; 676296417Sdim NewMemMI = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 677296417Sdim TII->get(NewOpc)) 678296417Sdim .addOperand(getLdStRegOp(RtNewDest)) 679296417Sdim .addOperand(BaseRegOp) 680296417Sdim .addImm(OffsetImm) 681296417Sdim .setMemRefs(I->mergeMemRefsWith(*Paired)); 682296417Sdim 683296417Sdim DEBUG( 684296417Sdim dbgs() 685296417Sdim << "Creating the new load and extract. Replacing instructions:\n "); 686296417Sdim DEBUG(I->print(dbgs())); 687296417Sdim DEBUG(dbgs() << " "); 688296417Sdim DEBUG(Paired->print(dbgs())); 689296417Sdim DEBUG(dbgs() << " with instructions:\n "); 690296417Sdim DEBUG((NewMemMI)->print(dbgs())); 691296417Sdim 692296417Sdim int Width = getMemScale(I) == 1 ? 8 : 16; 693296417Sdim int LSBLow = 0; 694296417Sdim int LSBHigh = Width; 695296417Sdim int ImmsLow = LSBLow + Width - 1; 696296417Sdim int ImmsHigh = LSBHigh + Width - 1; 697296417Sdim MachineInstr *ExtDestMI = MergeForward ? Paired : I; 698296417Sdim if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) { 699296417Sdim // Create the bitfield extract for high bits. 700296417Sdim BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 701296417Sdim TII->get(getBitExtrOpcode(Rt2MI))) 702296417Sdim .addOperand(getLdStRegOp(Rt2MI)) 703296417Sdim .addReg(getLdStRegOp(RtNewDest).getReg()) 704296417Sdim .addImm(LSBHigh) 705296417Sdim .addImm(ImmsHigh); 706296417Sdim // Create the bitfield extract for low bits. 707296417Sdim if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) { 708296417Sdim // For unsigned, prefer to use AND for low bits. 709296417Sdim BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 710296417Sdim TII->get(AArch64::ANDWri)) 711296417Sdim .addOperand(getLdStRegOp(RtMI)) 712296417Sdim .addReg(getLdStRegOp(RtNewDest).getReg()) 713296417Sdim .addImm(ImmsLow); 714296417Sdim } else { 715296417Sdim BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 716296417Sdim TII->get(getBitExtrOpcode(RtMI))) 717296417Sdim .addOperand(getLdStRegOp(RtMI)) 718296417Sdim .addReg(getLdStRegOp(RtNewDest).getReg()) 719296417Sdim .addImm(LSBLow) 720296417Sdim .addImm(ImmsLow); 721296417Sdim } 722296417Sdim } else { 723296417Sdim // Create the bitfield extract for low bits. 724296417Sdim if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) { 725296417Sdim // For unsigned, prefer to use AND for low bits. 726296417Sdim BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 727296417Sdim TII->get(AArch64::ANDWri)) 728296417Sdim .addOperand(getLdStRegOp(RtMI)) 729296417Sdim .addReg(getLdStRegOp(RtNewDest).getReg()) 730296417Sdim .addImm(ImmsLow); 731296417Sdim } else { 732296417Sdim BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 733296417Sdim TII->get(getBitExtrOpcode(RtMI))) 734296417Sdim .addOperand(getLdStRegOp(RtMI)) 735296417Sdim .addReg(getLdStRegOp(RtNewDest).getReg()) 736296417Sdim .addImm(LSBLow) 737296417Sdim .addImm(ImmsLow); 738296417Sdim } 739296417Sdim 740296417Sdim // Create the bitfield extract for high bits. 741296417Sdim BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 742296417Sdim TII->get(getBitExtrOpcode(Rt2MI))) 743296417Sdim .addOperand(getLdStRegOp(Rt2MI)) 744296417Sdim .addReg(getLdStRegOp(RtNewDest).getReg()) 745296417Sdim .addImm(LSBHigh) 746296417Sdim .addImm(ImmsHigh); 747296417Sdim } 748296417Sdim DEBUG(dbgs() << " "); 749296417Sdim DEBUG((BitExtMI1)->print(dbgs())); 750296417Sdim DEBUG(dbgs() << " "); 751296417Sdim DEBUG((BitExtMI2)->print(dbgs())); 752296417Sdim DEBUG(dbgs() << "\n"); 753296417Sdim 754296417Sdim // Erase the old instructions. 755296417Sdim I->eraseFromParent(); 756296417Sdim Paired->eraseFromParent(); 757296417Sdim return NextI; 758296417Sdim } 759296417Sdim 760274955Ssvnmir // Construct the new instruction. 761296417Sdim MachineInstrBuilder MIB; 762296417Sdim if (isNarrowStore(Opc)) { 763296417Sdim // Change the scaled offset from small to large type. 764296417Sdim if (!IsUnscaled) { 765296417Sdim assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge"); 766296417Sdim OffsetImm /= 2; 767296417Sdim } 768296417Sdim MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 769296417Sdim TII->get(NewOpc)) 770296417Sdim .addOperand(getLdStRegOp(I)) 771296417Sdim .addOperand(BaseRegOp) 772296417Sdim .addImm(OffsetImm) 773296417Sdim .setMemRefs(I->mergeMemRefsWith(*Paired)); 774296417Sdim } else { 775296417Sdim // Handle Unscaled 776296417Sdim if (IsUnscaled) 777296417Sdim OffsetImm /= OffsetStride; 778296417Sdim MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 779296417Sdim TII->get(NewOpc)) 780296417Sdim .addOperand(getLdStRegOp(RtMI)) 781296417Sdim .addOperand(getLdStRegOp(Rt2MI)) 782296417Sdim .addOperand(BaseRegOp) 783296417Sdim .addImm(OffsetImm); 784296417Sdim } 785296417Sdim 786274955Ssvnmir (void)MIB; 787274955Ssvnmir 788274955Ssvnmir // FIXME: Do we need/want to copy the mem operands from the source 789274955Ssvnmir // instructions? Probably. What uses them after this? 790274955Ssvnmir 791274955Ssvnmir DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); 792274955Ssvnmir DEBUG(I->print(dbgs())); 793274955Ssvnmir DEBUG(dbgs() << " "); 794274955Ssvnmir DEBUG(Paired->print(dbgs())); 795274955Ssvnmir DEBUG(dbgs() << " with instruction:\n "); 796274955Ssvnmir 797288943Sdim if (SExtIdx != -1) { 798288943Sdim // Generate the sign extension for the proper result of the ldp. 799288943Sdim // I.e., with X1, that would be: 800288943Sdim // %W1<def> = KILL %W1, %X1<imp-def> 801288943Sdim // %X1<def> = SBFMXri %X1<kill>, 0, 31 802288943Sdim MachineOperand &DstMO = MIB->getOperand(SExtIdx); 803288943Sdim // Right now, DstMO has the extended register, since it comes from an 804288943Sdim // extended opcode. 805288943Sdim unsigned DstRegX = DstMO.getReg(); 806288943Sdim // Get the W variant of that register. 807288943Sdim unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); 808288943Sdim // Update the result of LDP to use the W instead of the X variant. 809288943Sdim DstMO.setReg(DstRegW); 810288943Sdim DEBUG(((MachineInstr *)MIB)->print(dbgs())); 811288943Sdim DEBUG(dbgs() << "\n"); 812288943Sdim // Make the machine verifier happy by providing a definition for 813288943Sdim // the X register. 814288943Sdim // Insert this definition right after the generated LDP, i.e., before 815288943Sdim // InsertionPoint. 816288943Sdim MachineInstrBuilder MIBKill = 817288943Sdim BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 818288943Sdim TII->get(TargetOpcode::KILL), DstRegW) 819288943Sdim .addReg(DstRegW) 820288943Sdim .addReg(DstRegX, RegState::Define); 821288943Sdim MIBKill->getOperand(2).setImplicit(); 822288943Sdim // Create the sign extension. 823288943Sdim MachineInstrBuilder MIBSXTW = 824288943Sdim BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 825288943Sdim TII->get(AArch64::SBFMXri), DstRegX) 826288943Sdim .addReg(DstRegX) 827288943Sdim .addImm(0) 828288943Sdim .addImm(31); 829288943Sdim (void)MIBSXTW; 830288943Sdim DEBUG(dbgs() << " Extend operand:\n "); 831288943Sdim DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); 832288943Sdim DEBUG(dbgs() << "\n"); 833288943Sdim } else { 834288943Sdim DEBUG(((MachineInstr *)MIB)->print(dbgs())); 835288943Sdim DEBUG(dbgs() << "\n"); 836288943Sdim } 837288943Sdim 838274955Ssvnmir // Erase the old instructions. 839274955Ssvnmir I->eraseFromParent(); 840274955Ssvnmir Paired->eraseFromParent(); 841274955Ssvnmir 842274955Ssvnmir return NextI; 843274955Ssvnmir} 844274955Ssvnmir 845296417SdimMachineBasicBlock::iterator 846296417SdimAArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, 847296417Sdim MachineBasicBlock::iterator StoreI) { 848296417Sdim MachineBasicBlock::iterator NextI = LoadI; 849296417Sdim ++NextI; 850296417Sdim 851296417Sdim int LoadSize = getMemScale(LoadI); 852296417Sdim int StoreSize = getMemScale(StoreI); 853296417Sdim unsigned LdRt = getLdStRegOp(LoadI).getReg(); 854296417Sdim unsigned StRt = getLdStRegOp(StoreI).getReg(); 855296417Sdim bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); 856296417Sdim 857296417Sdim assert((IsStoreXReg || 858296417Sdim TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) && 859296417Sdim "Unexpected RegClass"); 860296417Sdim 861296417Sdim MachineInstr *BitExtMI; 862296417Sdim if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) { 863296417Sdim // Remove the load, if the destination register of the loads is the same 864296417Sdim // register for stored value. 865296417Sdim if (StRt == LdRt && LoadSize == 8) { 866296417Sdim DEBUG(dbgs() << "Remove load instruction:\n "); 867296417Sdim DEBUG(LoadI->print(dbgs())); 868296417Sdim DEBUG(dbgs() << "\n"); 869296417Sdim LoadI->eraseFromParent(); 870296417Sdim return NextI; 871296417Sdim } 872296417Sdim // Replace the load with a mov if the load and store are in the same size. 873296417Sdim BitExtMI = 874296417Sdim BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), 875296417Sdim TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) 876296417Sdim .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) 877296417Sdim .addReg(StRt) 878296417Sdim .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 879296417Sdim } else { 880296417Sdim // FIXME: Currently we disable this transformation in big-endian targets as 881296417Sdim // performance and correctness are verified only in little-endian. 882296417Sdim if (!Subtarget->isLittleEndian()) 883296417Sdim return NextI; 884296417Sdim bool IsUnscaled = isUnscaledLdSt(LoadI); 885296417Sdim assert(IsUnscaled == isUnscaledLdSt(StoreI) && "Unsupported ld/st match"); 886296417Sdim assert(LoadSize <= StoreSize && "Invalid load size"); 887296417Sdim int UnscaledLdOffset = IsUnscaled 888296417Sdim ? getLdStOffsetOp(LoadI).getImm() 889296417Sdim : getLdStOffsetOp(LoadI).getImm() * LoadSize; 890296417Sdim int UnscaledStOffset = IsUnscaled 891296417Sdim ? getLdStOffsetOp(StoreI).getImm() 892296417Sdim : getLdStOffsetOp(StoreI).getImm() * StoreSize; 893296417Sdim int Width = LoadSize * 8; 894296417Sdim int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); 895296417Sdim int Imms = Immr + Width - 1; 896296417Sdim unsigned DestReg = IsStoreXReg 897296417Sdim ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32, 898296417Sdim &AArch64::GPR64RegClass) 899296417Sdim : LdRt; 900296417Sdim 901296417Sdim assert((UnscaledLdOffset >= UnscaledStOffset && 902296417Sdim (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && 903296417Sdim "Invalid offset"); 904296417Sdim 905296417Sdim Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); 906296417Sdim Imms = Immr + Width - 1; 907296417Sdim if (UnscaledLdOffset == UnscaledStOffset) { 908296417Sdim uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N 909296417Sdim | ((Immr) << 6) // immr 910296417Sdim | ((Imms) << 0) // imms 911296417Sdim ; 912296417Sdim 913296417Sdim BitExtMI = 914296417Sdim BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), 915296417Sdim TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), 916296417Sdim DestReg) 917296417Sdim .addReg(StRt) 918296417Sdim .addImm(AndMaskEncoded); 919296417Sdim } else { 920296417Sdim BitExtMI = 921296417Sdim BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), 922296417Sdim TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri), 923296417Sdim DestReg) 924296417Sdim .addReg(StRt) 925296417Sdim .addImm(Immr) 926296417Sdim .addImm(Imms); 927296417Sdim } 928296417Sdim } 929296417Sdim 930296417Sdim DEBUG(dbgs() << "Promoting load by replacing :\n "); 931296417Sdim DEBUG(StoreI->print(dbgs())); 932296417Sdim DEBUG(dbgs() << " "); 933296417Sdim DEBUG(LoadI->print(dbgs())); 934296417Sdim DEBUG(dbgs() << " with instructions:\n "); 935296417Sdim DEBUG(StoreI->print(dbgs())); 936296417Sdim DEBUG(dbgs() << " "); 937296417Sdim DEBUG((BitExtMI)->print(dbgs())); 938296417Sdim DEBUG(dbgs() << "\n"); 939296417Sdim 940296417Sdim // Erase the old instructions. 941296417Sdim LoadI->eraseFromParent(); 942296417Sdim return NextI; 943296417Sdim} 944296417Sdim 945274955Ssvnmir/// trackRegDefsUses - Remember what registers the specified instruction uses 946274955Ssvnmir/// and modifies. 947296417Sdimstatic void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs, 948274955Ssvnmir BitVector &UsedRegs, 949274955Ssvnmir const TargetRegisterInfo *TRI) { 950296417Sdim for (const MachineOperand &MO : MI->operands()) { 951274955Ssvnmir if (MO.isRegMask()) 952274955Ssvnmir ModifiedRegs.setBitsNotInMask(MO.getRegMask()); 953274955Ssvnmir 954274955Ssvnmir if (!MO.isReg()) 955274955Ssvnmir continue; 956274955Ssvnmir unsigned Reg = MO.getReg(); 957274955Ssvnmir if (MO.isDef()) { 958274955Ssvnmir for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 959274955Ssvnmir ModifiedRegs.set(*AI); 960274955Ssvnmir } else { 961274955Ssvnmir assert(MO.isUse() && "Reg operand not a def and not a use?!?"); 962274955Ssvnmir for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 963274955Ssvnmir UsedRegs.set(*AI); 964274955Ssvnmir } 965274955Ssvnmir } 966274955Ssvnmir} 967274955Ssvnmir 968274955Ssvnmirstatic bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { 969296417Sdim // Convert the byte-offset used by unscaled into an "element" offset used 970296417Sdim // by the scaled pair load/store instructions. 971296417Sdim if (IsUnscaled) 972296417Sdim Offset /= OffsetStride; 973296417Sdim 974296417Sdim return Offset <= 63 && Offset >= -64; 975274955Ssvnmir} 976274955Ssvnmir 977274955Ssvnmir// Do alignment, specialized to power of 2 and for signed ints, 978274955Ssvnmir// avoiding having to do a C-style cast from uint_64t to int when 979274955Ssvnmir// using RoundUpToAlignment from include/llvm/Support/MathExtras.h. 980274955Ssvnmir// FIXME: Move this function to include/MathExtras.h? 981274955Ssvnmirstatic int alignTo(int Num, int PowOf2) { 982274955Ssvnmir return (Num + PowOf2 - 1) & ~(PowOf2 - 1); 983274955Ssvnmir} 984274955Ssvnmir 985288943Sdimstatic bool mayAlias(MachineInstr *MIa, MachineInstr *MIb, 986288943Sdim const AArch64InstrInfo *TII) { 987288943Sdim // One of the instructions must modify memory. 988288943Sdim if (!MIa->mayStore() && !MIb->mayStore()) 989288943Sdim return false; 990288943Sdim 991288943Sdim // Both instructions must be memory operations. 992288943Sdim if (!MIa->mayLoadOrStore() && !MIb->mayLoadOrStore()) 993288943Sdim return false; 994288943Sdim 995288943Sdim return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb); 996288943Sdim} 997288943Sdim 998288943Sdimstatic bool mayAlias(MachineInstr *MIa, 999288943Sdim SmallVectorImpl<MachineInstr *> &MemInsns, 1000288943Sdim const AArch64InstrInfo *TII) { 1001288943Sdim for (auto &MIb : MemInsns) 1002288943Sdim if (mayAlias(MIa, MIb, TII)) 1003288943Sdim return true; 1004288943Sdim 1005288943Sdim return false; 1006288943Sdim} 1007288943Sdim 1008296417Sdimbool AArch64LoadStoreOpt::findMatchingStore( 1009296417Sdim MachineBasicBlock::iterator I, unsigned Limit, 1010296417Sdim MachineBasicBlock::iterator &StoreI) { 1011296417Sdim MachineBasicBlock::iterator E = I->getParent()->begin(); 1012296417Sdim MachineBasicBlock::iterator MBBI = I; 1013296417Sdim MachineInstr *FirstMI = I; 1014296417Sdim unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); 1015296417Sdim 1016296417Sdim // Track which registers have been modified and used between the first insn 1017296417Sdim // and the second insn. 1018296417Sdim BitVector ModifiedRegs, UsedRegs; 1019296417Sdim ModifiedRegs.resize(TRI->getNumRegs()); 1020296417Sdim UsedRegs.resize(TRI->getNumRegs()); 1021296417Sdim 1022296417Sdim for (unsigned Count = 0; MBBI != E && Count < Limit;) { 1023296417Sdim --MBBI; 1024296417Sdim MachineInstr *MI = MBBI; 1025296417Sdim // Skip DBG_VALUE instructions. Otherwise debug info can affect the 1026296417Sdim // optimization by changing how far we scan. 1027296417Sdim if (MI->isDebugValue()) 1028296417Sdim continue; 1029296417Sdim // Now that we know this is a real instruction, count it. 1030296417Sdim ++Count; 1031296417Sdim 1032296417Sdim // If the load instruction reads directly from the address to which the 1033296417Sdim // store instruction writes and the stored value is not modified, we can 1034296417Sdim // promote the load. Since we do not handle stores with pre-/post-index, 1035296417Sdim // it's unnecessary to check if BaseReg is modified by the store itself. 1036296417Sdim if (MI->mayStore() && isMatchingStore(FirstMI, MI) && 1037296417Sdim BaseReg == getLdStBaseOp(MI).getReg() && 1038296417Sdim isLdOffsetInRangeOfSt(FirstMI, MI) && 1039296417Sdim !ModifiedRegs[getLdStRegOp(MI).getReg()]) { 1040296417Sdim StoreI = MBBI; 1041296417Sdim return true; 1042296417Sdim } 1043296417Sdim 1044296417Sdim if (MI->isCall()) 1045296417Sdim return false; 1046296417Sdim 1047296417Sdim // Update modified / uses register lists. 1048296417Sdim trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 1049296417Sdim 1050296417Sdim // Otherwise, if the base register is modified, we have no match, so 1051296417Sdim // return early. 1052296417Sdim if (ModifiedRegs[BaseReg]) 1053296417Sdim return false; 1054296417Sdim 1055296417Sdim // If we encounter a store aliased with the load, return early. 1056296417Sdim if (MI->mayStore() && mayAlias(FirstMI, MI, TII)) 1057296417Sdim return false; 1058296417Sdim } 1059296417Sdim return false; 1060296417Sdim} 1061296417Sdim 1062274955Ssvnmir/// findMatchingInsn - Scan the instructions looking for a load/store that can 1063274955Ssvnmir/// be combined with the current instruction into a load/store pair. 1064274955SsvnmirMachineBasicBlock::iterator 1065274955SsvnmirAArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, 1066296417Sdim LdStPairFlags &Flags, unsigned Limit) { 1067274955Ssvnmir MachineBasicBlock::iterator E = I->getParent()->end(); 1068274955Ssvnmir MachineBasicBlock::iterator MBBI = I; 1069274955Ssvnmir MachineInstr *FirstMI = I; 1070274955Ssvnmir ++MBBI; 1071274955Ssvnmir 1072288943Sdim unsigned Opc = FirstMI->getOpcode(); 1073274955Ssvnmir bool MayLoad = FirstMI->mayLoad(); 1074296417Sdim bool IsUnscaled = isUnscaledLdSt(FirstMI); 1075296417Sdim unsigned Reg = getLdStRegOp(FirstMI).getReg(); 1076296417Sdim unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); 1077296417Sdim int Offset = getLdStOffsetOp(FirstMI).getImm(); 1078296417Sdim bool IsNarrowStore = isNarrowStore(Opc); 1079274955Ssvnmir 1080296417Sdim // For narrow stores, find only the case where the stored value is WZR. 1081296417Sdim if (IsNarrowStore && Reg != AArch64::WZR) 1082296417Sdim return E; 1083296417Sdim 1084274955Ssvnmir // Early exit if the first instruction modifies the base register. 1085274955Ssvnmir // e.g., ldr x0, [x0] 1086296417Sdim if (FirstMI->modifiesRegister(BaseReg, TRI)) 1087296417Sdim return E; 1088296417Sdim 1089274955Ssvnmir // Early exit if the offset if not possible to match. (6 bits of positive 1090274955Ssvnmir // range, plus allow an extra one in case we find a later insn that matches 1091296417Sdim // with Offset-1) 1092296417Sdim int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; 1093296417Sdim if (!(isNarrowLoad(Opc) || IsNarrowStore) && 1094296417Sdim !inBoundsForPair(IsUnscaled, Offset, OffsetStride)) 1095274955Ssvnmir return E; 1096274955Ssvnmir 1097274955Ssvnmir // Track which registers have been modified and used between the first insn 1098274955Ssvnmir // (inclusive) and the second insn. 1099274955Ssvnmir BitVector ModifiedRegs, UsedRegs; 1100274955Ssvnmir ModifiedRegs.resize(TRI->getNumRegs()); 1101274955Ssvnmir UsedRegs.resize(TRI->getNumRegs()); 1102288943Sdim 1103288943Sdim // Remember any instructions that read/write memory between FirstMI and MI. 1104288943Sdim SmallVector<MachineInstr *, 4> MemInsns; 1105288943Sdim 1106274955Ssvnmir for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { 1107274955Ssvnmir MachineInstr *MI = MBBI; 1108274955Ssvnmir // Skip DBG_VALUE instructions. Otherwise debug info can affect the 1109274955Ssvnmir // optimization by changing how far we scan. 1110274955Ssvnmir if (MI->isDebugValue()) 1111274955Ssvnmir continue; 1112274955Ssvnmir 1113274955Ssvnmir // Now that we know this is a real instruction, count it. 1114274955Ssvnmir ++Count; 1115274955Ssvnmir 1116288943Sdim bool CanMergeOpc = Opc == MI->getOpcode(); 1117296417Sdim Flags.setSExtIdx(-1); 1118288943Sdim if (!CanMergeOpc) { 1119288943Sdim bool IsValidLdStrOpc; 1120288943Sdim unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc); 1121296417Sdim assert(IsValidLdStrOpc && 1122296417Sdim "Given Opc should be a Load or Store with an immediate"); 1123288943Sdim // Opc will be the first instruction in the pair. 1124296417Sdim Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0); 1125288943Sdim CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode()); 1126288943Sdim } 1127288943Sdim 1128296417Sdim if (CanMergeOpc && getLdStOffsetOp(MI).isImm()) { 1129296417Sdim assert(MI->mayLoadOrStore() && "Expected memory operation."); 1130274955Ssvnmir // If we've found another instruction with the same opcode, check to see 1131274955Ssvnmir // if the base and offset are compatible with our starting instruction. 1132274955Ssvnmir // These instructions all have scaled immediate operands, so we just 1133274955Ssvnmir // check for +1/-1. Make sure to check the new instruction offset is 1134274955Ssvnmir // actually an immediate and not a symbolic reference destined for 1135274955Ssvnmir // a relocation. 1136274955Ssvnmir // 1137274955Ssvnmir // Pairwise instructions have a 7-bit signed offset field. Single insns 1138274955Ssvnmir // have a 12-bit unsigned offset field. To be a valid combine, the 1139274955Ssvnmir // final offset must be in range. 1140296417Sdim unsigned MIBaseReg = getLdStBaseOp(MI).getReg(); 1141296417Sdim int MIOffset = getLdStOffsetOp(MI).getImm(); 1142274955Ssvnmir if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || 1143274955Ssvnmir (Offset + OffsetStride == MIOffset))) { 1144274955Ssvnmir int MinOffset = Offset < MIOffset ? Offset : MIOffset; 1145274955Ssvnmir // If this is a volatile load/store that otherwise matched, stop looking 1146274955Ssvnmir // as something is going on that we don't have enough information to 1147274955Ssvnmir // safely transform. Similarly, stop if we see a hint to avoid pairs. 1148274955Ssvnmir if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) 1149274955Ssvnmir return E; 1150274955Ssvnmir // If the resultant immediate offset of merging these instructions 1151274955Ssvnmir // is out of range for a pairwise instruction, bail and keep looking. 1152296417Sdim bool MIIsUnscaled = isUnscaledLdSt(MI); 1153296417Sdim bool IsNarrowLoad = isNarrowLoad(MI->getOpcode()); 1154296417Sdim if (!IsNarrowLoad && 1155296417Sdim !inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { 1156274955Ssvnmir trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 1157296417Sdim MemInsns.push_back(MI); 1158274955Ssvnmir continue; 1159274955Ssvnmir } 1160296417Sdim 1161296417Sdim if (IsNarrowLoad || IsNarrowStore) { 1162296417Sdim // If the alignment requirements of the scaled wide load/store 1163296417Sdim // instruction can't express the offset of the scaled narrow 1164296417Sdim // input, bail and keep looking. 1165296417Sdim if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) { 1166296417Sdim trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 1167288943Sdim MemInsns.push_back(MI); 1168296417Sdim continue; 1169296417Sdim } 1170296417Sdim } else { 1171296417Sdim // If the alignment requirements of the paired (scaled) instruction 1172296417Sdim // can't express the offset of the unscaled input, bail and keep 1173296417Sdim // looking. 1174296417Sdim if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) { 1175296417Sdim trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 1176296417Sdim MemInsns.push_back(MI); 1177296417Sdim continue; 1178296417Sdim } 1179274955Ssvnmir } 1180274955Ssvnmir // If the destination register of the loads is the same register, bail 1181274955Ssvnmir // and keep looking. A load-pair instruction with both destination 1182274955Ssvnmir // registers the same is UNPREDICTABLE and will result in an exception. 1183296417Sdim // For narrow stores, allow only when the stored value is the same 1184296417Sdim // (i.e., WZR). 1185296417Sdim if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) || 1186296417Sdim (IsNarrowStore && Reg != getLdStRegOp(MI).getReg())) { 1187274955Ssvnmir trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 1188296417Sdim MemInsns.push_back(MI); 1189274955Ssvnmir continue; 1190274955Ssvnmir } 1191274955Ssvnmir 1192274955Ssvnmir // If the Rt of the second instruction was not modified or used between 1193288943Sdim // the two instructions and none of the instructions between the second 1194288943Sdim // and first alias with the second, we can combine the second into the 1195288943Sdim // first. 1196296417Sdim if (!ModifiedRegs[getLdStRegOp(MI).getReg()] && 1197296417Sdim !(MI->mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) && 1198288943Sdim !mayAlias(MI, MemInsns, TII)) { 1199296417Sdim Flags.setMergeForward(false); 1200274955Ssvnmir return MBBI; 1201274955Ssvnmir } 1202274955Ssvnmir 1203274955Ssvnmir // Likewise, if the Rt of the first instruction is not modified or used 1204288943Sdim // between the two instructions and none of the instructions between the 1205288943Sdim // first and the second alias with the first, we can combine the first 1206288943Sdim // into the second. 1207296417Sdim if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] && 1208296417Sdim !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) && 1209288943Sdim !mayAlias(FirstMI, MemInsns, TII)) { 1210296417Sdim Flags.setMergeForward(true); 1211274955Ssvnmir return MBBI; 1212274955Ssvnmir } 1213274955Ssvnmir // Unable to combine these instructions due to interference in between. 1214274955Ssvnmir // Keep looking. 1215274955Ssvnmir } 1216274955Ssvnmir } 1217274955Ssvnmir 1218288943Sdim // If the instruction wasn't a matching load or store. Stop searching if we 1219288943Sdim // encounter a call instruction that might modify memory. 1220288943Sdim if (MI->isCall()) 1221274955Ssvnmir return E; 1222274955Ssvnmir 1223274955Ssvnmir // Update modified / uses register lists. 1224274955Ssvnmir trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 1225274955Ssvnmir 1226274955Ssvnmir // Otherwise, if the base register is modified, we have no match, so 1227274955Ssvnmir // return early. 1228274955Ssvnmir if (ModifiedRegs[BaseReg]) 1229274955Ssvnmir return E; 1230288943Sdim 1231288943Sdim // Update list of instructions that read/write memory. 1232288943Sdim if (MI->mayLoadOrStore()) 1233288943Sdim MemInsns.push_back(MI); 1234274955Ssvnmir } 1235274955Ssvnmir return E; 1236274955Ssvnmir} 1237274955Ssvnmir 1238274955SsvnmirMachineBasicBlock::iterator 1239296417SdimAArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, 1240296417Sdim MachineBasicBlock::iterator Update, 1241296417Sdim bool IsPreIdx) { 1242274955Ssvnmir assert((Update->getOpcode() == AArch64::ADDXri || 1243274955Ssvnmir Update->getOpcode() == AArch64::SUBXri) && 1244274955Ssvnmir "Unexpected base register update instruction to merge!"); 1245274955Ssvnmir MachineBasicBlock::iterator NextI = I; 1246274955Ssvnmir // Return the instruction following the merged instruction, which is 1247274955Ssvnmir // the instruction following our unmerged load. Unless that's the add/sub 1248274955Ssvnmir // instruction we're merging, in which case it's the one after that. 1249274955Ssvnmir if (++NextI == Update) 1250274955Ssvnmir ++NextI; 1251274955Ssvnmir 1252274955Ssvnmir int Value = Update->getOperand(2).getImm(); 1253274955Ssvnmir assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && 1254296417Sdim "Can't merge 1 << 12 offset into pre-/post-indexed load / store"); 1255274955Ssvnmir if (Update->getOpcode() == AArch64::SUBXri) 1256274955Ssvnmir Value = -Value; 1257274955Ssvnmir 1258296417Sdim unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode()) 1259296417Sdim : getPostIndexedOpcode(I->getOpcode()); 1260296417Sdim MachineInstrBuilder MIB; 1261296417Sdim if (!isPairedLdSt(I)) { 1262296417Sdim // Non-paired instruction. 1263296417Sdim MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 1264296417Sdim .addOperand(getLdStRegOp(Update)) 1265296417Sdim .addOperand(getLdStRegOp(I)) 1266296417Sdim .addOperand(getLdStBaseOp(I)) 1267296417Sdim .addImm(Value); 1268296417Sdim } else { 1269296417Sdim // Paired instruction. 1270296417Sdim int Scale = getMemScale(I); 1271296417Sdim MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 1272296417Sdim .addOperand(getLdStRegOp(Update)) 1273296417Sdim .addOperand(getLdStRegOp(I, 0)) 1274296417Sdim .addOperand(getLdStRegOp(I, 1)) 1275296417Sdim .addOperand(getLdStBaseOp(I)) 1276296417Sdim .addImm(Value / Scale); 1277296417Sdim } 1278274955Ssvnmir (void)MIB; 1279274955Ssvnmir 1280296417Sdim if (IsPreIdx) 1281296417Sdim DEBUG(dbgs() << "Creating pre-indexed load/store."); 1282296417Sdim else 1283296417Sdim DEBUG(dbgs() << "Creating post-indexed load/store."); 1284274955Ssvnmir DEBUG(dbgs() << " Replacing instructions:\n "); 1285274955Ssvnmir DEBUG(I->print(dbgs())); 1286274955Ssvnmir DEBUG(dbgs() << " "); 1287274955Ssvnmir DEBUG(Update->print(dbgs())); 1288274955Ssvnmir DEBUG(dbgs() << " with instruction:\n "); 1289274955Ssvnmir DEBUG(((MachineInstr *)MIB)->print(dbgs())); 1290274955Ssvnmir DEBUG(dbgs() << "\n"); 1291274955Ssvnmir 1292274955Ssvnmir // Erase the old instructions for the block. 1293274955Ssvnmir I->eraseFromParent(); 1294274955Ssvnmir Update->eraseFromParent(); 1295274955Ssvnmir 1296274955Ssvnmir return NextI; 1297274955Ssvnmir} 1298274955Ssvnmir 1299296417Sdimbool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI, 1300296417Sdim MachineInstr *MI, 1301296417Sdim unsigned BaseReg, int Offset) { 1302274955Ssvnmir switch (MI->getOpcode()) { 1303274955Ssvnmir default: 1304274955Ssvnmir break; 1305274955Ssvnmir case AArch64::SUBXri: 1306274955Ssvnmir // Negate the offset for a SUB instruction. 1307274955Ssvnmir Offset *= -1; 1308274955Ssvnmir // FALLTHROUGH 1309274955Ssvnmir case AArch64::ADDXri: 1310274955Ssvnmir // Make sure it's a vanilla immediate operand, not a relocation or 1311274955Ssvnmir // anything else we can't handle. 1312274955Ssvnmir if (!MI->getOperand(2).isImm()) 1313274955Ssvnmir break; 1314274955Ssvnmir // Watch out for 1 << 12 shifted value. 1315274955Ssvnmir if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm())) 1316274955Ssvnmir break; 1317296417Sdim 1318296417Sdim // The update instruction source and destination register must be the 1319296417Sdim // same as the load/store base register. 1320296417Sdim if (MI->getOperand(0).getReg() != BaseReg || 1321296417Sdim MI->getOperand(1).getReg() != BaseReg) 1322296417Sdim break; 1323296417Sdim 1324296417Sdim bool IsPairedInsn = isPairedLdSt(MemMI); 1325296417Sdim int UpdateOffset = MI->getOperand(2).getImm(); 1326296417Sdim // For non-paired load/store instructions, the immediate must fit in a 1327296417Sdim // signed 9-bit integer. 1328296417Sdim if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256)) 1329296417Sdim break; 1330296417Sdim 1331296417Sdim // For paired load/store instructions, the immediate must be a multiple of 1332296417Sdim // the scaling factor. The scaled offset must also fit into a signed 7-bit 1333296417Sdim // integer. 1334296417Sdim if (IsPairedInsn) { 1335296417Sdim int Scale = getMemScale(MemMI); 1336296417Sdim if (UpdateOffset % Scale != 0) 1337296417Sdim break; 1338296417Sdim 1339296417Sdim int ScaledOffset = UpdateOffset / Scale; 1340296417Sdim if (ScaledOffset > 64 || ScaledOffset < -64) 1341296417Sdim break; 1342274955Ssvnmir } 1343296417Sdim 1344296417Sdim // If we have a non-zero Offset, we check that it matches the amount 1345296417Sdim // we're adding to the register. 1346296417Sdim if (!Offset || Offset == MI->getOperand(2).getImm()) 1347296417Sdim return true; 1348274955Ssvnmir break; 1349274955Ssvnmir } 1350274955Ssvnmir return false; 1351274955Ssvnmir} 1352274955Ssvnmir 1353274955SsvnmirMachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( 1354296417Sdim MachineBasicBlock::iterator I, unsigned Limit, int UnscaledOffset) { 1355274955Ssvnmir MachineBasicBlock::iterator E = I->getParent()->end(); 1356274955Ssvnmir MachineInstr *MemMI = I; 1357274955Ssvnmir MachineBasicBlock::iterator MBBI = I; 1358274955Ssvnmir 1359296417Sdim unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); 1360296417Sdim int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI); 1361274955Ssvnmir 1362296417Sdim // Scan forward looking for post-index opportunities. Updating instructions 1363296417Sdim // can't be formed if the memory instruction doesn't have the offset we're 1364296417Sdim // looking for. 1365296417Sdim if (MIUnscaledOffset != UnscaledOffset) 1366274955Ssvnmir return E; 1367274955Ssvnmir 1368296417Sdim // If the base register overlaps a destination register, we can't 1369296417Sdim // merge the update. 1370296417Sdim bool IsPairedInsn = isPairedLdSt(MemMI); 1371296417Sdim for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { 1372296417Sdim unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); 1373296417Sdim if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 1374296417Sdim return E; 1375296417Sdim } 1376274955Ssvnmir 1377274955Ssvnmir // Track which registers have been modified and used between the first insn 1378274955Ssvnmir // (inclusive) and the second insn. 1379274955Ssvnmir BitVector ModifiedRegs, UsedRegs; 1380274955Ssvnmir ModifiedRegs.resize(TRI->getNumRegs()); 1381274955Ssvnmir UsedRegs.resize(TRI->getNumRegs()); 1382274955Ssvnmir ++MBBI; 1383274955Ssvnmir for (unsigned Count = 0; MBBI != E; ++MBBI) { 1384274955Ssvnmir MachineInstr *MI = MBBI; 1385274955Ssvnmir // Skip DBG_VALUE instructions. Otherwise debug info can affect the 1386274955Ssvnmir // optimization by changing how far we scan. 1387274955Ssvnmir if (MI->isDebugValue()) 1388274955Ssvnmir continue; 1389274955Ssvnmir 1390274955Ssvnmir // Now that we know this is a real instruction, count it. 1391274955Ssvnmir ++Count; 1392274955Ssvnmir 1393274955Ssvnmir // If we found a match, return it. 1394296417Sdim if (isMatchingUpdateInsn(I, MI, BaseReg, UnscaledOffset)) 1395274955Ssvnmir return MBBI; 1396274955Ssvnmir 1397274955Ssvnmir // Update the status of what the instruction clobbered and used. 1398274955Ssvnmir trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 1399274955Ssvnmir 1400274955Ssvnmir // Otherwise, if the base register is used or modified, we have no match, so 1401274955Ssvnmir // return early. 1402274955Ssvnmir if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) 1403274955Ssvnmir return E; 1404274955Ssvnmir } 1405274955Ssvnmir return E; 1406274955Ssvnmir} 1407274955Ssvnmir 1408274955SsvnmirMachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( 1409274955Ssvnmir MachineBasicBlock::iterator I, unsigned Limit) { 1410274955Ssvnmir MachineBasicBlock::iterator B = I->getParent()->begin(); 1411274955Ssvnmir MachineBasicBlock::iterator E = I->getParent()->end(); 1412274955Ssvnmir MachineInstr *MemMI = I; 1413274955Ssvnmir MachineBasicBlock::iterator MBBI = I; 1414274955Ssvnmir 1415296417Sdim unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); 1416296417Sdim int Offset = getLdStOffsetOp(MemMI).getImm(); 1417274955Ssvnmir 1418274955Ssvnmir // If the load/store is the first instruction in the block, there's obviously 1419274955Ssvnmir // not any matching update. Ditto if the memory offset isn't zero. 1420274955Ssvnmir if (MBBI == B || Offset != 0) 1421274955Ssvnmir return E; 1422296417Sdim // If the base register overlaps a destination register, we can't 1423274955Ssvnmir // merge the update. 1424296417Sdim bool IsPairedInsn = isPairedLdSt(MemMI); 1425296417Sdim for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { 1426296417Sdim unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); 1427296417Sdim if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 1428296417Sdim return E; 1429296417Sdim } 1430274955Ssvnmir 1431274955Ssvnmir // Track which registers have been modified and used between the first insn 1432274955Ssvnmir // (inclusive) and the second insn. 1433274955Ssvnmir BitVector ModifiedRegs, UsedRegs; 1434274955Ssvnmir ModifiedRegs.resize(TRI->getNumRegs()); 1435274955Ssvnmir UsedRegs.resize(TRI->getNumRegs()); 1436274955Ssvnmir --MBBI; 1437274955Ssvnmir for (unsigned Count = 0; MBBI != B; --MBBI) { 1438274955Ssvnmir MachineInstr *MI = MBBI; 1439274955Ssvnmir // Skip DBG_VALUE instructions. Otherwise debug info can affect the 1440274955Ssvnmir // optimization by changing how far we scan. 1441274955Ssvnmir if (MI->isDebugValue()) 1442274955Ssvnmir continue; 1443274955Ssvnmir 1444274955Ssvnmir // Now that we know this is a real instruction, count it. 1445274955Ssvnmir ++Count; 1446274955Ssvnmir 1447274955Ssvnmir // If we found a match, return it. 1448296417Sdim if (isMatchingUpdateInsn(I, MI, BaseReg, Offset)) 1449274955Ssvnmir return MBBI; 1450274955Ssvnmir 1451274955Ssvnmir // Update the status of what the instruction clobbered and used. 1452274955Ssvnmir trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 1453274955Ssvnmir 1454274955Ssvnmir // Otherwise, if the base register is used or modified, we have no match, so 1455274955Ssvnmir // return early. 1456274955Ssvnmir if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) 1457274955Ssvnmir return E; 1458274955Ssvnmir } 1459274955Ssvnmir return E; 1460274955Ssvnmir} 1461274955Ssvnmir 1462296417Sdimbool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( 1463296417Sdim MachineBasicBlock::iterator &MBBI) { 1464296417Sdim MachineInstr *MI = MBBI; 1465296417Sdim // If this is a volatile load, don't mess with it. 1466296417Sdim if (MI->hasOrderedMemoryRef()) 1467296417Sdim return false; 1468296417Sdim 1469296417Sdim // Make sure this is a reg+imm. 1470296417Sdim // FIXME: It is possible to extend it to handle reg+reg cases. 1471296417Sdim if (!getLdStOffsetOp(MI).isImm()) 1472296417Sdim return false; 1473296417Sdim 1474296417Sdim // Look backward up to ScanLimit instructions. 1475296417Sdim MachineBasicBlock::iterator StoreI; 1476296417Sdim if (findMatchingStore(MBBI, ScanLimit, StoreI)) { 1477296417Sdim ++NumLoadsFromStoresPromoted; 1478296417Sdim // Promote the load. Keeping the iterator straight is a 1479296417Sdim // pain, so we let the merge routine tell us what the next instruction 1480296417Sdim // is after it's done mucking about. 1481296417Sdim MBBI = promoteLoadFromStore(MBBI, StoreI); 1482296417Sdim return true; 1483296417Sdim } 1484296417Sdim return false; 1485296417Sdim} 1486296417Sdim 1487296417Sdimbool AArch64LoadStoreOpt::tryToMergeLdStInst( 1488296417Sdim MachineBasicBlock::iterator &MBBI) { 1489296417Sdim MachineInstr *MI = MBBI; 1490296417Sdim MachineBasicBlock::iterator E = MI->getParent()->end(); 1491296417Sdim // If this is a volatile load/store, don't mess with it. 1492296417Sdim if (MI->hasOrderedMemoryRef()) 1493296417Sdim return false; 1494296417Sdim 1495296417Sdim // Make sure this is a reg+imm (as opposed to an address reloc). 1496296417Sdim if (!getLdStOffsetOp(MI).isImm()) 1497296417Sdim return false; 1498296417Sdim 1499296417Sdim // Check if this load/store has a hint to avoid pair formation. 1500296417Sdim // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 1501296417Sdim if (TII->isLdStPairSuppressed(MI)) 1502296417Sdim return false; 1503296417Sdim 1504296417Sdim // Look ahead up to ScanLimit instructions for a pairable instruction. 1505296417Sdim LdStPairFlags Flags; 1506296417Sdim MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, ScanLimit); 1507296417Sdim if (Paired != E) { 1508296417Sdim if (isNarrowLoad(MI)) { 1509296417Sdim ++NumNarrowLoadsPromoted; 1510296417Sdim } else if (isNarrowStore(MI)) { 1511296417Sdim ++NumZeroStoresPromoted; 1512296417Sdim } else { 1513296417Sdim ++NumPairCreated; 1514296417Sdim if (isUnscaledLdSt(MI)) 1515296417Sdim ++NumUnscaledPairCreated; 1516296417Sdim } 1517296417Sdim 1518296417Sdim // Merge the loads into a pair. Keeping the iterator straight is a 1519296417Sdim // pain, so we let the merge routine tell us what the next instruction 1520296417Sdim // is after it's done mucking about. 1521296417Sdim MBBI = mergePairedInsns(MBBI, Paired, Flags); 1522296417Sdim return true; 1523296417Sdim } 1524296417Sdim return false; 1525296417Sdim} 1526296417Sdim 1527296417Sdimbool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, 1528296417Sdim bool enableNarrowLdOpt) { 1529274955Ssvnmir bool Modified = false; 1530296417Sdim // Three tranformations to do here: 1531296417Sdim // 1) Find loads that directly read from stores and promote them by 1532296417Sdim // replacing with mov instructions. If the store is wider than the load, 1533296417Sdim // the load will be replaced with a bitfield extract. 1534296417Sdim // e.g., 1535296417Sdim // str w1, [x0, #4] 1536296417Sdim // ldrh w2, [x0, #6] 1537296417Sdim // ; becomes 1538296417Sdim // str w1, [x0, #4] 1539296417Sdim // lsr w2, w1, #16 1540296417Sdim // 2) Find narrow loads that can be converted into a single wider load 1541296417Sdim // with bitfield extract instructions. 1542296417Sdim // e.g., 1543296417Sdim // ldrh w0, [x2] 1544296417Sdim // ldrh w1, [x2, #2] 1545296417Sdim // ; becomes 1546296417Sdim // ldr w0, [x2] 1547296417Sdim // ubfx w1, w0, #16, #16 1548296417Sdim // and w0, w0, #ffff 1549296417Sdim // 3) Find loads and stores that can be merged into a single load or store 1550274955Ssvnmir // pair instruction. 1551274955Ssvnmir // e.g., 1552274955Ssvnmir // ldr x0, [x2] 1553274955Ssvnmir // ldr x1, [x2, #8] 1554274955Ssvnmir // ; becomes 1555274955Ssvnmir // ldp x0, x1, [x2] 1556296417Sdim // 4) Find base register updates that can be merged into the load or store 1557274955Ssvnmir // as a base-reg writeback. 1558274955Ssvnmir // e.g., 1559274955Ssvnmir // ldr x0, [x2] 1560274955Ssvnmir // add x2, x2, #4 1561274955Ssvnmir // ; becomes 1562274955Ssvnmir // ldr x0, [x2], #4 1563274955Ssvnmir 1564274955Ssvnmir for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1565274955Ssvnmir MBBI != E;) { 1566274955Ssvnmir MachineInstr *MI = MBBI; 1567274955Ssvnmir switch (MI->getOpcode()) { 1568274955Ssvnmir default: 1569274955Ssvnmir // Just move on to the next instruction. 1570274955Ssvnmir ++MBBI; 1571274955Ssvnmir break; 1572296417Sdim // Scaled instructions. 1573296417Sdim case AArch64::LDRBBui: 1574296417Sdim case AArch64::LDRHHui: 1575296417Sdim case AArch64::LDRWui: 1576296417Sdim case AArch64::LDRXui: 1577296417Sdim // Unscaled instructions. 1578296417Sdim case AArch64::LDURBBi: 1579296417Sdim case AArch64::LDURHHi: 1580296417Sdim case AArch64::LDURWi: 1581296417Sdim case AArch64::LDURXi: { 1582296417Sdim if (tryToPromoteLoadFromStore(MBBI)) { 1583296417Sdim Modified = true; 1584296417Sdim break; 1585296417Sdim } 1586296417Sdim ++MBBI; 1587296417Sdim break; 1588296417Sdim } 1589296417Sdim // FIXME: Do the other instructions. 1590296417Sdim } 1591296417Sdim } 1592296417Sdim 1593296417Sdim for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1594296417Sdim enableNarrowLdOpt && MBBI != E;) { 1595296417Sdim MachineInstr *MI = MBBI; 1596296417Sdim switch (MI->getOpcode()) { 1597296417Sdim default: 1598296417Sdim // Just move on to the next instruction. 1599296417Sdim ++MBBI; 1600296417Sdim break; 1601296417Sdim // Scaled instructions. 1602296417Sdim case AArch64::LDRBBui: 1603296417Sdim case AArch64::LDRHHui: 1604296417Sdim case AArch64::LDRSBWui: 1605296417Sdim case AArch64::LDRSHWui: 1606296417Sdim case AArch64::STRBBui: 1607296417Sdim case AArch64::STRHHui: 1608296417Sdim // Unscaled instructions. 1609296417Sdim case AArch64::LDURBBi: 1610296417Sdim case AArch64::LDURHHi: 1611296417Sdim case AArch64::LDURSBWi: 1612296417Sdim case AArch64::LDURSHWi: 1613296417Sdim case AArch64::STURBBi: 1614296417Sdim case AArch64::STURHHi: { 1615296417Sdim if (tryToMergeLdStInst(MBBI)) { 1616296417Sdim Modified = true; 1617296417Sdim break; 1618296417Sdim } 1619296417Sdim ++MBBI; 1620296417Sdim break; 1621296417Sdim } 1622296417Sdim // FIXME: Do the other instructions. 1623296417Sdim } 1624296417Sdim } 1625296417Sdim 1626296417Sdim for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1627296417Sdim MBBI != E;) { 1628296417Sdim MachineInstr *MI = MBBI; 1629296417Sdim switch (MI->getOpcode()) { 1630296417Sdim default: 1631296417Sdim // Just move on to the next instruction. 1632296417Sdim ++MBBI; 1633296417Sdim break; 1634296417Sdim // Scaled instructions. 1635274955Ssvnmir case AArch64::STRSui: 1636274955Ssvnmir case AArch64::STRDui: 1637274955Ssvnmir case AArch64::STRQui: 1638274955Ssvnmir case AArch64::STRXui: 1639274955Ssvnmir case AArch64::STRWui: 1640274955Ssvnmir case AArch64::LDRSui: 1641274955Ssvnmir case AArch64::LDRDui: 1642274955Ssvnmir case AArch64::LDRQui: 1643274955Ssvnmir case AArch64::LDRXui: 1644274955Ssvnmir case AArch64::LDRWui: 1645288943Sdim case AArch64::LDRSWui: 1646296417Sdim // Unscaled instructions. 1647274955Ssvnmir case AArch64::STURSi: 1648274955Ssvnmir case AArch64::STURDi: 1649274955Ssvnmir case AArch64::STURQi: 1650274955Ssvnmir case AArch64::STURWi: 1651274955Ssvnmir case AArch64::STURXi: 1652274955Ssvnmir case AArch64::LDURSi: 1653274955Ssvnmir case AArch64::LDURDi: 1654274955Ssvnmir case AArch64::LDURQi: 1655274955Ssvnmir case AArch64::LDURWi: 1656288943Sdim case AArch64::LDURXi: 1657288943Sdim case AArch64::LDURSWi: { 1658296417Sdim if (tryToMergeLdStInst(MBBI)) { 1659274955Ssvnmir Modified = true; 1660274955Ssvnmir break; 1661274955Ssvnmir } 1662274955Ssvnmir ++MBBI; 1663274955Ssvnmir break; 1664274955Ssvnmir } 1665274955Ssvnmir // FIXME: Do the other instructions. 1666274955Ssvnmir } 1667274955Ssvnmir } 1668274955Ssvnmir 1669274955Ssvnmir for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1670274955Ssvnmir MBBI != E;) { 1671274955Ssvnmir MachineInstr *MI = MBBI; 1672274955Ssvnmir // Do update merging. It's simpler to keep this separate from the above 1673274955Ssvnmir // switch, though not strictly necessary. 1674288943Sdim unsigned Opc = MI->getOpcode(); 1675274955Ssvnmir switch (Opc) { 1676274955Ssvnmir default: 1677274955Ssvnmir // Just move on to the next instruction. 1678274955Ssvnmir ++MBBI; 1679274955Ssvnmir break; 1680296417Sdim // Scaled instructions. 1681274955Ssvnmir case AArch64::STRSui: 1682274955Ssvnmir case AArch64::STRDui: 1683274955Ssvnmir case AArch64::STRQui: 1684274955Ssvnmir case AArch64::STRXui: 1685274955Ssvnmir case AArch64::STRWui: 1686296417Sdim case AArch64::STRHHui: 1687296417Sdim case AArch64::STRBBui: 1688274955Ssvnmir case AArch64::LDRSui: 1689274955Ssvnmir case AArch64::LDRDui: 1690274955Ssvnmir case AArch64::LDRQui: 1691274955Ssvnmir case AArch64::LDRXui: 1692274955Ssvnmir case AArch64::LDRWui: 1693296417Sdim case AArch64::LDRHHui: 1694296417Sdim case AArch64::LDRBBui: 1695296417Sdim // Unscaled instructions. 1696274955Ssvnmir case AArch64::STURSi: 1697274955Ssvnmir case AArch64::STURDi: 1698274955Ssvnmir case AArch64::STURQi: 1699274955Ssvnmir case AArch64::STURWi: 1700274955Ssvnmir case AArch64::STURXi: 1701274955Ssvnmir case AArch64::LDURSi: 1702274955Ssvnmir case AArch64::LDURDi: 1703274955Ssvnmir case AArch64::LDURQi: 1704274955Ssvnmir case AArch64::LDURWi: 1705296417Sdim case AArch64::LDURXi: 1706296417Sdim // Paired instructions. 1707296417Sdim case AArch64::LDPSi: 1708296417Sdim case AArch64::LDPSWi: 1709296417Sdim case AArch64::LDPDi: 1710296417Sdim case AArch64::LDPQi: 1711296417Sdim case AArch64::LDPWi: 1712296417Sdim case AArch64::LDPXi: 1713296417Sdim case AArch64::STPSi: 1714296417Sdim case AArch64::STPDi: 1715296417Sdim case AArch64::STPQi: 1716296417Sdim case AArch64::STPWi: 1717296417Sdim case AArch64::STPXi: { 1718274955Ssvnmir // Make sure this is a reg+imm (as opposed to an address reloc). 1719296417Sdim if (!getLdStOffsetOp(MI).isImm()) { 1720274955Ssvnmir ++MBBI; 1721274955Ssvnmir break; 1722274955Ssvnmir } 1723296417Sdim // Look forward to try to form a post-index instruction. For example, 1724296417Sdim // ldr x0, [x20] 1725296417Sdim // add x20, x20, #32 1726296417Sdim // merged into: 1727296417Sdim // ldr x0, [x20], #32 1728274955Ssvnmir MachineBasicBlock::iterator Update = 1729274955Ssvnmir findMatchingUpdateInsnForward(MBBI, ScanLimit, 0); 1730274955Ssvnmir if (Update != E) { 1731274955Ssvnmir // Merge the update into the ld/st. 1732296417Sdim MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); 1733274955Ssvnmir Modified = true; 1734274955Ssvnmir ++NumPostFolded; 1735274955Ssvnmir break; 1736274955Ssvnmir } 1737274955Ssvnmir // Don't know how to handle pre/post-index versions, so move to the next 1738274955Ssvnmir // instruction. 1739296417Sdim if (isUnscaledLdSt(Opc)) { 1740274955Ssvnmir ++MBBI; 1741274955Ssvnmir break; 1742274955Ssvnmir } 1743274955Ssvnmir 1744274955Ssvnmir // Look back to try to find a pre-index instruction. For example, 1745274955Ssvnmir // add x0, x0, #8 1746274955Ssvnmir // ldr x1, [x0] 1747274955Ssvnmir // merged into: 1748274955Ssvnmir // ldr x1, [x0, #8]! 1749274955Ssvnmir Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit); 1750274955Ssvnmir if (Update != E) { 1751274955Ssvnmir // Merge the update into the ld/st. 1752296417Sdim MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); 1753274955Ssvnmir Modified = true; 1754274955Ssvnmir ++NumPreFolded; 1755274955Ssvnmir break; 1756274955Ssvnmir } 1757296417Sdim // The immediate in the load/store is scaled by the size of the memory 1758296417Sdim // operation. The immediate in the add we're looking for, 1759296417Sdim // however, is not, so adjust here. 1760296417Sdim int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); 1761274955Ssvnmir 1762274955Ssvnmir // Look forward to try to find a post-index instruction. For example, 1763274955Ssvnmir // ldr x1, [x0, #64] 1764274955Ssvnmir // add x0, x0, #64 1765274955Ssvnmir // merged into: 1766274955Ssvnmir // ldr x1, [x0, #64]! 1767296417Sdim Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, UnscaledOffset); 1768274955Ssvnmir if (Update != E) { 1769274955Ssvnmir // Merge the update into the ld/st. 1770296417Sdim MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); 1771274955Ssvnmir Modified = true; 1772274955Ssvnmir ++NumPreFolded; 1773274955Ssvnmir break; 1774274955Ssvnmir } 1775274955Ssvnmir 1776274955Ssvnmir // Nothing found. Just move to the next instruction. 1777274955Ssvnmir ++MBBI; 1778274955Ssvnmir break; 1779274955Ssvnmir } 1780274955Ssvnmir // FIXME: Do the other instructions. 1781274955Ssvnmir } 1782274955Ssvnmir } 1783274955Ssvnmir 1784274955Ssvnmir return Modified; 1785274955Ssvnmir} 1786274955Ssvnmir 1787296417Sdimbool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) { 1788296417Sdim bool ProfitableArch = Subtarget->isCortexA57(); 1789296417Sdim // FIXME: The benefit from converting narrow loads into a wider load could be 1790296417Sdim // microarchitectural as it assumes that a single load with two bitfield 1791296417Sdim // extracts is cheaper than two narrow loads. Currently, this conversion is 1792296417Sdim // enabled only in cortex-a57 on which performance benefits were verified. 1793296417Sdim return ProfitableArch && !Subtarget->requiresStrictAlign(); 1794296417Sdim} 1795296417Sdim 1796274955Ssvnmirbool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1797296417Sdim Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget()); 1798296417Sdim TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo()); 1799296417Sdim TRI = Subtarget->getRegisterInfo(); 1800274955Ssvnmir 1801274955Ssvnmir bool Modified = false; 1802296417Sdim bool enableNarrowLdOpt = enableNarrowLdMerge(Fn); 1803274955Ssvnmir for (auto &MBB : Fn) 1804296417Sdim Modified |= optimizeBlock(MBB, enableNarrowLdOpt); 1805274955Ssvnmir 1806274955Ssvnmir return Modified; 1807274955Ssvnmir} 1808274955Ssvnmir 1809274955Ssvnmir// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep 1810274955Ssvnmir// loads and stores near one another? 1811274955Ssvnmir 1812296417Sdim/// createAArch64LoadStoreOptimizationPass - returns an instance of the 1813296417Sdim/// load / store optimization pass. 1814274955SsvnmirFunctionPass *llvm::createAArch64LoadStoreOptimizationPass() { 1815274955Ssvnmir return new AArch64LoadStoreOpt(); 1816274955Ssvnmir} 1817