1235633Sdim//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// 2198090Srdivacky// 3198090Srdivacky// The LLVM Compiler Infrastructure 4198090Srdivacky// 5198090Srdivacky// This file is distributed under the University of Illinois Open Source 6198090Srdivacky// License. See LICENSE.TXT for details. 7198090Srdivacky// 8198090Srdivacky//===----------------------------------------------------------------------===// 9198090Srdivacky// 10198090Srdivacky// This file contains the Base ARM implementation of the TargetInstrInfo class. 11198090Srdivacky// 12198090Srdivacky//===----------------------------------------------------------------------===// 13198090Srdivacky 14263509Sdim#include "ARM.h" 15198090Srdivacky#include "ARMBaseInstrInfo.h" 16235633Sdim#include "ARMBaseRegisterInfo.h" 17199481Srdivacky#include "ARMConstantPoolValue.h" 18263509Sdim#include "ARMFeatures.h" 19218893Sdim#include "ARMHazardRecognizer.h" 20198090Srdivacky#include "ARMMachineFunctionInfo.h" 21226890Sdim#include "MCTargetDesc/ARMAddressingModes.h" 22252723Sdim#include "llvm/ADT/STLExtras.h" 23198090Srdivacky#include "llvm/CodeGen/LiveVariables.h" 24199481Srdivacky#include "llvm/CodeGen/MachineConstantPool.h" 25198090Srdivacky#include "llvm/CodeGen/MachineFrameInfo.h" 26198090Srdivacky#include "llvm/CodeGen/MachineInstrBuilder.h" 27198090Srdivacky#include "llvm/CodeGen/MachineJumpTableInfo.h" 28198090Srdivacky#include "llvm/CodeGen/MachineMemOperand.h" 29208599Srdivacky#include "llvm/CodeGen/MachineRegisterInfo.h" 30226890Sdim#include "llvm/CodeGen/SelectionDAGNodes.h" 31252723Sdim#include "llvm/IR/Constants.h" 32252723Sdim#include "llvm/IR/Function.h" 33252723Sdim#include "llvm/IR/GlobalValue.h" 34198090Srdivacky#include "llvm/MC/MCAsmInfo.h" 35224145Sdim#include "llvm/Support/BranchProbability.h" 36198090Srdivacky#include "llvm/Support/CommandLine.h" 37198892Srdivacky#include "llvm/Support/Debug.h" 38198090Srdivacky#include "llvm/Support/ErrorHandling.h" 39224145Sdim 40263509Sdim#define GET_INSTRINFO_CTOR_DTOR 41224145Sdim#include "ARMGenInstrInfo.inc" 42224145Sdim 43198090Srdivackyusing namespace llvm; 44198090Srdivacky 45198090Srdivackystatic cl::opt<bool> 46198090SrdivackyEnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, 47198090Srdivacky cl::desc("Enable ARM 2-addr to 3-addr conv")); 48198090Srdivacky 49226890Sdimstatic cl::opt<bool> 50235633SdimWidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), 51226890Sdim cl::desc("Widen ARM vmovs to vmovd when possible")); 52226890Sdim 53245431Sdimstatic cl::opt<unsigned> 54245431SdimSwiftPartialUpdateClearance("swift-partial-update-clearance", 55245431Sdim cl::Hidden, cl::init(12), 56245431Sdim cl::desc("Clearance before partial register updates")); 57245431Sdim 58218893Sdim/// ARM_MLxEntry - Record information about MLA / MLS instructions. 59218893Sdimstruct ARM_MLxEntry { 60245431Sdim uint16_t MLxOpc; // MLA / MLS opcode 61245431Sdim uint16_t MulOpc; // Expanded multiplication opcode 62245431Sdim uint16_t AddSubOpc; // Expanded add / sub opcode 63218893Sdim bool NegAcc; // True if the acc is negated before the add / sub. 64218893Sdim bool HasLane; // True if instruction has an extra "lane" operand. 65218893Sdim}; 66218893Sdim 67218893Sdimstatic const ARM_MLxEntry ARM_MLxTable[] = { 68218893Sdim // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane 69218893Sdim // fp scalar ops 70218893Sdim { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, 71218893Sdim { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, 72218893Sdim { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, 73218893Sdim { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, 74218893Sdim { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, 75218893Sdim { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, 76218893Sdim { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, 77218893Sdim { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, 78218893Sdim 79218893Sdim // fp SIMD ops 80218893Sdim { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, 81218893Sdim { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, 82218893Sdim { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, 83218893Sdim { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, 84218893Sdim { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, 85218893Sdim { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, 86218893Sdim { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, 87218893Sdim { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, 88218893Sdim}; 89218893Sdim 90198892SrdivackyARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) 91224145Sdim : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), 92198892Srdivacky Subtarget(STI) { 93218893Sdim for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { 94218893Sdim if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) 95218893Sdim assert(false && "Duplicated entries?"); 96218893Sdim MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); 97218893Sdim MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); 98218893Sdim } 99198090Srdivacky} 100198090Srdivacky 101218893Sdim// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl 102218893Sdim// currently defaults to no prepass hazard recognizer. 103218893SdimScheduleHazardRecognizer *ARMBaseInstrInfo:: 104218893SdimCreateTargetHazardRecognizer(const TargetMachine *TM, 105218893Sdim const ScheduleDAG *DAG) const { 106218893Sdim if (usePreRAHazardRecognizer()) { 107218893Sdim const InstrItineraryData *II = TM->getInstrItineraryData(); 108218893Sdim return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); 109218893Sdim } 110252723Sdim return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); 111218893Sdim} 112218893Sdim 113218893SdimScheduleHazardRecognizer *ARMBaseInstrInfo:: 114218893SdimCreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 115218893Sdim const ScheduleDAG *DAG) const { 116218893Sdim if (Subtarget.isThumb2() || Subtarget.hasVFP2()) 117263509Sdim return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG); 118252723Sdim return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); 119218893Sdim} 120218893Sdim 121198090SrdivackyMachineInstr * 122198090SrdivackyARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 123198090Srdivacky MachineBasicBlock::iterator &MBBI, 124198090Srdivacky LiveVariables *LV) const { 125198090Srdivacky // FIXME: Thumb2 support. 126198090Srdivacky 127198090Srdivacky if (!EnableARM3Addr) 128198090Srdivacky return NULL; 129198090Srdivacky 130198090Srdivacky MachineInstr *MI = MBBI; 131198090Srdivacky MachineFunction &MF = *MI->getParent()->getParent(); 132210299Sed uint64_t TSFlags = MI->getDesc().TSFlags; 133198090Srdivacky bool isPre = false; 134198090Srdivacky switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { 135198090Srdivacky default: return NULL; 136198090Srdivacky case ARMII::IndexModePre: 137198090Srdivacky isPre = true; 138198090Srdivacky break; 139198090Srdivacky case ARMII::IndexModePost: 140198090Srdivacky break; 141198090Srdivacky } 142198090Srdivacky 143198090Srdivacky // Try splitting an indexed load/store to an un-indexed one plus an add/sub 144198090Srdivacky // operation. 145198090Srdivacky unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); 146198090Srdivacky if (MemOpc == 0) 147198090Srdivacky return NULL; 148198090Srdivacky 149198090Srdivacky MachineInstr *UpdateMI = NULL; 150198090Srdivacky MachineInstr *MemMI = NULL; 151198090Srdivacky unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); 152224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 153224145Sdim unsigned NumOps = MCID.getNumOperands(); 154235633Sdim bool isLoad = !MI->mayStore(); 155198090Srdivacky const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); 156198090Srdivacky const MachineOperand &Base = MI->getOperand(2); 157198090Srdivacky const MachineOperand &Offset = MI->getOperand(NumOps-3); 158198090Srdivacky unsigned WBReg = WB.getReg(); 159198090Srdivacky unsigned BaseReg = Base.getReg(); 160198090Srdivacky unsigned OffReg = Offset.getReg(); 161198090Srdivacky unsigned OffImm = MI->getOperand(NumOps-2).getImm(); 162198090Srdivacky ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); 163198090Srdivacky switch (AddrMode) { 164235633Sdim default: llvm_unreachable("Unknown indexed op!"); 165198090Srdivacky case ARMII::AddrMode2: { 166198090Srdivacky bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; 167198090Srdivacky unsigned Amt = ARM_AM::getAM2Offset(OffImm); 168198090Srdivacky if (OffReg == 0) { 169198090Srdivacky if (ARM_AM::getSOImmVal(Amt) == -1) 170198090Srdivacky // Can't encode it in a so_imm operand. This transformation will 171198090Srdivacky // add more than 1 instruction. Abandon! 172198090Srdivacky return NULL; 173198090Srdivacky UpdateMI = BuildMI(MF, MI->getDebugLoc(), 174198090Srdivacky get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 175198090Srdivacky .addReg(BaseReg).addImm(Amt) 176198090Srdivacky .addImm(Pred).addReg(0).addReg(0); 177198090Srdivacky } else if (Amt != 0) { 178198090Srdivacky ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); 179198090Srdivacky unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); 180198090Srdivacky UpdateMI = BuildMI(MF, MI->getDebugLoc(), 181226890Sdim get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) 182198090Srdivacky .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) 183198090Srdivacky .addImm(Pred).addReg(0).addReg(0); 184198090Srdivacky } else 185198090Srdivacky UpdateMI = BuildMI(MF, MI->getDebugLoc(), 186198090Srdivacky get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 187198090Srdivacky .addReg(BaseReg).addReg(OffReg) 188198090Srdivacky .addImm(Pred).addReg(0).addReg(0); 189198090Srdivacky break; 190198090Srdivacky } 191198090Srdivacky case ARMII::AddrMode3 : { 192198090Srdivacky bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; 193198090Srdivacky unsigned Amt = ARM_AM::getAM3Offset(OffImm); 194198090Srdivacky if (OffReg == 0) 195198090Srdivacky // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. 196198090Srdivacky UpdateMI = BuildMI(MF, MI->getDebugLoc(), 197198090Srdivacky get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 198198090Srdivacky .addReg(BaseReg).addImm(Amt) 199198090Srdivacky .addImm(Pred).addReg(0).addReg(0); 200198090Srdivacky else 201198090Srdivacky UpdateMI = BuildMI(MF, MI->getDebugLoc(), 202198090Srdivacky get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 203198090Srdivacky .addReg(BaseReg).addReg(OffReg) 204198090Srdivacky .addImm(Pred).addReg(0).addReg(0); 205198090Srdivacky break; 206198090Srdivacky } 207198090Srdivacky } 208198090Srdivacky 209198090Srdivacky std::vector<MachineInstr*> NewMIs; 210198090Srdivacky if (isPre) { 211198090Srdivacky if (isLoad) 212198090Srdivacky MemMI = BuildMI(MF, MI->getDebugLoc(), 213198090Srdivacky get(MemOpc), MI->getOperand(0).getReg()) 214218893Sdim .addReg(WBReg).addImm(0).addImm(Pred); 215198090Srdivacky else 216198090Srdivacky MemMI = BuildMI(MF, MI->getDebugLoc(), 217198090Srdivacky get(MemOpc)).addReg(MI->getOperand(1).getReg()) 218198090Srdivacky .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); 219198090Srdivacky NewMIs.push_back(MemMI); 220198090Srdivacky NewMIs.push_back(UpdateMI); 221198090Srdivacky } else { 222198090Srdivacky if (isLoad) 223198090Srdivacky MemMI = BuildMI(MF, MI->getDebugLoc(), 224198090Srdivacky get(MemOpc), MI->getOperand(0).getReg()) 225218893Sdim .addReg(BaseReg).addImm(0).addImm(Pred); 226198090Srdivacky else 227198090Srdivacky MemMI = BuildMI(MF, MI->getDebugLoc(), 228198090Srdivacky get(MemOpc)).addReg(MI->getOperand(1).getReg()) 229198090Srdivacky .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); 230198090Srdivacky if (WB.isDead()) 231198090Srdivacky UpdateMI->getOperand(0).setIsDead(); 232198090Srdivacky NewMIs.push_back(UpdateMI); 233198090Srdivacky NewMIs.push_back(MemMI); 234198090Srdivacky } 235198090Srdivacky 236198090Srdivacky // Transfer LiveVariables states, kill / dead info. 237198090Srdivacky if (LV) { 238198090Srdivacky for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 239198090Srdivacky MachineOperand &MO = MI->getOperand(i); 240218893Sdim if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 241198090Srdivacky unsigned Reg = MO.getReg(); 242198090Srdivacky 243198090Srdivacky LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 244198090Srdivacky if (MO.isDef()) { 245198090Srdivacky MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; 246198090Srdivacky if (MO.isDead()) 247198090Srdivacky LV->addVirtualRegisterDead(Reg, NewMI); 248198090Srdivacky } 249198090Srdivacky if (MO.isUse() && MO.isKill()) { 250198090Srdivacky for (unsigned j = 0; j < 2; ++j) { 251198090Srdivacky // Look at the two new MI's in reverse order. 252198090Srdivacky MachineInstr *NewMI = NewMIs[j]; 253198090Srdivacky if (!NewMI->readsRegister(Reg)) 254198090Srdivacky continue; 255198090Srdivacky LV->addVirtualRegisterKilled(Reg, NewMI); 256198090Srdivacky if (VI.removeKill(MI)) 257198090Srdivacky VI.Kills.push_back(NewMI); 258198090Srdivacky break; 259198090Srdivacky } 260198090Srdivacky } 261198090Srdivacky } 262198090Srdivacky } 263198090Srdivacky } 264198090Srdivacky 265198090Srdivacky MFI->insert(MBBI, NewMIs[1]); 266198090Srdivacky MFI->insert(MBBI, NewMIs[0]); 267198090Srdivacky return NewMIs[0]; 268198090Srdivacky} 269198090Srdivacky 270198090Srdivacky// Branch analysis. 271198090Srdivackybool 272198090SrdivackyARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, 273198090Srdivacky MachineBasicBlock *&FBB, 274198090Srdivacky SmallVectorImpl<MachineOperand> &Cond, 275198090Srdivacky bool AllowModify) const { 276263509Sdim TBB = 0; 277263509Sdim FBB = 0; 278263509Sdim 279198090Srdivacky MachineBasicBlock::iterator I = MBB.end(); 280206083Srdivacky if (I == MBB.begin()) 281263509Sdim return false; // Empty blocks are easy. 282206083Srdivacky --I; 283198090Srdivacky 284263509Sdim // Walk backwards from the end of the basic block until the branch is 285263509Sdim // analyzed or we give up. 286263509Sdim while (isPredicated(I) || I->isTerminator()) { 287198090Srdivacky 288263509Sdim // Flag to be raised on unanalyzeable instructions. This is useful in cases 289263509Sdim // where we want to clean up on the end of the basic block before we bail 290263509Sdim // out. 291263509Sdim bool CantAnalyze = false; 292252723Sdim 293263509Sdim // Skip over DEBUG values and predicated nonterminators. 294263509Sdim while (I->isDebugValue() || !I->isTerminator()) { 295263509Sdim if (I == MBB.begin()) 296263509Sdim return false; 297263509Sdim --I; 298263509Sdim } 299252723Sdim 300263509Sdim if (isIndirectBranchOpcode(I->getOpcode()) || 301263509Sdim isJumpTableBranchOpcode(I->getOpcode())) { 302263509Sdim // Indirect branches and jump tables can't be analyzed, but we still want 303263509Sdim // to clean up any instructions at the tail of the basic block. 304263509Sdim CantAnalyze = true; 305263509Sdim } else if (isUncondBranchOpcode(I->getOpcode())) { 306263509Sdim TBB = I->getOperand(0).getMBB(); 307263509Sdim } else if (isCondBranchOpcode(I->getOpcode())) { 308263509Sdim // Bail out if we encounter multiple conditional branches. 309263509Sdim if (!Cond.empty()) 310263509Sdim return true; 311263509Sdim 312263509Sdim assert(!FBB && "FBB should have been null."); 313263509Sdim FBB = TBB; 314263509Sdim TBB = I->getOperand(0).getMBB(); 315263509Sdim Cond.push_back(I->getOperand(1)); 316263509Sdim Cond.push_back(I->getOperand(2)); 317263509Sdim } else if (I->isReturn()) { 318263509Sdim // Returns can't be analyzed, but we should run cleanup. 319263509Sdim CantAnalyze = !isPredicated(I); 320263509Sdim } else { 321263509Sdim // We encountered other unrecognized terminator. Bail out immediately. 322263509Sdim return true; 323198090Srdivacky } 324198090Srdivacky 325263509Sdim // Cleanup code - to be run for unpredicated unconditional branches and 326263509Sdim // returns. 327263509Sdim if (!isPredicated(I) && 328263509Sdim (isUncondBranchOpcode(I->getOpcode()) || 329263509Sdim isIndirectBranchOpcode(I->getOpcode()) || 330263509Sdim isJumpTableBranchOpcode(I->getOpcode()) || 331263509Sdim I->isReturn())) { 332263509Sdim // Forget any previous condition branch information - it no longer applies. 333263509Sdim Cond.clear(); 334263509Sdim FBB = 0; 335198090Srdivacky 336263509Sdim // If we can modify the function, delete everything below this 337263509Sdim // unconditional branch. 338263509Sdim if (AllowModify) { 339263509Sdim MachineBasicBlock::iterator DI = llvm::next(I); 340263509Sdim while (DI != MBB.end()) { 341263509Sdim MachineInstr *InstToDelete = DI; 342263509Sdim ++DI; 343263509Sdim InstToDelete->eraseFromParent(); 344263509Sdim } 345218893Sdim } 346218893Sdim } 347218893Sdim 348263509Sdim if (CantAnalyze) 349263509Sdim return true; 350198090Srdivacky 351263509Sdim if (I == MBB.begin()) 352263509Sdim return false; 353198090Srdivacky 354263509Sdim --I; 355198090Srdivacky } 356198090Srdivacky 357263509Sdim // We made it past the terminators without bailing out - we must have 358263509Sdim // analyzed this branch successfully. 359263509Sdim return false; 360198090Srdivacky} 361198090Srdivacky 362198090Srdivacky 363198090Srdivackyunsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 364198090Srdivacky MachineBasicBlock::iterator I = MBB.end(); 365198090Srdivacky if (I == MBB.begin()) return 0; 366198090Srdivacky --I; 367206083Srdivacky while (I->isDebugValue()) { 368206083Srdivacky if (I == MBB.begin()) 369206083Srdivacky return 0; 370206083Srdivacky --I; 371206083Srdivacky } 372198090Srdivacky if (!isUncondBranchOpcode(I->getOpcode()) && 373198090Srdivacky !isCondBranchOpcode(I->getOpcode())) 374198090Srdivacky return 0; 375198090Srdivacky 376198090Srdivacky // Remove the branch. 377198090Srdivacky I->eraseFromParent(); 378198090Srdivacky 379198090Srdivacky I = MBB.end(); 380198090Srdivacky 381198090Srdivacky if (I == MBB.begin()) return 1; 382198090Srdivacky --I; 383198090Srdivacky if (!isCondBranchOpcode(I->getOpcode())) 384198090Srdivacky return 1; 385198090Srdivacky 386198090Srdivacky // Remove the branch. 387198090Srdivacky I->eraseFromParent(); 388198090Srdivacky return 2; 389198090Srdivacky} 390198090Srdivacky 391198090Srdivackyunsigned 392198090SrdivackyARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 393198090Srdivacky MachineBasicBlock *FBB, 394210299Sed const SmallVectorImpl<MachineOperand> &Cond, 395210299Sed DebugLoc DL) const { 396198090Srdivacky ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); 397198090Srdivacky int BOpc = !AFI->isThumbFunction() 398198090Srdivacky ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); 399198090Srdivacky int BccOpc = !AFI->isThumbFunction() 400198090Srdivacky ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); 401226890Sdim bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); 402198090Srdivacky 403198090Srdivacky // Shouldn't be a fall through. 404198090Srdivacky assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 405198090Srdivacky assert((Cond.size() == 2 || Cond.size() == 0) && 406198090Srdivacky "ARM branch conditions have two components!"); 407198090Srdivacky 408198090Srdivacky if (FBB == 0) { 409226890Sdim if (Cond.empty()) { // Unconditional branch? 410226890Sdim if (isThumb) 411226890Sdim BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); 412226890Sdim else 413226890Sdim BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); 414226890Sdim } else 415210299Sed BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 416198090Srdivacky .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 417198090Srdivacky return 1; 418198090Srdivacky } 419198090Srdivacky 420198090Srdivacky // Two-way conditional branch. 421210299Sed BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 422198090Srdivacky .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 423226890Sdim if (isThumb) 424226890Sdim BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); 425226890Sdim else 426226890Sdim BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); 427198090Srdivacky return 2; 428198090Srdivacky} 429198090Srdivacky 430198090Srdivackybool ARMBaseInstrInfo:: 431198090SrdivackyReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 432198090Srdivacky ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); 433198090Srdivacky Cond[0].setImm(ARMCC::getOppositeCondition(CC)); 434198090Srdivacky return false; 435198090Srdivacky} 436198090Srdivacky 437235633Sdimbool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { 438235633Sdim if (MI->isBundle()) { 439235633Sdim MachineBasicBlock::const_instr_iterator I = MI; 440235633Sdim MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 441235633Sdim while (++I != E && I->isInsideBundle()) { 442235633Sdim int PIdx = I->findFirstPredOperandIdx(); 443235633Sdim if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) 444235633Sdim return true; 445235633Sdim } 446235633Sdim return false; 447235633Sdim } 448235633Sdim 449235633Sdim int PIdx = MI->findFirstPredOperandIdx(); 450235633Sdim return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; 451235633Sdim} 452235633Sdim 453198090Srdivackybool ARMBaseInstrInfo:: 454198090SrdivackyPredicateInstruction(MachineInstr *MI, 455198090Srdivacky const SmallVectorImpl<MachineOperand> &Pred) const { 456198090Srdivacky unsigned Opc = MI->getOpcode(); 457198090Srdivacky if (isUncondBranchOpcode(Opc)) { 458198090Srdivacky MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); 459252723Sdim MachineInstrBuilder(*MI->getParent()->getParent(), MI) 460252723Sdim .addImm(Pred[0].getImm()) 461252723Sdim .addReg(Pred[1].getReg()); 462198090Srdivacky return true; 463198090Srdivacky } 464198090Srdivacky 465198090Srdivacky int PIdx = MI->findFirstPredOperandIdx(); 466198090Srdivacky if (PIdx != -1) { 467198090Srdivacky MachineOperand &PMO = MI->getOperand(PIdx); 468198090Srdivacky PMO.setImm(Pred[0].getImm()); 469198090Srdivacky MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); 470198090Srdivacky return true; 471198090Srdivacky } 472198090Srdivacky return false; 473198090Srdivacky} 474198090Srdivacky 475198090Srdivackybool ARMBaseInstrInfo:: 476198090SrdivackySubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 477198090Srdivacky const SmallVectorImpl<MachineOperand> &Pred2) const { 478198090Srdivacky if (Pred1.size() > 2 || Pred2.size() > 2) 479198090Srdivacky return false; 480198090Srdivacky 481198090Srdivacky ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); 482198090Srdivacky ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); 483198090Srdivacky if (CC1 == CC2) 484198090Srdivacky return true; 485198090Srdivacky 486198090Srdivacky switch (CC1) { 487198090Srdivacky default: 488198090Srdivacky return false; 489198090Srdivacky case ARMCC::AL: 490198090Srdivacky return true; 491198090Srdivacky case ARMCC::HS: 492198090Srdivacky return CC2 == ARMCC::HI; 493198090Srdivacky case ARMCC::LS: 494198090Srdivacky return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; 495198090Srdivacky case ARMCC::GE: 496198090Srdivacky return CC2 == ARMCC::GT; 497198090Srdivacky case ARMCC::LE: 498198090Srdivacky return CC2 == ARMCC::LT; 499198090Srdivacky } 500198090Srdivacky} 501198090Srdivacky 502198090Srdivackybool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, 503198090Srdivacky std::vector<MachineOperand> &Pred) const { 504198090Srdivacky bool Found = false; 505198090Srdivacky for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 506198090Srdivacky const MachineOperand &MO = MI->getOperand(i); 507235633Sdim if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) || 508235633Sdim (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) { 509198090Srdivacky Pred.push_back(MO); 510198090Srdivacky Found = true; 511198090Srdivacky } 512198090Srdivacky } 513198090Srdivacky 514198090Srdivacky return Found; 515198090Srdivacky} 516198090Srdivacky 517199989Srdivacky/// isPredicable - Return true if the specified instruction can be predicated. 518199989Srdivacky/// By default, this returns true for every instruction with a 519199989Srdivacky/// PredicateOperand. 520199989Srdivackybool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { 521235633Sdim if (!MI->isPredicable()) 522199989Srdivacky return false; 523198090Srdivacky 524263509Sdim ARMFunctionInfo *AFI = 525263509Sdim MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); 526263509Sdim 527263509Sdim if (AFI->isThumb2Function()) { 528263509Sdim if (getSubtarget().restrictIT()) 529263509Sdim return isV8EligibleForIT(MI); 530263509Sdim } else { // non-Thumb 531263509Sdim if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) 532263509Sdim return false; 533199989Srdivacky } 534263509Sdim 535199989Srdivacky return true; 536199989Srdivacky} 537199989Srdivacky 538200581Srdivacky/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. 539218893SdimLLVM_ATTRIBUTE_NOINLINE 540198090Srdivackystatic unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 541200581Srdivacky unsigned JTI); 542198090Srdivackystatic unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 543198090Srdivacky unsigned JTI) { 544200581Srdivacky assert(JTI < JT.size()); 545198090Srdivacky return JT[JTI].MBBs.size(); 546198090Srdivacky} 547198090Srdivacky 548198090Srdivacky/// GetInstSize - Return the size of the specified MachineInstr. 549198090Srdivacky/// 550198090Srdivackyunsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 551198090Srdivacky const MachineBasicBlock &MBB = *MI->getParent(); 552198090Srdivacky const MachineFunction *MF = MBB.getParent(); 553198090Srdivacky const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 554198090Srdivacky 555224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 556224145Sdim if (MCID.getSize()) 557224145Sdim return MCID.getSize(); 558198090Srdivacky 559235633Sdim // If this machine instr is an inline asm, measure it. 560235633Sdim if (MI->getOpcode() == ARM::INLINEASM) 561235633Sdim return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); 562235633Sdim if (MI->isLabel()) 563235633Sdim return 0; 564224145Sdim unsigned Opc = MI->getOpcode(); 565235633Sdim switch (Opc) { 566235633Sdim case TargetOpcode::IMPLICIT_DEF: 567235633Sdim case TargetOpcode::KILL: 568235633Sdim case TargetOpcode::PROLOG_LABEL: 569235633Sdim case TargetOpcode::EH_LABEL: 570235633Sdim case TargetOpcode::DBG_VALUE: 571235633Sdim return 0; 572235633Sdim case TargetOpcode::BUNDLE: 573235633Sdim return getInstBundleLength(MI); 574235633Sdim case ARM::MOVi16_ga_pcrel: 575235633Sdim case ARM::MOVTi16_ga_pcrel: 576235633Sdim case ARM::t2MOVi16_ga_pcrel: 577235633Sdim case ARM::t2MOVTi16_ga_pcrel: 578235633Sdim return 4; 579235633Sdim case ARM::MOVi32imm: 580235633Sdim case ARM::t2MOVi32imm: 581235633Sdim return 8; 582235633Sdim case ARM::CONSTPOOL_ENTRY: 583235633Sdim // If this machine instr is a constant pool entry, its size is recorded as 584235633Sdim // operand #2. 585235633Sdim return MI->getOperand(2).getImm(); 586235633Sdim case ARM::Int_eh_sjlj_longjmp: 587235633Sdim return 16; 588235633Sdim case ARM::tInt_eh_sjlj_longjmp: 589235633Sdim return 10; 590235633Sdim case ARM::Int_eh_sjlj_setjmp: 591235633Sdim case ARM::Int_eh_sjlj_setjmp_nofp: 592235633Sdim return 20; 593235633Sdim case ARM::tInt_eh_sjlj_setjmp: 594235633Sdim case ARM::t2Int_eh_sjlj_setjmp: 595235633Sdim case ARM::t2Int_eh_sjlj_setjmp_nofp: 596235633Sdim return 12; 597235633Sdim case ARM::BR_JTr: 598235633Sdim case ARM::BR_JTm: 599235633Sdim case ARM::BR_JTadd: 600235633Sdim case ARM::tBR_JTr: 601235633Sdim case ARM::t2BR_JT: 602235633Sdim case ARM::t2TBB_JT: 603235633Sdim case ARM::t2TBH_JT: { 604235633Sdim // These are jumptable branches, i.e. a branch followed by an inlined 605235633Sdim // jumptable. The size is 4 + 4 * number of entries. For TBB, each 606235633Sdim // entry is one byte; TBH two byte each. 607235633Sdim unsigned EntrySize = (Opc == ARM::t2TBB_JT) 608235633Sdim ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); 609235633Sdim unsigned NumOps = MCID.getNumOperands(); 610235633Sdim MachineOperand JTOP = 611235633Sdim MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); 612235633Sdim unsigned JTI = JTOP.getIndex(); 613235633Sdim const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); 614235633Sdim assert(MJTI != 0); 615235633Sdim const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); 616235633Sdim assert(JTI < JT.size()); 617235633Sdim // Thumb instructions are 2 byte aligned, but JT entries are 4 byte 618235633Sdim // 4 aligned. The assembler / linker may add 2 byte padding just before 619235633Sdim // the JT entries. The size does not include this padding; the 620235633Sdim // constant islands pass does separate bookkeeping for it. 621235633Sdim // FIXME: If we know the size of the function is less than (1 << 16) *2 622235633Sdim // bytes, we can use 16-bit entries instead. Then there won't be an 623235633Sdim // alignment issue. 624235633Sdim unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; 625235633Sdim unsigned NumEntries = getNumJTEntries(JT, JTI); 626235633Sdim if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) 627235633Sdim // Make sure the instruction that follows TBB is 2-byte aligned. 628235633Sdim // FIXME: Constant island pass should insert an "ALIGN" instruction 629235633Sdim // instead. 630235633Sdim ++NumEntries; 631235633Sdim return NumEntries * EntrySize + InstSize; 632235633Sdim } 633235633Sdim default: 634235633Sdim // Otherwise, pseudo-instruction sizes are zero. 635235633Sdim return 0; 636235633Sdim } 637198090Srdivacky} 638198090Srdivacky 639235633Sdimunsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { 640235633Sdim unsigned Size = 0; 641235633Sdim MachineBasicBlock::const_instr_iterator I = MI; 642235633Sdim MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 643235633Sdim while (++I != E && I->isInsideBundle()) { 644235633Sdim assert(!I->isBundle() && "No nested bundle!"); 645235633Sdim Size += GetInstSizeInBytes(&*I); 646235633Sdim } 647235633Sdim return Size; 648235633Sdim} 649235633Sdim 650210299Sedvoid ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 651210299Sed MachineBasicBlock::iterator I, DebugLoc DL, 652210299Sed unsigned DestReg, unsigned SrcReg, 653210299Sed bool KillSrc) const { 654210299Sed bool GPRDest = ARM::GPRRegClass.contains(DestReg); 655263509Sdim bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); 656204642Srdivacky 657210299Sed if (GPRDest && GPRSrc) { 658210299Sed AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) 659263509Sdim .addReg(SrcReg, getKillRegState(KillSrc)))); 660210299Sed return; 661210299Sed } 662198892Srdivacky 663210299Sed bool SPRDest = ARM::SPRRegClass.contains(DestReg); 664263509Sdim bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); 665205407Srdivacky 666226890Sdim unsigned Opc = 0; 667210299Sed if (SPRDest && SPRSrc) 668210299Sed Opc = ARM::VMOVS; 669210299Sed else if (GPRDest && SPRSrc) 670210299Sed Opc = ARM::VMOVRS; 671210299Sed else if (SPRDest && GPRSrc) 672210299Sed Opc = ARM::VMOVSR; 673210299Sed else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) 674210299Sed Opc = ARM::VMOVD; 675210299Sed else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) 676224145Sdim Opc = ARM::VORRq; 677208599Srdivacky 678226890Sdim if (Opc) { 679226890Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); 680224145Sdim MIB.addReg(SrcReg, getKillRegState(KillSrc)); 681226890Sdim if (Opc == ARM::VORRq) 682226890Sdim MIB.addReg(SrcReg, getKillRegState(KillSrc)); 683210299Sed AddDefaultPred(MIB); 684226890Sdim return; 685226890Sdim } 686226890Sdim 687235633Sdim // Handle register classes that require multiple instructions. 688235633Sdim unsigned BeginIdx = 0; 689235633Sdim unsigned SubRegs = 0; 690245431Sdim int Spacing = 1; 691235633Sdim 692235633Sdim // Use VORRq when possible. 693263509Sdim if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) { 694263509Sdim Opc = ARM::VORRq; 695263509Sdim BeginIdx = ARM::qsub_0; 696263509Sdim SubRegs = 2; 697263509Sdim } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { 698263509Sdim Opc = ARM::VORRq; 699263509Sdim BeginIdx = ARM::qsub_0; 700263509Sdim SubRegs = 4; 701235633Sdim // Fall back to VMOVD. 702263509Sdim } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) { 703263509Sdim Opc = ARM::VMOVD; 704263509Sdim BeginIdx = ARM::dsub_0; 705263509Sdim SubRegs = 2; 706263509Sdim } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) { 707263509Sdim Opc = ARM::VMOVD; 708263509Sdim BeginIdx = ARM::dsub_0; 709263509Sdim SubRegs = 3; 710263509Sdim } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) { 711263509Sdim Opc = ARM::VMOVD; 712263509Sdim BeginIdx = ARM::dsub_0; 713263509Sdim SubRegs = 4; 714263509Sdim } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) { 715263509Sdim Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr; 716263509Sdim BeginIdx = ARM::gsub_0; 717263509Sdim SubRegs = 2; 718263509Sdim } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) { 719263509Sdim Opc = ARM::VMOVD; 720263509Sdim BeginIdx = ARM::dsub_0; 721263509Sdim SubRegs = 2; 722263509Sdim Spacing = 2; 723263509Sdim } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) { 724263509Sdim Opc = ARM::VMOVD; 725263509Sdim BeginIdx = ARM::dsub_0; 726263509Sdim SubRegs = 3; 727263509Sdim Spacing = 2; 728263509Sdim } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) { 729263509Sdim Opc = ARM::VMOVD; 730263509Sdim BeginIdx = ARM::dsub_0; 731263509Sdim SubRegs = 4; 732263509Sdim Spacing = 2; 733263509Sdim } 734235633Sdim 735245431Sdim assert(Opc && "Impossible reg-to-reg copy"); 736245431Sdim 737245431Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 738245431Sdim MachineInstrBuilder Mov; 739245431Sdim 740245431Sdim // Copy register tuples backward when the first Dest reg overlaps with SrcReg. 741245431Sdim if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { 742263509Sdim BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); 743245431Sdim Spacing = -Spacing; 744226890Sdim } 745245431Sdim#ifndef NDEBUG 746245431Sdim SmallSet<unsigned, 4> DstRegs; 747245431Sdim#endif 748245431Sdim for (unsigned i = 0; i != SubRegs; ++i) { 749263509Sdim unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); 750263509Sdim unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); 751245431Sdim assert(Dst && Src && "Bad sub-register"); 752245431Sdim#ifndef NDEBUG 753245431Sdim assert(!DstRegs.count(Src) && "destructive vector copy"); 754245431Sdim DstRegs.insert(Dst); 755245431Sdim#endif 756263509Sdim Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src); 757245431Sdim // VORR takes two source operands. 758245431Sdim if (Opc == ARM::VORRq) 759245431Sdim Mov.addReg(Src); 760245431Sdim Mov = AddDefaultPred(Mov); 761263509Sdim // MOVr can set CC. 762263509Sdim if (Opc == ARM::MOVr) 763263509Sdim Mov = AddDefaultCC(Mov); 764245431Sdim } 765245431Sdim // Add implicit super-register defs and kills to the last instruction. 766245431Sdim Mov->addRegisterDefined(DestReg, TRI); 767245431Sdim if (KillSrc) 768245431Sdim Mov->addRegisterKilled(SrcReg, TRI); 769198090Srdivacky} 770198090Srdivacky 771252723Sdimconst MachineInstrBuilder & 772252723SdimARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, 773252723Sdim unsigned SubIdx, unsigned State, 774252723Sdim const TargetRegisterInfo *TRI) const { 775208599Srdivacky if (!SubIdx) 776208599Srdivacky return MIB.addReg(Reg, State); 777208599Srdivacky 778208599Srdivacky if (TargetRegisterInfo::isPhysicalRegister(Reg)) 779208599Srdivacky return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 780208599Srdivacky return MIB.addReg(Reg, State, SubIdx); 781208599Srdivacky} 782208599Srdivacky 783198090Srdivackyvoid ARMBaseInstrInfo:: 784198090SrdivackystoreRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 785198090Srdivacky unsigned SrcReg, bool isKill, int FI, 786208599Srdivacky const TargetRegisterClass *RC, 787208599Srdivacky const TargetRegisterInfo *TRI) const { 788206124Srdivacky DebugLoc DL; 789198090Srdivacky if (I != MBB.end()) DL = I->getDebugLoc(); 790198090Srdivacky MachineFunction &MF = *MBB.getParent(); 791198090Srdivacky MachineFrameInfo &MFI = *MF.getFrameInfo(); 792199481Srdivacky unsigned Align = MFI.getObjectAlignment(FI); 793198090Srdivacky 794198090Srdivacky MachineMemOperand *MMO = 795235633Sdim MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 796218893Sdim MachineMemOperand::MOStore, 797198090Srdivacky MFI.getObjectSize(FI), 798199481Srdivacky Align); 799198090Srdivacky 800226890Sdim switch (RC->getSize()) { 801226890Sdim case 4: 802226890Sdim if (ARM::GPRRegClass.hasSubClassEq(RC)) { 803226890Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) 804198090Srdivacky .addReg(SrcReg, getKillRegState(isKill)) 805218893Sdim .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 806226890Sdim } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 807226890Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) 808208599Srdivacky .addReg(SrcReg, getKillRegState(isKill)) 809208599Srdivacky .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 810226890Sdim } else 811226890Sdim llvm_unreachable("Unknown reg class!"); 812226890Sdim break; 813226890Sdim case 8: 814226890Sdim if (ARM::DPRRegClass.hasSubClassEq(RC)) { 815226890Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) 816198090Srdivacky .addReg(SrcReg, getKillRegState(isKill)) 817198090Srdivacky .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 818245431Sdim } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 819252723Sdim if (Subtarget.hasV5TEOps()) { 820252723Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD)); 821252723Sdim AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 822252723Sdim AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 823252723Sdim MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); 824252723Sdim 825252723Sdim AddDefaultPred(MIB); 826252723Sdim } else { 827252723Sdim // Fallback to STM instruction, which has existed since the dawn of 828252723Sdim // time. 829252723Sdim MachineInstrBuilder MIB = 830252723Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) 831252723Sdim .addFrameIndex(FI).addMemOperand(MMO)); 832252723Sdim AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 833252723Sdim AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 834252723Sdim } 835226890Sdim } else 836226890Sdim llvm_unreachable("Unknown reg class!"); 837226890Sdim break; 838226890Sdim case 16: 839235633Sdim if (ARM::DPairRegClass.hasSubClassEq(RC)) { 840235633Sdim // Use aligned spills if the stack can be realigned. 841235633Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 842235633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) 843210299Sed .addFrameIndex(FI).addImm(16) 844208599Srdivacky .addReg(SrcReg, getKillRegState(isKill)) 845208599Srdivacky .addMemOperand(MMO)); 846226890Sdim } else { 847226890Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) 848208599Srdivacky .addReg(SrcReg, getKillRegState(isKill)) 849206083Srdivacky .addFrameIndex(FI) 850206083Srdivacky .addMemOperand(MMO)); 851226890Sdim } 852226890Sdim } else 853226890Sdim llvm_unreachable("Unknown reg class!"); 854226890Sdim break; 855245431Sdim case 24: 856245431Sdim if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 857245431Sdim // Use aligned spills if the stack can be realigned. 858245431Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 859245431Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) 860245431Sdim .addFrameIndex(FI).addImm(16) 861245431Sdim .addReg(SrcReg, getKillRegState(isKill)) 862245431Sdim .addMemOperand(MMO)); 863245431Sdim } else { 864245431Sdim MachineInstrBuilder MIB = 865245431Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 866245431Sdim .addFrameIndex(FI)) 867245431Sdim .addMemOperand(MMO); 868245431Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 869245431Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 870245431Sdim AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 871245431Sdim } 872245431Sdim } else 873245431Sdim llvm_unreachable("Unknown reg class!"); 874245431Sdim break; 875226890Sdim case 32: 876245431Sdim if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 877226890Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 878226890Sdim // FIXME: It's possible to only store part of the QQ register if the 879226890Sdim // spilled def has a sub-register index. 880226890Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) 881218893Sdim .addFrameIndex(FI).addImm(16) 882218893Sdim .addReg(SrcReg, getKillRegState(isKill)) 883218893Sdim .addMemOperand(MMO)); 884226890Sdim } else { 885226890Sdim MachineInstrBuilder MIB = 886226890Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 887218893Sdim .addFrameIndex(FI)) 888226890Sdim .addMemOperand(MMO); 889226890Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 890226890Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 891226890Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 892226890Sdim AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 893226890Sdim } 894226890Sdim } else 895226890Sdim llvm_unreachable("Unknown reg class!"); 896226890Sdim break; 897226890Sdim case 64: 898226890Sdim if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 899226890Sdim MachineInstrBuilder MIB = 900226890Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 901226890Sdim .addFrameIndex(FI)) 902226890Sdim .addMemOperand(MMO); 903226890Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 904226890Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 905226890Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 906226890Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 907226890Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); 908226890Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); 909226890Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); 910226890Sdim AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); 911226890Sdim } else 912226890Sdim llvm_unreachable("Unknown reg class!"); 913226890Sdim break; 914226890Sdim default: 915226890Sdim llvm_unreachable("Unknown reg class!"); 916198090Srdivacky } 917198090Srdivacky} 918198090Srdivacky 919218893Sdimunsigned 920218893SdimARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, 921218893Sdim int &FrameIndex) const { 922218893Sdim switch (MI->getOpcode()) { 923218893Sdim default: break; 924218893Sdim case ARM::STRrs: 925218893Sdim case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. 926218893Sdim if (MI->getOperand(1).isFI() && 927218893Sdim MI->getOperand(2).isReg() && 928218893Sdim MI->getOperand(3).isImm() && 929218893Sdim MI->getOperand(2).getReg() == 0 && 930218893Sdim MI->getOperand(3).getImm() == 0) { 931218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 932218893Sdim return MI->getOperand(0).getReg(); 933218893Sdim } 934218893Sdim break; 935218893Sdim case ARM::STRi12: 936218893Sdim case ARM::t2STRi12: 937224145Sdim case ARM::tSTRspi: 938218893Sdim case ARM::VSTRD: 939218893Sdim case ARM::VSTRS: 940218893Sdim if (MI->getOperand(1).isFI() && 941218893Sdim MI->getOperand(2).isImm() && 942218893Sdim MI->getOperand(2).getImm() == 0) { 943218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 944218893Sdim return MI->getOperand(0).getReg(); 945218893Sdim } 946218893Sdim break; 947235633Sdim case ARM::VST1q64: 948245431Sdim case ARM::VST1d64TPseudo: 949245431Sdim case ARM::VST1d64QPseudo: 950218893Sdim if (MI->getOperand(0).isFI() && 951218893Sdim MI->getOperand(2).getSubReg() == 0) { 952218893Sdim FrameIndex = MI->getOperand(0).getIndex(); 953218893Sdim return MI->getOperand(2).getReg(); 954218893Sdim } 955218893Sdim break; 956218893Sdim case ARM::VSTMQIA: 957218893Sdim if (MI->getOperand(1).isFI() && 958218893Sdim MI->getOperand(0).getSubReg() == 0) { 959218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 960218893Sdim return MI->getOperand(0).getReg(); 961218893Sdim } 962218893Sdim break; 963218893Sdim } 964218893Sdim 965218893Sdim return 0; 966218893Sdim} 967218893Sdim 968226890Sdimunsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 969226890Sdim int &FrameIndex) const { 970226890Sdim const MachineMemOperand *Dummy; 971235633Sdim return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); 972226890Sdim} 973226890Sdim 974198090Srdivackyvoid ARMBaseInstrInfo:: 975198090SrdivackyloadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 976198090Srdivacky unsigned DestReg, int FI, 977208599Srdivacky const TargetRegisterClass *RC, 978208599Srdivacky const TargetRegisterInfo *TRI) const { 979206124Srdivacky DebugLoc DL; 980198090Srdivacky if (I != MBB.end()) DL = I->getDebugLoc(); 981198090Srdivacky MachineFunction &MF = *MBB.getParent(); 982198090Srdivacky MachineFrameInfo &MFI = *MF.getFrameInfo(); 983199481Srdivacky unsigned Align = MFI.getObjectAlignment(FI); 984198090Srdivacky MachineMemOperand *MMO = 985218893Sdim MF.getMachineMemOperand( 986235633Sdim MachinePointerInfo::getFixedStack(FI), 987218893Sdim MachineMemOperand::MOLoad, 988198090Srdivacky MFI.getObjectSize(FI), 989199481Srdivacky Align); 990198090Srdivacky 991226890Sdim switch (RC->getSize()) { 992226890Sdim case 4: 993226890Sdim if (ARM::GPRRegClass.hasSubClassEq(RC)) { 994226890Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) 995226890Sdim .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 996204642Srdivacky 997226890Sdim } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 998226890Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) 999218893Sdim .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 1000226890Sdim } else 1001226890Sdim llvm_unreachable("Unknown reg class!"); 1002210299Sed break; 1003226890Sdim case 8: 1004226890Sdim if (ARM::DPRRegClass.hasSubClassEq(RC)) { 1005226890Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) 1006208599Srdivacky .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 1007245431Sdim } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 1008252723Sdim MachineInstrBuilder MIB; 1009252723Sdim 1010252723Sdim if (Subtarget.hasV5TEOps()) { 1011252723Sdim MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); 1012252723Sdim AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 1013252723Sdim AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 1014252723Sdim MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); 1015252723Sdim 1016252723Sdim AddDefaultPred(MIB); 1017252723Sdim } else { 1018252723Sdim // Fallback to LDM instruction, which has existed since the dawn of 1019252723Sdim // time. 1020252723Sdim MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA)) 1021252723Sdim .addFrameIndex(FI).addMemOperand(MMO)); 1022252723Sdim MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 1023252723Sdim MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 1024252723Sdim } 1025252723Sdim 1026245431Sdim if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 1027245431Sdim MIB.addReg(DestReg, RegState::ImplicitDefine); 1028226890Sdim } else 1029226890Sdim llvm_unreachable("Unknown reg class!"); 1030210299Sed break; 1031226890Sdim case 16: 1032235633Sdim if (ARM::DPairRegClass.hasSubClassEq(RC)) { 1033235633Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 1034235633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) 1035210299Sed .addFrameIndex(FI).addImm(16) 1036199989Srdivacky .addMemOperand(MMO)); 1037226890Sdim } else { 1038226890Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) 1039226890Sdim .addFrameIndex(FI) 1040226890Sdim .addMemOperand(MMO)); 1041226890Sdim } 1042226890Sdim } else 1043226890Sdim llvm_unreachable("Unknown reg class!"); 1044210299Sed break; 1045245431Sdim case 24: 1046245431Sdim if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 1047226890Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 1048245431Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) 1049245431Sdim .addFrameIndex(FI).addImm(16) 1050245431Sdim .addMemOperand(MMO)); 1051245431Sdim } else { 1052245431Sdim MachineInstrBuilder MIB = 1053245431Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1054245431Sdim .addFrameIndex(FI) 1055245431Sdim .addMemOperand(MMO)); 1056245431Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1057245431Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1058245431Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1059245431Sdim if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 1060245431Sdim MIB.addReg(DestReg, RegState::ImplicitDefine); 1061245431Sdim } 1062245431Sdim } else 1063245431Sdim llvm_unreachable("Unknown reg class!"); 1064245431Sdim break; 1065245431Sdim case 32: 1066245431Sdim if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 1067245431Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 1068226890Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) 1069218893Sdim .addFrameIndex(FI).addImm(16) 1070218893Sdim .addMemOperand(MMO)); 1071226890Sdim } else { 1072226890Sdim MachineInstrBuilder MIB = 1073218893Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1074218893Sdim .addFrameIndex(FI)) 1075226890Sdim .addMemOperand(MMO); 1076235633Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1077235633Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1078235633Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1079235633Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 1080235633Sdim if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 1081235633Sdim MIB.addReg(DestReg, RegState::ImplicitDefine); 1082226890Sdim } 1083226890Sdim } else 1084226890Sdim llvm_unreachable("Unknown reg class!"); 1085226890Sdim break; 1086226890Sdim case 64: 1087226890Sdim if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 1088226890Sdim MachineInstrBuilder MIB = 1089226890Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1090226890Sdim .addFrameIndex(FI)) 1091226890Sdim .addMemOperand(MMO); 1092235633Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1093235633Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1094235633Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1095235633Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 1096235633Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); 1097235633Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); 1098235633Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); 1099235633Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); 1100235633Sdim if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 1101235633Sdim MIB.addReg(DestReg, RegState::ImplicitDefine); 1102226890Sdim } else 1103226890Sdim llvm_unreachable("Unknown reg class!"); 1104210299Sed break; 1105210299Sed default: 1106210299Sed llvm_unreachable("Unknown regclass!"); 1107210299Sed } 1108198090Srdivacky} 1109198090Srdivacky 1110218893Sdimunsigned 1111218893SdimARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 1112218893Sdim int &FrameIndex) const { 1113218893Sdim switch (MI->getOpcode()) { 1114218893Sdim default: break; 1115218893Sdim case ARM::LDRrs: 1116218893Sdim case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. 1117218893Sdim if (MI->getOperand(1).isFI() && 1118218893Sdim MI->getOperand(2).isReg() && 1119218893Sdim MI->getOperand(3).isImm() && 1120218893Sdim MI->getOperand(2).getReg() == 0 && 1121218893Sdim MI->getOperand(3).getImm() == 0) { 1122218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 1123218893Sdim return MI->getOperand(0).getReg(); 1124218893Sdim } 1125218893Sdim break; 1126218893Sdim case ARM::LDRi12: 1127218893Sdim case ARM::t2LDRi12: 1128224145Sdim case ARM::tLDRspi: 1129218893Sdim case ARM::VLDRD: 1130218893Sdim case ARM::VLDRS: 1131218893Sdim if (MI->getOperand(1).isFI() && 1132218893Sdim MI->getOperand(2).isImm() && 1133218893Sdim MI->getOperand(2).getImm() == 0) { 1134218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 1135218893Sdim return MI->getOperand(0).getReg(); 1136218893Sdim } 1137218893Sdim break; 1138235633Sdim case ARM::VLD1q64: 1139245431Sdim case ARM::VLD1d64TPseudo: 1140245431Sdim case ARM::VLD1d64QPseudo: 1141218893Sdim if (MI->getOperand(1).isFI() && 1142218893Sdim MI->getOperand(0).getSubReg() == 0) { 1143218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 1144218893Sdim return MI->getOperand(0).getReg(); 1145218893Sdim } 1146218893Sdim break; 1147218893Sdim case ARM::VLDMQIA: 1148218893Sdim if (MI->getOperand(1).isFI() && 1149218893Sdim MI->getOperand(0).getSubReg() == 0) { 1150218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 1151218893Sdim return MI->getOperand(0).getReg(); 1152218893Sdim } 1153218893Sdim break; 1154218893Sdim } 1155218893Sdim 1156218893Sdim return 0; 1157218893Sdim} 1158218893Sdim 1159226890Sdimunsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 1160226890Sdim int &FrameIndex) const { 1161226890Sdim const MachineMemOperand *Dummy; 1162235633Sdim return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); 1163226890Sdim} 1164226890Sdim 1165226890Sdimbool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ 1166226890Sdim // This hook gets to expand COPY instructions before they become 1167226890Sdim // copyPhysReg() calls. Look for VMOVS instructions that can legally be 1168226890Sdim // widened to VMOVD. We prefer the VMOVD when possible because it may be 1169226890Sdim // changed into a VORR that can go down the NEON pipeline. 1170252723Sdim if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) 1171226890Sdim return false; 1172226890Sdim 1173226890Sdim // Look for a copy between even S-registers. That is where we keep floats 1174226890Sdim // when using NEON v2f32 instructions for f32 arithmetic. 1175226890Sdim unsigned DstRegS = MI->getOperand(0).getReg(); 1176226890Sdim unsigned SrcRegS = MI->getOperand(1).getReg(); 1177226890Sdim if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) 1178226890Sdim return false; 1179226890Sdim 1180226890Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 1181226890Sdim unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, 1182226890Sdim &ARM::DPRRegClass); 1183226890Sdim unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, 1184226890Sdim &ARM::DPRRegClass); 1185226890Sdim if (!DstRegD || !SrcRegD) 1186226890Sdim return false; 1187226890Sdim 1188226890Sdim // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only 1189226890Sdim // legal if the COPY already defines the full DstRegD, and it isn't a 1190226890Sdim // sub-register insertion. 1191226890Sdim if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI)) 1192226890Sdim return false; 1193226890Sdim 1194226890Sdim // A dead copy shouldn't show up here, but reject it just in case. 1195226890Sdim if (MI->getOperand(0).isDead()) 1196226890Sdim return false; 1197226890Sdim 1198226890Sdim // All clear, widen the COPY. 1199226890Sdim DEBUG(dbgs() << "widening: " << *MI); 1200252723Sdim MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 1201226890Sdim 1202226890Sdim // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg 1203226890Sdim // or some other super-register. 1204226890Sdim int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD); 1205226890Sdim if (ImpDefIdx != -1) 1206226890Sdim MI->RemoveOperand(ImpDefIdx); 1207226890Sdim 1208226890Sdim // Change the opcode and operands. 1209226890Sdim MI->setDesc(get(ARM::VMOVD)); 1210226890Sdim MI->getOperand(0).setReg(DstRegD); 1211226890Sdim MI->getOperand(1).setReg(SrcRegD); 1212252723Sdim AddDefaultPred(MIB); 1213226890Sdim 1214226890Sdim // We are now reading SrcRegD instead of SrcRegS. This may upset the 1215226890Sdim // register scavenger and machine verifier, so we need to indicate that we 1216226890Sdim // are reading an undefined value from SrcRegD, but a proper value from 1217226890Sdim // SrcRegS. 1218226890Sdim MI->getOperand(1).setIsUndef(); 1219252723Sdim MIB.addReg(SrcRegS, RegState::Implicit); 1220226890Sdim 1221226890Sdim // SrcRegD may actually contain an unrelated value in the ssub_1 1222226890Sdim // sub-register. Don't kill it. Only kill the ssub_0 sub-register. 1223226890Sdim if (MI->getOperand(1).isKill()) { 1224226890Sdim MI->getOperand(1).setIsKill(false); 1225226890Sdim MI->addRegisterKilled(SrcRegS, TRI, true); 1226226890Sdim } 1227226890Sdim 1228226890Sdim DEBUG(dbgs() << "replaced by: " << *MI); 1229226890Sdim return true; 1230226890Sdim} 1231226890Sdim 1232202375Srdivacky/// Create a copy of a const pool value. Update CPI to the new index and return 1233202375Srdivacky/// the label UID. 1234202375Srdivackystatic unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { 1235202375Srdivacky MachineConstantPool *MCP = MF.getConstantPool(); 1236202375Srdivacky ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1237202375Srdivacky 1238202375Srdivacky const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; 1239202375Srdivacky assert(MCPE.isMachineConstantPoolEntry() && 1240202375Srdivacky "Expecting a machine constantpool entry!"); 1241202375Srdivacky ARMConstantPoolValue *ACPV = 1242202375Srdivacky static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); 1243202375Srdivacky 1244218893Sdim unsigned PCLabelId = AFI->createPICLabelUId(); 1245202375Srdivacky ARMConstantPoolValue *NewCPV = 0; 1246212904Sdim // FIXME: The below assumes PIC relocation model and that the function 1247212904Sdim // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and 1248212904Sdim // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR 1249212904Sdim // instructions, so that's probably OK, but is PIC always correct when 1250212904Sdim // we get here? 1251202375Srdivacky if (ACPV->isGlobalValue()) 1252226890Sdim NewCPV = ARMConstantPoolConstant:: 1253226890Sdim Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, 1254226890Sdim ARMCP::CPValue, 4); 1255202375Srdivacky else if (ACPV->isExtSymbol()) 1256226890Sdim NewCPV = ARMConstantPoolSymbol:: 1257226890Sdim Create(MF.getFunction()->getContext(), 1258226890Sdim cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); 1259202375Srdivacky else if (ACPV->isBlockAddress()) 1260226890Sdim NewCPV = ARMConstantPoolConstant:: 1261226890Sdim Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, 1262226890Sdim ARMCP::CPBlockAddress, 4); 1263212904Sdim else if (ACPV->isLSDA()) 1264226890Sdim NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId, 1265226890Sdim ARMCP::CPLSDA, 4); 1266226890Sdim else if (ACPV->isMachineBasicBlock()) 1267226890Sdim NewCPV = ARMConstantPoolMBB:: 1268226890Sdim Create(MF.getFunction()->getContext(), 1269226890Sdim cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); 1270202375Srdivacky else 1271202375Srdivacky llvm_unreachable("Unexpected ARM constantpool value type!!"); 1272202375Srdivacky CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); 1273202375Srdivacky return PCLabelId; 1274202375Srdivacky} 1275202375Srdivacky 1276199481Srdivackyvoid ARMBaseInstrInfo:: 1277199481SrdivackyreMaterialize(MachineBasicBlock &MBB, 1278199481Srdivacky MachineBasicBlock::iterator I, 1279199481Srdivacky unsigned DestReg, unsigned SubIdx, 1280199481Srdivacky const MachineInstr *Orig, 1281210299Sed const TargetRegisterInfo &TRI) const { 1282199481Srdivacky unsigned Opcode = Orig->getOpcode(); 1283199481Srdivacky switch (Opcode) { 1284199481Srdivacky default: { 1285199481Srdivacky MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 1286210299Sed MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); 1287199481Srdivacky MBB.insert(I, MI); 1288199481Srdivacky break; 1289199481Srdivacky } 1290199481Srdivacky case ARM::tLDRpci_pic: 1291199481Srdivacky case ARM::t2LDRpci_pic: { 1292199481Srdivacky MachineFunction &MF = *MBB.getParent(); 1293199481Srdivacky unsigned CPI = Orig->getOperand(1).getIndex(); 1294202375Srdivacky unsigned PCLabelId = duplicateCPV(MF, CPI); 1295199481Srdivacky MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), 1296199481Srdivacky DestReg) 1297199481Srdivacky .addConstantPoolIndex(CPI).addImm(PCLabelId); 1298221345Sdim MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); 1299199481Srdivacky break; 1300199481Srdivacky } 1301199481Srdivacky } 1302199481Srdivacky} 1303199481Srdivacky 1304202375SrdivackyMachineInstr * 1305202375SrdivackyARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { 1306252723Sdim MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF); 1307202375Srdivacky switch(Orig->getOpcode()) { 1308202375Srdivacky case ARM::tLDRpci_pic: 1309202375Srdivacky case ARM::t2LDRpci_pic: { 1310202375Srdivacky unsigned CPI = Orig->getOperand(1).getIndex(); 1311202375Srdivacky unsigned PCLabelId = duplicateCPV(MF, CPI); 1312202375Srdivacky Orig->getOperand(1).setIndex(CPI); 1313202375Srdivacky Orig->getOperand(2).setImm(PCLabelId); 1314202375Srdivacky break; 1315202375Srdivacky } 1316202375Srdivacky } 1317202375Srdivacky return MI; 1318202375Srdivacky} 1319202375Srdivacky 1320204642Srdivackybool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, 1321218893Sdim const MachineInstr *MI1, 1322218893Sdim const MachineRegisterInfo *MRI) const { 1323199481Srdivacky int Opcode = MI0->getOpcode(); 1324199989Srdivacky if (Opcode == ARM::t2LDRpci || 1325199989Srdivacky Opcode == ARM::t2LDRpci_pic || 1326199989Srdivacky Opcode == ARM::tLDRpci || 1327218893Sdim Opcode == ARM::tLDRpci_pic || 1328218893Sdim Opcode == ARM::MOV_ga_dyn || 1329218893Sdim Opcode == ARM::MOV_ga_pcrel || 1330218893Sdim Opcode == ARM::MOV_ga_pcrel_ldr || 1331218893Sdim Opcode == ARM::t2MOV_ga_dyn || 1332218893Sdim Opcode == ARM::t2MOV_ga_pcrel) { 1333199481Srdivacky if (MI1->getOpcode() != Opcode) 1334199481Srdivacky return false; 1335199481Srdivacky if (MI0->getNumOperands() != MI1->getNumOperands()) 1336199481Srdivacky return false; 1337199481Srdivacky 1338199481Srdivacky const MachineOperand &MO0 = MI0->getOperand(1); 1339199481Srdivacky const MachineOperand &MO1 = MI1->getOperand(1); 1340199481Srdivacky if (MO0.getOffset() != MO1.getOffset()) 1341199481Srdivacky return false; 1342199481Srdivacky 1343218893Sdim if (Opcode == ARM::MOV_ga_dyn || 1344218893Sdim Opcode == ARM::MOV_ga_pcrel || 1345218893Sdim Opcode == ARM::MOV_ga_pcrel_ldr || 1346218893Sdim Opcode == ARM::t2MOV_ga_dyn || 1347218893Sdim Opcode == ARM::t2MOV_ga_pcrel) 1348218893Sdim // Ignore the PC labels. 1349218893Sdim return MO0.getGlobal() == MO1.getGlobal(); 1350218893Sdim 1351199481Srdivacky const MachineFunction *MF = MI0->getParent()->getParent(); 1352199481Srdivacky const MachineConstantPool *MCP = MF->getConstantPool(); 1353199481Srdivacky int CPI0 = MO0.getIndex(); 1354199481Srdivacky int CPI1 = MO1.getIndex(); 1355199481Srdivacky const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; 1356199481Srdivacky const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; 1357221345Sdim bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); 1358221345Sdim bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); 1359221345Sdim if (isARMCP0 && isARMCP1) { 1360221345Sdim ARMConstantPoolValue *ACPV0 = 1361221345Sdim static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); 1362221345Sdim ARMConstantPoolValue *ACPV1 = 1363221345Sdim static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); 1364221345Sdim return ACPV0->hasSameValue(ACPV1); 1365221345Sdim } else if (!isARMCP0 && !isARMCP1) { 1366221345Sdim return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; 1367221345Sdim } 1368221345Sdim return false; 1369218893Sdim } else if (Opcode == ARM::PICLDR) { 1370218893Sdim if (MI1->getOpcode() != Opcode) 1371218893Sdim return false; 1372218893Sdim if (MI0->getNumOperands() != MI1->getNumOperands()) 1373218893Sdim return false; 1374218893Sdim 1375218893Sdim unsigned Addr0 = MI0->getOperand(1).getReg(); 1376218893Sdim unsigned Addr1 = MI1->getOperand(1).getReg(); 1377218893Sdim if (Addr0 != Addr1) { 1378218893Sdim if (!MRI || 1379218893Sdim !TargetRegisterInfo::isVirtualRegister(Addr0) || 1380218893Sdim !TargetRegisterInfo::isVirtualRegister(Addr1)) 1381218893Sdim return false; 1382218893Sdim 1383218893Sdim // This assumes SSA form. 1384218893Sdim MachineInstr *Def0 = MRI->getVRegDef(Addr0); 1385218893Sdim MachineInstr *Def1 = MRI->getVRegDef(Addr1); 1386218893Sdim // Check if the loaded value, e.g. a constantpool of a global address, are 1387218893Sdim // the same. 1388218893Sdim if (!produceSameValue(Def0, Def1, MRI)) 1389218893Sdim return false; 1390218893Sdim } 1391218893Sdim 1392218893Sdim for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) { 1393218893Sdim // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg 1394218893Sdim const MachineOperand &MO0 = MI0->getOperand(i); 1395218893Sdim const MachineOperand &MO1 = MI1->getOperand(i); 1396218893Sdim if (!MO0.isIdenticalTo(MO1)) 1397218893Sdim return false; 1398218893Sdim } 1399218893Sdim return true; 1400199481Srdivacky } 1401199481Srdivacky 1402204642Srdivacky return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); 1403199481Srdivacky} 1404199481Srdivacky 1405210299Sed/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to 1406210299Sed/// determine if two loads are loading from the same base address. It should 1407210299Sed/// only return true if the base pointers are the same and the only differences 1408210299Sed/// between the two addresses is the offset. It also returns the offsets by 1409210299Sed/// reference. 1410252723Sdim/// 1411252723Sdim/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 1412252723Sdim/// is permanently disabled. 1413210299Sedbool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 1414210299Sed int64_t &Offset1, 1415210299Sed int64_t &Offset2) const { 1416210299Sed // Don't worry about Thumb: just ARM and Thumb2. 1417210299Sed if (Subtarget.isThumb1Only()) return false; 1418210299Sed 1419210299Sed if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 1420210299Sed return false; 1421210299Sed 1422210299Sed switch (Load1->getMachineOpcode()) { 1423210299Sed default: 1424210299Sed return false; 1425218893Sdim case ARM::LDRi12: 1426218893Sdim case ARM::LDRBi12: 1427210299Sed case ARM::LDRD: 1428210299Sed case ARM::LDRH: 1429210299Sed case ARM::LDRSB: 1430210299Sed case ARM::LDRSH: 1431210299Sed case ARM::VLDRD: 1432210299Sed case ARM::VLDRS: 1433210299Sed case ARM::t2LDRi8: 1434263509Sdim case ARM::t2LDRBi8: 1435210299Sed case ARM::t2LDRDi8: 1436210299Sed case ARM::t2LDRSHi8: 1437210299Sed case ARM::t2LDRi12: 1438263509Sdim case ARM::t2LDRBi12: 1439210299Sed case ARM::t2LDRSHi12: 1440210299Sed break; 1441210299Sed } 1442210299Sed 1443210299Sed switch (Load2->getMachineOpcode()) { 1444210299Sed default: 1445210299Sed return false; 1446218893Sdim case ARM::LDRi12: 1447218893Sdim case ARM::LDRBi12: 1448210299Sed case ARM::LDRD: 1449210299Sed case ARM::LDRH: 1450210299Sed case ARM::LDRSB: 1451210299Sed case ARM::LDRSH: 1452210299Sed case ARM::VLDRD: 1453210299Sed case ARM::VLDRS: 1454210299Sed case ARM::t2LDRi8: 1455263509Sdim case ARM::t2LDRBi8: 1456210299Sed case ARM::t2LDRSHi8: 1457210299Sed case ARM::t2LDRi12: 1458263509Sdim case ARM::t2LDRBi12: 1459210299Sed case ARM::t2LDRSHi12: 1460210299Sed break; 1461210299Sed } 1462210299Sed 1463210299Sed // Check if base addresses and chain operands match. 1464210299Sed if (Load1->getOperand(0) != Load2->getOperand(0) || 1465210299Sed Load1->getOperand(4) != Load2->getOperand(4)) 1466210299Sed return false; 1467210299Sed 1468210299Sed // Index should be Reg0. 1469210299Sed if (Load1->getOperand(3) != Load2->getOperand(3)) 1470210299Sed return false; 1471210299Sed 1472210299Sed // Determine the offsets. 1473210299Sed if (isa<ConstantSDNode>(Load1->getOperand(1)) && 1474210299Sed isa<ConstantSDNode>(Load2->getOperand(1))) { 1475210299Sed Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); 1476210299Sed Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); 1477210299Sed return true; 1478210299Sed } 1479210299Sed 1480210299Sed return false; 1481210299Sed} 1482210299Sed 1483210299Sed/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to 1484221345Sdim/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should 1485210299Sed/// be scheduled togther. On some targets if two loads are loading from 1486210299Sed/// addresses in the same cache line, it's better if they are scheduled 1487210299Sed/// together. This function takes two integers that represent the load offsets 1488210299Sed/// from the common base address. It returns true if it decides it's desirable 1489210299Sed/// to schedule the two loads together. "NumLoads" is the number of loads that 1490210299Sed/// have already been scheduled after Load1. 1491252723Sdim/// 1492252723Sdim/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 1493252723Sdim/// is permanently disabled. 1494210299Sedbool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 1495210299Sed int64_t Offset1, int64_t Offset2, 1496210299Sed unsigned NumLoads) const { 1497210299Sed // Don't worry about Thumb: just ARM and Thumb2. 1498210299Sed if (Subtarget.isThumb1Only()) return false; 1499210299Sed 1500210299Sed assert(Offset2 > Offset1); 1501210299Sed 1502210299Sed if ((Offset2 - Offset1) / 8 > 64) 1503210299Sed return false; 1504210299Sed 1505263509Sdim // Check if the machine opcodes are different. If they are different 1506263509Sdim // then we consider them to not be of the same base address, 1507263509Sdim // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. 1508263509Sdim // In this case, they are considered to be the same because they are different 1509263509Sdim // encoding forms of the same basic instruction. 1510263509Sdim if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && 1511263509Sdim !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && 1512263509Sdim Load2->getMachineOpcode() == ARM::t2LDRBi12) || 1513263509Sdim (Load1->getMachineOpcode() == ARM::t2LDRBi12 && 1514263509Sdim Load2->getMachineOpcode() == ARM::t2LDRBi8))) 1515210299Sed return false; // FIXME: overly conservative? 1516210299Sed 1517210299Sed // Four loads in a row should be sufficient. 1518210299Sed if (NumLoads >= 3) 1519210299Sed return false; 1520210299Sed 1521210299Sed return true; 1522210299Sed} 1523210299Sed 1524210299Sedbool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, 1525210299Sed const MachineBasicBlock *MBB, 1526210299Sed const MachineFunction &MF) const { 1527210299Sed // Debug info is never a scheduling boundary. It's necessary to be explicit 1528210299Sed // due to the special treatment of IT instructions below, otherwise a 1529210299Sed // dbg_value followed by an IT will result in the IT instruction being 1530210299Sed // considered a scheduling hazard, which is wrong. It should be the actual 1531210299Sed // instruction preceding the dbg_value instruction(s), just like it is 1532210299Sed // when debug info is not present. 1533210299Sed if (MI->isDebugValue()) 1534210299Sed return false; 1535210299Sed 1536210299Sed // Terminators and labels can't be scheduled around. 1537235633Sdim if (MI->isTerminator() || MI->isLabel()) 1538210299Sed return true; 1539210299Sed 1540210299Sed // Treat the start of the IT block as a scheduling boundary, but schedule 1541210299Sed // t2IT along with all instructions following it. 1542210299Sed // FIXME: This is a big hammer. But the alternative is to add all potential 1543210299Sed // true and anti dependencies to IT block instructions as implicit operands 1544210299Sed // to the t2IT instruction. The added compile time and complexity does not 1545210299Sed // seem worth it. 1546210299Sed MachineBasicBlock::const_iterator I = MI; 1547210299Sed // Make sure to skip any dbg_value instructions 1548210299Sed while (++I != MBB->end() && I->isDebugValue()) 1549210299Sed ; 1550210299Sed if (I != MBB->end() && I->getOpcode() == ARM::t2IT) 1551210299Sed return true; 1552210299Sed 1553210299Sed // Don't attempt to schedule around any instruction that defines 1554210299Sed // a stack-oriented pointer, as it's unlikely to be profitable. This 1555210299Sed // saves compile time, because it doesn't require every single 1556210299Sed // stack slot reference to depend on the instruction that does the 1557210299Sed // modification. 1558235633Sdim // Calls don't actually change the stack pointer, even if they have imp-defs. 1559235633Sdim // No ARM calling conventions change the stack pointer. (X86 calling 1560235633Sdim // conventions sometimes do). 1561235633Sdim if (!MI->isCall() && MI->definesRegister(ARM::SP)) 1562210299Sed return true; 1563210299Sed 1564210299Sed return false; 1565210299Sed} 1566210299Sed 1567224145Sdimbool ARMBaseInstrInfo:: 1568224145SdimisProfitableToIfCvt(MachineBasicBlock &MBB, 1569224145Sdim unsigned NumCycles, unsigned ExtraPredCycles, 1570224145Sdim const BranchProbability &Probability) const { 1571221345Sdim if (!NumCycles) 1572210299Sed return false; 1573218893Sdim 1574218893Sdim // Attempt to estimate the relative costs of predication versus branching. 1575224145Sdim unsigned UnpredCost = Probability.getNumerator() * NumCycles; 1576224145Sdim UnpredCost /= Probability.getDenominator(); 1577224145Sdim UnpredCost += 1; // The branch itself 1578224145Sdim UnpredCost += Subtarget.getMispredictionPenalty() / 10; 1579218893Sdim 1580224145Sdim return (NumCycles + ExtraPredCycles) <= UnpredCost; 1581210299Sed} 1582218893Sdim 1583210299Sedbool ARMBaseInstrInfo:: 1584218893SdimisProfitableToIfCvt(MachineBasicBlock &TMBB, 1585218893Sdim unsigned TCycles, unsigned TExtra, 1586218893Sdim MachineBasicBlock &FMBB, 1587218893Sdim unsigned FCycles, unsigned FExtra, 1588224145Sdim const BranchProbability &Probability) const { 1589218893Sdim if (!TCycles || !FCycles) 1590218893Sdim return false; 1591218893Sdim 1592218893Sdim // Attempt to estimate the relative costs of predication versus branching. 1593224145Sdim unsigned TUnpredCost = Probability.getNumerator() * TCycles; 1594224145Sdim TUnpredCost /= Probability.getDenominator(); 1595226890Sdim 1596224145Sdim uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); 1597224145Sdim unsigned FUnpredCost = Comp * FCycles; 1598224145Sdim FUnpredCost /= Probability.getDenominator(); 1599218893Sdim 1600224145Sdim unsigned UnpredCost = TUnpredCost + FUnpredCost; 1601224145Sdim UnpredCost += 1; // The branch itself 1602224145Sdim UnpredCost += Subtarget.getMispredictionPenalty() / 10; 1603224145Sdim 1604224145Sdim return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; 1605210299Sed} 1606210299Sed 1607245431Sdimbool 1608245431SdimARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 1609245431Sdim MachineBasicBlock &FMBB) const { 1610245431Sdim // Reduce false anti-dependencies to let Swift's out-of-order execution 1611245431Sdim // engine do its thing. 1612245431Sdim return Subtarget.isSwift(); 1613245431Sdim} 1614245431Sdim 1615198090Srdivacky/// getInstrPredicate - If instruction is predicated, returns its predicate 1616198090Srdivacky/// condition, otherwise returns AL. It also returns the condition code 1617198090Srdivacky/// register by reference. 1618198090SrdivackyARMCC::CondCodes 1619198090Srdivackyllvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { 1620198090Srdivacky int PIdx = MI->findFirstPredOperandIdx(); 1621198090Srdivacky if (PIdx == -1) { 1622198090Srdivacky PredReg = 0; 1623198090Srdivacky return ARMCC::AL; 1624198090Srdivacky } 1625198090Srdivacky 1626198090Srdivacky PredReg = MI->getOperand(PIdx+1).getReg(); 1627198090Srdivacky return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); 1628198090Srdivacky} 1629198090Srdivacky 1630198090Srdivacky 1631198090Srdivackyint llvm::getMatchingCondBranchOpcode(int Opc) { 1632198090Srdivacky if (Opc == ARM::B) 1633198090Srdivacky return ARM::Bcc; 1634235633Sdim if (Opc == ARM::tB) 1635198090Srdivacky return ARM::tBcc; 1636235633Sdim if (Opc == ARM::t2B) 1637235633Sdim return ARM::t2Bcc; 1638198090Srdivacky 1639198090Srdivacky llvm_unreachable("Unknown unconditional branch opcode!"); 1640198090Srdivacky} 1641198090Srdivacky 1642235633Sdim/// commuteInstruction - Handle commutable instructions. 1643235633SdimMachineInstr * 1644235633SdimARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { 1645235633Sdim switch (MI->getOpcode()) { 1646235633Sdim case ARM::MOVCCr: 1647235633Sdim case ARM::t2MOVCCr: { 1648235633Sdim // MOVCC can be commuted by inverting the condition. 1649235633Sdim unsigned PredReg = 0; 1650235633Sdim ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); 1651235633Sdim // MOVCC AL can't be inverted. Shouldn't happen. 1652235633Sdim if (CC == ARMCC::AL || PredReg != ARM::CPSR) 1653235633Sdim return NULL; 1654252723Sdim MI = TargetInstrInfo::commuteInstruction(MI, NewMI); 1655235633Sdim if (!MI) 1656235633Sdim return NULL; 1657235633Sdim // After swapping the MOVCC operands, also invert the condition. 1658235633Sdim MI->getOperand(MI->findFirstPredOperandIdx()) 1659235633Sdim .setImm(ARMCC::getOppositeCondition(CC)); 1660235633Sdim return MI; 1661235633Sdim } 1662235633Sdim } 1663252723Sdim return TargetInstrInfo::commuteInstruction(MI, NewMI); 1664235633Sdim} 1665198090Srdivacky 1666245431Sdim/// Identify instructions that can be folded into a MOVCC instruction, and 1667245431Sdim/// return the defining instruction. 1668245431Sdimstatic MachineInstr *canFoldIntoMOVCC(unsigned Reg, 1669245431Sdim const MachineRegisterInfo &MRI, 1670245431Sdim const TargetInstrInfo *TII) { 1671245431Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1672245431Sdim return 0; 1673245431Sdim if (!MRI.hasOneNonDBGUse(Reg)) 1674245431Sdim return 0; 1675245431Sdim MachineInstr *MI = MRI.getVRegDef(Reg); 1676245431Sdim if (!MI) 1677245431Sdim return 0; 1678245431Sdim // MI is folded into the MOVCC by predicating it. 1679245431Sdim if (!MI->isPredicable()) 1680245431Sdim return 0; 1681245431Sdim // Check if MI has any non-dead defs or physreg uses. This also detects 1682245431Sdim // predicated instructions which will be reading CPSR. 1683245431Sdim for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 1684245431Sdim const MachineOperand &MO = MI->getOperand(i); 1685245431Sdim // Reject frame index operands, PEI can't handle the predicated pseudos. 1686245431Sdim if (MO.isFI() || MO.isCPI() || MO.isJTI()) 1687245431Sdim return 0; 1688245431Sdim if (!MO.isReg()) 1689245431Sdim continue; 1690245431Sdim // MI can't have any tied operands, that would conflict with predication. 1691245431Sdim if (MO.isTied()) 1692245431Sdim return 0; 1693245431Sdim if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 1694245431Sdim return 0; 1695245431Sdim if (MO.isDef() && !MO.isDead()) 1696245431Sdim return 0; 1697245431Sdim } 1698245431Sdim bool DontMoveAcrossStores = true; 1699245431Sdim if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) 1700245431Sdim return 0; 1701245431Sdim return MI; 1702245431Sdim} 1703245431Sdim 1704245431Sdimbool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, 1705245431Sdim SmallVectorImpl<MachineOperand> &Cond, 1706245431Sdim unsigned &TrueOp, unsigned &FalseOp, 1707245431Sdim bool &Optimizable) const { 1708245431Sdim assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && 1709245431Sdim "Unknown select instruction"); 1710245431Sdim // MOVCC operands: 1711245431Sdim // 0: Def. 1712245431Sdim // 1: True use. 1713245431Sdim // 2: False use. 1714245431Sdim // 3: Condition code. 1715245431Sdim // 4: CPSR use. 1716245431Sdim TrueOp = 1; 1717245431Sdim FalseOp = 2; 1718245431Sdim Cond.push_back(MI->getOperand(3)); 1719245431Sdim Cond.push_back(MI->getOperand(4)); 1720245431Sdim // We can always fold a def. 1721245431Sdim Optimizable = true; 1722245431Sdim return false; 1723245431Sdim} 1724245431Sdim 1725245431SdimMachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, 1726245431Sdim bool PreferFalse) const { 1727245431Sdim assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && 1728245431Sdim "Unknown select instruction"); 1729263509Sdim MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1730245431Sdim MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); 1731245431Sdim bool Invert = !DefMI; 1732245431Sdim if (!DefMI) 1733245431Sdim DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); 1734245431Sdim if (!DefMI) 1735245431Sdim return 0; 1736245431Sdim 1737263509Sdim // Find new register class to use. 1738263509Sdim MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); 1739263509Sdim unsigned DestReg = MI->getOperand(0).getReg(); 1740263509Sdim const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); 1741263509Sdim if (!MRI.constrainRegClass(DestReg, PreviousClass)) 1742263509Sdim return 0; 1743263509Sdim 1744245431Sdim // Create a new predicated version of DefMI. 1745245431Sdim // Rfalse is the first use. 1746245431Sdim MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 1747263509Sdim DefMI->getDesc(), DestReg); 1748245431Sdim 1749245431Sdim // Copy all the DefMI operands, excluding its (null) predicate. 1750245431Sdim const MCInstrDesc &DefDesc = DefMI->getDesc(); 1751245431Sdim for (unsigned i = 1, e = DefDesc.getNumOperands(); 1752245431Sdim i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) 1753245431Sdim NewMI.addOperand(DefMI->getOperand(i)); 1754245431Sdim 1755245431Sdim unsigned CondCode = MI->getOperand(3).getImm(); 1756245431Sdim if (Invert) 1757245431Sdim NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); 1758245431Sdim else 1759245431Sdim NewMI.addImm(CondCode); 1760245431Sdim NewMI.addOperand(MI->getOperand(4)); 1761245431Sdim 1762245431Sdim // DefMI is not the -S version that sets CPSR, so add an optional %noreg. 1763245431Sdim if (NewMI->hasOptionalDef()) 1764245431Sdim AddDefaultCC(NewMI); 1765245431Sdim 1766245431Sdim // The output register value when the predicate is false is an implicit 1767245431Sdim // register operand tied to the first def. 1768245431Sdim // The tie makes the register allocator ensure the FalseReg is allocated the 1769245431Sdim // same register as operand 0. 1770245431Sdim FalseReg.setImplicit(); 1771252723Sdim NewMI.addOperand(FalseReg); 1772245431Sdim NewMI->tieOperands(0, NewMI->getNumOperands() - 1); 1773245431Sdim 1774245431Sdim // The caller will erase MI, but not DefMI. 1775245431Sdim DefMI->eraseFromParent(); 1776245431Sdim return NewMI; 1777245431Sdim} 1778245431Sdim 1779226890Sdim/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the 1780226890Sdim/// instruction is encoded with an 'S' bit is determined by the optional CPSR 1781226890Sdim/// def operand. 1782226890Sdim/// 1783226890Sdim/// This will go away once we can teach tblgen how to set the optional CPSR def 1784226890Sdim/// operand itself. 1785226890Sdimstruct AddSubFlagsOpcodePair { 1786245431Sdim uint16_t PseudoOpc; 1787245431Sdim uint16_t MachineOpc; 1788226890Sdim}; 1789226890Sdim 1790245431Sdimstatic const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { 1791226890Sdim {ARM::ADDSri, ARM::ADDri}, 1792226890Sdim {ARM::ADDSrr, ARM::ADDrr}, 1793226890Sdim {ARM::ADDSrsi, ARM::ADDrsi}, 1794226890Sdim {ARM::ADDSrsr, ARM::ADDrsr}, 1795226890Sdim 1796226890Sdim {ARM::SUBSri, ARM::SUBri}, 1797226890Sdim {ARM::SUBSrr, ARM::SUBrr}, 1798226890Sdim {ARM::SUBSrsi, ARM::SUBrsi}, 1799226890Sdim {ARM::SUBSrsr, ARM::SUBrsr}, 1800226890Sdim 1801226890Sdim {ARM::RSBSri, ARM::RSBri}, 1802226890Sdim {ARM::RSBSrsi, ARM::RSBrsi}, 1803226890Sdim {ARM::RSBSrsr, ARM::RSBrsr}, 1804226890Sdim 1805226890Sdim {ARM::t2ADDSri, ARM::t2ADDri}, 1806226890Sdim {ARM::t2ADDSrr, ARM::t2ADDrr}, 1807226890Sdim {ARM::t2ADDSrs, ARM::t2ADDrs}, 1808226890Sdim 1809226890Sdim {ARM::t2SUBSri, ARM::t2SUBri}, 1810226890Sdim {ARM::t2SUBSrr, ARM::t2SUBrr}, 1811226890Sdim {ARM::t2SUBSrs, ARM::t2SUBrs}, 1812226890Sdim 1813226890Sdim {ARM::t2RSBSri, ARM::t2RSBri}, 1814226890Sdim {ARM::t2RSBSrs, ARM::t2RSBrs}, 1815226890Sdim}; 1816226890Sdim 1817226890Sdimunsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { 1818245431Sdim for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i) 1819245431Sdim if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc) 1820245431Sdim return AddSubFlagsOpcodeMap[i].MachineOpc; 1821226890Sdim return 0; 1822226890Sdim} 1823226890Sdim 1824198090Srdivackyvoid llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, 1825198090Srdivacky MachineBasicBlock::iterator &MBBI, DebugLoc dl, 1826198090Srdivacky unsigned DestReg, unsigned BaseReg, int NumBytes, 1827198090Srdivacky ARMCC::CondCodes Pred, unsigned PredReg, 1828221345Sdim const ARMBaseInstrInfo &TII, unsigned MIFlags) { 1829263509Sdim if (NumBytes == 0 && DestReg != BaseReg) { 1830263509Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) 1831263509Sdim .addReg(BaseReg, RegState::Kill) 1832263509Sdim .addImm((unsigned)Pred).addReg(PredReg).addReg(0) 1833263509Sdim .setMIFlags(MIFlags); 1834263509Sdim return; 1835263509Sdim } 1836263509Sdim 1837198090Srdivacky bool isSub = NumBytes < 0; 1838198090Srdivacky if (isSub) NumBytes = -NumBytes; 1839198090Srdivacky 1840198090Srdivacky while (NumBytes) { 1841198090Srdivacky unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); 1842198090Srdivacky unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); 1843198090Srdivacky assert(ThisVal && "Didn't extract field correctly"); 1844198090Srdivacky 1845198090Srdivacky // We will handle these bits from offset, clear them. 1846198090Srdivacky NumBytes &= ~ThisVal; 1847198090Srdivacky 1848198090Srdivacky assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); 1849198090Srdivacky 1850198090Srdivacky // Build the new ADD / SUB. 1851198090Srdivacky unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; 1852198090Srdivacky BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) 1853198090Srdivacky .addReg(BaseReg, RegState::Kill).addImm(ThisVal) 1854221345Sdim .addImm((unsigned)Pred).addReg(PredReg).addReg(0) 1855221345Sdim .setMIFlags(MIFlags); 1856198090Srdivacky BaseReg = DestReg; 1857198090Srdivacky } 1858198090Srdivacky} 1859198090Srdivacky 1860263509Sdimbool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF, 1861263509Sdim MachineInstr *MI, 1862263509Sdim unsigned NumBytes) { 1863263509Sdim // This optimisation potentially adds lots of load and store 1864263509Sdim // micro-operations, it's only really a great benefit to code-size. 1865263509Sdim if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize)) 1866263509Sdim return false; 1867263509Sdim 1868263509Sdim // If only one register is pushed/popped, LLVM can use an LDR/STR 1869263509Sdim // instead. We can't modify those so make sure we're dealing with an 1870263509Sdim // instruction we understand. 1871263509Sdim bool IsPop = isPopOpcode(MI->getOpcode()); 1872263509Sdim bool IsPush = isPushOpcode(MI->getOpcode()); 1873263509Sdim if (!IsPush && !IsPop) 1874263509Sdim return false; 1875263509Sdim 1876263509Sdim bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || 1877263509Sdim MI->getOpcode() == ARM::VLDMDIA_UPD; 1878263509Sdim bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || 1879263509Sdim MI->getOpcode() == ARM::tPOP || 1880263509Sdim MI->getOpcode() == ARM::tPOP_RET; 1881263509Sdim 1882263509Sdim assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && 1883263509Sdim MI->getOperand(1).getReg() == ARM::SP)) && 1884263509Sdim "trying to fold sp update into non-sp-updating push/pop"); 1885263509Sdim 1886263509Sdim // The VFP push & pop act on D-registers, so we can only fold an adjustment 1887263509Sdim // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try 1888263509Sdim // if this is violated. 1889263509Sdim if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0) 1890263509Sdim return false; 1891263509Sdim 1892263509Sdim // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ 1893263509Sdim // pred) so the list starts at 4. Thumb1 starts after the predicate. 1894263509Sdim int RegListIdx = IsT1PushPop ? 2 : 4; 1895263509Sdim 1896263509Sdim // Calculate the space we'll need in terms of registers. 1897263509Sdim unsigned FirstReg = MI->getOperand(RegListIdx).getReg(); 1898263509Sdim unsigned RD0Reg, RegsNeeded; 1899263509Sdim if (IsVFPPushPop) { 1900263509Sdim RD0Reg = ARM::D0; 1901263509Sdim RegsNeeded = NumBytes / 8; 1902263509Sdim } else { 1903263509Sdim RD0Reg = ARM::R0; 1904263509Sdim RegsNeeded = NumBytes / 4; 1905263509Sdim } 1906263509Sdim 1907263509Sdim // We're going to have to strip all list operands off before 1908263509Sdim // re-adding them since the order matters, so save the existing ones 1909263509Sdim // for later. 1910263509Sdim SmallVector<MachineOperand, 4> RegList; 1911263509Sdim for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) 1912263509Sdim RegList.push_back(MI->getOperand(i)); 1913263509Sdim 1914263509Sdim MachineBasicBlock *MBB = MI->getParent(); 1915263509Sdim const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); 1916263509Sdim const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); 1917263509Sdim 1918263509Sdim // Now try to find enough space in the reglist to allocate NumBytes. 1919263509Sdim for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded; 1920263509Sdim --CurReg) { 1921263509Sdim if (!IsPop) { 1922263509Sdim // Pushing any register is completely harmless, mark the 1923263509Sdim // register involved as undef since we don't care about it in 1924263509Sdim // the slightest. 1925263509Sdim RegList.push_back(MachineOperand::CreateReg(CurReg, false, false, 1926263509Sdim false, false, true)); 1927263509Sdim --RegsNeeded; 1928263509Sdim continue; 1929263509Sdim } 1930263509Sdim 1931263509Sdim // However, we can only pop an extra register if it's not live. For 1932263509Sdim // registers live within the function we might clobber a return value 1933263509Sdim // register; the other way a register can be live here is if it's 1934263509Sdim // callee-saved. 1935263509Sdim if (isCalleeSavedRegister(CurReg, CSRegs) || 1936263509Sdim MBB->computeRegisterLiveness(TRI, CurReg, MI) != 1937263509Sdim MachineBasicBlock::LQR_Dead) { 1938263509Sdim // VFP pops don't allow holes in the register list, so any skip is fatal 1939263509Sdim // for our transformation. GPR pops do, so we should just keep looking. 1940263509Sdim if (IsVFPPushPop) 1941263509Sdim return false; 1942263509Sdim else 1943263509Sdim continue; 1944263509Sdim } 1945263509Sdim 1946263509Sdim // Mark the unimportant registers as <def,dead> in the POP. 1947263509Sdim RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false, 1948263509Sdim true)); 1949263509Sdim --RegsNeeded; 1950263509Sdim } 1951263509Sdim 1952263509Sdim if (RegsNeeded > 0) 1953263509Sdim return false; 1954263509Sdim 1955263509Sdim // Finally we know we can profitably perform the optimisation so go 1956263509Sdim // ahead: strip all existing registers off and add them back again 1957263509Sdim // in the right order. 1958263509Sdim for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) 1959263509Sdim MI->RemoveOperand(i); 1960263509Sdim 1961263509Sdim // Add the complete list back in. 1962263509Sdim MachineInstrBuilder MIB(MF, &*MI); 1963263509Sdim for (int i = RegList.size() - 1; i >= 0; --i) 1964263509Sdim MIB.addOperand(RegList[i]); 1965263509Sdim 1966263509Sdim return true; 1967263509Sdim} 1968263509Sdim 1969198090Srdivackybool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 1970198090Srdivacky unsigned FrameReg, int &Offset, 1971198090Srdivacky const ARMBaseInstrInfo &TII) { 1972198090Srdivacky unsigned Opcode = MI.getOpcode(); 1973224145Sdim const MCInstrDesc &Desc = MI.getDesc(); 1974198090Srdivacky unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); 1975198090Srdivacky bool isSub = false; 1976198090Srdivacky 1977198090Srdivacky // Memory operands in inline assembly always use AddrMode2. 1978198090Srdivacky if (Opcode == ARM::INLINEASM) 1979198090Srdivacky AddrMode = ARMII::AddrMode2; 1980198090Srdivacky 1981198090Srdivacky if (Opcode == ARM::ADDri) { 1982198090Srdivacky Offset += MI.getOperand(FrameRegIdx+1).getImm(); 1983198090Srdivacky if (Offset == 0) { 1984198090Srdivacky // Turn it into a move. 1985198090Srdivacky MI.setDesc(TII.get(ARM::MOVr)); 1986198090Srdivacky MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1987198090Srdivacky MI.RemoveOperand(FrameRegIdx+1); 1988198090Srdivacky Offset = 0; 1989198090Srdivacky return true; 1990198090Srdivacky } else if (Offset < 0) { 1991198090Srdivacky Offset = -Offset; 1992198090Srdivacky isSub = true; 1993198090Srdivacky MI.setDesc(TII.get(ARM::SUBri)); 1994198090Srdivacky } 1995198090Srdivacky 1996198090Srdivacky // Common case: small offset, fits into instruction. 1997198090Srdivacky if (ARM_AM::getSOImmVal(Offset) != -1) { 1998198090Srdivacky // Replace the FrameIndex with sp / fp 1999198090Srdivacky MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2000198090Srdivacky MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); 2001198090Srdivacky Offset = 0; 2002198090Srdivacky return true; 2003198090Srdivacky } 2004198090Srdivacky 2005198090Srdivacky // Otherwise, pull as much of the immedidate into this ADDri/SUBri 2006198090Srdivacky // as possible. 2007198090Srdivacky unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); 2008198090Srdivacky unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); 2009198090Srdivacky 2010198090Srdivacky // We will handle these bits from offset, clear them. 2011198090Srdivacky Offset &= ~ThisImmVal; 2012198090Srdivacky 2013198090Srdivacky // Get the properly encoded SOImmVal field. 2014198090Srdivacky assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && 2015198090Srdivacky "Bit extraction didn't work?"); 2016198090Srdivacky MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); 2017198090Srdivacky } else { 2018198090Srdivacky unsigned ImmIdx = 0; 2019198090Srdivacky int InstrOffs = 0; 2020198090Srdivacky unsigned NumBits = 0; 2021198090Srdivacky unsigned Scale = 1; 2022198090Srdivacky switch (AddrMode) { 2023218893Sdim case ARMII::AddrMode_i12: { 2024218893Sdim ImmIdx = FrameRegIdx + 1; 2025218893Sdim InstrOffs = MI.getOperand(ImmIdx).getImm(); 2026218893Sdim NumBits = 12; 2027218893Sdim break; 2028218893Sdim } 2029198090Srdivacky case ARMII::AddrMode2: { 2030198090Srdivacky ImmIdx = FrameRegIdx+2; 2031198090Srdivacky InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); 2032198090Srdivacky if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2033198090Srdivacky InstrOffs *= -1; 2034198090Srdivacky NumBits = 12; 2035198090Srdivacky break; 2036198090Srdivacky } 2037198090Srdivacky case ARMII::AddrMode3: { 2038198090Srdivacky ImmIdx = FrameRegIdx+2; 2039198090Srdivacky InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); 2040198090Srdivacky if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2041198090Srdivacky InstrOffs *= -1; 2042198090Srdivacky NumBits = 8; 2043198090Srdivacky break; 2044198090Srdivacky } 2045198090Srdivacky case ARMII::AddrMode4: 2046199481Srdivacky case ARMII::AddrMode6: 2047198090Srdivacky // Can't fold any offset even if it's zero. 2048198090Srdivacky return false; 2049198090Srdivacky case ARMII::AddrMode5: { 2050198090Srdivacky ImmIdx = FrameRegIdx+1; 2051198090Srdivacky InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); 2052198090Srdivacky if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2053198090Srdivacky InstrOffs *= -1; 2054198090Srdivacky NumBits = 8; 2055198090Srdivacky Scale = 4; 2056198090Srdivacky break; 2057198090Srdivacky } 2058198090Srdivacky default: 2059198090Srdivacky llvm_unreachable("Unsupported addressing mode!"); 2060198090Srdivacky } 2061198090Srdivacky 2062198090Srdivacky Offset += InstrOffs * Scale; 2063198090Srdivacky assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); 2064198090Srdivacky if (Offset < 0) { 2065198090Srdivacky Offset = -Offset; 2066198090Srdivacky isSub = true; 2067198090Srdivacky } 2068198090Srdivacky 2069198090Srdivacky // Attempt to fold address comp. if opcode has offset bits 2070198090Srdivacky if (NumBits > 0) { 2071198090Srdivacky // Common case: small offset, fits into instruction. 2072198090Srdivacky MachineOperand &ImmOp = MI.getOperand(ImmIdx); 2073198090Srdivacky int ImmedOffset = Offset / Scale; 2074198090Srdivacky unsigned Mask = (1 << NumBits) - 1; 2075198090Srdivacky if ((unsigned)Offset <= Mask * Scale) { 2076198090Srdivacky // Replace the FrameIndex with sp 2077198090Srdivacky MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2078218893Sdim // FIXME: When addrmode2 goes away, this will simplify (like the 2079218893Sdim // T2 version), as the LDR.i12 versions don't need the encoding 2080218893Sdim // tricks for the offset value. 2081218893Sdim if (isSub) { 2082218893Sdim if (AddrMode == ARMII::AddrMode_i12) 2083218893Sdim ImmedOffset = -ImmedOffset; 2084218893Sdim else 2085218893Sdim ImmedOffset |= 1 << NumBits; 2086218893Sdim } 2087198090Srdivacky ImmOp.ChangeToImmediate(ImmedOffset); 2088198090Srdivacky Offset = 0; 2089198090Srdivacky return true; 2090198090Srdivacky } 2091198090Srdivacky 2092198090Srdivacky // Otherwise, it didn't fit. Pull in what we can to simplify the immed. 2093198090Srdivacky ImmedOffset = ImmedOffset & Mask; 2094218893Sdim if (isSub) { 2095218893Sdim if (AddrMode == ARMII::AddrMode_i12) 2096218893Sdim ImmedOffset = -ImmedOffset; 2097218893Sdim else 2098218893Sdim ImmedOffset |= 1 << NumBits; 2099218893Sdim } 2100198090Srdivacky ImmOp.ChangeToImmediate(ImmedOffset); 2101198090Srdivacky Offset &= ~(Mask*Scale); 2102198090Srdivacky } 2103198090Srdivacky } 2104198090Srdivacky 2105198090Srdivacky Offset = (isSub) ? -Offset : Offset; 2106198090Srdivacky return Offset == 0; 2107198090Srdivacky} 2108212904Sdim 2109245431Sdim/// analyzeCompare - For a comparison instruction, return the source registers 2110245431Sdim/// in SrcReg and SrcReg2 if having two register operands, and the value it 2111245431Sdim/// compares against in CmpValue. Return true if the comparison instruction 2112245431Sdim/// can be analyzed. 2113212904Sdimbool ARMBaseInstrInfo:: 2114245431SdimanalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, 2115245431Sdim int &CmpMask, int &CmpValue) const { 2116212904Sdim switch (MI->getOpcode()) { 2117212904Sdim default: break; 2118212904Sdim case ARM::CMPri: 2119212904Sdim case ARM::t2CMPri: 2120212904Sdim SrcReg = MI->getOperand(0).getReg(); 2121245431Sdim SrcReg2 = 0; 2122218893Sdim CmpMask = ~0; 2123212904Sdim CmpValue = MI->getOperand(1).getImm(); 2124212904Sdim return true; 2125245431Sdim case ARM::CMPrr: 2126245431Sdim case ARM::t2CMPrr: 2127245431Sdim SrcReg = MI->getOperand(0).getReg(); 2128245431Sdim SrcReg2 = MI->getOperand(1).getReg(); 2129245431Sdim CmpMask = ~0; 2130245431Sdim CmpValue = 0; 2131245431Sdim return true; 2132218893Sdim case ARM::TSTri: 2133218893Sdim case ARM::t2TSTri: 2134218893Sdim SrcReg = MI->getOperand(0).getReg(); 2135245431Sdim SrcReg2 = 0; 2136218893Sdim CmpMask = MI->getOperand(1).getImm(); 2137218893Sdim CmpValue = 0; 2138218893Sdim return true; 2139212904Sdim } 2140212904Sdim 2141212904Sdim return false; 2142212904Sdim} 2143212904Sdim 2144218893Sdim/// isSuitableForMask - Identify a suitable 'and' instruction that 2145218893Sdim/// operates on the given source register and applies the same mask 2146218893Sdim/// as a 'tst' instruction. Provide a limited look-through for copies. 2147218893Sdim/// When successful, MI will hold the found instruction. 2148218893Sdimstatic bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, 2149218893Sdim int CmpMask, bool CommonUse) { 2150218893Sdim switch (MI->getOpcode()) { 2151218893Sdim case ARM::ANDri: 2152218893Sdim case ARM::t2ANDri: 2153218893Sdim if (CmpMask != MI->getOperand(2).getImm()) 2154218893Sdim return false; 2155218893Sdim if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) 2156218893Sdim return true; 2157218893Sdim break; 2158218893Sdim case ARM::COPY: { 2159218893Sdim // Walk down one instruction which is potentially an 'and'. 2160218893Sdim const MachineInstr &Copy = *MI; 2161218893Sdim MachineBasicBlock::iterator AND( 2162218893Sdim llvm::next(MachineBasicBlock::iterator(MI))); 2163218893Sdim if (AND == MI->getParent()->end()) return false; 2164218893Sdim MI = AND; 2165218893Sdim return isSuitableForMask(MI, Copy.getOperand(0).getReg(), 2166218893Sdim CmpMask, true); 2167218893Sdim } 2168218893Sdim } 2169218893Sdim 2170218893Sdim return false; 2171218893Sdim} 2172218893Sdim 2173245431Sdim/// getSwappedCondition - assume the flags are set by MI(a,b), return 2174245431Sdim/// the condition code if we modify the instructions such that flags are 2175245431Sdim/// set by MI(b,a). 2176245431Sdiminline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { 2177245431Sdim switch (CC) { 2178245431Sdim default: return ARMCC::AL; 2179245431Sdim case ARMCC::EQ: return ARMCC::EQ; 2180245431Sdim case ARMCC::NE: return ARMCC::NE; 2181245431Sdim case ARMCC::HS: return ARMCC::LS; 2182245431Sdim case ARMCC::LO: return ARMCC::HI; 2183245431Sdim case ARMCC::HI: return ARMCC::LO; 2184245431Sdim case ARMCC::LS: return ARMCC::HS; 2185245431Sdim case ARMCC::GE: return ARMCC::LE; 2186245431Sdim case ARMCC::LT: return ARMCC::GT; 2187245431Sdim case ARMCC::GT: return ARMCC::LT; 2188245431Sdim case ARMCC::LE: return ARMCC::GE; 2189245431Sdim } 2190245431Sdim} 2191218893Sdim 2192245431Sdim/// isRedundantFlagInstr - check whether the first instruction, whose only 2193245431Sdim/// purpose is to update flags, can be made redundant. 2194245431Sdim/// CMPrr can be made redundant by SUBrr if the operands are the same. 2195245431Sdim/// CMPri can be made redundant by SUBri if the operands are the same. 2196245431Sdim/// This function can be extended later on. 2197245431Sdiminline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, 2198245431Sdim unsigned SrcReg2, int ImmValue, 2199245431Sdim MachineInstr *OI) { 2200245431Sdim if ((CmpI->getOpcode() == ARM::CMPrr || 2201245431Sdim CmpI->getOpcode() == ARM::t2CMPrr) && 2202245431Sdim (OI->getOpcode() == ARM::SUBrr || 2203245431Sdim OI->getOpcode() == ARM::t2SUBrr) && 2204245431Sdim ((OI->getOperand(1).getReg() == SrcReg && 2205245431Sdim OI->getOperand(2).getReg() == SrcReg2) || 2206245431Sdim (OI->getOperand(1).getReg() == SrcReg2 && 2207245431Sdim OI->getOperand(2).getReg() == SrcReg))) 2208245431Sdim return true; 2209218893Sdim 2210245431Sdim if ((CmpI->getOpcode() == ARM::CMPri || 2211245431Sdim CmpI->getOpcode() == ARM::t2CMPri) && 2212245431Sdim (OI->getOpcode() == ARM::SUBri || 2213245431Sdim OI->getOpcode() == ARM::t2SUBri) && 2214245431Sdim OI->getOperand(1).getReg() == SrcReg && 2215245431Sdim OI->getOperand(2).getImm() == ImmValue) 2216245431Sdim return true; 2217245431Sdim return false; 2218245431Sdim} 2219218893Sdim 2220245431Sdim/// optimizeCompareInstr - Convert the instruction supplying the argument to the 2221245431Sdim/// comparison into one that sets the zero bit in the flags register; 2222245431Sdim/// Remove a redundant Compare instruction if an earlier instruction can set the 2223245431Sdim/// flags in the same way as Compare. 2224245431Sdim/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two 2225245431Sdim/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the 2226245431Sdim/// condition code of instructions which use the flags. 2227245431Sdimbool ARMBaseInstrInfo:: 2228245431SdimoptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, 2229245431Sdim int CmpMask, int CmpValue, 2230245431Sdim const MachineRegisterInfo *MRI) const { 2231245431Sdim // Get the unique definition of SrcReg. 2232245431Sdim MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 2233245431Sdim if (!MI) return false; 2234245431Sdim 2235218893Sdim // Masked compares sometimes use the same register as the corresponding 'and'. 2236218893Sdim if (CmpMask != ~0) { 2237245431Sdim if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { 2238218893Sdim MI = 0; 2239218893Sdim for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), 2240218893Sdim UE = MRI->use_end(); UI != UE; ++UI) { 2241218893Sdim if (UI->getParent() != CmpInstr->getParent()) continue; 2242218893Sdim MachineInstr *PotentialAND = &*UI; 2243245431Sdim if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || 2244245431Sdim isPredicated(PotentialAND)) 2245218893Sdim continue; 2246218893Sdim MI = PotentialAND; 2247218893Sdim break; 2248218893Sdim } 2249218893Sdim if (!MI) return false; 2250218893Sdim } 2251218893Sdim } 2252218893Sdim 2253245431Sdim // Get ready to iterate backward from CmpInstr. 2254245431Sdim MachineBasicBlock::iterator I = CmpInstr, E = MI, 2255245431Sdim B = CmpInstr->getParent()->begin(); 2256212904Sdim 2257218893Sdim // Early exit if CmpInstr is at the beginning of the BB. 2258218893Sdim if (I == B) return false; 2259218893Sdim 2260245431Sdim // There are two possible candidates which can be changed to set CPSR: 2261245431Sdim // One is MI, the other is a SUB instruction. 2262245431Sdim // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). 2263245431Sdim // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). 2264245431Sdim MachineInstr *Sub = NULL; 2265245431Sdim if (SrcReg2 != 0) 2266245431Sdim // MI is not a candidate for CMPrr. 2267245431Sdim MI = NULL; 2268245431Sdim else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { 2269245431Sdim // Conservatively refuse to convert an instruction which isn't in the same 2270245431Sdim // BB as the comparison. 2271245431Sdim // For CMPri, we need to check Sub, thus we can't return here. 2272245431Sdim if (CmpInstr->getOpcode() == ARM::CMPri || 2273245431Sdim CmpInstr->getOpcode() == ARM::t2CMPri) 2274245431Sdim MI = NULL; 2275245431Sdim else 2276245431Sdim return false; 2277245431Sdim } 2278245431Sdim 2279245431Sdim // Check that CPSR isn't set between the comparison instruction and the one we 2280245431Sdim // want to change. At the same time, search for Sub. 2281245431Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 2282212904Sdim --I; 2283212904Sdim for (; I != E; --I) { 2284212904Sdim const MachineInstr &Instr = *I; 2285212904Sdim 2286245431Sdim if (Instr.modifiesRegister(ARM::CPSR, TRI) || 2287245431Sdim Instr.readsRegister(ARM::CPSR, TRI)) 2288218893Sdim // This instruction modifies or uses CPSR after the one we want to 2289218893Sdim // change. We can't do this transformation. 2290245431Sdim return false; 2291245431Sdim 2292245431Sdim // Check whether CmpInstr can be made redundant by the current instruction. 2293245431Sdim if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { 2294245431Sdim Sub = &*I; 2295245431Sdim break; 2296212904Sdim } 2297213534Sdim 2298213534Sdim if (I == B) 2299213534Sdim // The 'and' is below the comparison instruction. 2300213534Sdim return false; 2301212904Sdim } 2302212904Sdim 2303245431Sdim // Return false if no candidates exist. 2304245431Sdim if (!MI && !Sub) 2305245431Sdim return false; 2306245431Sdim 2307245431Sdim // The single candidate is called MI. 2308245431Sdim if (!MI) MI = Sub; 2309245431Sdim 2310245431Sdim // We can't use a predicated instruction - it doesn't always write the flags. 2311245431Sdim if (isPredicated(MI)) 2312245431Sdim return false; 2313245431Sdim 2314212904Sdim switch (MI->getOpcode()) { 2315212904Sdim default: break; 2316221345Sdim case ARM::RSBrr: 2317221345Sdim case ARM::RSBri: 2318221345Sdim case ARM::RSCrr: 2319221345Sdim case ARM::RSCri: 2320221345Sdim case ARM::ADDrr: 2321212904Sdim case ARM::ADDri: 2322221345Sdim case ARM::ADCrr: 2323221345Sdim case ARM::ADCri: 2324221345Sdim case ARM::SUBrr: 2325212904Sdim case ARM::SUBri: 2326221345Sdim case ARM::SBCrr: 2327221345Sdim case ARM::SBCri: 2328221345Sdim case ARM::t2RSBri: 2329221345Sdim case ARM::t2ADDrr: 2330212904Sdim case ARM::t2ADDri: 2331221345Sdim case ARM::t2ADCrr: 2332221345Sdim case ARM::t2ADCri: 2333221345Sdim case ARM::t2SUBrr: 2334212904Sdim case ARM::t2SUBri: 2335221345Sdim case ARM::t2SBCrr: 2336221345Sdim case ARM::t2SBCri: 2337221345Sdim case ARM::ANDrr: 2338221345Sdim case ARM::ANDri: 2339221345Sdim case ARM::t2ANDrr: 2340221345Sdim case ARM::t2ANDri: 2341221345Sdim case ARM::ORRrr: 2342221345Sdim case ARM::ORRri: 2343221345Sdim case ARM::t2ORRrr: 2344221345Sdim case ARM::t2ORRri: 2345221345Sdim case ARM::EORrr: 2346221345Sdim case ARM::EORri: 2347221345Sdim case ARM::t2EORrr: 2348221345Sdim case ARM::t2EORri: { 2349245431Sdim // Scan forward for the use of CPSR 2350245431Sdim // When checking against MI: if it's a conditional code requires 2351245431Sdim // checking of V bit, then this is not safe to do. 2352245431Sdim // It is safe to remove CmpInstr if CPSR is redefined or killed. 2353245431Sdim // If we are done with the basic block, we need to check whether CPSR is 2354245431Sdim // live-out. 2355245431Sdim SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> 2356245431Sdim OperandsToUpdate; 2357221345Sdim bool isSafe = false; 2358221345Sdim I = CmpInstr; 2359245431Sdim E = CmpInstr->getParent()->end(); 2360221345Sdim while (!isSafe && ++I != E) { 2361221345Sdim const MachineInstr &Instr = *I; 2362221345Sdim for (unsigned IO = 0, EO = Instr.getNumOperands(); 2363221345Sdim !isSafe && IO != EO; ++IO) { 2364221345Sdim const MachineOperand &MO = Instr.getOperand(IO); 2365235633Sdim if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { 2366235633Sdim isSafe = true; 2367235633Sdim break; 2368235633Sdim } 2369221345Sdim if (!MO.isReg() || MO.getReg() != ARM::CPSR) 2370221345Sdim continue; 2371221345Sdim if (MO.isDef()) { 2372221345Sdim isSafe = true; 2373221345Sdim break; 2374221345Sdim } 2375263509Sdim // Condition code is after the operand before CPSR except for VSELs. 2376263509Sdim ARMCC::CondCodes CC; 2377263509Sdim bool IsInstrVSel = true; 2378263509Sdim switch (Instr.getOpcode()) { 2379263509Sdim default: 2380263509Sdim IsInstrVSel = false; 2381263509Sdim CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); 2382263509Sdim break; 2383263509Sdim case ARM::VSELEQD: 2384263509Sdim case ARM::VSELEQS: 2385263509Sdim CC = ARMCC::EQ; 2386263509Sdim break; 2387263509Sdim case ARM::VSELGTD: 2388263509Sdim case ARM::VSELGTS: 2389263509Sdim CC = ARMCC::GT; 2390263509Sdim break; 2391263509Sdim case ARM::VSELGED: 2392263509Sdim case ARM::VSELGES: 2393263509Sdim CC = ARMCC::GE; 2394263509Sdim break; 2395263509Sdim case ARM::VSELVSS: 2396263509Sdim case ARM::VSELVSD: 2397263509Sdim CC = ARMCC::VS; 2398263509Sdim break; 2399263509Sdim } 2400263509Sdim 2401245431Sdim if (Sub) { 2402245431Sdim ARMCC::CondCodes NewCC = getSwappedCondition(CC); 2403245431Sdim if (NewCC == ARMCC::AL) 2404245431Sdim return false; 2405245431Sdim // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based 2406245431Sdim // on CMP needs to be updated to be based on SUB. 2407245431Sdim // Push the condition code operands to OperandsToUpdate. 2408245431Sdim // If it is safe to remove CmpInstr, the condition code of these 2409245431Sdim // operands will be modified. 2410245431Sdim if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && 2411263509Sdim Sub->getOperand(2).getReg() == SrcReg) { 2412263509Sdim // VSel doesn't support condition code update. 2413263509Sdim if (IsInstrVSel) 2414263509Sdim return false; 2415263509Sdim OperandsToUpdate.push_back( 2416263509Sdim std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); 2417263509Sdim } 2418263509Sdim } else 2419245431Sdim switch (CC) { 2420245431Sdim default: 2421245431Sdim // CPSR can be used multiple times, we should continue. 2422245431Sdim break; 2423245431Sdim case ARMCC::VS: 2424245431Sdim case ARMCC::VC: 2425245431Sdim case ARMCC::GE: 2426245431Sdim case ARMCC::LT: 2427245431Sdim case ARMCC::GT: 2428245431Sdim case ARMCC::LE: 2429245431Sdim return false; 2430245431Sdim } 2431221345Sdim } 2432221345Sdim } 2433221345Sdim 2434245431Sdim // If CPSR is not killed nor re-defined, we should check whether it is 2435245431Sdim // live-out. If it is live-out, do not optimize. 2436245431Sdim if (!isSafe) { 2437245431Sdim MachineBasicBlock *MBB = CmpInstr->getParent(); 2438245431Sdim for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 2439245431Sdim SE = MBB->succ_end(); SI != SE; ++SI) 2440245431Sdim if ((*SI)->isLiveIn(ARM::CPSR)) 2441245431Sdim return false; 2442245431Sdim } 2443221345Sdim 2444218893Sdim // Toggle the optional operand to CPSR. 2445218893Sdim MI->getOperand(5).setReg(ARM::CPSR); 2446218893Sdim MI->getOperand(5).setIsDef(true); 2447245431Sdim assert(!isPredicated(MI) && "Can't use flags from predicated instruction"); 2448212904Sdim CmpInstr->eraseFromParent(); 2449245431Sdim 2450245431Sdim // Modify the condition code of operands in OperandsToUpdate. 2451245431Sdim // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to 2452245431Sdim // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. 2453245431Sdim for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) 2454245431Sdim OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); 2455212904Sdim return true; 2456212904Sdim } 2457221345Sdim } 2458212904Sdim 2459212904Sdim return false; 2460212904Sdim} 2461218893Sdim 2462218893Sdimbool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, 2463218893Sdim MachineInstr *DefMI, unsigned Reg, 2464218893Sdim MachineRegisterInfo *MRI) const { 2465218893Sdim // Fold large immediates into add, sub, or, xor. 2466218893Sdim unsigned DefOpc = DefMI->getOpcode(); 2467218893Sdim if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) 2468218893Sdim return false; 2469218893Sdim if (!DefMI->getOperand(1).isImm()) 2470218893Sdim // Could be t2MOVi32imm <ga:xx> 2471218893Sdim return false; 2472218893Sdim 2473218893Sdim if (!MRI->hasOneNonDBGUse(Reg)) 2474218893Sdim return false; 2475218893Sdim 2476235633Sdim const MCInstrDesc &DefMCID = DefMI->getDesc(); 2477235633Sdim if (DefMCID.hasOptionalDef()) { 2478235633Sdim unsigned NumOps = DefMCID.getNumOperands(); 2479235633Sdim const MachineOperand &MO = DefMI->getOperand(NumOps-1); 2480235633Sdim if (MO.getReg() == ARM::CPSR && !MO.isDead()) 2481235633Sdim // If DefMI defines CPSR and it is not dead, it's obviously not safe 2482235633Sdim // to delete DefMI. 2483235633Sdim return false; 2484235633Sdim } 2485235633Sdim 2486235633Sdim const MCInstrDesc &UseMCID = UseMI->getDesc(); 2487235633Sdim if (UseMCID.hasOptionalDef()) { 2488235633Sdim unsigned NumOps = UseMCID.getNumOperands(); 2489235633Sdim if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR) 2490235633Sdim // If the instruction sets the flag, do not attempt this optimization 2491235633Sdim // since it may change the semantics of the code. 2492235633Sdim return false; 2493235633Sdim } 2494235633Sdim 2495218893Sdim unsigned UseOpc = UseMI->getOpcode(); 2496218893Sdim unsigned NewUseOpc = 0; 2497218893Sdim uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); 2498218893Sdim uint32_t SOImmValV1 = 0, SOImmValV2 = 0; 2499218893Sdim bool Commute = false; 2500218893Sdim switch (UseOpc) { 2501218893Sdim default: return false; 2502218893Sdim case ARM::SUBrr: 2503218893Sdim case ARM::ADDrr: 2504218893Sdim case ARM::ORRrr: 2505218893Sdim case ARM::EORrr: 2506218893Sdim case ARM::t2SUBrr: 2507218893Sdim case ARM::t2ADDrr: 2508218893Sdim case ARM::t2ORRrr: 2509218893Sdim case ARM::t2EORrr: { 2510218893Sdim Commute = UseMI->getOperand(2).getReg() != Reg; 2511218893Sdim switch (UseOpc) { 2512218893Sdim default: break; 2513218893Sdim case ARM::SUBrr: { 2514218893Sdim if (Commute) 2515218893Sdim return false; 2516218893Sdim ImmVal = -ImmVal; 2517218893Sdim NewUseOpc = ARM::SUBri; 2518218893Sdim // Fallthrough 2519218893Sdim } 2520218893Sdim case ARM::ADDrr: 2521218893Sdim case ARM::ORRrr: 2522218893Sdim case ARM::EORrr: { 2523218893Sdim if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) 2524218893Sdim return false; 2525218893Sdim SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); 2526218893Sdim SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); 2527218893Sdim switch (UseOpc) { 2528218893Sdim default: break; 2529218893Sdim case ARM::ADDrr: NewUseOpc = ARM::ADDri; break; 2530218893Sdim case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; 2531218893Sdim case ARM::EORrr: NewUseOpc = ARM::EORri; break; 2532218893Sdim } 2533218893Sdim break; 2534218893Sdim } 2535218893Sdim case ARM::t2SUBrr: { 2536218893Sdim if (Commute) 2537218893Sdim return false; 2538218893Sdim ImmVal = -ImmVal; 2539218893Sdim NewUseOpc = ARM::t2SUBri; 2540218893Sdim // Fallthrough 2541218893Sdim } 2542218893Sdim case ARM::t2ADDrr: 2543218893Sdim case ARM::t2ORRrr: 2544218893Sdim case ARM::t2EORrr: { 2545218893Sdim if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) 2546218893Sdim return false; 2547218893Sdim SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); 2548218893Sdim SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); 2549218893Sdim switch (UseOpc) { 2550218893Sdim default: break; 2551218893Sdim case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break; 2552218893Sdim case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; 2553218893Sdim case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; 2554218893Sdim } 2555218893Sdim break; 2556218893Sdim } 2557218893Sdim } 2558218893Sdim } 2559218893Sdim } 2560218893Sdim 2561218893Sdim unsigned OpIdx = Commute ? 2 : 1; 2562218893Sdim unsigned Reg1 = UseMI->getOperand(OpIdx).getReg(); 2563218893Sdim bool isKill = UseMI->getOperand(OpIdx).isKill(); 2564218893Sdim unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); 2565218893Sdim AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), 2566235633Sdim UseMI, UseMI->getDebugLoc(), 2567218893Sdim get(NewUseOpc), NewReg) 2568218893Sdim .addReg(Reg1, getKillRegState(isKill)) 2569218893Sdim .addImm(SOImmValV1))); 2570218893Sdim UseMI->setDesc(get(NewUseOpc)); 2571218893Sdim UseMI->getOperand(1).setReg(NewReg); 2572218893Sdim UseMI->getOperand(1).setIsKill(); 2573218893Sdim UseMI->getOperand(2).ChangeToImmediate(SOImmValV2); 2574218893Sdim DefMI->eraseFromParent(); 2575218893Sdim return true; 2576218893Sdim} 2577218893Sdim 2578245431Sdimstatic unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, 2579245431Sdim const MachineInstr *MI) { 2580245431Sdim switch (MI->getOpcode()) { 2581245431Sdim default: { 2582245431Sdim const MCInstrDesc &Desc = MI->getDesc(); 2583245431Sdim int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); 2584245431Sdim assert(UOps >= 0 && "bad # UOps"); 2585245431Sdim return UOps; 2586245431Sdim } 2587245431Sdim 2588245431Sdim case ARM::LDRrs: 2589245431Sdim case ARM::LDRBrs: 2590245431Sdim case ARM::STRrs: 2591245431Sdim case ARM::STRBrs: { 2592245431Sdim unsigned ShOpVal = MI->getOperand(3).getImm(); 2593245431Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2594245431Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2595245431Sdim if (!isSub && 2596245431Sdim (ShImm == 0 || 2597245431Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2598245431Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2599245431Sdim return 1; 2600245431Sdim return 2; 2601245431Sdim } 2602245431Sdim 2603245431Sdim case ARM::LDRH: 2604245431Sdim case ARM::STRH: { 2605245431Sdim if (!MI->getOperand(2).getReg()) 2606245431Sdim return 1; 2607245431Sdim 2608245431Sdim unsigned ShOpVal = MI->getOperand(3).getImm(); 2609245431Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2610245431Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2611245431Sdim if (!isSub && 2612245431Sdim (ShImm == 0 || 2613245431Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2614245431Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2615245431Sdim return 1; 2616245431Sdim return 2; 2617245431Sdim } 2618245431Sdim 2619245431Sdim case ARM::LDRSB: 2620245431Sdim case ARM::LDRSH: 2621245431Sdim return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2; 2622245431Sdim 2623245431Sdim case ARM::LDRSB_POST: 2624245431Sdim case ARM::LDRSH_POST: { 2625245431Sdim unsigned Rt = MI->getOperand(0).getReg(); 2626245431Sdim unsigned Rm = MI->getOperand(3).getReg(); 2627245431Sdim return (Rt == Rm) ? 4 : 3; 2628245431Sdim } 2629245431Sdim 2630245431Sdim case ARM::LDR_PRE_REG: 2631245431Sdim case ARM::LDRB_PRE_REG: { 2632245431Sdim unsigned Rt = MI->getOperand(0).getReg(); 2633245431Sdim unsigned Rm = MI->getOperand(3).getReg(); 2634245431Sdim if (Rt == Rm) 2635245431Sdim return 3; 2636245431Sdim unsigned ShOpVal = MI->getOperand(4).getImm(); 2637245431Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2638245431Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2639245431Sdim if (!isSub && 2640245431Sdim (ShImm == 0 || 2641245431Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2642245431Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2643245431Sdim return 2; 2644245431Sdim return 3; 2645245431Sdim } 2646245431Sdim 2647245431Sdim case ARM::STR_PRE_REG: 2648245431Sdim case ARM::STRB_PRE_REG: { 2649245431Sdim unsigned ShOpVal = MI->getOperand(4).getImm(); 2650245431Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2651245431Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2652245431Sdim if (!isSub && 2653245431Sdim (ShImm == 0 || 2654245431Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2655245431Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2656245431Sdim return 2; 2657245431Sdim return 3; 2658245431Sdim } 2659245431Sdim 2660245431Sdim case ARM::LDRH_PRE: 2661245431Sdim case ARM::STRH_PRE: { 2662245431Sdim unsigned Rt = MI->getOperand(0).getReg(); 2663245431Sdim unsigned Rm = MI->getOperand(3).getReg(); 2664245431Sdim if (!Rm) 2665245431Sdim return 2; 2666245431Sdim if (Rt == Rm) 2667245431Sdim return 3; 2668245431Sdim return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) 2669245431Sdim ? 3 : 2; 2670245431Sdim } 2671245431Sdim 2672245431Sdim case ARM::LDR_POST_REG: 2673245431Sdim case ARM::LDRB_POST_REG: 2674245431Sdim case ARM::LDRH_POST: { 2675245431Sdim unsigned Rt = MI->getOperand(0).getReg(); 2676245431Sdim unsigned Rm = MI->getOperand(3).getReg(); 2677245431Sdim return (Rt == Rm) ? 3 : 2; 2678245431Sdim } 2679245431Sdim 2680245431Sdim case ARM::LDR_PRE_IMM: 2681245431Sdim case ARM::LDRB_PRE_IMM: 2682245431Sdim case ARM::LDR_POST_IMM: 2683245431Sdim case ARM::LDRB_POST_IMM: 2684245431Sdim case ARM::STRB_POST_IMM: 2685245431Sdim case ARM::STRB_POST_REG: 2686245431Sdim case ARM::STRB_PRE_IMM: 2687245431Sdim case ARM::STRH_POST: 2688245431Sdim case ARM::STR_POST_IMM: 2689245431Sdim case ARM::STR_POST_REG: 2690245431Sdim case ARM::STR_PRE_IMM: 2691245431Sdim return 2; 2692245431Sdim 2693245431Sdim case ARM::LDRSB_PRE: 2694245431Sdim case ARM::LDRSH_PRE: { 2695245431Sdim unsigned Rm = MI->getOperand(3).getReg(); 2696245431Sdim if (Rm == 0) 2697245431Sdim return 3; 2698245431Sdim unsigned Rt = MI->getOperand(0).getReg(); 2699245431Sdim if (Rt == Rm) 2700245431Sdim return 4; 2701245431Sdim unsigned ShOpVal = MI->getOperand(4).getImm(); 2702245431Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2703245431Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2704245431Sdim if (!isSub && 2705245431Sdim (ShImm == 0 || 2706245431Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2707245431Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2708245431Sdim return 3; 2709245431Sdim return 4; 2710245431Sdim } 2711245431Sdim 2712245431Sdim case ARM::LDRD: { 2713245431Sdim unsigned Rt = MI->getOperand(0).getReg(); 2714245431Sdim unsigned Rn = MI->getOperand(2).getReg(); 2715245431Sdim unsigned Rm = MI->getOperand(3).getReg(); 2716245431Sdim if (Rm) 2717245431Sdim return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; 2718245431Sdim return (Rt == Rn) ? 3 : 2; 2719245431Sdim } 2720245431Sdim 2721245431Sdim case ARM::STRD: { 2722245431Sdim unsigned Rm = MI->getOperand(3).getReg(); 2723245431Sdim if (Rm) 2724245431Sdim return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; 2725245431Sdim return 2; 2726245431Sdim } 2727245431Sdim 2728245431Sdim case ARM::LDRD_POST: 2729245431Sdim case ARM::t2LDRD_POST: 2730245431Sdim return 3; 2731245431Sdim 2732245431Sdim case ARM::STRD_POST: 2733245431Sdim case ARM::t2STRD_POST: 2734245431Sdim return 4; 2735245431Sdim 2736245431Sdim case ARM::LDRD_PRE: { 2737245431Sdim unsigned Rt = MI->getOperand(0).getReg(); 2738245431Sdim unsigned Rn = MI->getOperand(3).getReg(); 2739245431Sdim unsigned Rm = MI->getOperand(4).getReg(); 2740245431Sdim if (Rm) 2741245431Sdim return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; 2742245431Sdim return (Rt == Rn) ? 4 : 3; 2743245431Sdim } 2744245431Sdim 2745245431Sdim case ARM::t2LDRD_PRE: { 2746245431Sdim unsigned Rt = MI->getOperand(0).getReg(); 2747245431Sdim unsigned Rn = MI->getOperand(3).getReg(); 2748245431Sdim return (Rt == Rn) ? 4 : 3; 2749245431Sdim } 2750245431Sdim 2751245431Sdim case ARM::STRD_PRE: { 2752245431Sdim unsigned Rm = MI->getOperand(4).getReg(); 2753245431Sdim if (Rm) 2754245431Sdim return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; 2755245431Sdim return 3; 2756245431Sdim } 2757245431Sdim 2758245431Sdim case ARM::t2STRD_PRE: 2759245431Sdim return 3; 2760245431Sdim 2761245431Sdim case ARM::t2LDR_POST: 2762245431Sdim case ARM::t2LDRB_POST: 2763245431Sdim case ARM::t2LDRB_PRE: 2764245431Sdim case ARM::t2LDRSBi12: 2765245431Sdim case ARM::t2LDRSBi8: 2766245431Sdim case ARM::t2LDRSBpci: 2767245431Sdim case ARM::t2LDRSBs: 2768245431Sdim case ARM::t2LDRH_POST: 2769245431Sdim case ARM::t2LDRH_PRE: 2770245431Sdim case ARM::t2LDRSBT: 2771245431Sdim case ARM::t2LDRSB_POST: 2772245431Sdim case ARM::t2LDRSB_PRE: 2773245431Sdim case ARM::t2LDRSH_POST: 2774245431Sdim case ARM::t2LDRSH_PRE: 2775245431Sdim case ARM::t2LDRSHi12: 2776245431Sdim case ARM::t2LDRSHi8: 2777245431Sdim case ARM::t2LDRSHpci: 2778245431Sdim case ARM::t2LDRSHs: 2779245431Sdim return 2; 2780245431Sdim 2781245431Sdim case ARM::t2LDRDi8: { 2782245431Sdim unsigned Rt = MI->getOperand(0).getReg(); 2783245431Sdim unsigned Rn = MI->getOperand(2).getReg(); 2784245431Sdim return (Rt == Rn) ? 3 : 2; 2785245431Sdim } 2786245431Sdim 2787245431Sdim case ARM::t2STRB_POST: 2788245431Sdim case ARM::t2STRB_PRE: 2789245431Sdim case ARM::t2STRBs: 2790245431Sdim case ARM::t2STRDi8: 2791245431Sdim case ARM::t2STRH_POST: 2792245431Sdim case ARM::t2STRH_PRE: 2793245431Sdim case ARM::t2STRHs: 2794245431Sdim case ARM::t2STR_POST: 2795245431Sdim case ARM::t2STR_PRE: 2796245431Sdim case ARM::t2STRs: 2797245431Sdim return 2; 2798245431Sdim } 2799245431Sdim} 2800245431Sdim 2801245431Sdim// Return the number of 32-bit words loaded by LDM or stored by STM. If this 2802245431Sdim// can't be easily determined return 0 (missing MachineMemOperand). 2803245431Sdim// 2804245431Sdim// FIXME: The current MachineInstr design does not support relying on machine 2805245431Sdim// mem operands to determine the width of a memory access. Instead, we expect 2806245431Sdim// the target to provide this information based on the instruction opcode and 2807245431Sdim// operands. However, using MachineMemOperand is a the best solution now for 2808245431Sdim// two reasons: 2809245431Sdim// 2810245431Sdim// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI 2811245431Sdim// operands. This is much more dangerous than using the MachineMemOperand 2812245431Sdim// sizes because CodeGen passes can insert/remove optional machine operands. In 2813245431Sdim// fact, it's totally incorrect for preRA passes and appears to be wrong for 2814245431Sdim// postRA passes as well. 2815245431Sdim// 2816245431Sdim// 2) getNumLDMAddresses is only used by the scheduling machine model and any 2817245431Sdim// machine model that calls this should handle the unknown (zero size) case. 2818245431Sdim// 2819245431Sdim// Long term, we should require a target hook that verifies MachineMemOperand 2820245431Sdim// sizes during MC lowering. That target hook should be local to MC lowering 2821245431Sdim// because we can't ensure that it is aware of other MI forms. Doing this will 2822245431Sdim// ensure that MachineMemOperands are correctly propagated through all passes. 2823245431Sdimunsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const { 2824245431Sdim unsigned Size = 0; 2825245431Sdim for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), 2826245431Sdim E = MI->memoperands_end(); I != E; ++I) { 2827245431Sdim Size += (*I)->getSize(); 2828245431Sdim } 2829245431Sdim return Size / 4; 2830245431Sdim} 2831245431Sdim 2832218893Sdimunsigned 2833218893SdimARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, 2834218893Sdim const MachineInstr *MI) const { 2835218893Sdim if (!ItinData || ItinData->isEmpty()) 2836218893Sdim return 1; 2837218893Sdim 2838224145Sdim const MCInstrDesc &Desc = MI->getDesc(); 2839218893Sdim unsigned Class = Desc.getSchedClass(); 2840245431Sdim int ItinUOps = ItinData->getNumMicroOps(Class); 2841245431Sdim if (ItinUOps >= 0) { 2842245431Sdim if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) 2843245431Sdim return getNumMicroOpsSwiftLdSt(ItinData, MI); 2844218893Sdim 2845245431Sdim return ItinUOps; 2846245431Sdim } 2847245431Sdim 2848218893Sdim unsigned Opc = MI->getOpcode(); 2849218893Sdim switch (Opc) { 2850218893Sdim default: 2851218893Sdim llvm_unreachable("Unexpected multi-uops instruction!"); 2852218893Sdim case ARM::VLDMQIA: 2853218893Sdim case ARM::VSTMQIA: 2854218893Sdim return 2; 2855218893Sdim 2856218893Sdim // The number of uOps for load / store multiple are determined by the number 2857218893Sdim // registers. 2858218893Sdim // 2859218893Sdim // On Cortex-A8, each pair of register loads / stores can be scheduled on the 2860218893Sdim // same cycle. The scheduling for the first load / store must be done 2861245431Sdim // separately by assuming the address is not 64-bit aligned. 2862218893Sdim // 2863218893Sdim // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address 2864218893Sdim // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON 2865218893Sdim // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. 2866218893Sdim case ARM::VLDMDIA: 2867218893Sdim case ARM::VLDMDIA_UPD: 2868218893Sdim case ARM::VLDMDDB_UPD: 2869218893Sdim case ARM::VLDMSIA: 2870218893Sdim case ARM::VLDMSIA_UPD: 2871218893Sdim case ARM::VLDMSDB_UPD: 2872218893Sdim case ARM::VSTMDIA: 2873218893Sdim case ARM::VSTMDIA_UPD: 2874218893Sdim case ARM::VSTMDDB_UPD: 2875218893Sdim case ARM::VSTMSIA: 2876218893Sdim case ARM::VSTMSIA_UPD: 2877218893Sdim case ARM::VSTMSDB_UPD: { 2878218893Sdim unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); 2879218893Sdim return (NumRegs / 2) + (NumRegs % 2) + 1; 2880218893Sdim } 2881218893Sdim 2882218893Sdim case ARM::LDMIA_RET: 2883218893Sdim case ARM::LDMIA: 2884218893Sdim case ARM::LDMDA: 2885218893Sdim case ARM::LDMDB: 2886218893Sdim case ARM::LDMIB: 2887218893Sdim case ARM::LDMIA_UPD: 2888218893Sdim case ARM::LDMDA_UPD: 2889218893Sdim case ARM::LDMDB_UPD: 2890218893Sdim case ARM::LDMIB_UPD: 2891218893Sdim case ARM::STMIA: 2892218893Sdim case ARM::STMDA: 2893218893Sdim case ARM::STMDB: 2894218893Sdim case ARM::STMIB: 2895218893Sdim case ARM::STMIA_UPD: 2896218893Sdim case ARM::STMDA_UPD: 2897218893Sdim case ARM::STMDB_UPD: 2898218893Sdim case ARM::STMIB_UPD: 2899218893Sdim case ARM::tLDMIA: 2900218893Sdim case ARM::tLDMIA_UPD: 2901218893Sdim case ARM::tSTMIA_UPD: 2902218893Sdim case ARM::tPOP_RET: 2903218893Sdim case ARM::tPOP: 2904218893Sdim case ARM::tPUSH: 2905218893Sdim case ARM::t2LDMIA_RET: 2906218893Sdim case ARM::t2LDMIA: 2907218893Sdim case ARM::t2LDMDB: 2908218893Sdim case ARM::t2LDMIA_UPD: 2909218893Sdim case ARM::t2LDMDB_UPD: 2910218893Sdim case ARM::t2STMIA: 2911218893Sdim case ARM::t2STMDB: 2912218893Sdim case ARM::t2STMIA_UPD: 2913218893Sdim case ARM::t2STMDB_UPD: { 2914218893Sdim unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; 2915245431Sdim if (Subtarget.isSwift()) { 2916245431Sdim int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. 2917245431Sdim switch (Opc) { 2918245431Sdim default: break; 2919245431Sdim case ARM::VLDMDIA_UPD: 2920245431Sdim case ARM::VLDMDDB_UPD: 2921245431Sdim case ARM::VLDMSIA_UPD: 2922245431Sdim case ARM::VLDMSDB_UPD: 2923245431Sdim case ARM::VSTMDIA_UPD: 2924245431Sdim case ARM::VSTMDDB_UPD: 2925245431Sdim case ARM::VSTMSIA_UPD: 2926245431Sdim case ARM::VSTMSDB_UPD: 2927245431Sdim case ARM::LDMIA_UPD: 2928245431Sdim case ARM::LDMDA_UPD: 2929245431Sdim case ARM::LDMDB_UPD: 2930245431Sdim case ARM::LDMIB_UPD: 2931245431Sdim case ARM::STMIA_UPD: 2932245431Sdim case ARM::STMDA_UPD: 2933245431Sdim case ARM::STMDB_UPD: 2934245431Sdim case ARM::STMIB_UPD: 2935245431Sdim case ARM::tLDMIA_UPD: 2936245431Sdim case ARM::tSTMIA_UPD: 2937245431Sdim case ARM::t2LDMIA_UPD: 2938245431Sdim case ARM::t2LDMDB_UPD: 2939245431Sdim case ARM::t2STMIA_UPD: 2940245431Sdim case ARM::t2STMDB_UPD: 2941245431Sdim ++UOps; // One for base register writeback. 2942245431Sdim break; 2943245431Sdim case ARM::LDMIA_RET: 2944245431Sdim case ARM::tPOP_RET: 2945245431Sdim case ARM::t2LDMIA_RET: 2946245431Sdim UOps += 2; // One for base reg wb, one for write to pc. 2947245431Sdim break; 2948245431Sdim } 2949245431Sdim return UOps; 2950245431Sdim } else if (Subtarget.isCortexA8()) { 2951218893Sdim if (NumRegs < 4) 2952218893Sdim return 2; 2953218893Sdim // 4 registers would be issued: 2, 2. 2954218893Sdim // 5 registers would be issued: 2, 2, 1. 2955245431Sdim int A8UOps = (NumRegs / 2); 2956218893Sdim if (NumRegs % 2) 2957245431Sdim ++A8UOps; 2958245431Sdim return A8UOps; 2959245431Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 2960245431Sdim int A9UOps = (NumRegs / 2); 2961218893Sdim // If there are odd number of registers or if it's not 64-bit aligned, 2962218893Sdim // then it takes an extra AGU (Address Generation Unit) cycle. 2963218893Sdim if ((NumRegs % 2) || 2964218893Sdim !MI->hasOneMemOperand() || 2965218893Sdim (*MI->memoperands_begin())->getAlignment() < 8) 2966245431Sdim ++A9UOps; 2967245431Sdim return A9UOps; 2968218893Sdim } else { 2969218893Sdim // Assume the worst. 2970218893Sdim return NumRegs; 2971218893Sdim } 2972218893Sdim } 2973218893Sdim } 2974218893Sdim} 2975218893Sdim 2976218893Sdimint 2977218893SdimARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, 2978224145Sdim const MCInstrDesc &DefMCID, 2979218893Sdim unsigned DefClass, 2980218893Sdim unsigned DefIdx, unsigned DefAlign) const { 2981224145Sdim int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 2982218893Sdim if (RegNo <= 0) 2983218893Sdim // Def is the address writeback. 2984218893Sdim return ItinData->getOperandCycle(DefClass, DefIdx); 2985218893Sdim 2986218893Sdim int DefCycle; 2987218893Sdim if (Subtarget.isCortexA8()) { 2988218893Sdim // (regno / 2) + (regno % 2) + 1 2989218893Sdim DefCycle = RegNo / 2 + 1; 2990218893Sdim if (RegNo % 2) 2991218893Sdim ++DefCycle; 2992245431Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 2993218893Sdim DefCycle = RegNo; 2994218893Sdim bool isSLoad = false; 2995218893Sdim 2996224145Sdim switch (DefMCID.getOpcode()) { 2997218893Sdim default: break; 2998218893Sdim case ARM::VLDMSIA: 2999218893Sdim case ARM::VLDMSIA_UPD: 3000218893Sdim case ARM::VLDMSDB_UPD: 3001218893Sdim isSLoad = true; 3002218893Sdim break; 3003218893Sdim } 3004218893Sdim 3005218893Sdim // If there are odd number of 'S' registers or if it's not 64-bit aligned, 3006218893Sdim // then it takes an extra cycle. 3007218893Sdim if ((isSLoad && (RegNo % 2)) || DefAlign < 8) 3008218893Sdim ++DefCycle; 3009218893Sdim } else { 3010218893Sdim // Assume the worst. 3011218893Sdim DefCycle = RegNo + 2; 3012218893Sdim } 3013218893Sdim 3014218893Sdim return DefCycle; 3015218893Sdim} 3016218893Sdim 3017218893Sdimint 3018218893SdimARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, 3019224145Sdim const MCInstrDesc &DefMCID, 3020218893Sdim unsigned DefClass, 3021218893Sdim unsigned DefIdx, unsigned DefAlign) const { 3022224145Sdim int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 3023218893Sdim if (RegNo <= 0) 3024218893Sdim // Def is the address writeback. 3025218893Sdim return ItinData->getOperandCycle(DefClass, DefIdx); 3026218893Sdim 3027218893Sdim int DefCycle; 3028218893Sdim if (Subtarget.isCortexA8()) { 3029218893Sdim // 4 registers would be issued: 1, 2, 1. 3030218893Sdim // 5 registers would be issued: 1, 2, 2. 3031218893Sdim DefCycle = RegNo / 2; 3032218893Sdim if (DefCycle < 1) 3033218893Sdim DefCycle = 1; 3034218893Sdim // Result latency is issue cycle + 2: E2. 3035218893Sdim DefCycle += 2; 3036245431Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3037218893Sdim DefCycle = (RegNo / 2); 3038218893Sdim // If there are odd number of registers or if it's not 64-bit aligned, 3039218893Sdim // then it takes an extra AGU (Address Generation Unit) cycle. 3040218893Sdim if ((RegNo % 2) || DefAlign < 8) 3041218893Sdim ++DefCycle; 3042218893Sdim // Result latency is AGU cycles + 2. 3043218893Sdim DefCycle += 2; 3044218893Sdim } else { 3045218893Sdim // Assume the worst. 3046218893Sdim DefCycle = RegNo + 2; 3047218893Sdim } 3048218893Sdim 3049218893Sdim return DefCycle; 3050218893Sdim} 3051218893Sdim 3052218893Sdimint 3053218893SdimARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, 3054224145Sdim const MCInstrDesc &UseMCID, 3055218893Sdim unsigned UseClass, 3056218893Sdim unsigned UseIdx, unsigned UseAlign) const { 3057224145Sdim int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 3058218893Sdim if (RegNo <= 0) 3059218893Sdim return ItinData->getOperandCycle(UseClass, UseIdx); 3060218893Sdim 3061218893Sdim int UseCycle; 3062218893Sdim if (Subtarget.isCortexA8()) { 3063218893Sdim // (regno / 2) + (regno % 2) + 1 3064218893Sdim UseCycle = RegNo / 2 + 1; 3065218893Sdim if (RegNo % 2) 3066218893Sdim ++UseCycle; 3067245431Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3068218893Sdim UseCycle = RegNo; 3069218893Sdim bool isSStore = false; 3070218893Sdim 3071224145Sdim switch (UseMCID.getOpcode()) { 3072218893Sdim default: break; 3073218893Sdim case ARM::VSTMSIA: 3074218893Sdim case ARM::VSTMSIA_UPD: 3075218893Sdim case ARM::VSTMSDB_UPD: 3076218893Sdim isSStore = true; 3077218893Sdim break; 3078218893Sdim } 3079218893Sdim 3080218893Sdim // If there are odd number of 'S' registers or if it's not 64-bit aligned, 3081218893Sdim // then it takes an extra cycle. 3082218893Sdim if ((isSStore && (RegNo % 2)) || UseAlign < 8) 3083218893Sdim ++UseCycle; 3084218893Sdim } else { 3085218893Sdim // Assume the worst. 3086218893Sdim UseCycle = RegNo + 2; 3087218893Sdim } 3088218893Sdim 3089218893Sdim return UseCycle; 3090218893Sdim} 3091218893Sdim 3092218893Sdimint 3093218893SdimARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, 3094224145Sdim const MCInstrDesc &UseMCID, 3095218893Sdim unsigned UseClass, 3096218893Sdim unsigned UseIdx, unsigned UseAlign) const { 3097224145Sdim int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 3098218893Sdim if (RegNo <= 0) 3099218893Sdim return ItinData->getOperandCycle(UseClass, UseIdx); 3100218893Sdim 3101218893Sdim int UseCycle; 3102218893Sdim if (Subtarget.isCortexA8()) { 3103218893Sdim UseCycle = RegNo / 2; 3104218893Sdim if (UseCycle < 2) 3105218893Sdim UseCycle = 2; 3106218893Sdim // Read in E3. 3107218893Sdim UseCycle += 2; 3108245431Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3109218893Sdim UseCycle = (RegNo / 2); 3110218893Sdim // If there are odd number of registers or if it's not 64-bit aligned, 3111218893Sdim // then it takes an extra AGU (Address Generation Unit) cycle. 3112218893Sdim if ((RegNo % 2) || UseAlign < 8) 3113218893Sdim ++UseCycle; 3114218893Sdim } else { 3115218893Sdim // Assume the worst. 3116218893Sdim UseCycle = 1; 3117218893Sdim } 3118218893Sdim return UseCycle; 3119218893Sdim} 3120218893Sdim 3121218893Sdimint 3122218893SdimARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 3123224145Sdim const MCInstrDesc &DefMCID, 3124218893Sdim unsigned DefIdx, unsigned DefAlign, 3125224145Sdim const MCInstrDesc &UseMCID, 3126218893Sdim unsigned UseIdx, unsigned UseAlign) const { 3127224145Sdim unsigned DefClass = DefMCID.getSchedClass(); 3128224145Sdim unsigned UseClass = UseMCID.getSchedClass(); 3129218893Sdim 3130224145Sdim if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) 3131218893Sdim return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); 3132218893Sdim 3133218893Sdim // This may be a def / use of a variable_ops instruction, the operand 3134218893Sdim // latency might be determinable dynamically. Let the target try to 3135218893Sdim // figure it out. 3136218893Sdim int DefCycle = -1; 3137218893Sdim bool LdmBypass = false; 3138224145Sdim switch (DefMCID.getOpcode()) { 3139218893Sdim default: 3140218893Sdim DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 3141218893Sdim break; 3142218893Sdim 3143218893Sdim case ARM::VLDMDIA: 3144218893Sdim case ARM::VLDMDIA_UPD: 3145218893Sdim case ARM::VLDMDDB_UPD: 3146218893Sdim case ARM::VLDMSIA: 3147218893Sdim case ARM::VLDMSIA_UPD: 3148218893Sdim case ARM::VLDMSDB_UPD: 3149224145Sdim DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 3150218893Sdim break; 3151218893Sdim 3152218893Sdim case ARM::LDMIA_RET: 3153218893Sdim case ARM::LDMIA: 3154218893Sdim case ARM::LDMDA: 3155218893Sdim case ARM::LDMDB: 3156218893Sdim case ARM::LDMIB: 3157218893Sdim case ARM::LDMIA_UPD: 3158218893Sdim case ARM::LDMDA_UPD: 3159218893Sdim case ARM::LDMDB_UPD: 3160218893Sdim case ARM::LDMIB_UPD: 3161218893Sdim case ARM::tLDMIA: 3162218893Sdim case ARM::tLDMIA_UPD: 3163218893Sdim case ARM::tPUSH: 3164218893Sdim case ARM::t2LDMIA_RET: 3165218893Sdim case ARM::t2LDMIA: 3166218893Sdim case ARM::t2LDMDB: 3167218893Sdim case ARM::t2LDMIA_UPD: 3168218893Sdim case ARM::t2LDMDB_UPD: 3169218893Sdim LdmBypass = 1; 3170224145Sdim DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 3171218893Sdim break; 3172218893Sdim } 3173218893Sdim 3174218893Sdim if (DefCycle == -1) 3175218893Sdim // We can't seem to determine the result latency of the def, assume it's 2. 3176218893Sdim DefCycle = 2; 3177218893Sdim 3178218893Sdim int UseCycle = -1; 3179224145Sdim switch (UseMCID.getOpcode()) { 3180218893Sdim default: 3181218893Sdim UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); 3182218893Sdim break; 3183218893Sdim 3184218893Sdim case ARM::VSTMDIA: 3185218893Sdim case ARM::VSTMDIA_UPD: 3186218893Sdim case ARM::VSTMDDB_UPD: 3187218893Sdim case ARM::VSTMSIA: 3188218893Sdim case ARM::VSTMSIA_UPD: 3189218893Sdim case ARM::VSTMSDB_UPD: 3190224145Sdim UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 3191218893Sdim break; 3192218893Sdim 3193218893Sdim case ARM::STMIA: 3194218893Sdim case ARM::STMDA: 3195218893Sdim case ARM::STMDB: 3196218893Sdim case ARM::STMIB: 3197218893Sdim case ARM::STMIA_UPD: 3198218893Sdim case ARM::STMDA_UPD: 3199218893Sdim case ARM::STMDB_UPD: 3200218893Sdim case ARM::STMIB_UPD: 3201218893Sdim case ARM::tSTMIA_UPD: 3202218893Sdim case ARM::tPOP_RET: 3203218893Sdim case ARM::tPOP: 3204218893Sdim case ARM::t2STMIA: 3205218893Sdim case ARM::t2STMDB: 3206218893Sdim case ARM::t2STMIA_UPD: 3207218893Sdim case ARM::t2STMDB_UPD: 3208224145Sdim UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 3209218893Sdim break; 3210218893Sdim } 3211218893Sdim 3212218893Sdim if (UseCycle == -1) 3213218893Sdim // Assume it's read in the first stage. 3214218893Sdim UseCycle = 1; 3215218893Sdim 3216218893Sdim UseCycle = DefCycle - UseCycle + 1; 3217218893Sdim if (UseCycle > 0) { 3218218893Sdim if (LdmBypass) { 3219218893Sdim // It's a variable_ops instruction so we can't use DefIdx here. Just use 3220218893Sdim // first def operand. 3221224145Sdim if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, 3222218893Sdim UseClass, UseIdx)) 3223218893Sdim --UseCycle; 3224218893Sdim } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, 3225218893Sdim UseClass, UseIdx)) { 3226218893Sdim --UseCycle; 3227218893Sdim } 3228218893Sdim } 3229218893Sdim 3230218893Sdim return UseCycle; 3231218893Sdim} 3232218893Sdim 3233235633Sdimstatic const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, 3234235633Sdim const MachineInstr *MI, unsigned Reg, 3235235633Sdim unsigned &DefIdx, unsigned &Dist) { 3236235633Sdim Dist = 0; 3237235633Sdim 3238235633Sdim MachineBasicBlock::const_iterator I = MI; ++I; 3239235633Sdim MachineBasicBlock::const_instr_iterator II = 3240235633Sdim llvm::prior(I.getInstrIterator()); 3241235633Sdim assert(II->isInsideBundle() && "Empty bundle?"); 3242235633Sdim 3243235633Sdim int Idx = -1; 3244235633Sdim while (II->isInsideBundle()) { 3245235633Sdim Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); 3246235633Sdim if (Idx != -1) 3247235633Sdim break; 3248235633Sdim --II; 3249235633Sdim ++Dist; 3250235633Sdim } 3251235633Sdim 3252235633Sdim assert(Idx != -1 && "Cannot find bundled definition!"); 3253235633Sdim DefIdx = Idx; 3254235633Sdim return II; 3255235633Sdim} 3256235633Sdim 3257235633Sdimstatic const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, 3258235633Sdim const MachineInstr *MI, unsigned Reg, 3259235633Sdim unsigned &UseIdx, unsigned &Dist) { 3260235633Sdim Dist = 0; 3261235633Sdim 3262235633Sdim MachineBasicBlock::const_instr_iterator II = MI; ++II; 3263235633Sdim assert(II->isInsideBundle() && "Empty bundle?"); 3264235633Sdim MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 3265235633Sdim 3266235633Sdim // FIXME: This doesn't properly handle multiple uses. 3267235633Sdim int Idx = -1; 3268235633Sdim while (II != E && II->isInsideBundle()) { 3269235633Sdim Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); 3270235633Sdim if (Idx != -1) 3271235633Sdim break; 3272235633Sdim if (II->getOpcode() != ARM::t2IT) 3273235633Sdim ++Dist; 3274235633Sdim ++II; 3275235633Sdim } 3276235633Sdim 3277235633Sdim if (Idx == -1) { 3278235633Sdim Dist = 0; 3279235633Sdim return 0; 3280235633Sdim } 3281235633Sdim 3282235633Sdim UseIdx = Idx; 3283235633Sdim return II; 3284235633Sdim} 3285235633Sdim 3286245431Sdim/// Return the number of cycles to add to (or subtract from) the static 3287245431Sdim/// itinerary based on the def opcode and alignment. The caller will ensure that 3288245431Sdim/// adjusted latency is at least one cycle. 3289245431Sdimstatic int adjustDefLatency(const ARMSubtarget &Subtarget, 3290245431Sdim const MachineInstr *DefMI, 3291245431Sdim const MCInstrDesc *DefMCID, unsigned DefAlign) { 3292245431Sdim int Adjust = 0; 3293245431Sdim if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) { 3294218893Sdim // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 3295218893Sdim // variants are one cycle cheaper. 3296235633Sdim switch (DefMCID->getOpcode()) { 3297218893Sdim default: break; 3298218893Sdim case ARM::LDRrs: 3299218893Sdim case ARM::LDRBrs: { 3300218893Sdim unsigned ShOpVal = DefMI->getOperand(3).getImm(); 3301218893Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3302218893Sdim if (ShImm == 0 || 3303218893Sdim (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 3304245431Sdim --Adjust; 3305218893Sdim break; 3306218893Sdim } 3307218893Sdim case ARM::t2LDRs: 3308218893Sdim case ARM::t2LDRBs: 3309218893Sdim case ARM::t2LDRHs: 3310218893Sdim case ARM::t2LDRSHs: { 3311218893Sdim // Thumb2 mode: lsl only. 3312218893Sdim unsigned ShAmt = DefMI->getOperand(3).getImm(); 3313218893Sdim if (ShAmt == 0 || ShAmt == 2) 3314245431Sdim --Adjust; 3315218893Sdim break; 3316218893Sdim } 3317218893Sdim } 3318245431Sdim } else if (Subtarget.isSwift()) { 3319245431Sdim // FIXME: Properly handle all of the latency adjustments for address 3320245431Sdim // writeback. 3321245431Sdim switch (DefMCID->getOpcode()) { 3322245431Sdim default: break; 3323245431Sdim case ARM::LDRrs: 3324245431Sdim case ARM::LDRBrs: { 3325245431Sdim unsigned ShOpVal = DefMI->getOperand(3).getImm(); 3326245431Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3327245431Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3328245431Sdim if (!isSub && 3329245431Sdim (ShImm == 0 || 3330245431Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3331245431Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3332245431Sdim Adjust -= 2; 3333245431Sdim else if (!isSub && 3334245431Sdim ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 3335245431Sdim --Adjust; 3336245431Sdim break; 3337245431Sdim } 3338245431Sdim case ARM::t2LDRs: 3339245431Sdim case ARM::t2LDRBs: 3340245431Sdim case ARM::t2LDRHs: 3341245431Sdim case ARM::t2LDRSHs: { 3342245431Sdim // Thumb2 mode: lsl only. 3343245431Sdim unsigned ShAmt = DefMI->getOperand(3).getImm(); 3344245431Sdim if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) 3345245431Sdim Adjust -= 2; 3346245431Sdim break; 3347245431Sdim } 3348245431Sdim } 3349218893Sdim } 3350218893Sdim 3351245431Sdim if (DefAlign < 8 && Subtarget.isLikeA9()) { 3352235633Sdim switch (DefMCID->getOpcode()) { 3353221345Sdim default: break; 3354221345Sdim case ARM::VLD1q8: 3355221345Sdim case ARM::VLD1q16: 3356221345Sdim case ARM::VLD1q32: 3357221345Sdim case ARM::VLD1q64: 3358235633Sdim case ARM::VLD1q8wb_fixed: 3359235633Sdim case ARM::VLD1q16wb_fixed: 3360235633Sdim case ARM::VLD1q32wb_fixed: 3361235633Sdim case ARM::VLD1q64wb_fixed: 3362235633Sdim case ARM::VLD1q8wb_register: 3363235633Sdim case ARM::VLD1q16wb_register: 3364235633Sdim case ARM::VLD1q32wb_register: 3365235633Sdim case ARM::VLD1q64wb_register: 3366221345Sdim case ARM::VLD2d8: 3367221345Sdim case ARM::VLD2d16: 3368221345Sdim case ARM::VLD2d32: 3369221345Sdim case ARM::VLD2q8: 3370221345Sdim case ARM::VLD2q16: 3371221345Sdim case ARM::VLD2q32: 3372235633Sdim case ARM::VLD2d8wb_fixed: 3373235633Sdim case ARM::VLD2d16wb_fixed: 3374235633Sdim case ARM::VLD2d32wb_fixed: 3375235633Sdim case ARM::VLD2q8wb_fixed: 3376235633Sdim case ARM::VLD2q16wb_fixed: 3377235633Sdim case ARM::VLD2q32wb_fixed: 3378235633Sdim case ARM::VLD2d8wb_register: 3379235633Sdim case ARM::VLD2d16wb_register: 3380235633Sdim case ARM::VLD2d32wb_register: 3381235633Sdim case ARM::VLD2q8wb_register: 3382235633Sdim case ARM::VLD2q16wb_register: 3383235633Sdim case ARM::VLD2q32wb_register: 3384221345Sdim case ARM::VLD3d8: 3385221345Sdim case ARM::VLD3d16: 3386221345Sdim case ARM::VLD3d32: 3387221345Sdim case ARM::VLD1d64T: 3388221345Sdim case ARM::VLD3d8_UPD: 3389221345Sdim case ARM::VLD3d16_UPD: 3390221345Sdim case ARM::VLD3d32_UPD: 3391235633Sdim case ARM::VLD1d64Twb_fixed: 3392235633Sdim case ARM::VLD1d64Twb_register: 3393221345Sdim case ARM::VLD3q8_UPD: 3394221345Sdim case ARM::VLD3q16_UPD: 3395221345Sdim case ARM::VLD3q32_UPD: 3396221345Sdim case ARM::VLD4d8: 3397221345Sdim case ARM::VLD4d16: 3398221345Sdim case ARM::VLD4d32: 3399221345Sdim case ARM::VLD1d64Q: 3400221345Sdim case ARM::VLD4d8_UPD: 3401221345Sdim case ARM::VLD4d16_UPD: 3402221345Sdim case ARM::VLD4d32_UPD: 3403235633Sdim case ARM::VLD1d64Qwb_fixed: 3404235633Sdim case ARM::VLD1d64Qwb_register: 3405221345Sdim case ARM::VLD4q8_UPD: 3406221345Sdim case ARM::VLD4q16_UPD: 3407221345Sdim case ARM::VLD4q32_UPD: 3408221345Sdim case ARM::VLD1DUPq8: 3409221345Sdim case ARM::VLD1DUPq16: 3410221345Sdim case ARM::VLD1DUPq32: 3411235633Sdim case ARM::VLD1DUPq8wb_fixed: 3412235633Sdim case ARM::VLD1DUPq16wb_fixed: 3413235633Sdim case ARM::VLD1DUPq32wb_fixed: 3414235633Sdim case ARM::VLD1DUPq8wb_register: 3415235633Sdim case ARM::VLD1DUPq16wb_register: 3416235633Sdim case ARM::VLD1DUPq32wb_register: 3417221345Sdim case ARM::VLD2DUPd8: 3418221345Sdim case ARM::VLD2DUPd16: 3419221345Sdim case ARM::VLD2DUPd32: 3420235633Sdim case ARM::VLD2DUPd8wb_fixed: 3421235633Sdim case ARM::VLD2DUPd16wb_fixed: 3422235633Sdim case ARM::VLD2DUPd32wb_fixed: 3423235633Sdim case ARM::VLD2DUPd8wb_register: 3424235633Sdim case ARM::VLD2DUPd16wb_register: 3425235633Sdim case ARM::VLD2DUPd32wb_register: 3426221345Sdim case ARM::VLD4DUPd8: 3427221345Sdim case ARM::VLD4DUPd16: 3428221345Sdim case ARM::VLD4DUPd32: 3429221345Sdim case ARM::VLD4DUPd8_UPD: 3430221345Sdim case ARM::VLD4DUPd16_UPD: 3431221345Sdim case ARM::VLD4DUPd32_UPD: 3432221345Sdim case ARM::VLD1LNd8: 3433221345Sdim case ARM::VLD1LNd16: 3434221345Sdim case ARM::VLD1LNd32: 3435221345Sdim case ARM::VLD1LNd8_UPD: 3436221345Sdim case ARM::VLD1LNd16_UPD: 3437221345Sdim case ARM::VLD1LNd32_UPD: 3438221345Sdim case ARM::VLD2LNd8: 3439221345Sdim case ARM::VLD2LNd16: 3440221345Sdim case ARM::VLD2LNd32: 3441221345Sdim case ARM::VLD2LNq16: 3442221345Sdim case ARM::VLD2LNq32: 3443221345Sdim case ARM::VLD2LNd8_UPD: 3444221345Sdim case ARM::VLD2LNd16_UPD: 3445221345Sdim case ARM::VLD2LNd32_UPD: 3446221345Sdim case ARM::VLD2LNq16_UPD: 3447221345Sdim case ARM::VLD2LNq32_UPD: 3448221345Sdim case ARM::VLD4LNd8: 3449221345Sdim case ARM::VLD4LNd16: 3450221345Sdim case ARM::VLD4LNd32: 3451221345Sdim case ARM::VLD4LNq16: 3452221345Sdim case ARM::VLD4LNq32: 3453221345Sdim case ARM::VLD4LNd8_UPD: 3454221345Sdim case ARM::VLD4LNd16_UPD: 3455221345Sdim case ARM::VLD4LNd32_UPD: 3456221345Sdim case ARM::VLD4LNq16_UPD: 3457221345Sdim case ARM::VLD4LNq32_UPD: 3458221345Sdim // If the address is not 64-bit aligned, the latencies of these 3459221345Sdim // instructions increases by one. 3460245431Sdim ++Adjust; 3461221345Sdim break; 3462221345Sdim } 3463245431Sdim } 3464245431Sdim return Adjust; 3465245431Sdim} 3466221345Sdim 3467245431Sdim 3468245431Sdim 3469245431Sdimint 3470245431SdimARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 3471245431Sdim const MachineInstr *DefMI, unsigned DefIdx, 3472245431Sdim const MachineInstr *UseMI, 3473245431Sdim unsigned UseIdx) const { 3474245431Sdim // No operand latency. The caller may fall back to getInstrLatency. 3475245431Sdim if (!ItinData || ItinData->isEmpty()) 3476245431Sdim return -1; 3477245431Sdim 3478245431Sdim const MachineOperand &DefMO = DefMI->getOperand(DefIdx); 3479245431Sdim unsigned Reg = DefMO.getReg(); 3480245431Sdim const MCInstrDesc *DefMCID = &DefMI->getDesc(); 3481245431Sdim const MCInstrDesc *UseMCID = &UseMI->getDesc(); 3482245431Sdim 3483245431Sdim unsigned DefAdj = 0; 3484245431Sdim if (DefMI->isBundle()) { 3485245431Sdim DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); 3486245431Sdim DefMCID = &DefMI->getDesc(); 3487245431Sdim } 3488245431Sdim if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || 3489245431Sdim DefMI->isRegSequence() || DefMI->isImplicitDef()) { 3490245431Sdim return 1; 3491245431Sdim } 3492245431Sdim 3493245431Sdim unsigned UseAdj = 0; 3494245431Sdim if (UseMI->isBundle()) { 3495245431Sdim unsigned NewUseIdx; 3496245431Sdim const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, 3497245431Sdim Reg, NewUseIdx, UseAdj); 3498245431Sdim if (!NewUseMI) 3499245431Sdim return -1; 3500245431Sdim 3501245431Sdim UseMI = NewUseMI; 3502245431Sdim UseIdx = NewUseIdx; 3503245431Sdim UseMCID = &UseMI->getDesc(); 3504245431Sdim } 3505245431Sdim 3506245431Sdim if (Reg == ARM::CPSR) { 3507245431Sdim if (DefMI->getOpcode() == ARM::FMSTAT) { 3508245431Sdim // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) 3509245431Sdim return Subtarget.isLikeA9() ? 1 : 20; 3510245431Sdim } 3511245431Sdim 3512245431Sdim // CPSR set and branch can be paired in the same cycle. 3513245431Sdim if (UseMI->isBranch()) 3514245431Sdim return 0; 3515245431Sdim 3516245431Sdim // Otherwise it takes the instruction latency (generally one). 3517245431Sdim unsigned Latency = getInstrLatency(ItinData, DefMI); 3518245431Sdim 3519245431Sdim // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to 3520245431Sdim // its uses. Instructions which are otherwise scheduled between them may 3521245431Sdim // incur a code size penalty (not able to use the CPSR setting 16-bit 3522245431Sdim // instructions). 3523245431Sdim if (Latency > 0 && Subtarget.isThumb2()) { 3524245431Sdim const MachineFunction *MF = DefMI->getParent()->getParent(); 3525252723Sdim if (MF->getFunction()->getAttributes(). 3526252723Sdim hasAttribute(AttributeSet::FunctionIndex, 3527252723Sdim Attribute::OptimizeForSize)) 3528245431Sdim --Latency; 3529245431Sdim } 3530245431Sdim return Latency; 3531245431Sdim } 3532245431Sdim 3533245431Sdim if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit()) 3534245431Sdim return -1; 3535245431Sdim 3536245431Sdim unsigned DefAlign = DefMI->hasOneMemOperand() 3537245431Sdim ? (*DefMI->memoperands_begin())->getAlignment() : 0; 3538245431Sdim unsigned UseAlign = UseMI->hasOneMemOperand() 3539245431Sdim ? (*UseMI->memoperands_begin())->getAlignment() : 0; 3540245431Sdim 3541245431Sdim // Get the itinerary's latency if possible, and handle variable_ops. 3542245431Sdim int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, 3543245431Sdim *UseMCID, UseIdx, UseAlign); 3544245431Sdim // Unable to find operand latency. The caller may resort to getInstrLatency. 3545245431Sdim if (Latency < 0) 3546245431Sdim return Latency; 3547245431Sdim 3548245431Sdim // Adjust for IT block position. 3549245431Sdim int Adj = DefAdj + UseAdj; 3550245431Sdim 3551245431Sdim // Adjust for dynamic def-side opcode variants not captured by the itinerary. 3552245431Sdim Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); 3553245431Sdim if (Adj >= 0 || (int)Latency > -Adj) { 3554245431Sdim return Latency + Adj; 3555245431Sdim } 3556245431Sdim // Return the itinerary latency, which may be zero but not less than zero. 3557218893Sdim return Latency; 3558218893Sdim} 3559218893Sdim 3560218893Sdimint 3561218893SdimARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 3562218893Sdim SDNode *DefNode, unsigned DefIdx, 3563218893Sdim SDNode *UseNode, unsigned UseIdx) const { 3564218893Sdim if (!DefNode->isMachineOpcode()) 3565218893Sdim return 1; 3566218893Sdim 3567224145Sdim const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); 3568218893Sdim 3569224145Sdim if (isZeroCost(DefMCID.Opcode)) 3570218893Sdim return 0; 3571218893Sdim 3572218893Sdim if (!ItinData || ItinData->isEmpty()) 3573224145Sdim return DefMCID.mayLoad() ? 3 : 1; 3574218893Sdim 3575218893Sdim if (!UseNode->isMachineOpcode()) { 3576224145Sdim int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); 3577245431Sdim if (Subtarget.isLikeA9() || Subtarget.isSwift()) 3578218893Sdim return Latency <= 2 ? 1 : Latency - 1; 3579218893Sdim else 3580218893Sdim return Latency <= 3 ? 1 : Latency - 2; 3581218893Sdim } 3582218893Sdim 3583224145Sdim const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); 3584218893Sdim const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); 3585218893Sdim unsigned DefAlign = !DefMN->memoperands_empty() 3586218893Sdim ? (*DefMN->memoperands_begin())->getAlignment() : 0; 3587218893Sdim const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); 3588218893Sdim unsigned UseAlign = !UseMN->memoperands_empty() 3589218893Sdim ? (*UseMN->memoperands_begin())->getAlignment() : 0; 3590224145Sdim int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, 3591224145Sdim UseMCID, UseIdx, UseAlign); 3592218893Sdim 3593218893Sdim if (Latency > 1 && 3594245431Sdim (Subtarget.isCortexA8() || Subtarget.isLikeA9())) { 3595218893Sdim // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 3596218893Sdim // variants are one cycle cheaper. 3597224145Sdim switch (DefMCID.getOpcode()) { 3598218893Sdim default: break; 3599218893Sdim case ARM::LDRrs: 3600218893Sdim case ARM::LDRBrs: { 3601218893Sdim unsigned ShOpVal = 3602218893Sdim cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 3603218893Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3604218893Sdim if (ShImm == 0 || 3605218893Sdim (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 3606218893Sdim --Latency; 3607218893Sdim break; 3608218893Sdim } 3609218893Sdim case ARM::t2LDRs: 3610218893Sdim case ARM::t2LDRBs: 3611218893Sdim case ARM::t2LDRHs: 3612218893Sdim case ARM::t2LDRSHs: { 3613218893Sdim // Thumb2 mode: lsl only. 3614218893Sdim unsigned ShAmt = 3615218893Sdim cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 3616218893Sdim if (ShAmt == 0 || ShAmt == 2) 3617218893Sdim --Latency; 3618218893Sdim break; 3619218893Sdim } 3620218893Sdim } 3621245431Sdim } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) { 3622245431Sdim // FIXME: Properly handle all of the latency adjustments for address 3623245431Sdim // writeback. 3624245431Sdim switch (DefMCID.getOpcode()) { 3625245431Sdim default: break; 3626245431Sdim case ARM::LDRrs: 3627245431Sdim case ARM::LDRBrs: { 3628245431Sdim unsigned ShOpVal = 3629245431Sdim cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 3630245431Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3631245431Sdim if (ShImm == 0 || 3632245431Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3633245431Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 3634245431Sdim Latency -= 2; 3635245431Sdim else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 3636245431Sdim --Latency; 3637245431Sdim break; 3638245431Sdim } 3639245431Sdim case ARM::t2LDRs: 3640245431Sdim case ARM::t2LDRBs: 3641245431Sdim case ARM::t2LDRHs: 3642245431Sdim case ARM::t2LDRSHs: { 3643245431Sdim // Thumb2 mode: lsl 0-3 only. 3644245431Sdim Latency -= 2; 3645245431Sdim break; 3646245431Sdim } 3647245431Sdim } 3648218893Sdim } 3649218893Sdim 3650245431Sdim if (DefAlign < 8 && Subtarget.isLikeA9()) 3651224145Sdim switch (DefMCID.getOpcode()) { 3652221345Sdim default: break; 3653235633Sdim case ARM::VLD1q8: 3654235633Sdim case ARM::VLD1q16: 3655235633Sdim case ARM::VLD1q32: 3656235633Sdim case ARM::VLD1q64: 3657235633Sdim case ARM::VLD1q8wb_register: 3658235633Sdim case ARM::VLD1q16wb_register: 3659235633Sdim case ARM::VLD1q32wb_register: 3660235633Sdim case ARM::VLD1q64wb_register: 3661235633Sdim case ARM::VLD1q8wb_fixed: 3662235633Sdim case ARM::VLD1q16wb_fixed: 3663235633Sdim case ARM::VLD1q32wb_fixed: 3664235633Sdim case ARM::VLD1q64wb_fixed: 3665235633Sdim case ARM::VLD2d8: 3666235633Sdim case ARM::VLD2d16: 3667235633Sdim case ARM::VLD2d32: 3668221345Sdim case ARM::VLD2q8Pseudo: 3669221345Sdim case ARM::VLD2q16Pseudo: 3670221345Sdim case ARM::VLD2q32Pseudo: 3671235633Sdim case ARM::VLD2d8wb_fixed: 3672235633Sdim case ARM::VLD2d16wb_fixed: 3673235633Sdim case ARM::VLD2d32wb_fixed: 3674235633Sdim case ARM::VLD2q8PseudoWB_fixed: 3675235633Sdim case ARM::VLD2q16PseudoWB_fixed: 3676235633Sdim case ARM::VLD2q32PseudoWB_fixed: 3677235633Sdim case ARM::VLD2d8wb_register: 3678235633Sdim case ARM::VLD2d16wb_register: 3679235633Sdim case ARM::VLD2d32wb_register: 3680235633Sdim case ARM::VLD2q8PseudoWB_register: 3681235633Sdim case ARM::VLD2q16PseudoWB_register: 3682235633Sdim case ARM::VLD2q32PseudoWB_register: 3683221345Sdim case ARM::VLD3d8Pseudo: 3684221345Sdim case ARM::VLD3d16Pseudo: 3685221345Sdim case ARM::VLD3d32Pseudo: 3686221345Sdim case ARM::VLD1d64TPseudo: 3687266759Sdim case ARM::VLD1d64TPseudoWB_fixed: 3688221345Sdim case ARM::VLD3d8Pseudo_UPD: 3689221345Sdim case ARM::VLD3d16Pseudo_UPD: 3690221345Sdim case ARM::VLD3d32Pseudo_UPD: 3691221345Sdim case ARM::VLD3q8Pseudo_UPD: 3692221345Sdim case ARM::VLD3q16Pseudo_UPD: 3693221345Sdim case ARM::VLD3q32Pseudo_UPD: 3694221345Sdim case ARM::VLD3q8oddPseudo: 3695221345Sdim case ARM::VLD3q16oddPseudo: 3696221345Sdim case ARM::VLD3q32oddPseudo: 3697221345Sdim case ARM::VLD3q8oddPseudo_UPD: 3698221345Sdim case ARM::VLD3q16oddPseudo_UPD: 3699221345Sdim case ARM::VLD3q32oddPseudo_UPD: 3700221345Sdim case ARM::VLD4d8Pseudo: 3701221345Sdim case ARM::VLD4d16Pseudo: 3702221345Sdim case ARM::VLD4d32Pseudo: 3703221345Sdim case ARM::VLD1d64QPseudo: 3704266759Sdim case ARM::VLD1d64QPseudoWB_fixed: 3705221345Sdim case ARM::VLD4d8Pseudo_UPD: 3706221345Sdim case ARM::VLD4d16Pseudo_UPD: 3707221345Sdim case ARM::VLD4d32Pseudo_UPD: 3708221345Sdim case ARM::VLD4q8Pseudo_UPD: 3709221345Sdim case ARM::VLD4q16Pseudo_UPD: 3710221345Sdim case ARM::VLD4q32Pseudo_UPD: 3711221345Sdim case ARM::VLD4q8oddPseudo: 3712221345Sdim case ARM::VLD4q16oddPseudo: 3713221345Sdim case ARM::VLD4q32oddPseudo: 3714221345Sdim case ARM::VLD4q8oddPseudo_UPD: 3715221345Sdim case ARM::VLD4q16oddPseudo_UPD: 3716221345Sdim case ARM::VLD4q32oddPseudo_UPD: 3717235633Sdim case ARM::VLD1DUPq8: 3718235633Sdim case ARM::VLD1DUPq16: 3719235633Sdim case ARM::VLD1DUPq32: 3720235633Sdim case ARM::VLD1DUPq8wb_fixed: 3721235633Sdim case ARM::VLD1DUPq16wb_fixed: 3722235633Sdim case ARM::VLD1DUPq32wb_fixed: 3723235633Sdim case ARM::VLD1DUPq8wb_register: 3724235633Sdim case ARM::VLD1DUPq16wb_register: 3725235633Sdim case ARM::VLD1DUPq32wb_register: 3726235633Sdim case ARM::VLD2DUPd8: 3727235633Sdim case ARM::VLD2DUPd16: 3728235633Sdim case ARM::VLD2DUPd32: 3729235633Sdim case ARM::VLD2DUPd8wb_fixed: 3730235633Sdim case ARM::VLD2DUPd16wb_fixed: 3731235633Sdim case ARM::VLD2DUPd32wb_fixed: 3732235633Sdim case ARM::VLD2DUPd8wb_register: 3733235633Sdim case ARM::VLD2DUPd16wb_register: 3734235633Sdim case ARM::VLD2DUPd32wb_register: 3735221345Sdim case ARM::VLD4DUPd8Pseudo: 3736221345Sdim case ARM::VLD4DUPd16Pseudo: 3737221345Sdim case ARM::VLD4DUPd32Pseudo: 3738221345Sdim case ARM::VLD4DUPd8Pseudo_UPD: 3739221345Sdim case ARM::VLD4DUPd16Pseudo_UPD: 3740221345Sdim case ARM::VLD4DUPd32Pseudo_UPD: 3741221345Sdim case ARM::VLD1LNq8Pseudo: 3742221345Sdim case ARM::VLD1LNq16Pseudo: 3743221345Sdim case ARM::VLD1LNq32Pseudo: 3744221345Sdim case ARM::VLD1LNq8Pseudo_UPD: 3745221345Sdim case ARM::VLD1LNq16Pseudo_UPD: 3746221345Sdim case ARM::VLD1LNq32Pseudo_UPD: 3747221345Sdim case ARM::VLD2LNd8Pseudo: 3748221345Sdim case ARM::VLD2LNd16Pseudo: 3749221345Sdim case ARM::VLD2LNd32Pseudo: 3750221345Sdim case ARM::VLD2LNq16Pseudo: 3751221345Sdim case ARM::VLD2LNq32Pseudo: 3752221345Sdim case ARM::VLD2LNd8Pseudo_UPD: 3753221345Sdim case ARM::VLD2LNd16Pseudo_UPD: 3754221345Sdim case ARM::VLD2LNd32Pseudo_UPD: 3755221345Sdim case ARM::VLD2LNq16Pseudo_UPD: 3756221345Sdim case ARM::VLD2LNq32Pseudo_UPD: 3757221345Sdim case ARM::VLD4LNd8Pseudo: 3758221345Sdim case ARM::VLD4LNd16Pseudo: 3759221345Sdim case ARM::VLD4LNd32Pseudo: 3760221345Sdim case ARM::VLD4LNq16Pseudo: 3761221345Sdim case ARM::VLD4LNq32Pseudo: 3762221345Sdim case ARM::VLD4LNd8Pseudo_UPD: 3763221345Sdim case ARM::VLD4LNd16Pseudo_UPD: 3764221345Sdim case ARM::VLD4LNd32Pseudo_UPD: 3765221345Sdim case ARM::VLD4LNq16Pseudo_UPD: 3766221345Sdim case ARM::VLD4LNq32Pseudo_UPD: 3767221345Sdim // If the address is not 64-bit aligned, the latencies of these 3768221345Sdim // instructions increases by one. 3769221345Sdim ++Latency; 3770221345Sdim break; 3771221345Sdim } 3772221345Sdim 3773218893Sdim return Latency; 3774218893Sdim} 3775218893Sdim 3776263509Sdimunsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const { 3777263509Sdim if (MI->isCopyLike() || MI->isInsertSubreg() || 3778263509Sdim MI->isRegSequence() || MI->isImplicitDef()) 3779263509Sdim return 0; 3780263509Sdim 3781263509Sdim if (MI->isBundle()) 3782263509Sdim return 0; 3783263509Sdim 3784263509Sdim const MCInstrDesc &MCID = MI->getDesc(); 3785263509Sdim 3786263509Sdim if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) { 3787263509Sdim // When predicated, CPSR is an additional source operand for CPSR updating 3788263509Sdim // instructions, this apparently increases their latencies. 3789263509Sdim return 1; 3790263509Sdim } 3791263509Sdim return 0; 3792263509Sdim} 3793263509Sdim 3794245431Sdimunsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 3795245431Sdim const MachineInstr *MI, 3796245431Sdim unsigned *PredCost) const { 3797218893Sdim if (MI->isCopyLike() || MI->isInsertSubreg() || 3798218893Sdim MI->isRegSequence() || MI->isImplicitDef()) 3799218893Sdim return 1; 3800218893Sdim 3801245431Sdim // An instruction scheduler typically runs on unbundled instructions, however 3802245431Sdim // other passes may query the latency of a bundled instruction. 3803235633Sdim if (MI->isBundle()) { 3804245431Sdim unsigned Latency = 0; 3805235633Sdim MachineBasicBlock::const_instr_iterator I = MI; 3806235633Sdim MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 3807235633Sdim while (++I != E && I->isInsideBundle()) { 3808235633Sdim if (I->getOpcode() != ARM::t2IT) 3809235633Sdim Latency += getInstrLatency(ItinData, I, PredCost); 3810235633Sdim } 3811235633Sdim return Latency; 3812235633Sdim } 3813235633Sdim 3814224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 3815245431Sdim if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) { 3816218893Sdim // When predicated, CPSR is an additional source operand for CPSR updating 3817218893Sdim // instructions, this apparently increases their latencies. 3818218893Sdim *PredCost = 1; 3819245431Sdim } 3820245431Sdim // Be sure to call getStageLatency for an empty itinerary in case it has a 3821245431Sdim // valid MinLatency property. 3822245431Sdim if (!ItinData) 3823245431Sdim return MI->mayLoad() ? 3 : 1; 3824245431Sdim 3825245431Sdim unsigned Class = MCID.getSchedClass(); 3826245431Sdim 3827245431Sdim // For instructions with variable uops, use uops as latency. 3828245431Sdim if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0) 3829245431Sdim return getNumMicroOps(ItinData, MI); 3830245431Sdim 3831245431Sdim // For the common case, fall back on the itinerary's latency. 3832245431Sdim unsigned Latency = ItinData->getStageLatency(Class); 3833245431Sdim 3834245431Sdim // Adjust for dynamic def-side opcode variants not captured by the itinerary. 3835245431Sdim unsigned DefAlign = MI->hasOneMemOperand() 3836245431Sdim ? (*MI->memoperands_begin())->getAlignment() : 0; 3837245431Sdim int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign); 3838245431Sdim if (Adj >= 0 || (int)Latency > -Adj) { 3839245431Sdim return Latency + Adj; 3840245431Sdim } 3841245431Sdim return Latency; 3842218893Sdim} 3843218893Sdim 3844218893Sdimint ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 3845218893Sdim SDNode *Node) const { 3846218893Sdim if (!Node->isMachineOpcode()) 3847218893Sdim return 1; 3848218893Sdim 3849218893Sdim if (!ItinData || ItinData->isEmpty()) 3850218893Sdim return 1; 3851218893Sdim 3852218893Sdim unsigned Opcode = Node->getMachineOpcode(); 3853218893Sdim switch (Opcode) { 3854218893Sdim default: 3855218893Sdim return ItinData->getStageLatency(get(Opcode).getSchedClass()); 3856218893Sdim case ARM::VLDMQIA: 3857218893Sdim case ARM::VSTMQIA: 3858218893Sdim return 2; 3859218893Sdim } 3860218893Sdim} 3861218893Sdim 3862218893Sdimbool ARMBaseInstrInfo:: 3863218893SdimhasHighOperandLatency(const InstrItineraryData *ItinData, 3864218893Sdim const MachineRegisterInfo *MRI, 3865218893Sdim const MachineInstr *DefMI, unsigned DefIdx, 3866218893Sdim const MachineInstr *UseMI, unsigned UseIdx) const { 3867218893Sdim unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 3868218893Sdim unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask; 3869218893Sdim if (Subtarget.isCortexA8() && 3870218893Sdim (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) 3871218893Sdim // CortexA8 VFP instructions are not pipelined. 3872218893Sdim return true; 3873218893Sdim 3874218893Sdim // Hoist VFP / NEON instructions with 4 or higher latency. 3875263509Sdim int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); 3876245431Sdim if (Latency < 0) 3877245431Sdim Latency = getInstrLatency(ItinData, DefMI); 3878218893Sdim if (Latency <= 3) 3879218893Sdim return false; 3880218893Sdim return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || 3881218893Sdim UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; 3882218893Sdim} 3883218893Sdim 3884218893Sdimbool ARMBaseInstrInfo:: 3885218893SdimhasLowDefLatency(const InstrItineraryData *ItinData, 3886218893Sdim const MachineInstr *DefMI, unsigned DefIdx) const { 3887218893Sdim if (!ItinData || ItinData->isEmpty()) 3888218893Sdim return false; 3889218893Sdim 3890218893Sdim unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 3891218893Sdim if (DDomain == ARMII::DomainGeneral) { 3892218893Sdim unsigned DefClass = DefMI->getDesc().getSchedClass(); 3893218893Sdim int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 3894218893Sdim return (DefCycle != -1 && DefCycle <= 2); 3895218893Sdim } 3896218893Sdim return false; 3897218893Sdim} 3898218893Sdim 3899226890Sdimbool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, 3900226890Sdim StringRef &ErrInfo) const { 3901226890Sdim if (convertAddSubFlagsOpcode(MI->getOpcode())) { 3902226890Sdim ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; 3903226890Sdim return false; 3904226890Sdim } 3905226890Sdim return true; 3906226890Sdim} 3907226890Sdim 3908218893Sdimbool 3909218893SdimARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, 3910218893Sdim unsigned &AddSubOpc, 3911218893Sdim bool &NegAcc, bool &HasLane) const { 3912218893Sdim DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); 3913218893Sdim if (I == MLxEntryMap.end()) 3914218893Sdim return false; 3915218893Sdim 3916218893Sdim const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; 3917218893Sdim MulOpc = Entry.MulOpc; 3918218893Sdim AddSubOpc = Entry.AddSubOpc; 3919218893Sdim NegAcc = Entry.NegAcc; 3920218893Sdim HasLane = Entry.HasLane; 3921218893Sdim return true; 3922218893Sdim} 3923226890Sdim 3924226890Sdim//===----------------------------------------------------------------------===// 3925226890Sdim// Execution domains. 3926226890Sdim//===----------------------------------------------------------------------===// 3927226890Sdim// 3928226890Sdim// Some instructions go down the NEON pipeline, some go down the VFP pipeline, 3929226890Sdim// and some can go down both. The vmov instructions go down the VFP pipeline, 3930226890Sdim// but they can be changed to vorr equivalents that are executed by the NEON 3931226890Sdim// pipeline. 3932226890Sdim// 3933226890Sdim// We use the following execution domain numbering: 3934226890Sdim// 3935226890Sdimenum ARMExeDomain { 3936226890Sdim ExeGeneric = 0, 3937226890Sdim ExeVFP = 1, 3938226890Sdim ExeNEON = 2 3939226890Sdim}; 3940226890Sdim// 3941226890Sdim// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h 3942226890Sdim// 3943226890Sdimstd::pair<uint16_t, uint16_t> 3944226890SdimARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { 3945245431Sdim // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON 3946245431Sdim // if they are not predicated. 3947226890Sdim if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) 3948226890Sdim return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); 3949226890Sdim 3950252723Sdim // CortexA9 is particularly picky about mixing the two and wants these 3951245431Sdim // converted. 3952252723Sdim if (Subtarget.isCortexA9() && !isPredicated(MI) && 3953245431Sdim (MI->getOpcode() == ARM::VMOVRS || 3954245431Sdim MI->getOpcode() == ARM::VMOVSR || 3955245431Sdim MI->getOpcode() == ARM::VMOVS)) 3956245431Sdim return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); 3957245431Sdim 3958226890Sdim // No other instructions can be swizzled, so just determine their domain. 3959226890Sdim unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; 3960226890Sdim 3961226890Sdim if (Domain & ARMII::DomainNEON) 3962226890Sdim return std::make_pair(ExeNEON, 0); 3963226890Sdim 3964226890Sdim // Certain instructions can go either way on Cortex-A8. 3965226890Sdim // Treat them as NEON instructions. 3966226890Sdim if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) 3967226890Sdim return std::make_pair(ExeNEON, 0); 3968226890Sdim 3969226890Sdim if (Domain & ARMII::DomainVFP) 3970226890Sdim return std::make_pair(ExeVFP, 0); 3971226890Sdim 3972226890Sdim return std::make_pair(ExeGeneric, 0); 3973226890Sdim} 3974226890Sdim 3975245431Sdimstatic unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, 3976245431Sdim unsigned SReg, unsigned &Lane) { 3977245431Sdim unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); 3978245431Sdim Lane = 0; 3979245431Sdim 3980245431Sdim if (DReg != ARM::NoRegister) 3981245431Sdim return DReg; 3982245431Sdim 3983245431Sdim Lane = 1; 3984245431Sdim DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); 3985245431Sdim 3986245431Sdim assert(DReg && "S-register with no D super-register?"); 3987245431Sdim return DReg; 3988245431Sdim} 3989245431Sdim 3990245431Sdim/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, 3991245431Sdim/// set ImplicitSReg to a register number that must be marked as implicit-use or 3992245431Sdim/// zero if no register needs to be defined as implicit-use. 3993245431Sdim/// 3994245431Sdim/// If the function cannot determine if an SPR should be marked implicit use or 3995245431Sdim/// not, it returns false. 3996245431Sdim/// 3997245431Sdim/// This function handles cases where an instruction is being modified from taking 3998245431Sdim/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict 3999245431Sdim/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other 4000245431Sdim/// lane of the DPR). 4001245431Sdim/// 4002245431Sdim/// If the other SPR is defined, an implicit-use of it should be added. Else, 4003245431Sdim/// (including the case where the DPR itself is defined), it should not. 4004245431Sdim/// 4005245431Sdimstatic bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, 4006245431Sdim MachineInstr *MI, 4007245431Sdim unsigned DReg, unsigned Lane, 4008245431Sdim unsigned &ImplicitSReg) { 4009245431Sdim // If the DPR is defined or used already, the other SPR lane will be chained 4010245431Sdim // correctly, so there is nothing to be done. 4011245431Sdim if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) { 4012245431Sdim ImplicitSReg = 0; 4013245431Sdim return true; 4014245431Sdim } 4015245431Sdim 4016245431Sdim // Otherwise we need to go searching to see if the SPR is set explicitly. 4017245431Sdim ImplicitSReg = TRI->getSubReg(DReg, 4018245431Sdim (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); 4019245431Sdim MachineBasicBlock::LivenessQueryResult LQR = 4020245431Sdim MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); 4021245431Sdim 4022245431Sdim if (LQR == MachineBasicBlock::LQR_Live) 4023245431Sdim return true; 4024245431Sdim else if (LQR == MachineBasicBlock::LQR_Unknown) 4025245431Sdim return false; 4026245431Sdim 4027245431Sdim // If the register is known not to be live, there is no need to add an 4028245431Sdim // implicit-use. 4029245431Sdim ImplicitSReg = 0; 4030245431Sdim return true; 4031245431Sdim} 4032245431Sdim 4033226890Sdimvoid 4034226890SdimARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { 4035245431Sdim unsigned DstReg, SrcReg, DReg; 4036245431Sdim unsigned Lane; 4037252723Sdim MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 4038245431Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 4039245431Sdim switch (MI->getOpcode()) { 4040245431Sdim default: 4041245431Sdim llvm_unreachable("cannot handle opcode!"); 4042245431Sdim break; 4043245431Sdim case ARM::VMOVD: 4044245431Sdim if (Domain != ExeNEON) 4045245431Sdim break; 4046226890Sdim 4047245431Sdim // Zap the predicate operands. 4048245431Sdim assert(!isPredicated(MI) && "Cannot predicate a VORRd"); 4049226890Sdim 4050245431Sdim // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) 4051245431Sdim DstReg = MI->getOperand(0).getReg(); 4052245431Sdim SrcReg = MI->getOperand(1).getReg(); 4053226890Sdim 4054245431Sdim for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 4055245431Sdim MI->RemoveOperand(i-1); 4056245431Sdim 4057245431Sdim // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) 4058245431Sdim MI->setDesc(get(ARM::VORRd)); 4059245431Sdim AddDefaultPred(MIB.addReg(DstReg, RegState::Define) 4060245431Sdim .addReg(SrcReg) 4061245431Sdim .addReg(SrcReg)); 4062245431Sdim break; 4063245431Sdim case ARM::VMOVRS: 4064245431Sdim if (Domain != ExeNEON) 4065245431Sdim break; 4066245431Sdim assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); 4067245431Sdim 4068245431Sdim // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) 4069245431Sdim DstReg = MI->getOperand(0).getReg(); 4070245431Sdim SrcReg = MI->getOperand(1).getReg(); 4071245431Sdim 4072245431Sdim for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 4073245431Sdim MI->RemoveOperand(i-1); 4074245431Sdim 4075245431Sdim DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); 4076245431Sdim 4077245431Sdim // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) 4078245431Sdim // Note that DSrc has been widened and the other lane may be undef, which 4079245431Sdim // contaminates the entire register. 4080245431Sdim MI->setDesc(get(ARM::VGETLNi32)); 4081245431Sdim AddDefaultPred(MIB.addReg(DstReg, RegState::Define) 4082245431Sdim .addReg(DReg, RegState::Undef) 4083245431Sdim .addImm(Lane)); 4084245431Sdim 4085245431Sdim // The old source should be an implicit use, otherwise we might think it 4086245431Sdim // was dead before here. 4087245431Sdim MIB.addReg(SrcReg, RegState::Implicit); 4088245431Sdim break; 4089245431Sdim case ARM::VMOVSR: { 4090245431Sdim if (Domain != ExeNEON) 4091245431Sdim break; 4092245431Sdim assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); 4093245431Sdim 4094245431Sdim // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) 4095245431Sdim DstReg = MI->getOperand(0).getReg(); 4096245431Sdim SrcReg = MI->getOperand(1).getReg(); 4097245431Sdim 4098245431Sdim DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); 4099245431Sdim 4100245431Sdim unsigned ImplicitSReg; 4101245431Sdim if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) 4102245431Sdim break; 4103245431Sdim 4104245431Sdim for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 4105245431Sdim MI->RemoveOperand(i-1); 4106245431Sdim 4107245431Sdim // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) 4108245431Sdim // Again DDst may be undefined at the beginning of this instruction. 4109245431Sdim MI->setDesc(get(ARM::VSETLNi32)); 4110245431Sdim MIB.addReg(DReg, RegState::Define) 4111245431Sdim .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI))) 4112245431Sdim .addReg(SrcReg) 4113245431Sdim .addImm(Lane); 4114245431Sdim AddDefaultPred(MIB); 4115245431Sdim 4116245431Sdim // The narrower destination must be marked as set to keep previous chains 4117245431Sdim // in place. 4118245431Sdim MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 4119245431Sdim if (ImplicitSReg != 0) 4120245431Sdim MIB.addReg(ImplicitSReg, RegState::Implicit); 4121245431Sdim break; 4122245431Sdim } 4123245431Sdim case ARM::VMOVS: { 4124245431Sdim if (Domain != ExeNEON) 4125245431Sdim break; 4126245431Sdim 4127245431Sdim // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) 4128245431Sdim DstReg = MI->getOperand(0).getReg(); 4129245431Sdim SrcReg = MI->getOperand(1).getReg(); 4130245431Sdim 4131245431Sdim unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; 4132245431Sdim DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); 4133245431Sdim DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); 4134245431Sdim 4135245431Sdim unsigned ImplicitSReg; 4136245431Sdim if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) 4137245431Sdim break; 4138245431Sdim 4139245431Sdim for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 4140245431Sdim MI->RemoveOperand(i-1); 4141245431Sdim 4142245431Sdim if (DSrc == DDst) { 4143245431Sdim // Destination can be: 4144245431Sdim // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) 4145245431Sdim MI->setDesc(get(ARM::VDUPLN32d)); 4146245431Sdim MIB.addReg(DDst, RegState::Define) 4147245431Sdim .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI))) 4148245431Sdim .addImm(SrcLane); 4149245431Sdim AddDefaultPred(MIB); 4150245431Sdim 4151245431Sdim // Neither the source or the destination are naturally represented any 4152245431Sdim // more, so add them in manually. 4153245431Sdim MIB.addReg(DstReg, RegState::Implicit | RegState::Define); 4154245431Sdim MIB.addReg(SrcReg, RegState::Implicit); 4155245431Sdim if (ImplicitSReg != 0) 4156245431Sdim MIB.addReg(ImplicitSReg, RegState::Implicit); 4157245431Sdim break; 4158245431Sdim } 4159245431Sdim 4160245431Sdim // In general there's no single instruction that can perform an S <-> S 4161245431Sdim // move in NEON space, but a pair of VEXT instructions *can* do the 4162245431Sdim // job. It turns out that the VEXTs needed will only use DSrc once, with 4163245431Sdim // the position based purely on the combination of lane-0 and lane-1 4164245431Sdim // involved. For example 4165245431Sdim // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 4166245431Sdim // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 4167245431Sdim // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 4168245431Sdim // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 4169245431Sdim // 4170245431Sdim // Pattern of the MachineInstrs is: 4171245431Sdim // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) 4172245431Sdim MachineInstrBuilder NewMIB; 4173245431Sdim NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 4174245431Sdim get(ARM::VEXTd32), DDst); 4175245431Sdim 4176245431Sdim // On the first instruction, both DSrc and DDst may be <undef> if present. 4177245431Sdim // Specifically when the original instruction didn't have them as an 4178245431Sdim // <imp-use>. 4179245431Sdim unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; 4180245431Sdim bool CurUndef = !MI->readsRegister(CurReg, TRI); 4181245431Sdim NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); 4182245431Sdim 4183245431Sdim CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; 4184245431Sdim CurUndef = !MI->readsRegister(CurReg, TRI); 4185245431Sdim NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); 4186245431Sdim 4187245431Sdim NewMIB.addImm(1); 4188245431Sdim AddDefaultPred(NewMIB); 4189245431Sdim 4190245431Sdim if (SrcLane == DstLane) 4191245431Sdim NewMIB.addReg(SrcReg, RegState::Implicit); 4192245431Sdim 4193245431Sdim MI->setDesc(get(ARM::VEXTd32)); 4194245431Sdim MIB.addReg(DDst, RegState::Define); 4195245431Sdim 4196245431Sdim // On the second instruction, DDst has definitely been defined above, so 4197245431Sdim // it is not <undef>. DSrc, if present, can be <undef> as above. 4198245431Sdim CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; 4199245431Sdim CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); 4200245431Sdim MIB.addReg(CurReg, getUndefRegState(CurUndef)); 4201245431Sdim 4202245431Sdim CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; 4203245431Sdim CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); 4204245431Sdim MIB.addReg(CurReg, getUndefRegState(CurUndef)); 4205245431Sdim 4206245431Sdim MIB.addImm(1); 4207245431Sdim AddDefaultPred(MIB); 4208245431Sdim 4209245431Sdim if (SrcLane != DstLane) 4210245431Sdim MIB.addReg(SrcReg, RegState::Implicit); 4211245431Sdim 4212245431Sdim // As before, the original destination is no longer represented, add it 4213245431Sdim // implicitly. 4214245431Sdim MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 4215245431Sdim if (ImplicitSReg != 0) 4216245431Sdim MIB.addReg(ImplicitSReg, RegState::Implicit); 4217245431Sdim break; 4218245431Sdim } 4219245431Sdim } 4220245431Sdim 4221226890Sdim} 4222235633Sdim 4223245431Sdim//===----------------------------------------------------------------------===// 4224245431Sdim// Partial register updates 4225245431Sdim//===----------------------------------------------------------------------===// 4226245431Sdim// 4227245431Sdim// Swift renames NEON registers with 64-bit granularity. That means any 4228245431Sdim// instruction writing an S-reg implicitly reads the containing D-reg. The 4229245431Sdim// problem is mostly avoided by translating f32 operations to v2f32 operations 4230245431Sdim// on D-registers, but f32 loads are still a problem. 4231245431Sdim// 4232245431Sdim// These instructions can load an f32 into a NEON register: 4233245431Sdim// 4234245431Sdim// VLDRS - Only writes S, partial D update. 4235245431Sdim// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. 4236245431Sdim// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. 4237245431Sdim// 4238245431Sdim// FCONSTD can be used as a dependency-breaking instruction. 4239245431Sdimunsigned ARMBaseInstrInfo:: 4240245431SdimgetPartialRegUpdateClearance(const MachineInstr *MI, 4241245431Sdim unsigned OpNum, 4242245431Sdim const TargetRegisterInfo *TRI) const { 4243252723Sdim if (!SwiftPartialUpdateClearance || 4244252723Sdim !(Subtarget.isSwift() || Subtarget.isCortexA15())) 4245245431Sdim return 0; 4246245431Sdim 4247245431Sdim assert(TRI && "Need TRI instance"); 4248245431Sdim 4249245431Sdim const MachineOperand &MO = MI->getOperand(OpNum); 4250245431Sdim if (MO.readsReg()) 4251245431Sdim return 0; 4252245431Sdim unsigned Reg = MO.getReg(); 4253245431Sdim int UseOp = -1; 4254245431Sdim 4255245431Sdim switch(MI->getOpcode()) { 4256245431Sdim // Normal instructions writing only an S-register. 4257245431Sdim case ARM::VLDRS: 4258245431Sdim case ARM::FCONSTS: 4259245431Sdim case ARM::VMOVSR: 4260245431Sdim case ARM::VMOVv8i8: 4261245431Sdim case ARM::VMOVv4i16: 4262245431Sdim case ARM::VMOVv2i32: 4263245431Sdim case ARM::VMOVv2f32: 4264245431Sdim case ARM::VMOVv1i64: 4265245431Sdim UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI); 4266245431Sdim break; 4267245431Sdim 4268245431Sdim // Explicitly reads the dependency. 4269245431Sdim case ARM::VLD1LNd32: 4270252723Sdim UseOp = 3; 4271245431Sdim break; 4272245431Sdim default: 4273245431Sdim return 0; 4274245431Sdim } 4275245431Sdim 4276245431Sdim // If this instruction actually reads a value from Reg, there is no unwanted 4277245431Sdim // dependency. 4278245431Sdim if (UseOp != -1 && MI->getOperand(UseOp).readsReg()) 4279245431Sdim return 0; 4280245431Sdim 4281245431Sdim // We must be able to clobber the whole D-reg. 4282245431Sdim if (TargetRegisterInfo::isVirtualRegister(Reg)) { 4283245431Sdim // Virtual register must be a foo:ssub_0<def,undef> operand. 4284245431Sdim if (!MO.getSubReg() || MI->readsVirtualRegister(Reg)) 4285245431Sdim return 0; 4286245431Sdim } else if (ARM::SPRRegClass.contains(Reg)) { 4287245431Sdim // Physical register: MI must define the full D-reg. 4288245431Sdim unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, 4289245431Sdim &ARM::DPRRegClass); 4290245431Sdim if (!DReg || !MI->definesRegister(DReg, TRI)) 4291245431Sdim return 0; 4292245431Sdim } 4293245431Sdim 4294245431Sdim // MI has an unwanted D-register dependency. 4295245431Sdim // Avoid defs in the previous N instructrions. 4296245431Sdim return SwiftPartialUpdateClearance; 4297245431Sdim} 4298245431Sdim 4299245431Sdim// Break a partial register dependency after getPartialRegUpdateClearance 4300245431Sdim// returned non-zero. 4301245431Sdimvoid ARMBaseInstrInfo:: 4302245431SdimbreakPartialRegDependency(MachineBasicBlock::iterator MI, 4303245431Sdim unsigned OpNum, 4304245431Sdim const TargetRegisterInfo *TRI) const { 4305245431Sdim assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def"); 4306245431Sdim assert(TRI && "Need TRI instance"); 4307245431Sdim 4308245431Sdim const MachineOperand &MO = MI->getOperand(OpNum); 4309245431Sdim unsigned Reg = MO.getReg(); 4310245431Sdim assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 4311245431Sdim "Can't break virtual register dependencies."); 4312245431Sdim unsigned DReg = Reg; 4313245431Sdim 4314245431Sdim // If MI defines an S-reg, find the corresponding D super-register. 4315245431Sdim if (ARM::SPRRegClass.contains(Reg)) { 4316245431Sdim DReg = ARM::D0 + (Reg - ARM::S0) / 2; 4317245431Sdim assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); 4318245431Sdim } 4319245431Sdim 4320245431Sdim assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); 4321245431Sdim assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); 4322245431Sdim 4323245431Sdim // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines 4324245431Sdim // the full D-register by loading the same value to both lanes. The 4325245431Sdim // instruction is micro-coded with 2 uops, so don't do this until we can 4326263509Sdim // properly schedule micro-coded instructions. The dispatcher stalls cause 4327245431Sdim // too big regressions. 4328245431Sdim 4329245431Sdim // Insert the dependency-breaking FCONSTD before MI. 4330245431Sdim // 96 is the encoding of 0.5, but the actual value doesn't matter here. 4331245431Sdim AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 4332245431Sdim get(ARM::FCONSTD), DReg).addImm(96)); 4333245431Sdim MI->addRegisterKilled(DReg, TRI, true); 4334245431Sdim} 4335245431Sdim 4336235633Sdimbool ARMBaseInstrInfo::hasNOP() const { 4337235633Sdim return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; 4338235633Sdim} 4339252723Sdim 4340252723Sdimbool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { 4341263509Sdim if (MI->getNumOperands() < 4) 4342263509Sdim return true; 4343252723Sdim unsigned ShOpVal = MI->getOperand(3).getImm(); 4344252723Sdim unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); 4345252723Sdim // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. 4346252723Sdim if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) || 4347252723Sdim ((ShImm == 1 || ShImm == 2) && 4348252723Sdim ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl)) 4349252723Sdim return true; 4350252723Sdim 4351252723Sdim return false; 4352252723Sdim} 4353