1234353Sdim//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// 2198090Srdivacky// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6198090Srdivacky// 7198090Srdivacky//===----------------------------------------------------------------------===// 8198090Srdivacky// 9198090Srdivacky// This file contains the Base ARM implementation of the TargetInstrInfo class. 10198090Srdivacky// 11198090Srdivacky//===----------------------------------------------------------------------===// 12198090Srdivacky 13198090Srdivacky#include "ARMBaseInstrInfo.h" 14234353Sdim#include "ARMBaseRegisterInfo.h" 15199481Srdivacky#include "ARMConstantPoolValue.h" 16261991Sdim#include "ARMFeatures.h" 17218893Sdim#include "ARMHazardRecognizer.h" 18198090Srdivacky#include "ARMMachineFunctionInfo.h" 19321369Sdim#include "ARMSubtarget.h" 20226633Sdim#include "MCTargetDesc/ARMAddressingModes.h" 21321369Sdim#include "MCTargetDesc/ARMBaseInfo.h" 22321369Sdim#include "llvm/ADT/DenseMap.h" 23249423Sdim#include "llvm/ADT/STLExtras.h" 24321369Sdim#include "llvm/ADT/SmallSet.h" 25321369Sdim#include "llvm/ADT/SmallVector.h" 26321369Sdim#include "llvm/ADT/Triple.h" 27198090Srdivacky#include "llvm/CodeGen/LiveVariables.h" 28321369Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 29199481Srdivacky#include "llvm/CodeGen/MachineConstantPool.h" 30198090Srdivacky#include "llvm/CodeGen/MachineFrameInfo.h" 31321369Sdim#include "llvm/CodeGen/MachineFunction.h" 32321369Sdim#include "llvm/CodeGen/MachineInstr.h" 33198090Srdivacky#include "llvm/CodeGen/MachineInstrBuilder.h" 34198090Srdivacky#include "llvm/CodeGen/MachineMemOperand.h" 35321369Sdim#include "llvm/CodeGen/MachineOperand.h" 36208599Srdivacky#include "llvm/CodeGen/MachineRegisterInfo.h" 37321369Sdim#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" 38226633Sdim#include "llvm/CodeGen/SelectionDAGNodes.h" 39327952Sdim#include "llvm/CodeGen/TargetInstrInfo.h" 40327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h" 41288943Sdim#include "llvm/CodeGen/TargetSchedule.h" 42321369Sdim#include "llvm/IR/Attributes.h" 43249423Sdim#include "llvm/IR/Constants.h" 44321369Sdim#include "llvm/IR/DebugLoc.h" 45249423Sdim#include "llvm/IR/Function.h" 46249423Sdim#include "llvm/IR/GlobalValue.h" 47198090Srdivacky#include "llvm/MC/MCAsmInfo.h" 48321369Sdim#include "llvm/MC/MCInstrDesc.h" 49321369Sdim#include "llvm/MC/MCInstrItineraries.h" 50224145Sdim#include "llvm/Support/BranchProbability.h" 51321369Sdim#include "llvm/Support/Casting.h" 52198090Srdivacky#include "llvm/Support/CommandLine.h" 53321369Sdim#include "llvm/Support/Compiler.h" 54198892Srdivacky#include "llvm/Support/Debug.h" 55198090Srdivacky#include "llvm/Support/ErrorHandling.h" 56288943Sdim#include "llvm/Support/raw_ostream.h" 57321369Sdim#include "llvm/Target/TargetMachine.h" 58321369Sdim#include <algorithm> 59321369Sdim#include <cassert> 60321369Sdim#include <cstdint> 61321369Sdim#include <iterator> 62321369Sdim#include <new> 63321369Sdim#include <utility> 64321369Sdim#include <vector> 65224145Sdim 66276479Sdimusing namespace llvm; 67276479Sdim 68276479Sdim#define DEBUG_TYPE "arm-instrinfo" 69276479Sdim 70261991Sdim#define GET_INSTRINFO_CTOR_DTOR 71224145Sdim#include "ARMGenInstrInfo.inc" 72224145Sdim 73198090Srdivackystatic cl::opt<bool> 74198090SrdivackyEnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, 75198090Srdivacky cl::desc("Enable ARM 2-addr to 3-addr conv")); 76198090Srdivacky 77218893Sdim/// ARM_MLxEntry - Record information about MLA / MLS instructions. 78218893Sdimstruct ARM_MLxEntry { 79239462Sdim uint16_t MLxOpc; // MLA / MLS opcode 80239462Sdim uint16_t MulOpc; // Expanded multiplication opcode 81239462Sdim uint16_t AddSubOpc; // Expanded add / sub opcode 82218893Sdim bool NegAcc; // True if the acc is negated before the add / sub. 83218893Sdim bool HasLane; // True if instruction has an extra "lane" operand. 84218893Sdim}; 85218893Sdim 86218893Sdimstatic const ARM_MLxEntry ARM_MLxTable[] = { 87218893Sdim // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane 88218893Sdim // fp scalar ops 89218893Sdim { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, 90218893Sdim { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, 91218893Sdim { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, 92218893Sdim { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, 93218893Sdim { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, 94218893Sdim { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, 95218893Sdim { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, 96218893Sdim { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, 97218893Sdim 98218893Sdim // fp SIMD ops 99218893Sdim { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, 100218893Sdim { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, 101218893Sdim { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, 102218893Sdim { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, 103218893Sdim { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, 104218893Sdim { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, 105218893Sdim { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, 106218893Sdim { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, 107218893Sdim}; 108218893Sdim 109198892SrdivackyARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) 110224145Sdim : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), 111198892Srdivacky Subtarget(STI) { 112218893Sdim for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { 113218893Sdim if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) 114296417Sdim llvm_unreachable("Duplicated entries?"); 115218893Sdim MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); 116218893Sdim MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); 117218893Sdim } 118198090Srdivacky} 119198090Srdivacky 120218893Sdim// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl 121218893Sdim// currently defaults to no prepass hazard recognizer. 122276479SdimScheduleHazardRecognizer * 123276479SdimARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, 124276479Sdim const ScheduleDAG *DAG) const { 125218893Sdim if (usePreRAHazardRecognizer()) { 126276479Sdim const InstrItineraryData *II = 127280031Sdim static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); 128218893Sdim return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); 129218893Sdim } 130276479Sdim return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); 131218893Sdim} 132218893Sdim 133218893SdimScheduleHazardRecognizer *ARMBaseInstrInfo:: 134218893SdimCreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 135218893Sdim const ScheduleDAG *DAG) const { 136353358Sdim if (Subtarget.isThumb2() || Subtarget.hasVFP2Base()) 137360784Sdim return new ARMHazardRecognizer(II, DAG); 138249423Sdim return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); 139218893Sdim} 140218893Sdim 141309124SdimMachineInstr *ARMBaseInstrInfo::convertToThreeAddress( 142309124Sdim MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { 143198090Srdivacky // FIXME: Thumb2 support. 144198090Srdivacky 145198090Srdivacky if (!EnableARM3Addr) 146276479Sdim return nullptr; 147198090Srdivacky 148309124Sdim MachineFunction &MF = *MI.getParent()->getParent(); 149309124Sdim uint64_t TSFlags = MI.getDesc().TSFlags; 150198090Srdivacky bool isPre = false; 151198090Srdivacky switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { 152276479Sdim default: return nullptr; 153198090Srdivacky case ARMII::IndexModePre: 154198090Srdivacky isPre = true; 155198090Srdivacky break; 156198090Srdivacky case ARMII::IndexModePost: 157198090Srdivacky break; 158198090Srdivacky } 159198090Srdivacky 160198090Srdivacky // Try splitting an indexed load/store to an un-indexed one plus an add/sub 161198090Srdivacky // operation. 162309124Sdim unsigned MemOpc = getUnindexedOpcode(MI.getOpcode()); 163198090Srdivacky if (MemOpc == 0) 164276479Sdim return nullptr; 165198090Srdivacky 166276479Sdim MachineInstr *UpdateMI = nullptr; 167276479Sdim MachineInstr *MemMI = nullptr; 168198090Srdivacky unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); 169309124Sdim const MCInstrDesc &MCID = MI.getDesc(); 170224145Sdim unsigned NumOps = MCID.getNumOperands(); 171309124Sdim bool isLoad = !MI.mayStore(); 172309124Sdim const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0); 173309124Sdim const MachineOperand &Base = MI.getOperand(2); 174309124Sdim const MachineOperand &Offset = MI.getOperand(NumOps - 3); 175360784Sdim Register WBReg = WB.getReg(); 176360784Sdim Register BaseReg = Base.getReg(); 177360784Sdim Register OffReg = Offset.getReg(); 178309124Sdim unsigned OffImm = MI.getOperand(NumOps - 2).getImm(); 179309124Sdim ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm(); 180198090Srdivacky switch (AddrMode) { 181234353Sdim default: llvm_unreachable("Unknown indexed op!"); 182198090Srdivacky case ARMII::AddrMode2: { 183198090Srdivacky bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; 184198090Srdivacky unsigned Amt = ARM_AM::getAM2Offset(OffImm); 185198090Srdivacky if (OffReg == 0) { 186198090Srdivacky if (ARM_AM::getSOImmVal(Amt) == -1) 187198090Srdivacky // Can't encode it in a so_imm operand. This transformation will 188198090Srdivacky // add more than 1 instruction. Abandon! 189276479Sdim return nullptr; 190309124Sdim UpdateMI = BuildMI(MF, MI.getDebugLoc(), 191198090Srdivacky get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 192309124Sdim .addReg(BaseReg) 193309124Sdim .addImm(Amt) 194321369Sdim .add(predOps(Pred)) 195321369Sdim .add(condCodeOp()); 196198090Srdivacky } else if (Amt != 0) { 197198090Srdivacky ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); 198198090Srdivacky unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); 199309124Sdim UpdateMI = BuildMI(MF, MI.getDebugLoc(), 200226633Sdim get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) 201309124Sdim .addReg(BaseReg) 202309124Sdim .addReg(OffReg) 203309124Sdim .addReg(0) 204309124Sdim .addImm(SOOpc) 205321369Sdim .add(predOps(Pred)) 206321369Sdim .add(condCodeOp()); 207198090Srdivacky } else 208309124Sdim UpdateMI = BuildMI(MF, MI.getDebugLoc(), 209198090Srdivacky get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 210309124Sdim .addReg(BaseReg) 211309124Sdim .addReg(OffReg) 212321369Sdim .add(predOps(Pred)) 213321369Sdim .add(condCodeOp()); 214198090Srdivacky break; 215198090Srdivacky } 216198090Srdivacky case ARMII::AddrMode3 : { 217198090Srdivacky bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; 218198090Srdivacky unsigned Amt = ARM_AM::getAM3Offset(OffImm); 219198090Srdivacky if (OffReg == 0) 220198090Srdivacky // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. 221309124Sdim UpdateMI = BuildMI(MF, MI.getDebugLoc(), 222198090Srdivacky get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 223309124Sdim .addReg(BaseReg) 224309124Sdim .addImm(Amt) 225321369Sdim .add(predOps(Pred)) 226321369Sdim .add(condCodeOp()); 227198090Srdivacky else 228309124Sdim UpdateMI = BuildMI(MF, MI.getDebugLoc(), 229198090Srdivacky get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 230309124Sdim .addReg(BaseReg) 231309124Sdim .addReg(OffReg) 232321369Sdim .add(predOps(Pred)) 233321369Sdim .add(condCodeOp()); 234198090Srdivacky break; 235198090Srdivacky } 236198090Srdivacky } 237198090Srdivacky 238198090Srdivacky std::vector<MachineInstr*> NewMIs; 239198090Srdivacky if (isPre) { 240198090Srdivacky if (isLoad) 241309124Sdim MemMI = 242309124Sdim BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg()) 243309124Sdim .addReg(WBReg) 244309124Sdim .addImm(0) 245309124Sdim .addImm(Pred); 246198090Srdivacky else 247309124Sdim MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) 248309124Sdim .addReg(MI.getOperand(1).getReg()) 249309124Sdim .addReg(WBReg) 250309124Sdim .addReg(0) 251309124Sdim .addImm(0) 252309124Sdim .addImm(Pred); 253198090Srdivacky NewMIs.push_back(MemMI); 254198090Srdivacky NewMIs.push_back(UpdateMI); 255198090Srdivacky } else { 256198090Srdivacky if (isLoad) 257309124Sdim MemMI = 258309124Sdim BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg()) 259309124Sdim .addReg(BaseReg) 260309124Sdim .addImm(0) 261309124Sdim .addImm(Pred); 262198090Srdivacky else 263309124Sdim MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) 264309124Sdim .addReg(MI.getOperand(1).getReg()) 265309124Sdim .addReg(BaseReg) 266309124Sdim .addReg(0) 267309124Sdim .addImm(0) 268309124Sdim .addImm(Pred); 269198090Srdivacky if (WB.isDead()) 270198090Srdivacky UpdateMI->getOperand(0).setIsDead(); 271198090Srdivacky NewMIs.push_back(UpdateMI); 272198090Srdivacky NewMIs.push_back(MemMI); 273198090Srdivacky } 274198090Srdivacky 275198090Srdivacky // Transfer LiveVariables states, kill / dead info. 276198090Srdivacky if (LV) { 277309124Sdim for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 278309124Sdim MachineOperand &MO = MI.getOperand(i); 279360784Sdim if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { 280360784Sdim Register Reg = MO.getReg(); 281198090Srdivacky 282198090Srdivacky LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 283198090Srdivacky if (MO.isDef()) { 284198090Srdivacky MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; 285198090Srdivacky if (MO.isDead()) 286309124Sdim LV->addVirtualRegisterDead(Reg, *NewMI); 287198090Srdivacky } 288198090Srdivacky if (MO.isUse() && MO.isKill()) { 289198090Srdivacky for (unsigned j = 0; j < 2; ++j) { 290198090Srdivacky // Look at the two new MI's in reverse order. 291198090Srdivacky MachineInstr *NewMI = NewMIs[j]; 292198090Srdivacky if (!NewMI->readsRegister(Reg)) 293198090Srdivacky continue; 294309124Sdim LV->addVirtualRegisterKilled(Reg, *NewMI); 295198090Srdivacky if (VI.removeKill(MI)) 296198090Srdivacky VI.Kills.push_back(NewMI); 297198090Srdivacky break; 298198090Srdivacky } 299198090Srdivacky } 300198090Srdivacky } 301198090Srdivacky } 302198090Srdivacky } 303198090Srdivacky 304309124Sdim MachineBasicBlock::iterator MBBI = MI.getIterator(); 305198090Srdivacky MFI->insert(MBBI, NewMIs[1]); 306198090Srdivacky MFI->insert(MBBI, NewMIs[0]); 307198090Srdivacky return NewMIs[0]; 308198090Srdivacky} 309198090Srdivacky 310198090Srdivacky// Branch analysis. 311309124Sdimbool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, 312309124Sdim MachineBasicBlock *&TBB, 313309124Sdim MachineBasicBlock *&FBB, 314309124Sdim SmallVectorImpl<MachineOperand> &Cond, 315309124Sdim bool AllowModify) const { 316276479Sdim TBB = nullptr; 317276479Sdim FBB = nullptr; 318261991Sdim 319198090Srdivacky MachineBasicBlock::iterator I = MBB.end(); 320206083Srdivacky if (I == MBB.begin()) 321261991Sdim return false; // Empty blocks are easy. 322206083Srdivacky --I; 323198090Srdivacky 324261991Sdim // Walk backwards from the end of the basic block until the branch is 325261991Sdim // analyzed or we give up. 326309124Sdim while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) { 327261991Sdim // Flag to be raised on unanalyzeable instructions. This is useful in cases 328261991Sdim // where we want to clean up on the end of the basic block before we bail 329261991Sdim // out. 330261991Sdim bool CantAnalyze = false; 331251662Sdim 332261991Sdim // Skip over DEBUG values and predicated nonterminators. 333341825Sdim while (I->isDebugInstr() || !I->isTerminator()) { 334261991Sdim if (I == MBB.begin()) 335261991Sdim return false; 336261991Sdim --I; 337261991Sdim } 338251662Sdim 339261991Sdim if (isIndirectBranchOpcode(I->getOpcode()) || 340261991Sdim isJumpTableBranchOpcode(I->getOpcode())) { 341261991Sdim // Indirect branches and jump tables can't be analyzed, but we still want 342261991Sdim // to clean up any instructions at the tail of the basic block. 343261991Sdim CantAnalyze = true; 344261991Sdim } else if (isUncondBranchOpcode(I->getOpcode())) { 345261991Sdim TBB = I->getOperand(0).getMBB(); 346261991Sdim } else if (isCondBranchOpcode(I->getOpcode())) { 347261991Sdim // Bail out if we encounter multiple conditional branches. 348261991Sdim if (!Cond.empty()) 349261991Sdim return true; 350261991Sdim 351261991Sdim assert(!FBB && "FBB should have been null."); 352261991Sdim FBB = TBB; 353261991Sdim TBB = I->getOperand(0).getMBB(); 354261991Sdim Cond.push_back(I->getOperand(1)); 355261991Sdim Cond.push_back(I->getOperand(2)); 356261991Sdim } else if (I->isReturn()) { 357261991Sdim // Returns can't be analyzed, but we should run cleanup. 358309124Sdim CantAnalyze = !isPredicated(*I); 359261991Sdim } else { 360261991Sdim // We encountered other unrecognized terminator. Bail out immediately. 361261991Sdim return true; 362198090Srdivacky } 363198090Srdivacky 364261991Sdim // Cleanup code - to be run for unpredicated unconditional branches and 365261991Sdim // returns. 366309124Sdim if (!isPredicated(*I) && 367261991Sdim (isUncondBranchOpcode(I->getOpcode()) || 368261991Sdim isIndirectBranchOpcode(I->getOpcode()) || 369261991Sdim isJumpTableBranchOpcode(I->getOpcode()) || 370261991Sdim I->isReturn())) { 371261991Sdim // Forget any previous condition branch information - it no longer applies. 372261991Sdim Cond.clear(); 373276479Sdim FBB = nullptr; 374198090Srdivacky 375261991Sdim // If we can modify the function, delete everything below this 376261991Sdim // unconditional branch. 377261991Sdim if (AllowModify) { 378276479Sdim MachineBasicBlock::iterator DI = std::next(I); 379261991Sdim while (DI != MBB.end()) { 380309124Sdim MachineInstr &InstToDelete = *DI; 381261991Sdim ++DI; 382309124Sdim InstToDelete.eraseFromParent(); 383261991Sdim } 384218893Sdim } 385218893Sdim } 386218893Sdim 387261991Sdim if (CantAnalyze) 388261991Sdim return true; 389198090Srdivacky 390261991Sdim if (I == MBB.begin()) 391261991Sdim return false; 392198090Srdivacky 393261991Sdim --I; 394198090Srdivacky } 395198090Srdivacky 396261991Sdim // We made it past the terminators without bailing out - we must have 397261991Sdim // analyzed this branch successfully. 398261991Sdim return false; 399198090Srdivacky} 400198090Srdivacky 401314564Sdimunsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, 402314564Sdim int *BytesRemoved) const { 403314564Sdim assert(!BytesRemoved && "code size not handled"); 404314564Sdim 405288943Sdim MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 406288943Sdim if (I == MBB.end()) 407288943Sdim return 0; 408288943Sdim 409198090Srdivacky if (!isUncondBranchOpcode(I->getOpcode()) && 410198090Srdivacky !isCondBranchOpcode(I->getOpcode())) 411198090Srdivacky return 0; 412198090Srdivacky 413198090Srdivacky // Remove the branch. 414198090Srdivacky I->eraseFromParent(); 415198090Srdivacky 416198090Srdivacky I = MBB.end(); 417198090Srdivacky 418198090Srdivacky if (I == MBB.begin()) return 1; 419198090Srdivacky --I; 420198090Srdivacky if (!isCondBranchOpcode(I->getOpcode())) 421198090Srdivacky return 1; 422198090Srdivacky 423198090Srdivacky // Remove the branch. 424198090Srdivacky I->eraseFromParent(); 425198090Srdivacky return 2; 426198090Srdivacky} 427198090Srdivacky 428314564Sdimunsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, 429309124Sdim MachineBasicBlock *TBB, 430309124Sdim MachineBasicBlock *FBB, 431309124Sdim ArrayRef<MachineOperand> Cond, 432314564Sdim const DebugLoc &DL, 433314564Sdim int *BytesAdded) const { 434314564Sdim assert(!BytesAdded && "code size not handled"); 435198090Srdivacky ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); 436198090Srdivacky int BOpc = !AFI->isThumbFunction() 437198090Srdivacky ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); 438198090Srdivacky int BccOpc = !AFI->isThumbFunction() 439198090Srdivacky ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); 440226633Sdim bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); 441198090Srdivacky 442198090Srdivacky // Shouldn't be a fall through. 443314564Sdim assert(TBB && "insertBranch must not be told to insert a fallthrough"); 444198090Srdivacky assert((Cond.size() == 2 || Cond.size() == 0) && 445198090Srdivacky "ARM branch conditions have two components!"); 446198090Srdivacky 447288943Sdim // For conditional branches, we use addOperand to preserve CPSR flags. 448288943Sdim 449276479Sdim if (!FBB) { 450226633Sdim if (Cond.empty()) { // Unconditional branch? 451226633Sdim if (isThumb) 452321369Sdim BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL)); 453226633Sdim else 454226633Sdim BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); 455226633Sdim } else 456321369Sdim BuildMI(&MBB, DL, get(BccOpc)) 457321369Sdim .addMBB(TBB) 458321369Sdim .addImm(Cond[0].getImm()) 459321369Sdim .add(Cond[1]); 460198090Srdivacky return 1; 461198090Srdivacky } 462198090Srdivacky 463198090Srdivacky // Two-way conditional branch. 464321369Sdim BuildMI(&MBB, DL, get(BccOpc)) 465321369Sdim .addMBB(TBB) 466321369Sdim .addImm(Cond[0].getImm()) 467321369Sdim .add(Cond[1]); 468226633Sdim if (isThumb) 469321369Sdim BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL)); 470226633Sdim else 471226633Sdim BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); 472198090Srdivacky return 2; 473198090Srdivacky} 474198090Srdivacky 475198090Srdivackybool ARMBaseInstrInfo:: 476314564SdimreverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 477198090Srdivacky ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); 478198090Srdivacky Cond[0].setImm(ARMCC::getOppositeCondition(CC)); 479198090Srdivacky return false; 480198090Srdivacky} 481198090Srdivacky 482309124Sdimbool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { 483309124Sdim if (MI.isBundle()) { 484309124Sdim MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 485309124Sdim MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 486234353Sdim while (++I != E && I->isInsideBundle()) { 487234353Sdim int PIdx = I->findFirstPredOperandIdx(); 488234353Sdim if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) 489234353Sdim return true; 490234353Sdim } 491234353Sdim return false; 492234353Sdim } 493234353Sdim 494309124Sdim int PIdx = MI.findFirstPredOperandIdx(); 495309124Sdim return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL; 496234353Sdim} 497234353Sdim 498309124Sdimbool ARMBaseInstrInfo::PredicateInstruction( 499309124Sdim MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { 500309124Sdim unsigned Opc = MI.getOpcode(); 501198090Srdivacky if (isUncondBranchOpcode(Opc)) { 502309124Sdim MI.setDesc(get(getMatchingCondBranchOpcode(Opc))); 503309124Sdim MachineInstrBuilder(*MI.getParent()->getParent(), MI) 504249423Sdim .addImm(Pred[0].getImm()) 505249423Sdim .addReg(Pred[1].getReg()); 506198090Srdivacky return true; 507198090Srdivacky } 508198090Srdivacky 509309124Sdim int PIdx = MI.findFirstPredOperandIdx(); 510198090Srdivacky if (PIdx != -1) { 511309124Sdim MachineOperand &PMO = MI.getOperand(PIdx); 512198090Srdivacky PMO.setImm(Pred[0].getImm()); 513309124Sdim MI.getOperand(PIdx+1).setReg(Pred[1].getReg()); 514198090Srdivacky return true; 515198090Srdivacky } 516198090Srdivacky return false; 517198090Srdivacky} 518198090Srdivacky 519288943Sdimbool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, 520288943Sdim ArrayRef<MachineOperand> Pred2) const { 521198090Srdivacky if (Pred1.size() > 2 || Pred2.size() > 2) 522198090Srdivacky return false; 523198090Srdivacky 524198090Srdivacky ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); 525198090Srdivacky ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); 526198090Srdivacky if (CC1 == CC2) 527198090Srdivacky return true; 528198090Srdivacky 529198090Srdivacky switch (CC1) { 530198090Srdivacky default: 531198090Srdivacky return false; 532198090Srdivacky case ARMCC::AL: 533198090Srdivacky return true; 534198090Srdivacky case ARMCC::HS: 535198090Srdivacky return CC2 == ARMCC::HI; 536198090Srdivacky case ARMCC::LS: 537198090Srdivacky return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; 538198090Srdivacky case ARMCC::GE: 539198090Srdivacky return CC2 == ARMCC::GT; 540198090Srdivacky case ARMCC::LE: 541198090Srdivacky return CC2 == ARMCC::LT; 542198090Srdivacky } 543198090Srdivacky} 544198090Srdivacky 545309124Sdimbool ARMBaseInstrInfo::DefinesPredicate( 546309124Sdim MachineInstr &MI, std::vector<MachineOperand> &Pred) const { 547198090Srdivacky bool Found = false; 548309124Sdim for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 549309124Sdim const MachineOperand &MO = MI.getOperand(i); 550234353Sdim if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) || 551234353Sdim (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) { 552198090Srdivacky Pred.push_back(MO); 553198090Srdivacky Found = true; 554198090Srdivacky } 555198090Srdivacky } 556198090Srdivacky 557198090Srdivacky return Found; 558198090Srdivacky} 559198090Srdivacky 560321369Sdimbool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) { 561321369Sdim for (const auto &MO : MI.operands()) 562296417Sdim if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) 563280031Sdim return true; 564280031Sdim return false; 565280031Sdim} 566280031Sdim 567321369Sdimbool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI, 568321369Sdim unsigned Op) const { 569321369Sdim const MachineOperand &Offset = MI.getOperand(Op + 1); 570321369Sdim return Offset.getReg() != 0; 571321369Sdim} 572321369Sdim 573321369Sdim// Load with negative register offset requires additional 1cyc and +I unit 574321369Sdim// for Cortex A57 575321369Sdimbool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI, 576321369Sdim unsigned Op) const { 577321369Sdim const MachineOperand &Offset = MI.getOperand(Op + 1); 578321369Sdim const MachineOperand &Opc = MI.getOperand(Op + 2); 579321369Sdim assert(Opc.isImm()); 580321369Sdim assert(Offset.isReg()); 581321369Sdim int64_t OpcImm = Opc.getImm(); 582321369Sdim 583321369Sdim bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub; 584321369Sdim return (isSub && Offset.getReg() != 0); 585321369Sdim} 586321369Sdim 587321369Sdimbool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI, 588321369Sdim unsigned Op) const { 589321369Sdim const MachineOperand &Opc = MI.getOperand(Op + 2); 590321369Sdim unsigned OffImm = Opc.getImm(); 591321369Sdim return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; 592321369Sdim} 593321369Sdim 594321369Sdim// Load, scaled register offset, not plus LSL2 595321369Sdimbool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, 596321369Sdim unsigned Op) const { 597321369Sdim const MachineOperand &Opc = MI.getOperand(Op + 2); 598321369Sdim unsigned OffImm = Opc.getImm(); 599321369Sdim 600321369Sdim bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add; 601321369Sdim unsigned Amt = ARM_AM::getAM2Offset(OffImm); 602321369Sdim ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm); 603321369Sdim if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled 604321369Sdim bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2); 605321369Sdim return !SimpleScaled; 606321369Sdim} 607321369Sdim 608321369Sdim// Minus reg for ldstso addr mode 609321369Sdimbool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI, 610321369Sdim unsigned Op) const { 611321369Sdim unsigned OffImm = MI.getOperand(Op + 2).getImm(); 612321369Sdim return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; 613321369Sdim} 614321369Sdim 615321369Sdim// Load, scaled register offset 616321369Sdimbool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI, 617321369Sdim unsigned Op) const { 618321369Sdim unsigned OffImm = MI.getOperand(Op + 2).getImm(); 619321369Sdim return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; 620321369Sdim} 621321369Sdim 622280031Sdimstatic bool isEligibleForITBlock(const MachineInstr *MI) { 623280031Sdim switch (MI->getOpcode()) { 624280031Sdim default: return true; 625280031Sdim case ARM::tADC: // ADC (register) T1 626280031Sdim case ARM::tADDi3: // ADD (immediate) T1 627280031Sdim case ARM::tADDi8: // ADD (immediate) T2 628280031Sdim case ARM::tADDrr: // ADD (register) T1 629280031Sdim case ARM::tAND: // AND (register) T1 630280031Sdim case ARM::tASRri: // ASR (immediate) T1 631280031Sdim case ARM::tASRrr: // ASR (register) T1 632280031Sdim case ARM::tBIC: // BIC (register) T1 633280031Sdim case ARM::tEOR: // EOR (register) T1 634280031Sdim case ARM::tLSLri: // LSL (immediate) T1 635280031Sdim case ARM::tLSLrr: // LSL (register) T1 636280031Sdim case ARM::tLSRri: // LSR (immediate) T1 637280031Sdim case ARM::tLSRrr: // LSR (register) T1 638280031Sdim case ARM::tMUL: // MUL T1 639280031Sdim case ARM::tMVN: // MVN (register) T1 640280031Sdim case ARM::tORR: // ORR (register) T1 641280031Sdim case ARM::tROR: // ROR (register) T1 642280031Sdim case ARM::tRSB: // RSB (immediate) T1 643280031Sdim case ARM::tSBC: // SBC (register) T1 644280031Sdim case ARM::tSUBi3: // SUB (immediate) T1 645280031Sdim case ARM::tSUBi8: // SUB (immediate) T2 646280031Sdim case ARM::tSUBrr: // SUB (register) T1 647321369Sdim return !ARMBaseInstrInfo::isCPSRDefined(*MI); 648280031Sdim } 649280031Sdim} 650280031Sdim 651199989Srdivacky/// isPredicable - Return true if the specified instruction can be predicated. 652199989Srdivacky/// By default, this returns true for every instruction with a 653199989Srdivacky/// PredicateOperand. 654321369Sdimbool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { 655309124Sdim if (!MI.isPredicable()) 656199989Srdivacky return false; 657198090Srdivacky 658314564Sdim if (MI.isBundle()) 659314564Sdim return false; 660314564Sdim 661309124Sdim if (!isEligibleForITBlock(&MI)) 662280031Sdim return false; 663280031Sdim 664321369Sdim const ARMFunctionInfo *AFI = 665309124Sdim MI.getParent()->getParent()->getInfo<ARMFunctionInfo>(); 666261991Sdim 667321369Sdim // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM. 668321369Sdim // In their ARM encoding, they can't be encoded in a conditional form. 669321369Sdim if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) 670321369Sdim return false; 671321369Sdim 672261991Sdim if (AFI->isThumb2Function()) { 673261991Sdim if (getSubtarget().restrictIT()) 674309124Sdim return isV8EligibleForIT(&MI); 675199989Srdivacky } 676261991Sdim 677199989Srdivacky return true; 678199989Srdivacky} 679199989Srdivacky 680276479Sdimnamespace llvm { 681321369Sdim 682321369Sdimtemplate <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) { 683276479Sdim for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 684276479Sdim const MachineOperand &MO = MI->getOperand(i); 685276479Sdim if (!MO.isReg() || MO.isUndef() || MO.isUse()) 686276479Sdim continue; 687276479Sdim if (MO.getReg() != ARM::CPSR) 688276479Sdim continue; 689276479Sdim if (!MO.isDead()) 690276479Sdim return false; 691276479Sdim } 692276479Sdim // all definitions of CPSR are dead 693276479Sdim return true; 694276479Sdim} 695276479Sdim 696321369Sdim} // end namespace llvm 697321369Sdim 698198090Srdivacky/// GetInstSize - Return the size of the specified MachineInstr. 699198090Srdivacky/// 700314564Sdimunsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 701309124Sdim const MachineBasicBlock &MBB = *MI.getParent(); 702198090Srdivacky const MachineFunction *MF = MBB.getParent(); 703198090Srdivacky const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 704198090Srdivacky 705309124Sdim const MCInstrDesc &MCID = MI.getDesc(); 706224145Sdim if (MCID.getSize()) 707224145Sdim return MCID.getSize(); 708198090Srdivacky 709353358Sdim switch (MI.getOpcode()) { 710276479Sdim default: 711276479Sdim // pseudo-instruction sizes are zero. 712234353Sdim return 0; 713234353Sdim case TargetOpcode::BUNDLE: 714234353Sdim return getInstBundleLength(MI); 715234353Sdim case ARM::MOVi16_ga_pcrel: 716234353Sdim case ARM::MOVTi16_ga_pcrel: 717234353Sdim case ARM::t2MOVi16_ga_pcrel: 718234353Sdim case ARM::t2MOVTi16_ga_pcrel: 719234353Sdim return 4; 720234353Sdim case ARM::MOVi32imm: 721234353Sdim case ARM::t2MOVi32imm: 722234353Sdim return 8; 723234353Sdim case ARM::CONSTPOOL_ENTRY: 724288943Sdim case ARM::JUMPTABLE_INSTS: 725288943Sdim case ARM::JUMPTABLE_ADDRS: 726288943Sdim case ARM::JUMPTABLE_TBB: 727288943Sdim case ARM::JUMPTABLE_TBH: 728234353Sdim // If this machine instr is a constant pool entry, its size is recorded as 729234353Sdim // operand #2. 730309124Sdim return MI.getOperand(2).getImm(); 731234353Sdim case ARM::Int_eh_sjlj_longjmp: 732234353Sdim return 16; 733234353Sdim case ARM::tInt_eh_sjlj_longjmp: 734234353Sdim return 10; 735309124Sdim case ARM::tInt_WIN_eh_sjlj_longjmp: 736309124Sdim return 12; 737234353Sdim case ARM::Int_eh_sjlj_setjmp: 738234353Sdim case ARM::Int_eh_sjlj_setjmp_nofp: 739234353Sdim return 20; 740234353Sdim case ARM::tInt_eh_sjlj_setjmp: 741234353Sdim case ARM::t2Int_eh_sjlj_setjmp: 742234353Sdim case ARM::t2Int_eh_sjlj_setjmp_nofp: 743234353Sdim return 12; 744280031Sdim case ARM::SPACE: 745309124Sdim return MI.getOperand(1).getImm(); 746353358Sdim case ARM::INLINEASM: 747353358Sdim case ARM::INLINEASM_BR: { 748353358Sdim // If this machine instr is an inline asm, measure it. 749353358Sdim unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); 750353358Sdim if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction()) 751353358Sdim Size = alignTo(Size, 4); 752353358Sdim return Size; 753234353Sdim } 754353358Sdim } 755198090Srdivacky} 756198090Srdivacky 757309124Sdimunsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const { 758234353Sdim unsigned Size = 0; 759309124Sdim MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 760309124Sdim MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 761234353Sdim while (++I != E && I->isInsideBundle()) { 762234353Sdim assert(!I->isBundle() && "No nested bundle!"); 763314564Sdim Size += getInstSizeInBytes(*I); 764234353Sdim } 765234353Sdim return Size; 766234353Sdim} 767234353Sdim 768280031Sdimvoid ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, 769280031Sdim MachineBasicBlock::iterator I, 770280031Sdim unsigned DestReg, bool KillSrc, 771280031Sdim const ARMSubtarget &Subtarget) const { 772280031Sdim unsigned Opc = Subtarget.isThumb() 773280031Sdim ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR) 774280031Sdim : ARM::MRS; 775280031Sdim 776280031Sdim MachineInstrBuilder MIB = 777280031Sdim BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg); 778280031Sdim 779280031Sdim // There is only 1 A/R class MRS instruction, and it always refers to 780280031Sdim // APSR. However, there are lots of other possibilities on M-class cores. 781280031Sdim if (Subtarget.isMClass()) 782280031Sdim MIB.addImm(0x800); 783280031Sdim 784321369Sdim MIB.add(predOps(ARMCC::AL)) 785321369Sdim .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); 786280031Sdim} 787280031Sdim 788280031Sdimvoid ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, 789280031Sdim MachineBasicBlock::iterator I, 790280031Sdim unsigned SrcReg, bool KillSrc, 791280031Sdim const ARMSubtarget &Subtarget) const { 792280031Sdim unsigned Opc = Subtarget.isThumb() 793280031Sdim ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR) 794280031Sdim : ARM::MSR; 795280031Sdim 796280031Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); 797280031Sdim 798280031Sdim if (Subtarget.isMClass()) 799280031Sdim MIB.addImm(0x800); 800280031Sdim else 801280031Sdim MIB.addImm(8); 802280031Sdim 803321369Sdim MIB.addReg(SrcReg, getKillRegState(KillSrc)) 804321369Sdim .add(predOps(ARMCC::AL)) 805321369Sdim .addReg(ARM::CPSR, RegState::Implicit | RegState::Define); 806280031Sdim} 807280031Sdim 808353358Sdimvoid llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) { 809353358Sdim MIB.addImm(ARMVCC::None); 810353358Sdim MIB.addReg(0); 811353358Sdim} 812353358Sdim 813353358Sdimvoid llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, 814353358Sdim unsigned DestReg) { 815353358Sdim addUnpredicatedMveVpredNOp(MIB); 816353358Sdim MIB.addReg(DestReg, RegState::Undef); 817353358Sdim} 818353358Sdim 819353358Sdimvoid llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) { 820353358Sdim MIB.addImm(Cond); 821353358Sdim MIB.addReg(ARM::VPR, RegState::Implicit); 822353358Sdim} 823353358Sdim 824353358Sdimvoid llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB, 825353358Sdim unsigned Cond, unsigned Inactive) { 826353358Sdim addPredicatedMveVpredNOp(MIB, Cond); 827353358Sdim MIB.addReg(Inactive); 828353358Sdim} 829353358Sdim 830210299Sedvoid ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 831309124Sdim MachineBasicBlock::iterator I, 832360784Sdim const DebugLoc &DL, MCRegister DestReg, 833360784Sdim MCRegister SrcReg, bool KillSrc) const { 834210299Sed bool GPRDest = ARM::GPRRegClass.contains(DestReg); 835261991Sdim bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); 836204642Srdivacky 837210299Sed if (GPRDest && GPRSrc) { 838321369Sdim BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) 839321369Sdim .addReg(SrcReg, getKillRegState(KillSrc)) 840321369Sdim .add(predOps(ARMCC::AL)) 841321369Sdim .add(condCodeOp()); 842210299Sed return; 843210299Sed } 844198892Srdivacky 845210299Sed bool SPRDest = ARM::SPRRegClass.contains(DestReg); 846261991Sdim bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); 847205407Srdivacky 848226633Sdim unsigned Opc = 0; 849210299Sed if (SPRDest && SPRSrc) 850210299Sed Opc = ARM::VMOVS; 851210299Sed else if (GPRDest && SPRSrc) 852210299Sed Opc = ARM::VMOVRS; 853210299Sed else if (SPRDest && GPRSrc) 854210299Sed Opc = ARM::VMOVSR; 855353358Sdim else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64()) 856210299Sed Opc = ARM::VMOVD; 857210299Sed else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) 858353358Sdim Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; 859208599Srdivacky 860226633Sdim if (Opc) { 861226633Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); 862224145Sdim MIB.addReg(SrcReg, getKillRegState(KillSrc)); 863353358Sdim if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) 864226633Sdim MIB.addReg(SrcReg, getKillRegState(KillSrc)); 865353358Sdim if (Opc == ARM::MVE_VORR) 866353358Sdim addUnpredicatedMveVpredROp(MIB, DestReg); 867353358Sdim else 868353358Sdim MIB.add(predOps(ARMCC::AL)); 869226633Sdim return; 870226633Sdim } 871226633Sdim 872234353Sdim // Handle register classes that require multiple instructions. 873234353Sdim unsigned BeginIdx = 0; 874234353Sdim unsigned SubRegs = 0; 875243830Sdim int Spacing = 1; 876234353Sdim 877234353Sdim // Use VORRq when possible. 878261991Sdim if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) { 879353358Sdim Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; 880261991Sdim BeginIdx = ARM::qsub_0; 881261991Sdim SubRegs = 2; 882261991Sdim } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { 883353358Sdim Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; 884261991Sdim BeginIdx = ARM::qsub_0; 885261991Sdim SubRegs = 4; 886234353Sdim // Fall back to VMOVD. 887261991Sdim } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) { 888261991Sdim Opc = ARM::VMOVD; 889261991Sdim BeginIdx = ARM::dsub_0; 890261991Sdim SubRegs = 2; 891261991Sdim } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) { 892261991Sdim Opc = ARM::VMOVD; 893261991Sdim BeginIdx = ARM::dsub_0; 894261991Sdim SubRegs = 3; 895261991Sdim } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) { 896261991Sdim Opc = ARM::VMOVD; 897261991Sdim BeginIdx = ARM::dsub_0; 898261991Sdim SubRegs = 4; 899261991Sdim } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) { 900261991Sdim Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr; 901261991Sdim BeginIdx = ARM::gsub_0; 902261991Sdim SubRegs = 2; 903261991Sdim } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) { 904261991Sdim Opc = ARM::VMOVD; 905261991Sdim BeginIdx = ARM::dsub_0; 906261991Sdim SubRegs = 2; 907261991Sdim Spacing = 2; 908261991Sdim } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) { 909261991Sdim Opc = ARM::VMOVD; 910261991Sdim BeginIdx = ARM::dsub_0; 911261991Sdim SubRegs = 3; 912261991Sdim Spacing = 2; 913261991Sdim } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) { 914261991Sdim Opc = ARM::VMOVD; 915261991Sdim BeginIdx = ARM::dsub_0; 916261991Sdim SubRegs = 4; 917261991Sdim Spacing = 2; 918353358Sdim } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && 919353358Sdim !Subtarget.hasFP64()) { 920280031Sdim Opc = ARM::VMOVS; 921280031Sdim BeginIdx = ARM::ssub_0; 922280031Sdim SubRegs = 2; 923280031Sdim } else if (SrcReg == ARM::CPSR) { 924280031Sdim copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); 925280031Sdim return; 926280031Sdim } else if (DestReg == ARM::CPSR) { 927280031Sdim copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); 928280031Sdim return; 929353358Sdim } else if (DestReg == ARM::VPR) { 930353358Sdim assert(ARM::GPRRegClass.contains(SrcReg)); 931353358Sdim BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg) 932353358Sdim .addReg(SrcReg, getKillRegState(KillSrc)) 933353358Sdim .add(predOps(ARMCC::AL)); 934353358Sdim return; 935353358Sdim } else if (SrcReg == ARM::VPR) { 936353358Sdim assert(ARM::GPRRegClass.contains(DestReg)); 937353358Sdim BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg) 938353358Sdim .addReg(SrcReg, getKillRegState(KillSrc)) 939353358Sdim .add(predOps(ARMCC::AL)); 940353358Sdim return; 941353358Sdim } else if (DestReg == ARM::FPSCR_NZCV) { 942353358Sdim assert(ARM::GPRRegClass.contains(SrcReg)); 943353358Sdim BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg) 944353358Sdim .addReg(SrcReg, getKillRegState(KillSrc)) 945353358Sdim .add(predOps(ARMCC::AL)); 946353358Sdim return; 947353358Sdim } else if (SrcReg == ARM::FPSCR_NZCV) { 948353358Sdim assert(ARM::GPRRegClass.contains(DestReg)); 949353358Sdim BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg) 950353358Sdim .addReg(SrcReg, getKillRegState(KillSrc)) 951353358Sdim .add(predOps(ARMCC::AL)); 952353358Sdim return; 953261991Sdim } 954234353Sdim 955243830Sdim assert(Opc && "Impossible reg-to-reg copy"); 956243830Sdim 957243830Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 958243830Sdim MachineInstrBuilder Mov; 959243830Sdim 960243830Sdim // Copy register tuples backward when the first Dest reg overlaps with SrcReg. 961243830Sdim if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { 962261991Sdim BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); 963243830Sdim Spacing = -Spacing; 964226633Sdim } 965243830Sdim#ifndef NDEBUG 966243830Sdim SmallSet<unsigned, 4> DstRegs; 967243830Sdim#endif 968243830Sdim for (unsigned i = 0; i != SubRegs; ++i) { 969360784Sdim Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); 970360784Sdim Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); 971243830Sdim assert(Dst && Src && "Bad sub-register"); 972243830Sdim#ifndef NDEBUG 973243830Sdim assert(!DstRegs.count(Src) && "destructive vector copy"); 974243830Sdim DstRegs.insert(Dst); 975243830Sdim#endif 976261991Sdim Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src); 977353358Sdim // VORR (NEON or MVE) takes two source operands. 978353358Sdim if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) { 979243830Sdim Mov.addReg(Src); 980353358Sdim } 981353358Sdim // MVE VORR takes predicate operands in place of an ordinary condition. 982353358Sdim if (Opc == ARM::MVE_VORR) 983353358Sdim addUnpredicatedMveVpredROp(Mov, Dst); 984353358Sdim else 985353358Sdim Mov = Mov.add(predOps(ARMCC::AL)); 986261991Sdim // MOVr can set CC. 987261991Sdim if (Opc == ARM::MOVr) 988321369Sdim Mov = Mov.add(condCodeOp()); 989243830Sdim } 990243830Sdim // Add implicit super-register defs and kills to the last instruction. 991243830Sdim Mov->addRegisterDefined(DestReg, TRI); 992243830Sdim if (KillSrc) 993243830Sdim Mov->addRegisterKilled(SrcReg, TRI); 994198090Srdivacky} 995198090Srdivacky 996360784SdimOptional<DestSourcePair> 997360784SdimARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 998341825Sdim // VMOVRRD is also a copy instruction but it requires 999341825Sdim // special way of handling. It is more complex copy version 1000341825Sdim // and since that we are not considering it. For recognition 1001341825Sdim // of such instruction isExtractSubregLike MI interface fuction 1002341825Sdim // could be used. 1003341825Sdim // VORRq is considered as a move only if two inputs are 1004341825Sdim // the same register. 1005341825Sdim if (!MI.isMoveReg() || 1006341825Sdim (MI.getOpcode() == ARM::VORRq && 1007341825Sdim MI.getOperand(1).getReg() != MI.getOperand(2).getReg())) 1008360784Sdim return None; 1009360784Sdim return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1010341825Sdim} 1011341825Sdim 1012251662Sdimconst MachineInstrBuilder & 1013251662SdimARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, 1014251662Sdim unsigned SubIdx, unsigned State, 1015251662Sdim const TargetRegisterInfo *TRI) const { 1016208599Srdivacky if (!SubIdx) 1017208599Srdivacky return MIB.addReg(Reg, State); 1018208599Srdivacky 1019360784Sdim if (Register::isPhysicalRegister(Reg)) 1020208599Srdivacky return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 1021208599Srdivacky return MIB.addReg(Reg, State, SubIdx); 1022208599Srdivacky} 1023208599Srdivacky 1024198090Srdivackyvoid ARMBaseInstrInfo:: 1025198090SrdivackystoreRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 1026198090Srdivacky unsigned SrcReg, bool isKill, int FI, 1027208599Srdivacky const TargetRegisterClass *RC, 1028208599Srdivacky const TargetRegisterInfo *TRI) const { 1029198090Srdivacky MachineFunction &MF = *MBB.getParent(); 1030314564Sdim MachineFrameInfo &MFI = MF.getFrameInfo(); 1031199481Srdivacky unsigned Align = MFI.getObjectAlignment(FI); 1032198090Srdivacky 1033296417Sdim MachineMemOperand *MMO = MF.getMachineMemOperand( 1034296417Sdim MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, 1035296417Sdim MFI.getObjectSize(FI), Align); 1036198090Srdivacky 1037321369Sdim switch (TRI->getSpillSize(*RC)) { 1038341825Sdim case 2: 1039341825Sdim if (ARM::HPRRegClass.hasSubClassEq(RC)) { 1040344779Sdim BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH)) 1041341825Sdim .addReg(SrcReg, getKillRegState(isKill)) 1042341825Sdim .addFrameIndex(FI) 1043341825Sdim .addImm(0) 1044341825Sdim .addMemOperand(MMO) 1045341825Sdim .add(predOps(ARMCC::AL)); 1046341825Sdim } else 1047341825Sdim llvm_unreachable("Unknown reg class!"); 1048341825Sdim break; 1049226633Sdim case 4: 1050226633Sdim if (ARM::GPRRegClass.hasSubClassEq(RC)) { 1051344779Sdim BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12)) 1052321369Sdim .addReg(SrcReg, getKillRegState(isKill)) 1053321369Sdim .addFrameIndex(FI) 1054321369Sdim .addImm(0) 1055321369Sdim .addMemOperand(MMO) 1056321369Sdim .add(predOps(ARMCC::AL)); 1057226633Sdim } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 1058344779Sdim BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS)) 1059321369Sdim .addReg(SrcReg, getKillRegState(isKill)) 1060321369Sdim .addFrameIndex(FI) 1061321369Sdim .addImm(0) 1062321369Sdim .addMemOperand(MMO) 1063321369Sdim .add(predOps(ARMCC::AL)); 1064353358Sdim } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { 1065353358Sdim BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off)) 1066353358Sdim .addReg(SrcReg, getKillRegState(isKill)) 1067353358Sdim .addFrameIndex(FI) 1068353358Sdim .addImm(0) 1069353358Sdim .addMemOperand(MMO) 1070353358Sdim .add(predOps(ARMCC::AL)); 1071226633Sdim } else 1072226633Sdim llvm_unreachable("Unknown reg class!"); 1073226633Sdim break; 1074226633Sdim case 8: 1075226633Sdim if (ARM::DPRRegClass.hasSubClassEq(RC)) { 1076344779Sdim BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD)) 1077321369Sdim .addReg(SrcReg, getKillRegState(isKill)) 1078321369Sdim .addFrameIndex(FI) 1079321369Sdim .addImm(0) 1080321369Sdim .addMemOperand(MMO) 1081321369Sdim .add(predOps(ARMCC::AL)); 1082243830Sdim } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 1083251662Sdim if (Subtarget.hasV5TEOps()) { 1084344779Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD)); 1085251662Sdim AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 1086251662Sdim AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 1087321369Sdim MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) 1088321369Sdim .add(predOps(ARMCC::AL)); 1089251662Sdim } else { 1090251662Sdim // Fallback to STM instruction, which has existed since the dawn of 1091251662Sdim // time. 1092344779Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA)) 1093321369Sdim .addFrameIndex(FI) 1094321369Sdim .addMemOperand(MMO) 1095321369Sdim .add(predOps(ARMCC::AL)); 1096251662Sdim AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 1097251662Sdim AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 1098251662Sdim } 1099226633Sdim } else 1100226633Sdim llvm_unreachable("Unknown reg class!"); 1101226633Sdim break; 1102226633Sdim case 16: 1103353358Sdim if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { 1104234353Sdim // Use aligned spills if the stack can be realigned. 1105234353Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 1106344779Sdim BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64)) 1107321369Sdim .addFrameIndex(FI) 1108321369Sdim .addImm(16) 1109321369Sdim .addReg(SrcReg, getKillRegState(isKill)) 1110321369Sdim .addMemOperand(MMO) 1111321369Sdim .add(predOps(ARMCC::AL)); 1112226633Sdim } else { 1113344779Sdim BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA)) 1114321369Sdim .addReg(SrcReg, getKillRegState(isKill)) 1115321369Sdim .addFrameIndex(FI) 1116321369Sdim .addMemOperand(MMO) 1117321369Sdim .add(predOps(ARMCC::AL)); 1118226633Sdim } 1119353358Sdim } else if (ARM::QPRRegClass.hasSubClassEq(RC) && 1120353358Sdim Subtarget.hasMVEIntegerOps()) { 1121353358Sdim auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32)); 1122353358Sdim MIB.addReg(SrcReg, getKillRegState(isKill)) 1123353358Sdim .addFrameIndex(FI) 1124353358Sdim .addImm(0) 1125353358Sdim .addMemOperand(MMO); 1126353358Sdim addUnpredicatedMveVpredNOp(MIB); 1127226633Sdim } else 1128226633Sdim llvm_unreachable("Unknown reg class!"); 1129226633Sdim break; 1130239462Sdim case 24: 1131239462Sdim if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 1132239462Sdim // Use aligned spills if the stack can be realigned. 1133360784Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && 1134360784Sdim Subtarget.hasNEON()) { 1135344779Sdim BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo)) 1136321369Sdim .addFrameIndex(FI) 1137321369Sdim .addImm(16) 1138321369Sdim .addReg(SrcReg, getKillRegState(isKill)) 1139321369Sdim .addMemOperand(MMO) 1140321369Sdim .add(predOps(ARMCC::AL)); 1141239462Sdim } else { 1142344779Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), 1143344779Sdim get(ARM::VSTMDIA)) 1144321369Sdim .addFrameIndex(FI) 1145321369Sdim .add(predOps(ARMCC::AL)) 1146321369Sdim .addMemOperand(MMO); 1147239462Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 1148239462Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 1149239462Sdim AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 1150239462Sdim } 1151239462Sdim } else 1152239462Sdim llvm_unreachable("Unknown reg class!"); 1153239462Sdim break; 1154226633Sdim case 32: 1155239462Sdim if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 1156360784Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && 1157360784Sdim Subtarget.hasNEON()) { 1158226633Sdim // FIXME: It's possible to only store part of the QQ register if the 1159226633Sdim // spilled def has a sub-register index. 1160344779Sdim BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo)) 1161321369Sdim .addFrameIndex(FI) 1162321369Sdim .addImm(16) 1163321369Sdim .addReg(SrcReg, getKillRegState(isKill)) 1164321369Sdim .addMemOperand(MMO) 1165321369Sdim .add(predOps(ARMCC::AL)); 1166226633Sdim } else { 1167344779Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), 1168344779Sdim get(ARM::VSTMDIA)) 1169321369Sdim .addFrameIndex(FI) 1170321369Sdim .add(predOps(ARMCC::AL)) 1171321369Sdim .addMemOperand(MMO); 1172226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 1173226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 1174226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 1175226633Sdim AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 1176226633Sdim } 1177226633Sdim } else 1178226633Sdim llvm_unreachable("Unknown reg class!"); 1179226633Sdim break; 1180226633Sdim case 64: 1181226633Sdim if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 1182344779Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA)) 1183321369Sdim .addFrameIndex(FI) 1184321369Sdim .add(predOps(ARMCC::AL)) 1185321369Sdim .addMemOperand(MMO); 1186226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 1187226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 1188226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 1189226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 1190226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); 1191226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); 1192226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); 1193226633Sdim AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); 1194226633Sdim } else 1195226633Sdim llvm_unreachable("Unknown reg class!"); 1196226633Sdim break; 1197226633Sdim default: 1198226633Sdim llvm_unreachable("Unknown reg class!"); 1199198090Srdivacky } 1200198090Srdivacky} 1201198090Srdivacky 1202309124Sdimunsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 1203309124Sdim int &FrameIndex) const { 1204309124Sdim switch (MI.getOpcode()) { 1205218893Sdim default: break; 1206218893Sdim case ARM::STRrs: 1207218893Sdim case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. 1208309124Sdim if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() && 1209309124Sdim MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 && 1210309124Sdim MI.getOperand(3).getImm() == 0) { 1211309124Sdim FrameIndex = MI.getOperand(1).getIndex(); 1212309124Sdim return MI.getOperand(0).getReg(); 1213218893Sdim } 1214218893Sdim break; 1215218893Sdim case ARM::STRi12: 1216218893Sdim case ARM::t2STRi12: 1217224145Sdim case ARM::tSTRspi: 1218218893Sdim case ARM::VSTRD: 1219218893Sdim case ARM::VSTRS: 1220309124Sdim if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 1221309124Sdim MI.getOperand(2).getImm() == 0) { 1222309124Sdim FrameIndex = MI.getOperand(1).getIndex(); 1223309124Sdim return MI.getOperand(0).getReg(); 1224218893Sdim } 1225218893Sdim break; 1226353358Sdim case ARM::VSTR_P0_off: 1227353358Sdim if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && 1228353358Sdim MI.getOperand(1).getImm() == 0) { 1229353358Sdim FrameIndex = MI.getOperand(0).getIndex(); 1230353358Sdim return ARM::P0; 1231353358Sdim } 1232353358Sdim break; 1233234353Sdim case ARM::VST1q64: 1234239462Sdim case ARM::VST1d64TPseudo: 1235239462Sdim case ARM::VST1d64QPseudo: 1236309124Sdim if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) { 1237309124Sdim FrameIndex = MI.getOperand(0).getIndex(); 1238309124Sdim return MI.getOperand(2).getReg(); 1239218893Sdim } 1240218893Sdim break; 1241218893Sdim case ARM::VSTMQIA: 1242309124Sdim if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { 1243309124Sdim FrameIndex = MI.getOperand(1).getIndex(); 1244309124Sdim return MI.getOperand(0).getReg(); 1245218893Sdim } 1246218893Sdim break; 1247218893Sdim } 1248218893Sdim 1249218893Sdim return 0; 1250218893Sdim} 1251218893Sdim 1252309124Sdimunsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, 1253226633Sdim int &FrameIndex) const { 1254344779Sdim SmallVector<const MachineMemOperand *, 1> Accesses; 1255353358Sdim if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) && 1256353358Sdim Accesses.size() == 1) { 1257344779Sdim FrameIndex = 1258344779Sdim cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) 1259344779Sdim ->getFrameIndex(); 1260344779Sdim return true; 1261344779Sdim } 1262344779Sdim return false; 1263226633Sdim} 1264226633Sdim 1265198090Srdivackyvoid ARMBaseInstrInfo:: 1266198090SrdivackyloadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 1267198090Srdivacky unsigned DestReg, int FI, 1268208599Srdivacky const TargetRegisterClass *RC, 1269208599Srdivacky const TargetRegisterInfo *TRI) const { 1270206124Srdivacky DebugLoc DL; 1271198090Srdivacky if (I != MBB.end()) DL = I->getDebugLoc(); 1272198090Srdivacky MachineFunction &MF = *MBB.getParent(); 1273314564Sdim MachineFrameInfo &MFI = MF.getFrameInfo(); 1274199481Srdivacky unsigned Align = MFI.getObjectAlignment(FI); 1275296417Sdim MachineMemOperand *MMO = MF.getMachineMemOperand( 1276296417Sdim MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, 1277296417Sdim MFI.getObjectSize(FI), Align); 1278198090Srdivacky 1279321369Sdim switch (TRI->getSpillSize(*RC)) { 1280341825Sdim case 2: 1281341825Sdim if (ARM::HPRRegClass.hasSubClassEq(RC)) { 1282341825Sdim BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg) 1283341825Sdim .addFrameIndex(FI) 1284341825Sdim .addImm(0) 1285341825Sdim .addMemOperand(MMO) 1286341825Sdim .add(predOps(ARMCC::AL)); 1287341825Sdim } else 1288341825Sdim llvm_unreachable("Unknown reg class!"); 1289341825Sdim break; 1290226633Sdim case 4: 1291226633Sdim if (ARM::GPRRegClass.hasSubClassEq(RC)) { 1292321369Sdim BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) 1293321369Sdim .addFrameIndex(FI) 1294321369Sdim .addImm(0) 1295321369Sdim .addMemOperand(MMO) 1296321369Sdim .add(predOps(ARMCC::AL)); 1297226633Sdim } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 1298321369Sdim BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) 1299321369Sdim .addFrameIndex(FI) 1300321369Sdim .addImm(0) 1301321369Sdim .addMemOperand(MMO) 1302321369Sdim .add(predOps(ARMCC::AL)); 1303353358Sdim } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { 1304353358Sdim BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg) 1305353358Sdim .addFrameIndex(FI) 1306353358Sdim .addImm(0) 1307353358Sdim .addMemOperand(MMO) 1308353358Sdim .add(predOps(ARMCC::AL)); 1309226633Sdim } else 1310226633Sdim llvm_unreachable("Unknown reg class!"); 1311210299Sed break; 1312226633Sdim case 8: 1313226633Sdim if (ARM::DPRRegClass.hasSubClassEq(RC)) { 1314321369Sdim BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) 1315321369Sdim .addFrameIndex(FI) 1316321369Sdim .addImm(0) 1317321369Sdim .addMemOperand(MMO) 1318321369Sdim .add(predOps(ARMCC::AL)); 1319243830Sdim } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 1320251662Sdim MachineInstrBuilder MIB; 1321251662Sdim 1322251662Sdim if (Subtarget.hasV5TEOps()) { 1323251662Sdim MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); 1324251662Sdim AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 1325251662Sdim AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 1326321369Sdim MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) 1327321369Sdim .add(predOps(ARMCC::AL)); 1328251662Sdim } else { 1329251662Sdim // Fallback to LDM instruction, which has existed since the dawn of 1330251662Sdim // time. 1331321369Sdim MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA)) 1332321369Sdim .addFrameIndex(FI) 1333321369Sdim .addMemOperand(MMO) 1334321369Sdim .add(predOps(ARMCC::AL)); 1335251662Sdim MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 1336251662Sdim MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 1337251662Sdim } 1338251662Sdim 1339360784Sdim if (Register::isPhysicalRegister(DestReg)) 1340243830Sdim MIB.addReg(DestReg, RegState::ImplicitDefine); 1341226633Sdim } else 1342226633Sdim llvm_unreachable("Unknown reg class!"); 1343210299Sed break; 1344226633Sdim case 16: 1345353358Sdim if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { 1346234353Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 1347321369Sdim BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) 1348321369Sdim .addFrameIndex(FI) 1349321369Sdim .addImm(16) 1350321369Sdim .addMemOperand(MMO) 1351321369Sdim .add(predOps(ARMCC::AL)); 1352226633Sdim } else { 1353321369Sdim BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) 1354321369Sdim .addFrameIndex(FI) 1355321369Sdim .addMemOperand(MMO) 1356321369Sdim .add(predOps(ARMCC::AL)); 1357226633Sdim } 1358353358Sdim } else if (ARM::QPRRegClass.hasSubClassEq(RC) && 1359353358Sdim Subtarget.hasMVEIntegerOps()) { 1360353358Sdim auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg); 1361353358Sdim MIB.addFrameIndex(FI) 1362353358Sdim .addImm(0) 1363353358Sdim .addMemOperand(MMO); 1364353358Sdim addUnpredicatedMveVpredNOp(MIB); 1365226633Sdim } else 1366226633Sdim llvm_unreachable("Unknown reg class!"); 1367210299Sed break; 1368239462Sdim case 24: 1369239462Sdim if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 1370360784Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && 1371360784Sdim Subtarget.hasNEON()) { 1372321369Sdim BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) 1373321369Sdim .addFrameIndex(FI) 1374321369Sdim .addImm(16) 1375321369Sdim .addMemOperand(MMO) 1376321369Sdim .add(predOps(ARMCC::AL)); 1377239462Sdim } else { 1378321369Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1379321369Sdim .addFrameIndex(FI) 1380321369Sdim .addMemOperand(MMO) 1381321369Sdim .add(predOps(ARMCC::AL)); 1382239462Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1383239462Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1384239462Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1385360784Sdim if (Register::isPhysicalRegister(DestReg)) 1386239462Sdim MIB.addReg(DestReg, RegState::ImplicitDefine); 1387239462Sdim } 1388239462Sdim } else 1389239462Sdim llvm_unreachable("Unknown reg class!"); 1390239462Sdim break; 1391239462Sdim case 32: 1392239462Sdim if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 1393360784Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && 1394360784Sdim Subtarget.hasNEON()) { 1395321369Sdim BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) 1396321369Sdim .addFrameIndex(FI) 1397321369Sdim .addImm(16) 1398321369Sdim .addMemOperand(MMO) 1399321369Sdim .add(predOps(ARMCC::AL)); 1400226633Sdim } else { 1401321369Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1402321369Sdim .addFrameIndex(FI) 1403321369Sdim .add(predOps(ARMCC::AL)) 1404321369Sdim .addMemOperand(MMO); 1405234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1406234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1407234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1408234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 1409360784Sdim if (Register::isPhysicalRegister(DestReg)) 1410234353Sdim MIB.addReg(DestReg, RegState::ImplicitDefine); 1411226633Sdim } 1412226633Sdim } else 1413226633Sdim llvm_unreachable("Unknown reg class!"); 1414226633Sdim break; 1415226633Sdim case 64: 1416226633Sdim if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 1417321369Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1418321369Sdim .addFrameIndex(FI) 1419321369Sdim .add(predOps(ARMCC::AL)) 1420321369Sdim .addMemOperand(MMO); 1421234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1422234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1423234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1424234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 1425234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); 1426234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); 1427234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); 1428234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); 1429360784Sdim if (Register::isPhysicalRegister(DestReg)) 1430234353Sdim MIB.addReg(DestReg, RegState::ImplicitDefine); 1431226633Sdim } else 1432226633Sdim llvm_unreachable("Unknown reg class!"); 1433210299Sed break; 1434210299Sed default: 1435210299Sed llvm_unreachable("Unknown regclass!"); 1436210299Sed } 1437198090Srdivacky} 1438198090Srdivacky 1439309124Sdimunsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 1440309124Sdim int &FrameIndex) const { 1441309124Sdim switch (MI.getOpcode()) { 1442218893Sdim default: break; 1443218893Sdim case ARM::LDRrs: 1444218893Sdim case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. 1445309124Sdim if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() && 1446309124Sdim MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 && 1447309124Sdim MI.getOperand(3).getImm() == 0) { 1448309124Sdim FrameIndex = MI.getOperand(1).getIndex(); 1449309124Sdim return MI.getOperand(0).getReg(); 1450218893Sdim } 1451218893Sdim break; 1452218893Sdim case ARM::LDRi12: 1453218893Sdim case ARM::t2LDRi12: 1454224145Sdim case ARM::tLDRspi: 1455218893Sdim case ARM::VLDRD: 1456218893Sdim case ARM::VLDRS: 1457309124Sdim if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 1458309124Sdim MI.getOperand(2).getImm() == 0) { 1459309124Sdim FrameIndex = MI.getOperand(1).getIndex(); 1460309124Sdim return MI.getOperand(0).getReg(); 1461218893Sdim } 1462218893Sdim break; 1463353358Sdim case ARM::VLDR_P0_off: 1464353358Sdim if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && 1465353358Sdim MI.getOperand(1).getImm() == 0) { 1466353358Sdim FrameIndex = MI.getOperand(0).getIndex(); 1467353358Sdim return ARM::P0; 1468353358Sdim } 1469353358Sdim break; 1470234353Sdim case ARM::VLD1q64: 1471341825Sdim case ARM::VLD1d8TPseudo: 1472341825Sdim case ARM::VLD1d16TPseudo: 1473341825Sdim case ARM::VLD1d32TPseudo: 1474239462Sdim case ARM::VLD1d64TPseudo: 1475341825Sdim case ARM::VLD1d8QPseudo: 1476341825Sdim case ARM::VLD1d16QPseudo: 1477341825Sdim case ARM::VLD1d32QPseudo: 1478239462Sdim case ARM::VLD1d64QPseudo: 1479309124Sdim if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { 1480309124Sdim FrameIndex = MI.getOperand(1).getIndex(); 1481309124Sdim return MI.getOperand(0).getReg(); 1482218893Sdim } 1483218893Sdim break; 1484218893Sdim case ARM::VLDMQIA: 1485309124Sdim if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { 1486309124Sdim FrameIndex = MI.getOperand(1).getIndex(); 1487309124Sdim return MI.getOperand(0).getReg(); 1488218893Sdim } 1489218893Sdim break; 1490218893Sdim } 1491218893Sdim 1492218893Sdim return 0; 1493218893Sdim} 1494218893Sdim 1495309124Sdimunsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, 1496309124Sdim int &FrameIndex) const { 1497344779Sdim SmallVector<const MachineMemOperand *, 1> Accesses; 1498353358Sdim if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) && 1499353358Sdim Accesses.size() == 1) { 1500344779Sdim FrameIndex = 1501344779Sdim cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) 1502344779Sdim ->getFrameIndex(); 1503344779Sdim return true; 1504344779Sdim } 1505344779Sdim return false; 1506226633Sdim} 1507226633Sdim 1508341825Sdim/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD 1509296417Sdim/// depending on whether the result is used. 1510309124Sdimvoid ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { 1511296417Sdim bool isThumb1 = Subtarget.isThumb1Only(); 1512296417Sdim bool isThumb2 = Subtarget.isThumb2(); 1513296417Sdim const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo(); 1514296417Sdim 1515296417Sdim DebugLoc dl = MI->getDebugLoc(); 1516296417Sdim MachineBasicBlock *BB = MI->getParent(); 1517296417Sdim 1518296417Sdim MachineInstrBuilder LDM, STM; 1519296417Sdim if (isThumb1 || !MI->getOperand(1).isDead()) { 1520327952Sdim MachineOperand LDWb(MI->getOperand(1)); 1521296417Sdim LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD 1522296417Sdim : isThumb1 ? ARM::tLDMIA_UPD 1523296417Sdim : ARM::LDMIA_UPD)) 1524327952Sdim .add(LDWb); 1525296417Sdim } else { 1526296417Sdim LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA)); 1527296417Sdim } 1528296417Sdim 1529296417Sdim if (isThumb1 || !MI->getOperand(0).isDead()) { 1530327952Sdim MachineOperand STWb(MI->getOperand(0)); 1531296417Sdim STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD 1532296417Sdim : isThumb1 ? ARM::tSTMIA_UPD 1533296417Sdim : ARM::STMIA_UPD)) 1534327952Sdim .add(STWb); 1535296417Sdim } else { 1536296417Sdim STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA)); 1537296417Sdim } 1538296417Sdim 1539327952Sdim MachineOperand LDBase(MI->getOperand(3)); 1540327952Sdim LDM.add(LDBase).add(predOps(ARMCC::AL)); 1541296417Sdim 1542327952Sdim MachineOperand STBase(MI->getOperand(2)); 1543327952Sdim STM.add(STBase).add(predOps(ARMCC::AL)); 1544327952Sdim 1545296417Sdim // Sort the scratch registers into ascending order. 1546296417Sdim const TargetRegisterInfo &TRI = getRegisterInfo(); 1547321369Sdim SmallVector<unsigned, 6> ScratchRegs; 1548296417Sdim for(unsigned I = 5; I < MI->getNumOperands(); ++I) 1549296417Sdim ScratchRegs.push_back(MI->getOperand(I).getReg()); 1550344779Sdim llvm::sort(ScratchRegs, 1551344779Sdim [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool { 1552341825Sdim return TRI.getEncodingValue(Reg1) < 1553341825Sdim TRI.getEncodingValue(Reg2); 1554341825Sdim }); 1555296417Sdim 1556296417Sdim for (const auto &Reg : ScratchRegs) { 1557296417Sdim LDM.addReg(Reg, RegState::Define); 1558296417Sdim STM.addReg(Reg, RegState::Kill); 1559296417Sdim } 1560296417Sdim 1561309124Sdim BB->erase(MI); 1562296417Sdim} 1563296417Sdim 1564309124Sdimbool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1565309124Sdim if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { 1566288943Sdim assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() && 1567280031Sdim "LOAD_STACK_GUARD currently supported only for MachO."); 1568309124Sdim expandLoadStackGuard(MI); 1569309124Sdim MI.getParent()->erase(MI); 1570280031Sdim return true; 1571280031Sdim } 1572280031Sdim 1573309124Sdim if (MI.getOpcode() == ARM::MEMCPY) { 1574296417Sdim expandMEMCPY(MI); 1575296417Sdim return true; 1576296417Sdim } 1577296417Sdim 1578226633Sdim // This hook gets to expand COPY instructions before they become 1579226633Sdim // copyPhysReg() calls. Look for VMOVS instructions that can legally be 1580226633Sdim // widened to VMOVD. We prefer the VMOVD when possible because it may be 1581226633Sdim // changed into a VORR that can go down the NEON pipeline. 1582353358Sdim if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64()) 1583226633Sdim return false; 1584226633Sdim 1585226633Sdim // Look for a copy between even S-registers. That is where we keep floats 1586226633Sdim // when using NEON v2f32 instructions for f32 arithmetic. 1587360784Sdim Register DstRegS = MI.getOperand(0).getReg(); 1588360784Sdim Register SrcRegS = MI.getOperand(1).getReg(); 1589226633Sdim if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) 1590226633Sdim return false; 1591226633Sdim 1592226633Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 1593226633Sdim unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, 1594226633Sdim &ARM::DPRRegClass); 1595226633Sdim unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, 1596226633Sdim &ARM::DPRRegClass); 1597226633Sdim if (!DstRegD || !SrcRegD) 1598226633Sdim return false; 1599226633Sdim 1600226633Sdim // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only 1601226633Sdim // legal if the COPY already defines the full DstRegD, and it isn't a 1602226633Sdim // sub-register insertion. 1603309124Sdim if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI)) 1604226633Sdim return false; 1605226633Sdim 1606226633Sdim // A dead copy shouldn't show up here, but reject it just in case. 1607309124Sdim if (MI.getOperand(0).isDead()) 1608226633Sdim return false; 1609226633Sdim 1610226633Sdim // All clear, widen the COPY. 1611341825Sdim LLVM_DEBUG(dbgs() << "widening: " << MI); 1612309124Sdim MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); 1613226633Sdim 1614327952Sdim // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg 1615226633Sdim // or some other super-register. 1616309124Sdim int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD); 1617226633Sdim if (ImpDefIdx != -1) 1618309124Sdim MI.RemoveOperand(ImpDefIdx); 1619226633Sdim 1620226633Sdim // Change the opcode and operands. 1621309124Sdim MI.setDesc(get(ARM::VMOVD)); 1622309124Sdim MI.getOperand(0).setReg(DstRegD); 1623309124Sdim MI.getOperand(1).setReg(SrcRegD); 1624321369Sdim MIB.add(predOps(ARMCC::AL)); 1625226633Sdim 1626226633Sdim // We are now reading SrcRegD instead of SrcRegS. This may upset the 1627226633Sdim // register scavenger and machine verifier, so we need to indicate that we 1628226633Sdim // are reading an undefined value from SrcRegD, but a proper value from 1629226633Sdim // SrcRegS. 1630309124Sdim MI.getOperand(1).setIsUndef(); 1631249423Sdim MIB.addReg(SrcRegS, RegState::Implicit); 1632226633Sdim 1633226633Sdim // SrcRegD may actually contain an unrelated value in the ssub_1 1634226633Sdim // sub-register. Don't kill it. Only kill the ssub_0 sub-register. 1635309124Sdim if (MI.getOperand(1).isKill()) { 1636309124Sdim MI.getOperand(1).setIsKill(false); 1637309124Sdim MI.addRegisterKilled(SrcRegS, TRI, true); 1638226633Sdim } 1639226633Sdim 1640341825Sdim LLVM_DEBUG(dbgs() << "replaced by: " << MI); 1641226633Sdim return true; 1642226633Sdim} 1643226633Sdim 1644202375Srdivacky/// Create a copy of a const pool value. Update CPI to the new index and return 1645202375Srdivacky/// the label UID. 1646202375Srdivackystatic unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { 1647202375Srdivacky MachineConstantPool *MCP = MF.getConstantPool(); 1648202375Srdivacky ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1649202375Srdivacky 1650202375Srdivacky const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; 1651202375Srdivacky assert(MCPE.isMachineConstantPoolEntry() && 1652202375Srdivacky "Expecting a machine constantpool entry!"); 1653202375Srdivacky ARMConstantPoolValue *ACPV = 1654202375Srdivacky static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); 1655202375Srdivacky 1656218893Sdim unsigned PCLabelId = AFI->createPICLabelUId(); 1657276479Sdim ARMConstantPoolValue *NewCPV = nullptr; 1658276479Sdim 1659212904Sdim // FIXME: The below assumes PIC relocation model and that the function 1660212904Sdim // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and 1661212904Sdim // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR 1662212904Sdim // instructions, so that's probably OK, but is PIC always correct when 1663212904Sdim // we get here? 1664202375Srdivacky if (ACPV->isGlobalValue()) 1665296417Sdim NewCPV = ARMConstantPoolConstant::Create( 1666296417Sdim cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue, 1667296417Sdim 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress()); 1668202375Srdivacky else if (ACPV->isExtSymbol()) 1669226633Sdim NewCPV = ARMConstantPoolSymbol:: 1670327952Sdim Create(MF.getFunction().getContext(), 1671226633Sdim cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); 1672202375Srdivacky else if (ACPV->isBlockAddress()) 1673226633Sdim NewCPV = ARMConstantPoolConstant:: 1674226633Sdim Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, 1675226633Sdim ARMCP::CPBlockAddress, 4); 1676212904Sdim else if (ACPV->isLSDA()) 1677327952Sdim NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId, 1678226633Sdim ARMCP::CPLSDA, 4); 1679226633Sdim else if (ACPV->isMachineBasicBlock()) 1680226633Sdim NewCPV = ARMConstantPoolMBB:: 1681327952Sdim Create(MF.getFunction().getContext(), 1682226633Sdim cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); 1683202375Srdivacky else 1684202375Srdivacky llvm_unreachable("Unexpected ARM constantpool value type!!"); 1685202375Srdivacky CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); 1686202375Srdivacky return PCLabelId; 1687202375Srdivacky} 1688202375Srdivacky 1689309124Sdimvoid ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, 1690309124Sdim MachineBasicBlock::iterator I, 1691309124Sdim unsigned DestReg, unsigned SubIdx, 1692309124Sdim const MachineInstr &Orig, 1693309124Sdim const TargetRegisterInfo &TRI) const { 1694309124Sdim unsigned Opcode = Orig.getOpcode(); 1695199481Srdivacky switch (Opcode) { 1696199481Srdivacky default: { 1697309124Sdim MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); 1698309124Sdim MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI); 1699199481Srdivacky MBB.insert(I, MI); 1700199481Srdivacky break; 1701199481Srdivacky } 1702199481Srdivacky case ARM::tLDRpci_pic: 1703199481Srdivacky case ARM::t2LDRpci_pic: { 1704199481Srdivacky MachineFunction &MF = *MBB.getParent(); 1705309124Sdim unsigned CPI = Orig.getOperand(1).getIndex(); 1706202375Srdivacky unsigned PCLabelId = duplicateCPV(MF, CPI); 1707344779Sdim BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg) 1708344779Sdim .addConstantPoolIndex(CPI) 1709344779Sdim .addImm(PCLabelId) 1710344779Sdim .cloneMemRefs(Orig); 1711199481Srdivacky break; 1712199481Srdivacky } 1713199481Srdivacky } 1714199481Srdivacky} 1715199481Srdivacky 1716327952SdimMachineInstr & 1717327952SdimARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB, 1718327952Sdim MachineBasicBlock::iterator InsertBefore, 1719327952Sdim const MachineInstr &Orig) const { 1720327952Sdim MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig); 1721327952Sdim MachineBasicBlock::instr_iterator I = Cloned.getIterator(); 1722327952Sdim for (;;) { 1723327952Sdim switch (I->getOpcode()) { 1724327952Sdim case ARM::tLDRpci_pic: 1725327952Sdim case ARM::t2LDRpci_pic: { 1726327952Sdim MachineFunction &MF = *MBB.getParent(); 1727327952Sdim unsigned CPI = I->getOperand(1).getIndex(); 1728327952Sdim unsigned PCLabelId = duplicateCPV(MF, CPI); 1729327952Sdim I->getOperand(1).setIndex(CPI); 1730327952Sdim I->getOperand(2).setImm(PCLabelId); 1731327952Sdim break; 1732327952Sdim } 1733327952Sdim } 1734327952Sdim if (!I->isBundledWithSucc()) 1735327952Sdim break; 1736327952Sdim ++I; 1737202375Srdivacky } 1738327952Sdim return Cloned; 1739202375Srdivacky} 1740202375Srdivacky 1741309124Sdimbool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, 1742309124Sdim const MachineInstr &MI1, 1743218893Sdim const MachineRegisterInfo *MRI) const { 1744309124Sdim unsigned Opcode = MI0.getOpcode(); 1745199989Srdivacky if (Opcode == ARM::t2LDRpci || 1746199989Srdivacky Opcode == ARM::t2LDRpci_pic || 1747199989Srdivacky Opcode == ARM::tLDRpci || 1748218893Sdim Opcode == ARM::tLDRpci_pic || 1749276479Sdim Opcode == ARM::LDRLIT_ga_pcrel || 1750276479Sdim Opcode == ARM::LDRLIT_ga_pcrel_ldr || 1751276479Sdim Opcode == ARM::tLDRLIT_ga_pcrel || 1752218893Sdim Opcode == ARM::MOV_ga_pcrel || 1753218893Sdim Opcode == ARM::MOV_ga_pcrel_ldr || 1754218893Sdim Opcode == ARM::t2MOV_ga_pcrel) { 1755309124Sdim if (MI1.getOpcode() != Opcode) 1756199481Srdivacky return false; 1757309124Sdim if (MI0.getNumOperands() != MI1.getNumOperands()) 1758199481Srdivacky return false; 1759199481Srdivacky 1760309124Sdim const MachineOperand &MO0 = MI0.getOperand(1); 1761309124Sdim const MachineOperand &MO1 = MI1.getOperand(1); 1762199481Srdivacky if (MO0.getOffset() != MO1.getOffset()) 1763199481Srdivacky return false; 1764199481Srdivacky 1765276479Sdim if (Opcode == ARM::LDRLIT_ga_pcrel || 1766276479Sdim Opcode == ARM::LDRLIT_ga_pcrel_ldr || 1767276479Sdim Opcode == ARM::tLDRLIT_ga_pcrel || 1768218893Sdim Opcode == ARM::MOV_ga_pcrel || 1769218893Sdim Opcode == ARM::MOV_ga_pcrel_ldr || 1770218893Sdim Opcode == ARM::t2MOV_ga_pcrel) 1771218893Sdim // Ignore the PC labels. 1772218893Sdim return MO0.getGlobal() == MO1.getGlobal(); 1773218893Sdim 1774309124Sdim const MachineFunction *MF = MI0.getParent()->getParent(); 1775199481Srdivacky const MachineConstantPool *MCP = MF->getConstantPool(); 1776199481Srdivacky int CPI0 = MO0.getIndex(); 1777199481Srdivacky int CPI1 = MO1.getIndex(); 1778199481Srdivacky const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; 1779199481Srdivacky const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; 1780221345Sdim bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); 1781221345Sdim bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); 1782221345Sdim if (isARMCP0 && isARMCP1) { 1783221345Sdim ARMConstantPoolValue *ACPV0 = 1784221345Sdim static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); 1785221345Sdim ARMConstantPoolValue *ACPV1 = 1786221345Sdim static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); 1787221345Sdim return ACPV0->hasSameValue(ACPV1); 1788221345Sdim } else if (!isARMCP0 && !isARMCP1) { 1789221345Sdim return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; 1790221345Sdim } 1791221345Sdim return false; 1792218893Sdim } else if (Opcode == ARM::PICLDR) { 1793309124Sdim if (MI1.getOpcode() != Opcode) 1794218893Sdim return false; 1795309124Sdim if (MI0.getNumOperands() != MI1.getNumOperands()) 1796218893Sdim return false; 1797218893Sdim 1798360784Sdim Register Addr0 = MI0.getOperand(1).getReg(); 1799360784Sdim Register Addr1 = MI1.getOperand(1).getReg(); 1800218893Sdim if (Addr0 != Addr1) { 1801360784Sdim if (!MRI || !Register::isVirtualRegister(Addr0) || 1802360784Sdim !Register::isVirtualRegister(Addr1)) 1803218893Sdim return false; 1804218893Sdim 1805218893Sdim // This assumes SSA form. 1806218893Sdim MachineInstr *Def0 = MRI->getVRegDef(Addr0); 1807218893Sdim MachineInstr *Def1 = MRI->getVRegDef(Addr1); 1808218893Sdim // Check if the loaded value, e.g. a constantpool of a global address, are 1809218893Sdim // the same. 1810309124Sdim if (!produceSameValue(*Def0, *Def1, MRI)) 1811218893Sdim return false; 1812218893Sdim } 1813218893Sdim 1814309124Sdim for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) { 1815341825Sdim // %12 = PICLDR %11, 0, 14, %noreg 1816309124Sdim const MachineOperand &MO0 = MI0.getOperand(i); 1817309124Sdim const MachineOperand &MO1 = MI1.getOperand(i); 1818218893Sdim if (!MO0.isIdenticalTo(MO1)) 1819218893Sdim return false; 1820218893Sdim } 1821218893Sdim return true; 1822199481Srdivacky } 1823199481Srdivacky 1824309124Sdim return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); 1825199481Srdivacky} 1826199481Srdivacky 1827210299Sed/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to 1828210299Sed/// determine if two loads are loading from the same base address. It should 1829210299Sed/// only return true if the base pointers are the same and the only differences 1830210299Sed/// between the two addresses is the offset. It also returns the offsets by 1831210299Sed/// reference. 1832249423Sdim/// 1833249423Sdim/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 1834249423Sdim/// is permanently disabled. 1835210299Sedbool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 1836210299Sed int64_t &Offset1, 1837210299Sed int64_t &Offset2) const { 1838210299Sed // Don't worry about Thumb: just ARM and Thumb2. 1839210299Sed if (Subtarget.isThumb1Only()) return false; 1840210299Sed 1841210299Sed if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 1842210299Sed return false; 1843210299Sed 1844210299Sed switch (Load1->getMachineOpcode()) { 1845210299Sed default: 1846210299Sed return false; 1847218893Sdim case ARM::LDRi12: 1848218893Sdim case ARM::LDRBi12: 1849210299Sed case ARM::LDRD: 1850210299Sed case ARM::LDRH: 1851210299Sed case ARM::LDRSB: 1852210299Sed case ARM::LDRSH: 1853210299Sed case ARM::VLDRD: 1854210299Sed case ARM::VLDRS: 1855210299Sed case ARM::t2LDRi8: 1856261991Sdim case ARM::t2LDRBi8: 1857210299Sed case ARM::t2LDRDi8: 1858210299Sed case ARM::t2LDRSHi8: 1859210299Sed case ARM::t2LDRi12: 1860261991Sdim case ARM::t2LDRBi12: 1861210299Sed case ARM::t2LDRSHi12: 1862210299Sed break; 1863210299Sed } 1864210299Sed 1865210299Sed switch (Load2->getMachineOpcode()) { 1866210299Sed default: 1867210299Sed return false; 1868218893Sdim case ARM::LDRi12: 1869218893Sdim case ARM::LDRBi12: 1870210299Sed case ARM::LDRD: 1871210299Sed case ARM::LDRH: 1872210299Sed case ARM::LDRSB: 1873210299Sed case ARM::LDRSH: 1874210299Sed case ARM::VLDRD: 1875210299Sed case ARM::VLDRS: 1876210299Sed case ARM::t2LDRi8: 1877261991Sdim case ARM::t2LDRBi8: 1878210299Sed case ARM::t2LDRSHi8: 1879210299Sed case ARM::t2LDRi12: 1880261991Sdim case ARM::t2LDRBi12: 1881210299Sed case ARM::t2LDRSHi12: 1882210299Sed break; 1883210299Sed } 1884210299Sed 1885210299Sed // Check if base addresses and chain operands match. 1886210299Sed if (Load1->getOperand(0) != Load2->getOperand(0) || 1887210299Sed Load1->getOperand(4) != Load2->getOperand(4)) 1888210299Sed return false; 1889210299Sed 1890210299Sed // Index should be Reg0. 1891210299Sed if (Load1->getOperand(3) != Load2->getOperand(3)) 1892210299Sed return false; 1893210299Sed 1894210299Sed // Determine the offsets. 1895210299Sed if (isa<ConstantSDNode>(Load1->getOperand(1)) && 1896210299Sed isa<ConstantSDNode>(Load2->getOperand(1))) { 1897210299Sed Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); 1898210299Sed Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); 1899210299Sed return true; 1900210299Sed } 1901210299Sed 1902210299Sed return false; 1903210299Sed} 1904210299Sed 1905210299Sed/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to 1906221345Sdim/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should 1907210299Sed/// be scheduled togther. On some targets if two loads are loading from 1908210299Sed/// addresses in the same cache line, it's better if they are scheduled 1909210299Sed/// together. This function takes two integers that represent the load offsets 1910210299Sed/// from the common base address. It returns true if it decides it's desirable 1911210299Sed/// to schedule the two loads together. "NumLoads" is the number of loads that 1912210299Sed/// have already been scheduled after Load1. 1913249423Sdim/// 1914249423Sdim/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 1915249423Sdim/// is permanently disabled. 1916210299Sedbool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 1917210299Sed int64_t Offset1, int64_t Offset2, 1918210299Sed unsigned NumLoads) const { 1919210299Sed // Don't worry about Thumb: just ARM and Thumb2. 1920210299Sed if (Subtarget.isThumb1Only()) return false; 1921210299Sed 1922210299Sed assert(Offset2 > Offset1); 1923210299Sed 1924210299Sed if ((Offset2 - Offset1) / 8 > 64) 1925210299Sed return false; 1926210299Sed 1927261991Sdim // Check if the machine opcodes are different. If they are different 1928261991Sdim // then we consider them to not be of the same base address, 1929261991Sdim // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. 1930261991Sdim // In this case, they are considered to be the same because they are different 1931261991Sdim // encoding forms of the same basic instruction. 1932261991Sdim if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && 1933261991Sdim !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && 1934261991Sdim Load2->getMachineOpcode() == ARM::t2LDRBi12) || 1935261991Sdim (Load1->getMachineOpcode() == ARM::t2LDRBi12 && 1936261991Sdim Load2->getMachineOpcode() == ARM::t2LDRBi8))) 1937210299Sed return false; // FIXME: overly conservative? 1938210299Sed 1939210299Sed // Four loads in a row should be sufficient. 1940210299Sed if (NumLoads >= 3) 1941210299Sed return false; 1942210299Sed 1943210299Sed return true; 1944210299Sed} 1945210299Sed 1946309124Sdimbool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, 1947210299Sed const MachineBasicBlock *MBB, 1948210299Sed const MachineFunction &MF) const { 1949210299Sed // Debug info is never a scheduling boundary. It's necessary to be explicit 1950210299Sed // due to the special treatment of IT instructions below, otherwise a 1951210299Sed // dbg_value followed by an IT will result in the IT instruction being 1952210299Sed // considered a scheduling hazard, which is wrong. It should be the actual 1953210299Sed // instruction preceding the dbg_value instruction(s), just like it is 1954210299Sed // when debug info is not present. 1955341825Sdim if (MI.isDebugInstr()) 1956210299Sed return false; 1957210299Sed 1958210299Sed // Terminators and labels can't be scheduled around. 1959309124Sdim if (MI.isTerminator() || MI.isPosition()) 1960210299Sed return true; 1961210299Sed 1962210299Sed // Treat the start of the IT block as a scheduling boundary, but schedule 1963210299Sed // t2IT along with all instructions following it. 1964210299Sed // FIXME: This is a big hammer. But the alternative is to add all potential 1965210299Sed // true and anti dependencies to IT block instructions as implicit operands 1966210299Sed // to the t2IT instruction. The added compile time and complexity does not 1967210299Sed // seem worth it. 1968210299Sed MachineBasicBlock::const_iterator I = MI; 1969341825Sdim // Make sure to skip any debug instructions 1970341825Sdim while (++I != MBB->end() && I->isDebugInstr()) 1971210299Sed ; 1972210299Sed if (I != MBB->end() && I->getOpcode() == ARM::t2IT) 1973210299Sed return true; 1974210299Sed 1975210299Sed // Don't attempt to schedule around any instruction that defines 1976210299Sed // a stack-oriented pointer, as it's unlikely to be profitable. This 1977210299Sed // saves compile time, because it doesn't require every single 1978210299Sed // stack slot reference to depend on the instruction that does the 1979210299Sed // modification. 1980234353Sdim // Calls don't actually change the stack pointer, even if they have imp-defs. 1981234353Sdim // No ARM calling conventions change the stack pointer. (X86 calling 1982234353Sdim // conventions sometimes do). 1983309124Sdim if (!MI.isCall() && MI.definesRegister(ARM::SP)) 1984210299Sed return true; 1985210299Sed 1986210299Sed return false; 1987210299Sed} 1988210299Sed 1989224145Sdimbool ARMBaseInstrInfo:: 1990224145SdimisProfitableToIfCvt(MachineBasicBlock &MBB, 1991224145Sdim unsigned NumCycles, unsigned ExtraPredCycles, 1992296417Sdim BranchProbability Probability) const { 1993221345Sdim if (!NumCycles) 1994210299Sed return false; 1995218893Sdim 1996288943Sdim // If we are optimizing for size, see if the branch in the predecessor can be 1997288943Sdim // lowered to cbn?z by the constant island lowering pass, and return false if 1998288943Sdim // so. This results in a shorter instruction sequence. 1999353358Sdim if (MBB.getParent()->getFunction().hasOptSize()) { 2000288943Sdim MachineBasicBlock *Pred = *MBB.pred_begin(); 2001288943Sdim if (!Pred->empty()) { 2002288943Sdim MachineInstr *LastMI = &*Pred->rbegin(); 2003288943Sdim if (LastMI->getOpcode() == ARM::t2Bcc) { 2004353358Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 2005353358Sdim MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI); 2006353358Sdim if (CmpMI) 2007353358Sdim return false; 2008288943Sdim } 2009288943Sdim } 2010288943Sdim } 2011321369Sdim return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles, 2012321369Sdim MBB, 0, 0, Probability); 2013210299Sed} 2014218893Sdim 2015210299Sedbool ARMBaseInstrInfo:: 2016321369SdimisProfitableToIfCvt(MachineBasicBlock &TBB, 2017218893Sdim unsigned TCycles, unsigned TExtra, 2018321369Sdim MachineBasicBlock &FBB, 2019218893Sdim unsigned FCycles, unsigned FExtra, 2020296417Sdim BranchProbability Probability) const { 2021321369Sdim if (!TCycles) 2022218893Sdim return false; 2023218893Sdim 2024353358Sdim // In thumb code we often end up trading one branch for a IT block, and 2025353358Sdim // if we are cloning the instruction can increase code size. Prevent 2026353358Sdim // blocks with multiple predecesors from being ifcvted to prevent this 2027353358Sdim // cloning. 2028353358Sdim if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) { 2029353358Sdim if (TBB.pred_size() != 1 || FBB.pred_size() != 1) 2030353358Sdim return false; 2031353358Sdim } 2032353358Sdim 2033218893Sdim // Attempt to estimate the relative costs of predication versus branching. 2034296417Sdim // Here we scale up each component of UnpredCost to avoid precision issue when 2035296417Sdim // scaling TCycles/FCycles by Probability. 2036296417Sdim const unsigned ScalingUpFactor = 1024; 2037321369Sdim 2038321369Sdim unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor; 2039321369Sdim unsigned UnpredCost; 2040321369Sdim if (!Subtarget.hasBranchPredictor()) { 2041321369Sdim // When we don't have a branch predictor it's always cheaper to not take a 2042321369Sdim // branch than take it, so we have to take that into account. 2043321369Sdim unsigned NotTakenBranchCost = 1; 2044321369Sdim unsigned TakenBranchCost = Subtarget.getMispredictionPenalty(); 2045321369Sdim unsigned TUnpredCycles, FUnpredCycles; 2046321369Sdim if (!FCycles) { 2047321369Sdim // Triangle: TBB is the fallthrough 2048321369Sdim TUnpredCycles = TCycles + NotTakenBranchCost; 2049321369Sdim FUnpredCycles = TakenBranchCost; 2050321369Sdim } else { 2051321369Sdim // Diamond: TBB is the block that is branched to, FBB is the fallthrough 2052321369Sdim TUnpredCycles = TCycles + TakenBranchCost; 2053321369Sdim FUnpredCycles = FCycles + NotTakenBranchCost; 2054321369Sdim // The branch at the end of FBB will disappear when it's predicated, so 2055321369Sdim // discount it from PredCost. 2056321369Sdim PredCost -= 1 * ScalingUpFactor; 2057321369Sdim } 2058321369Sdim // The total cost is the cost of each path scaled by their probabilites 2059321369Sdim unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor); 2060321369Sdim unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor); 2061321369Sdim UnpredCost = TUnpredCost + FUnpredCost; 2062321369Sdim // When predicating assume that the first IT can be folded away but later 2063321369Sdim // ones cost one cycle each 2064321369Sdim if (Subtarget.isThumb2() && TCycles + FCycles > 4) { 2065321369Sdim PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor; 2066321369Sdim } 2067321369Sdim } else { 2068321369Sdim unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); 2069321369Sdim unsigned FUnpredCost = 2070296417Sdim Probability.getCompl().scale(FCycles * ScalingUpFactor); 2071321369Sdim UnpredCost = TUnpredCost + FUnpredCost; 2072321369Sdim UnpredCost += 1 * ScalingUpFactor; // The branch itself 2073321369Sdim UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; 2074321369Sdim } 2075224145Sdim 2076321369Sdim return PredCost <= UnpredCost; 2077210299Sed} 2078210299Sed 2079360784Sdimunsigned 2080360784SdimARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF, 2081360784Sdim unsigned NumInsts) const { 2082360784Sdim // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions. 2083360784Sdim // ARM has a condition code field in every predicable instruction, using it 2084360784Sdim // doesn't change code size. 2085360784Sdim return Subtarget.isThumb2() ? divideCeil(NumInsts, 4) * 2 : 0; 2086360784Sdim} 2087360784Sdim 2088360784Sdimunsigned 2089360784SdimARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const { 2090360784Sdim // If this branch is likely to be folded into the comparison to form a 2091360784Sdim // CB(N)Z, then removing it won't reduce code size at all, because that will 2092360784Sdim // just replace the CB(N)Z with a CMP. 2093360784Sdim if (MI.getOpcode() == ARM::t2Bcc && 2094360784Sdim findCMPToFoldIntoCBZ(&MI, &getRegisterInfo())) 2095360784Sdim return 0; 2096360784Sdim 2097360784Sdim unsigned Size = getInstSizeInBytes(MI); 2098360784Sdim 2099360784Sdim // For Thumb2, all branches are 32-bit instructions during the if conversion 2100360784Sdim // pass, but may be replaced with 16-bit instructions during size reduction. 2101360784Sdim // Since the branches considered by if conversion tend to be forward branches 2102360784Sdim // over small basic blocks, they are very likely to be in range for the 2103360784Sdim // narrow instructions, so we assume the final code size will be half what it 2104360784Sdim // currently is. 2105360784Sdim if (Subtarget.isThumb2()) 2106360784Sdim Size /= 2; 2107360784Sdim 2108360784Sdim return Size; 2109360784Sdim} 2110360784Sdim 2111243830Sdimbool 2112243830SdimARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 2113243830Sdim MachineBasicBlock &FMBB) const { 2114309124Sdim // Reduce false anti-dependencies to let the target's out-of-order execution 2115243830Sdim // engine do its thing. 2116309124Sdim return Subtarget.isProfitableToUnpredicate(); 2117243830Sdim} 2118243830Sdim 2119198090Srdivacky/// getInstrPredicate - If instruction is predicated, returns its predicate 2120198090Srdivacky/// condition, otherwise returns AL. It also returns the condition code 2121198090Srdivacky/// register by reference. 2122309124SdimARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI, 2123309124Sdim unsigned &PredReg) { 2124309124Sdim int PIdx = MI.findFirstPredOperandIdx(); 2125198090Srdivacky if (PIdx == -1) { 2126198090Srdivacky PredReg = 0; 2127198090Srdivacky return ARMCC::AL; 2128198090Srdivacky } 2129198090Srdivacky 2130309124Sdim PredReg = MI.getOperand(PIdx+1).getReg(); 2131309124Sdim return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); 2132198090Srdivacky} 2133198090Srdivacky 2134288943Sdimunsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { 2135198090Srdivacky if (Opc == ARM::B) 2136198090Srdivacky return ARM::Bcc; 2137234353Sdim if (Opc == ARM::tB) 2138198090Srdivacky return ARM::tBcc; 2139234353Sdim if (Opc == ARM::t2B) 2140234353Sdim return ARM::t2Bcc; 2141198090Srdivacky 2142198090Srdivacky llvm_unreachable("Unknown unconditional branch opcode!"); 2143198090Srdivacky} 2144198090Srdivacky 2145309124SdimMachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, 2146296417Sdim bool NewMI, 2147296417Sdim unsigned OpIdx1, 2148296417Sdim unsigned OpIdx2) const { 2149309124Sdim switch (MI.getOpcode()) { 2150234353Sdim case ARM::MOVCCr: 2151234353Sdim case ARM::t2MOVCCr: { 2152234353Sdim // MOVCC can be commuted by inverting the condition. 2153234353Sdim unsigned PredReg = 0; 2154234353Sdim ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); 2155234353Sdim // MOVCC AL can't be inverted. Shouldn't happen. 2156234353Sdim if (CC == ARMCC::AL || PredReg != ARM::CPSR) 2157276479Sdim return nullptr; 2158309124Sdim MachineInstr *CommutedMI = 2159309124Sdim TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 2160309124Sdim if (!CommutedMI) 2161276479Sdim return nullptr; 2162234353Sdim // After swapping the MOVCC operands, also invert the condition. 2163309124Sdim CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx()) 2164309124Sdim .setImm(ARMCC::getOppositeCondition(CC)); 2165309124Sdim return CommutedMI; 2166234353Sdim } 2167234353Sdim } 2168296417Sdim return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 2169234353Sdim} 2170198090Srdivacky 2171239462Sdim/// Identify instructions that can be folded into a MOVCC instruction, and 2172243830Sdim/// return the defining instruction. 2173353358SdimMachineInstr * 2174353358SdimARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, 2175353358Sdim const TargetInstrInfo *TII) const { 2176360784Sdim if (!Register::isVirtualRegister(Reg)) 2177276479Sdim return nullptr; 2178239462Sdim if (!MRI.hasOneNonDBGUse(Reg)) 2179276479Sdim return nullptr; 2180243830Sdim MachineInstr *MI = MRI.getVRegDef(Reg); 2181239462Sdim if (!MI) 2182276479Sdim return nullptr; 2183353358Sdim // Check if MI can be predicated and folded into the MOVCC. 2184353358Sdim if (!isPredicable(*MI)) 2185276479Sdim return nullptr; 2186239462Sdim // Check if MI has any non-dead defs or physreg uses. This also detects 2187239462Sdim // predicated instructions which will be reading CPSR. 2188239462Sdim for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 2189239462Sdim const MachineOperand &MO = MI->getOperand(i); 2190243830Sdim // Reject frame index operands, PEI can't handle the predicated pseudos. 2191243830Sdim if (MO.isFI() || MO.isCPI() || MO.isJTI()) 2192276479Sdim return nullptr; 2193239462Sdim if (!MO.isReg()) 2194239462Sdim continue; 2195243830Sdim // MI can't have any tied operands, that would conflict with predication. 2196243830Sdim if (MO.isTied()) 2197276479Sdim return nullptr; 2198360784Sdim if (Register::isPhysicalRegister(MO.getReg())) 2199276479Sdim return nullptr; 2200239462Sdim if (MO.isDef() && !MO.isDead()) 2201276479Sdim return nullptr; 2202239462Sdim } 2203243830Sdim bool DontMoveAcrossStores = true; 2204288943Sdim if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores)) 2205276479Sdim return nullptr; 2206243830Sdim return MI; 2207239462Sdim} 2208239462Sdim 2209309124Sdimbool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI, 2210239462Sdim SmallVectorImpl<MachineOperand> &Cond, 2211239462Sdim unsigned &TrueOp, unsigned &FalseOp, 2212239462Sdim bool &Optimizable) const { 2213309124Sdim assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && 2214239462Sdim "Unknown select instruction"); 2215239462Sdim // MOVCC operands: 2216239462Sdim // 0: Def. 2217239462Sdim // 1: True use. 2218239462Sdim // 2: False use. 2219239462Sdim // 3: Condition code. 2220239462Sdim // 4: CPSR use. 2221239462Sdim TrueOp = 1; 2222239462Sdim FalseOp = 2; 2223309124Sdim Cond.push_back(MI.getOperand(3)); 2224309124Sdim Cond.push_back(MI.getOperand(4)); 2225239462Sdim // We can always fold a def. 2226239462Sdim Optimizable = true; 2227239462Sdim return false; 2228239462Sdim} 2229239462Sdim 2230280031SdimMachineInstr * 2231309124SdimARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, 2232280031Sdim SmallPtrSetImpl<MachineInstr *> &SeenMIs, 2233280031Sdim bool PreferFalse) const { 2234309124Sdim assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && 2235239462Sdim "Unknown select instruction"); 2236309124Sdim MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 2237309124Sdim MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this); 2238243830Sdim bool Invert = !DefMI; 2239243830Sdim if (!DefMI) 2240309124Sdim DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this); 2241243830Sdim if (!DefMI) 2242276479Sdim return nullptr; 2243239462Sdim 2244261991Sdim // Find new register class to use. 2245309124Sdim MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1); 2246360784Sdim Register DestReg = MI.getOperand(0).getReg(); 2247261991Sdim const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); 2248261991Sdim if (!MRI.constrainRegClass(DestReg, PreviousClass)) 2249276479Sdim return nullptr; 2250261991Sdim 2251239462Sdim // Create a new predicated version of DefMI. 2252239462Sdim // Rfalse is the first use. 2253309124Sdim MachineInstrBuilder NewMI = 2254309124Sdim BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg); 2255239462Sdim 2256239462Sdim // Copy all the DefMI operands, excluding its (null) predicate. 2257239462Sdim const MCInstrDesc &DefDesc = DefMI->getDesc(); 2258239462Sdim for (unsigned i = 1, e = DefDesc.getNumOperands(); 2259239462Sdim i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) 2260321369Sdim NewMI.add(DefMI->getOperand(i)); 2261239462Sdim 2262309124Sdim unsigned CondCode = MI.getOperand(3).getImm(); 2263239462Sdim if (Invert) 2264239462Sdim NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); 2265239462Sdim else 2266239462Sdim NewMI.addImm(CondCode); 2267321369Sdim NewMI.add(MI.getOperand(4)); 2268239462Sdim 2269239462Sdim // DefMI is not the -S version that sets CPSR, so add an optional %noreg. 2270239462Sdim if (NewMI->hasOptionalDef()) 2271321369Sdim NewMI.add(condCodeOp()); 2272239462Sdim 2273243830Sdim // The output register value when the predicate is false is an implicit 2274243830Sdim // register operand tied to the first def. 2275243830Sdim // The tie makes the register allocator ensure the FalseReg is allocated the 2276243830Sdim // same register as operand 0. 2277243830Sdim FalseReg.setImplicit(); 2278321369Sdim NewMI.add(FalseReg); 2279243830Sdim NewMI->tieOperands(0, NewMI->getNumOperands() - 1); 2280243830Sdim 2281280031Sdim // Update SeenMIs set: register newly created MI and erase removed DefMI. 2282280031Sdim SeenMIs.insert(NewMI); 2283280031Sdim SeenMIs.erase(DefMI); 2284280031Sdim 2285288943Sdim // If MI is inside a loop, and DefMI is outside the loop, then kill flags on 2286288943Sdim // DefMI would be invalid when tranferred inside the loop. Checking for a 2287288943Sdim // loop is expensive, but at least remove kill flags if they are in different 2288288943Sdim // BBs. 2289309124Sdim if (DefMI->getParent() != MI.getParent()) 2290288943Sdim NewMI->clearKillInfo(); 2291288943Sdim 2292239462Sdim // The caller will erase MI, but not DefMI. 2293239462Sdim DefMI->eraseFromParent(); 2294239462Sdim return NewMI; 2295239462Sdim} 2296239462Sdim 2297226633Sdim/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the 2298226633Sdim/// instruction is encoded with an 'S' bit is determined by the optional CPSR 2299226633Sdim/// def operand. 2300226633Sdim/// 2301226633Sdim/// This will go away once we can teach tblgen how to set the optional CPSR def 2302226633Sdim/// operand itself. 2303226633Sdimstruct AddSubFlagsOpcodePair { 2304239462Sdim uint16_t PseudoOpc; 2305239462Sdim uint16_t MachineOpc; 2306226633Sdim}; 2307226633Sdim 2308239462Sdimstatic const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { 2309226633Sdim {ARM::ADDSri, ARM::ADDri}, 2310226633Sdim {ARM::ADDSrr, ARM::ADDrr}, 2311226633Sdim {ARM::ADDSrsi, ARM::ADDrsi}, 2312226633Sdim {ARM::ADDSrsr, ARM::ADDrsr}, 2313226633Sdim 2314226633Sdim {ARM::SUBSri, ARM::SUBri}, 2315226633Sdim {ARM::SUBSrr, ARM::SUBrr}, 2316226633Sdim {ARM::SUBSrsi, ARM::SUBrsi}, 2317226633Sdim {ARM::SUBSrsr, ARM::SUBrsr}, 2318226633Sdim 2319226633Sdim {ARM::RSBSri, ARM::RSBri}, 2320226633Sdim {ARM::RSBSrsi, ARM::RSBrsi}, 2321226633Sdim {ARM::RSBSrsr, ARM::RSBrsr}, 2322226633Sdim 2323321369Sdim {ARM::tADDSi3, ARM::tADDi3}, 2324321369Sdim {ARM::tADDSi8, ARM::tADDi8}, 2325321369Sdim {ARM::tADDSrr, ARM::tADDrr}, 2326321369Sdim {ARM::tADCS, ARM::tADC}, 2327321369Sdim 2328321369Sdim {ARM::tSUBSi3, ARM::tSUBi3}, 2329321369Sdim {ARM::tSUBSi8, ARM::tSUBi8}, 2330321369Sdim {ARM::tSUBSrr, ARM::tSUBrr}, 2331321369Sdim {ARM::tSBCS, ARM::tSBC}, 2332344779Sdim {ARM::tRSBS, ARM::tRSB}, 2333360784Sdim {ARM::tLSLSri, ARM::tLSLri}, 2334321369Sdim 2335226633Sdim {ARM::t2ADDSri, ARM::t2ADDri}, 2336226633Sdim {ARM::t2ADDSrr, ARM::t2ADDrr}, 2337226633Sdim {ARM::t2ADDSrs, ARM::t2ADDrs}, 2338226633Sdim 2339226633Sdim {ARM::t2SUBSri, ARM::t2SUBri}, 2340226633Sdim {ARM::t2SUBSrr, ARM::t2SUBrr}, 2341226633Sdim {ARM::t2SUBSrs, ARM::t2SUBrs}, 2342226633Sdim 2343226633Sdim {ARM::t2RSBSri, ARM::t2RSBri}, 2344226633Sdim {ARM::t2RSBSrs, ARM::t2RSBrs}, 2345226633Sdim}; 2346226633Sdim 2347226633Sdimunsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { 2348239462Sdim for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i) 2349239462Sdim if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc) 2350239462Sdim return AddSubFlagsOpcodeMap[i].MachineOpc; 2351226633Sdim return 0; 2352226633Sdim} 2353226633Sdim 2354198090Srdivackyvoid llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, 2355309124Sdim MachineBasicBlock::iterator &MBBI, 2356309124Sdim const DebugLoc &dl, unsigned DestReg, 2357309124Sdim unsigned BaseReg, int NumBytes, 2358309124Sdim ARMCC::CondCodes Pred, unsigned PredReg, 2359309124Sdim const ARMBaseInstrInfo &TII, 2360309124Sdim unsigned MIFlags) { 2361261991Sdim if (NumBytes == 0 && DestReg != BaseReg) { 2362261991Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) 2363321369Sdim .addReg(BaseReg, RegState::Kill) 2364321369Sdim .add(predOps(Pred, PredReg)) 2365321369Sdim .add(condCodeOp()) 2366321369Sdim .setMIFlags(MIFlags); 2367261991Sdim return; 2368261991Sdim } 2369261991Sdim 2370198090Srdivacky bool isSub = NumBytes < 0; 2371198090Srdivacky if (isSub) NumBytes = -NumBytes; 2372198090Srdivacky 2373198090Srdivacky while (NumBytes) { 2374198090Srdivacky unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); 2375198090Srdivacky unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); 2376198090Srdivacky assert(ThisVal && "Didn't extract field correctly"); 2377198090Srdivacky 2378198090Srdivacky // We will handle these bits from offset, clear them. 2379198090Srdivacky NumBytes &= ~ThisVal; 2380198090Srdivacky 2381198090Srdivacky assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); 2382198090Srdivacky 2383198090Srdivacky // Build the new ADD / SUB. 2384198090Srdivacky unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; 2385198090Srdivacky BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) 2386321369Sdim .addReg(BaseReg, RegState::Kill) 2387321369Sdim .addImm(ThisVal) 2388321369Sdim .add(predOps(Pred, PredReg)) 2389321369Sdim .add(condCodeOp()) 2390321369Sdim .setMIFlags(MIFlags); 2391198090Srdivacky BaseReg = DestReg; 2392198090Srdivacky } 2393198090Srdivacky} 2394198090Srdivacky 2395276479Sdimbool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, 2396276479Sdim MachineFunction &MF, MachineInstr *MI, 2397261991Sdim unsigned NumBytes) { 2398261991Sdim // This optimisation potentially adds lots of load and store 2399261991Sdim // micro-operations, it's only really a great benefit to code-size. 2400353358Sdim if (!Subtarget.hasMinSize()) 2401261991Sdim return false; 2402261991Sdim 2403261991Sdim // If only one register is pushed/popped, LLVM can use an LDR/STR 2404261991Sdim // instead. We can't modify those so make sure we're dealing with an 2405261991Sdim // instruction we understand. 2406261991Sdim bool IsPop = isPopOpcode(MI->getOpcode()); 2407261991Sdim bool IsPush = isPushOpcode(MI->getOpcode()); 2408261991Sdim if (!IsPush && !IsPop) 2409261991Sdim return false; 2410261991Sdim 2411261991Sdim bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || 2412261991Sdim MI->getOpcode() == ARM::VLDMDIA_UPD; 2413261991Sdim bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || 2414261991Sdim MI->getOpcode() == ARM::tPOP || 2415261991Sdim MI->getOpcode() == ARM::tPOP_RET; 2416261991Sdim 2417261991Sdim assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && 2418261991Sdim MI->getOperand(1).getReg() == ARM::SP)) && 2419261991Sdim "trying to fold sp update into non-sp-updating push/pop"); 2420261991Sdim 2421261991Sdim // The VFP push & pop act on D-registers, so we can only fold an adjustment 2422261991Sdim // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try 2423261991Sdim // if this is violated. 2424261991Sdim if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0) 2425261991Sdim return false; 2426261991Sdim 2427261991Sdim // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ 2428261991Sdim // pred) so the list starts at 4. Thumb1 starts after the predicate. 2429261991Sdim int RegListIdx = IsT1PushPop ? 2 : 4; 2430261991Sdim 2431261991Sdim // Calculate the space we'll need in terms of registers. 2432314564Sdim unsigned RegsNeeded; 2433314564Sdim const TargetRegisterClass *RegClass; 2434261991Sdim if (IsVFPPushPop) { 2435261991Sdim RegsNeeded = NumBytes / 8; 2436314564Sdim RegClass = &ARM::DPRRegClass; 2437261991Sdim } else { 2438261991Sdim RegsNeeded = NumBytes / 4; 2439314564Sdim RegClass = &ARM::GPRRegClass; 2440261991Sdim } 2441261991Sdim 2442261991Sdim // We're going to have to strip all list operands off before 2443261991Sdim // re-adding them since the order matters, so save the existing ones 2444261991Sdim // for later. 2445261991Sdim SmallVector<MachineOperand, 4> RegList; 2446261991Sdim 2447314564Sdim // We're also going to need the first register transferred by this 2448314564Sdim // instruction, which won't necessarily be the first register in the list. 2449314564Sdim unsigned FirstRegEnc = -1; 2450314564Sdim 2451261991Sdim const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); 2452314564Sdim for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) { 2453314564Sdim MachineOperand &MO = MI->getOperand(i); 2454314564Sdim RegList.push_back(MO); 2455314564Sdim 2456360784Sdim if (MO.isReg() && !MO.isImplicit() && 2457360784Sdim TRI->getEncodingValue(MO.getReg()) < FirstRegEnc) 2458314564Sdim FirstRegEnc = TRI->getEncodingValue(MO.getReg()); 2459314564Sdim } 2460314564Sdim 2461261991Sdim const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); 2462261991Sdim 2463261991Sdim // Now try to find enough space in the reglist to allocate NumBytes. 2464314564Sdim for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded; 2465314564Sdim --CurRegEnc) { 2466314564Sdim unsigned CurReg = RegClass->getRegister(CurRegEnc); 2467360784Sdim if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7)) 2468353358Sdim continue; 2469261991Sdim if (!IsPop) { 2470341825Sdim // Pushing any register is completely harmless, mark the register involved 2471341825Sdim // as undef since we don't care about its value and must not restore it 2472341825Sdim // during stack unwinding. 2473261991Sdim RegList.push_back(MachineOperand::CreateReg(CurReg, false, false, 2474261991Sdim false, false, true)); 2475261991Sdim --RegsNeeded; 2476261991Sdim continue; 2477261991Sdim } 2478261991Sdim 2479261991Sdim // However, we can only pop an extra register if it's not live. For 2480261991Sdim // registers live within the function we might clobber a return value 2481261991Sdim // register; the other way a register can be live here is if it's 2482261991Sdim // callee-saved. 2483261991Sdim if (isCalleeSavedRegister(CurReg, CSRegs) || 2484296417Sdim MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) != 2485296417Sdim MachineBasicBlock::LQR_Dead) { 2486261991Sdim // VFP pops don't allow holes in the register list, so any skip is fatal 2487261991Sdim // for our transformation. GPR pops do, so we should just keep looking. 2488261991Sdim if (IsVFPPushPop) 2489261991Sdim return false; 2490261991Sdim else 2491261991Sdim continue; 2492261991Sdim } 2493261991Sdim 2494261991Sdim // Mark the unimportant registers as <def,dead> in the POP. 2495261991Sdim RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false, 2496261991Sdim true)); 2497261991Sdim --RegsNeeded; 2498261991Sdim } 2499261991Sdim 2500261991Sdim if (RegsNeeded > 0) 2501261991Sdim return false; 2502261991Sdim 2503261991Sdim // Finally we know we can profitably perform the optimisation so go 2504261991Sdim // ahead: strip all existing registers off and add them back again 2505261991Sdim // in the right order. 2506261991Sdim for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) 2507261991Sdim MI->RemoveOperand(i); 2508261991Sdim 2509261991Sdim // Add the complete list back in. 2510261991Sdim MachineInstrBuilder MIB(MF, &*MI); 2511261991Sdim for (int i = RegList.size() - 1; i >= 0; --i) 2512321369Sdim MIB.add(RegList[i]); 2513261991Sdim 2514261991Sdim return true; 2515261991Sdim} 2516261991Sdim 2517198090Srdivackybool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 2518198090Srdivacky unsigned FrameReg, int &Offset, 2519198090Srdivacky const ARMBaseInstrInfo &TII) { 2520198090Srdivacky unsigned Opcode = MI.getOpcode(); 2521224145Sdim const MCInstrDesc &Desc = MI.getDesc(); 2522198090Srdivacky unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); 2523198090Srdivacky bool isSub = false; 2524198090Srdivacky 2525198090Srdivacky // Memory operands in inline assembly always use AddrMode2. 2526353358Sdim if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) 2527198090Srdivacky AddrMode = ARMII::AddrMode2; 2528198090Srdivacky 2529198090Srdivacky if (Opcode == ARM::ADDri) { 2530198090Srdivacky Offset += MI.getOperand(FrameRegIdx+1).getImm(); 2531198090Srdivacky if (Offset == 0) { 2532198090Srdivacky // Turn it into a move. 2533198090Srdivacky MI.setDesc(TII.get(ARM::MOVr)); 2534198090Srdivacky MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2535198090Srdivacky MI.RemoveOperand(FrameRegIdx+1); 2536198090Srdivacky Offset = 0; 2537198090Srdivacky return true; 2538198090Srdivacky } else if (Offset < 0) { 2539198090Srdivacky Offset = -Offset; 2540198090Srdivacky isSub = true; 2541198090Srdivacky MI.setDesc(TII.get(ARM::SUBri)); 2542198090Srdivacky } 2543198090Srdivacky 2544198090Srdivacky // Common case: small offset, fits into instruction. 2545198090Srdivacky if (ARM_AM::getSOImmVal(Offset) != -1) { 2546198090Srdivacky // Replace the FrameIndex with sp / fp 2547198090Srdivacky MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2548198090Srdivacky MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); 2549198090Srdivacky Offset = 0; 2550198090Srdivacky return true; 2551198090Srdivacky } 2552198090Srdivacky 2553198090Srdivacky // Otherwise, pull as much of the immedidate into this ADDri/SUBri 2554198090Srdivacky // as possible. 2555198090Srdivacky unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); 2556198090Srdivacky unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); 2557198090Srdivacky 2558198090Srdivacky // We will handle these bits from offset, clear them. 2559198090Srdivacky Offset &= ~ThisImmVal; 2560198090Srdivacky 2561198090Srdivacky // Get the properly encoded SOImmVal field. 2562198090Srdivacky assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && 2563198090Srdivacky "Bit extraction didn't work?"); 2564198090Srdivacky MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); 2565198090Srdivacky } else { 2566198090Srdivacky unsigned ImmIdx = 0; 2567198090Srdivacky int InstrOffs = 0; 2568198090Srdivacky unsigned NumBits = 0; 2569198090Srdivacky unsigned Scale = 1; 2570198090Srdivacky switch (AddrMode) { 2571321369Sdim case ARMII::AddrMode_i12: 2572218893Sdim ImmIdx = FrameRegIdx + 1; 2573218893Sdim InstrOffs = MI.getOperand(ImmIdx).getImm(); 2574218893Sdim NumBits = 12; 2575218893Sdim break; 2576321369Sdim case ARMII::AddrMode2: 2577198090Srdivacky ImmIdx = FrameRegIdx+2; 2578198090Srdivacky InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); 2579198090Srdivacky if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2580198090Srdivacky InstrOffs *= -1; 2581198090Srdivacky NumBits = 12; 2582198090Srdivacky break; 2583321369Sdim case ARMII::AddrMode3: 2584198090Srdivacky ImmIdx = FrameRegIdx+2; 2585198090Srdivacky InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); 2586198090Srdivacky if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2587198090Srdivacky InstrOffs *= -1; 2588198090Srdivacky NumBits = 8; 2589198090Srdivacky break; 2590198090Srdivacky case ARMII::AddrMode4: 2591199481Srdivacky case ARMII::AddrMode6: 2592198090Srdivacky // Can't fold any offset even if it's zero. 2593198090Srdivacky return false; 2594321369Sdim case ARMII::AddrMode5: 2595198090Srdivacky ImmIdx = FrameRegIdx+1; 2596198090Srdivacky InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); 2597198090Srdivacky if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2598198090Srdivacky InstrOffs *= -1; 2599198090Srdivacky NumBits = 8; 2600198090Srdivacky Scale = 4; 2601198090Srdivacky break; 2602341825Sdim case ARMII::AddrMode5FP16: 2603341825Sdim ImmIdx = FrameRegIdx+1; 2604341825Sdim InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); 2605341825Sdim if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2606341825Sdim InstrOffs *= -1; 2607341825Sdim NumBits = 8; 2608341825Sdim Scale = 2; 2609341825Sdim break; 2610353358Sdim case ARMII::AddrModeT2_i7: 2611353358Sdim case ARMII::AddrModeT2_i7s2: 2612353358Sdim case ARMII::AddrModeT2_i7s4: 2613353358Sdim ImmIdx = FrameRegIdx+1; 2614353358Sdim InstrOffs = MI.getOperand(ImmIdx).getImm(); 2615353358Sdim NumBits = 7; 2616353358Sdim Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 : 2617353358Sdim AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1); 2618353358Sdim break; 2619198090Srdivacky default: 2620198090Srdivacky llvm_unreachable("Unsupported addressing mode!"); 2621198090Srdivacky } 2622198090Srdivacky 2623198090Srdivacky Offset += InstrOffs * Scale; 2624198090Srdivacky assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); 2625198090Srdivacky if (Offset < 0) { 2626198090Srdivacky Offset = -Offset; 2627198090Srdivacky isSub = true; 2628198090Srdivacky } 2629198090Srdivacky 2630198090Srdivacky // Attempt to fold address comp. if opcode has offset bits 2631198090Srdivacky if (NumBits > 0) { 2632198090Srdivacky // Common case: small offset, fits into instruction. 2633198090Srdivacky MachineOperand &ImmOp = MI.getOperand(ImmIdx); 2634198090Srdivacky int ImmedOffset = Offset / Scale; 2635198090Srdivacky unsigned Mask = (1 << NumBits) - 1; 2636198090Srdivacky if ((unsigned)Offset <= Mask * Scale) { 2637198090Srdivacky // Replace the FrameIndex with sp 2638198090Srdivacky MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2639218893Sdim // FIXME: When addrmode2 goes away, this will simplify (like the 2640218893Sdim // T2 version), as the LDR.i12 versions don't need the encoding 2641218893Sdim // tricks for the offset value. 2642218893Sdim if (isSub) { 2643218893Sdim if (AddrMode == ARMII::AddrMode_i12) 2644218893Sdim ImmedOffset = -ImmedOffset; 2645218893Sdim else 2646218893Sdim ImmedOffset |= 1 << NumBits; 2647218893Sdim } 2648198090Srdivacky ImmOp.ChangeToImmediate(ImmedOffset); 2649198090Srdivacky Offset = 0; 2650198090Srdivacky return true; 2651198090Srdivacky } 2652198090Srdivacky 2653198090Srdivacky // Otherwise, it didn't fit. Pull in what we can to simplify the immed. 2654198090Srdivacky ImmedOffset = ImmedOffset & Mask; 2655218893Sdim if (isSub) { 2656218893Sdim if (AddrMode == ARMII::AddrMode_i12) 2657218893Sdim ImmedOffset = -ImmedOffset; 2658218893Sdim else 2659218893Sdim ImmedOffset |= 1 << NumBits; 2660218893Sdim } 2661198090Srdivacky ImmOp.ChangeToImmediate(ImmedOffset); 2662198090Srdivacky Offset &= ~(Mask*Scale); 2663198090Srdivacky } 2664198090Srdivacky } 2665198090Srdivacky 2666198090Srdivacky Offset = (isSub) ? -Offset : Offset; 2667198090Srdivacky return Offset == 0; 2668198090Srdivacky} 2669212904Sdim 2670239462Sdim/// analyzeCompare - For a comparison instruction, return the source registers 2671239462Sdim/// in SrcReg and SrcReg2 if having two register operands, and the value it 2672239462Sdim/// compares against in CmpValue. Return true if the comparison instruction 2673239462Sdim/// can be analyzed. 2674309124Sdimbool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, 2675309124Sdim unsigned &SrcReg2, int &CmpMask, 2676309124Sdim int &CmpValue) const { 2677309124Sdim switch (MI.getOpcode()) { 2678212904Sdim default: break; 2679212904Sdim case ARM::CMPri: 2680212904Sdim case ARM::t2CMPri: 2681314564Sdim case ARM::tCMPi8: 2682309124Sdim SrcReg = MI.getOperand(0).getReg(); 2683239462Sdim SrcReg2 = 0; 2684218893Sdim CmpMask = ~0; 2685309124Sdim CmpValue = MI.getOperand(1).getImm(); 2686212904Sdim return true; 2687239462Sdim case ARM::CMPrr: 2688239462Sdim case ARM::t2CMPrr: 2689353358Sdim case ARM::tCMPr: 2690309124Sdim SrcReg = MI.getOperand(0).getReg(); 2691309124Sdim SrcReg2 = MI.getOperand(1).getReg(); 2692239462Sdim CmpMask = ~0; 2693239462Sdim CmpValue = 0; 2694239462Sdim return true; 2695218893Sdim case ARM::TSTri: 2696218893Sdim case ARM::t2TSTri: 2697309124Sdim SrcReg = MI.getOperand(0).getReg(); 2698239462Sdim SrcReg2 = 0; 2699309124Sdim CmpMask = MI.getOperand(1).getImm(); 2700218893Sdim CmpValue = 0; 2701218893Sdim return true; 2702212904Sdim } 2703212904Sdim 2704212904Sdim return false; 2705212904Sdim} 2706212904Sdim 2707218893Sdim/// isSuitableForMask - Identify a suitable 'and' instruction that 2708218893Sdim/// operates on the given source register and applies the same mask 2709218893Sdim/// as a 'tst' instruction. Provide a limited look-through for copies. 2710218893Sdim/// When successful, MI will hold the found instruction. 2711218893Sdimstatic bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, 2712218893Sdim int CmpMask, bool CommonUse) { 2713218893Sdim switch (MI->getOpcode()) { 2714218893Sdim case ARM::ANDri: 2715218893Sdim case ARM::t2ANDri: 2716218893Sdim if (CmpMask != MI->getOperand(2).getImm()) 2717218893Sdim return false; 2718218893Sdim if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) 2719218893Sdim return true; 2720218893Sdim break; 2721218893Sdim } 2722218893Sdim 2723218893Sdim return false; 2724218893Sdim} 2725218893Sdim 2726341825Sdim/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return 2727341825Sdim/// the condition code if we modify the instructions such that flags are 2728341825Sdim/// set by ADD(a,b,X). 2729341825Sdiminline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) { 2730341825Sdim switch (CC) { 2731341825Sdim default: return ARMCC::AL; 2732341825Sdim case ARMCC::HS: return ARMCC::LO; 2733341825Sdim case ARMCC::LO: return ARMCC::HS; 2734341825Sdim case ARMCC::VS: return ARMCC::VS; 2735341825Sdim case ARMCC::VC: return ARMCC::VC; 2736341825Sdim } 2737341825Sdim} 2738341825Sdim 2739239462Sdim/// isRedundantFlagInstr - check whether the first instruction, whose only 2740239462Sdim/// purpose is to update flags, can be made redundant. 2741239462Sdim/// CMPrr can be made redundant by SUBrr if the operands are the same. 2742239462Sdim/// CMPri can be made redundant by SUBri if the operands are the same. 2743341825Sdim/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X). 2744239462Sdim/// This function can be extended later on. 2745341825Sdiminline static bool isRedundantFlagInstr(const MachineInstr *CmpI, 2746341825Sdim unsigned SrcReg, unsigned SrcReg2, 2747353358Sdim int ImmValue, const MachineInstr *OI, 2748353358Sdim bool &IsThumb1) { 2749353358Sdim if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && 2750353358Sdim (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) && 2751239462Sdim ((OI->getOperand(1).getReg() == SrcReg && 2752239462Sdim OI->getOperand(2).getReg() == SrcReg2) || 2753239462Sdim (OI->getOperand(1).getReg() == SrcReg2 && 2754353358Sdim OI->getOperand(2).getReg() == SrcReg))) { 2755353358Sdim IsThumb1 = false; 2756239462Sdim return true; 2757353358Sdim } 2758218893Sdim 2759353358Sdim if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr && 2760353358Sdim ((OI->getOperand(2).getReg() == SrcReg && 2761353358Sdim OI->getOperand(3).getReg() == SrcReg2) || 2762353358Sdim (OI->getOperand(2).getReg() == SrcReg2 && 2763353358Sdim OI->getOperand(3).getReg() == SrcReg))) { 2764353358Sdim IsThumb1 = true; 2765353358Sdim return true; 2766353358Sdim } 2767353358Sdim 2768353358Sdim if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) && 2769353358Sdim (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) && 2770239462Sdim OI->getOperand(1).getReg() == SrcReg && 2771353358Sdim OI->getOperand(2).getImm() == ImmValue) { 2772353358Sdim IsThumb1 = false; 2773239462Sdim return true; 2774353358Sdim } 2775341825Sdim 2776353358Sdim if (CmpI->getOpcode() == ARM::tCMPi8 && 2777353358Sdim (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) && 2778353358Sdim OI->getOperand(2).getReg() == SrcReg && 2779353358Sdim OI->getOperand(3).getImm() == ImmValue) { 2780353358Sdim IsThumb1 = true; 2781353358Sdim return true; 2782353358Sdim } 2783353358Sdim 2784341825Sdim if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && 2785341825Sdim (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr || 2786341825Sdim OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) && 2787341825Sdim OI->getOperand(0).isReg() && OI->getOperand(1).isReg() && 2788341825Sdim OI->getOperand(0).getReg() == SrcReg && 2789353358Sdim OI->getOperand(1).getReg() == SrcReg2) { 2790353358Sdim IsThumb1 = false; 2791341825Sdim return true; 2792353358Sdim } 2793353358Sdim 2794353358Sdim if (CmpI->getOpcode() == ARM::tCMPr && 2795353358Sdim (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 || 2796353358Sdim OI->getOpcode() == ARM::tADDrr) && 2797353358Sdim OI->getOperand(0).getReg() == SrcReg && 2798353358Sdim OI->getOperand(2).getReg() == SrcReg2) { 2799353358Sdim IsThumb1 = true; 2800353358Sdim return true; 2801353358Sdim } 2802353358Sdim 2803239462Sdim return false; 2804239462Sdim} 2805218893Sdim 2806321369Sdimstatic bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { 2807321369Sdim switch (MI->getOpcode()) { 2808321369Sdim default: return false; 2809321369Sdim case ARM::tLSLri: 2810321369Sdim case ARM::tLSRri: 2811321369Sdim case ARM::tLSLrr: 2812321369Sdim case ARM::tLSRrr: 2813321369Sdim case ARM::tSUBrr: 2814321369Sdim case ARM::tADDrr: 2815321369Sdim case ARM::tADDi3: 2816321369Sdim case ARM::tADDi8: 2817321369Sdim case ARM::tSUBi3: 2818321369Sdim case ARM::tSUBi8: 2819321369Sdim case ARM::tMUL: 2820353358Sdim case ARM::tADC: 2821353358Sdim case ARM::tSBC: 2822353358Sdim case ARM::tRSB: 2823353358Sdim case ARM::tAND: 2824353358Sdim case ARM::tORR: 2825353358Sdim case ARM::tEOR: 2826353358Sdim case ARM::tBIC: 2827353358Sdim case ARM::tMVN: 2828353358Sdim case ARM::tASRri: 2829353358Sdim case ARM::tASRrr: 2830353358Sdim case ARM::tROR: 2831321369Sdim IsThumb1 = true; 2832321369Sdim LLVM_FALLTHROUGH; 2833321369Sdim case ARM::RSBrr: 2834321369Sdim case ARM::RSBri: 2835321369Sdim case ARM::RSCrr: 2836321369Sdim case ARM::RSCri: 2837321369Sdim case ARM::ADDrr: 2838321369Sdim case ARM::ADDri: 2839321369Sdim case ARM::ADCrr: 2840321369Sdim case ARM::ADCri: 2841321369Sdim case ARM::SUBrr: 2842321369Sdim case ARM::SUBri: 2843321369Sdim case ARM::SBCrr: 2844321369Sdim case ARM::SBCri: 2845321369Sdim case ARM::t2RSBri: 2846321369Sdim case ARM::t2ADDrr: 2847321369Sdim case ARM::t2ADDri: 2848321369Sdim case ARM::t2ADCrr: 2849321369Sdim case ARM::t2ADCri: 2850321369Sdim case ARM::t2SUBrr: 2851321369Sdim case ARM::t2SUBri: 2852321369Sdim case ARM::t2SBCrr: 2853321369Sdim case ARM::t2SBCri: 2854321369Sdim case ARM::ANDrr: 2855321369Sdim case ARM::ANDri: 2856321369Sdim case ARM::t2ANDrr: 2857321369Sdim case ARM::t2ANDri: 2858321369Sdim case ARM::ORRrr: 2859321369Sdim case ARM::ORRri: 2860321369Sdim case ARM::t2ORRrr: 2861321369Sdim case ARM::t2ORRri: 2862321369Sdim case ARM::EORrr: 2863321369Sdim case ARM::EORri: 2864321369Sdim case ARM::t2EORrr: 2865321369Sdim case ARM::t2EORri: 2866321369Sdim case ARM::t2LSRri: 2867321369Sdim case ARM::t2LSRrr: 2868321369Sdim case ARM::t2LSLri: 2869321369Sdim case ARM::t2LSLrr: 2870321369Sdim return true; 2871321369Sdim } 2872321369Sdim} 2873321369Sdim 2874239462Sdim/// optimizeCompareInstr - Convert the instruction supplying the argument to the 2875239462Sdim/// comparison into one that sets the zero bit in the flags register; 2876239462Sdim/// Remove a redundant Compare instruction if an earlier instruction can set the 2877239462Sdim/// flags in the same way as Compare. 2878239462Sdim/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two 2879239462Sdim/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the 2880239462Sdim/// condition code of instructions which use the flags. 2881309124Sdimbool ARMBaseInstrInfo::optimizeCompareInstr( 2882309124Sdim MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, 2883309124Sdim int CmpValue, const MachineRegisterInfo *MRI) const { 2884239462Sdim // Get the unique definition of SrcReg. 2885239462Sdim MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 2886239462Sdim if (!MI) return false; 2887239462Sdim 2888218893Sdim // Masked compares sometimes use the same register as the corresponding 'and'. 2889218893Sdim if (CmpMask != ~0) { 2890309124Sdim if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) { 2891276479Sdim MI = nullptr; 2892276479Sdim for (MachineRegisterInfo::use_instr_iterator 2893276479Sdim UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end(); 2894276479Sdim UI != UE; ++UI) { 2895309124Sdim if (UI->getParent() != CmpInstr.getParent()) 2896309124Sdim continue; 2897218893Sdim MachineInstr *PotentialAND = &*UI; 2898243830Sdim if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || 2899309124Sdim isPredicated(*PotentialAND)) 2900218893Sdim continue; 2901218893Sdim MI = PotentialAND; 2902218893Sdim break; 2903218893Sdim } 2904218893Sdim if (!MI) return false; 2905218893Sdim } 2906218893Sdim } 2907218893Sdim 2908239462Sdim // Get ready to iterate backward from CmpInstr. 2909239462Sdim MachineBasicBlock::iterator I = CmpInstr, E = MI, 2910309124Sdim B = CmpInstr.getParent()->begin(); 2911212904Sdim 2912218893Sdim // Early exit if CmpInstr is at the beginning of the BB. 2913218893Sdim if (I == B) return false; 2914218893Sdim 2915239462Sdim // There are two possible candidates which can be changed to set CPSR: 2916341825Sdim // One is MI, the other is a SUB or ADD instruction. 2917341825Sdim // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or 2918341825Sdim // ADDr[ri](r1, r2, X). 2919239462Sdim // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). 2920341825Sdim MachineInstr *SubAdd = nullptr; 2921239462Sdim if (SrcReg2 != 0) 2922239462Sdim // MI is not a candidate for CMPrr. 2923276479Sdim MI = nullptr; 2924309124Sdim else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) { 2925239462Sdim // Conservatively refuse to convert an instruction which isn't in the same 2926239462Sdim // BB as the comparison. 2927341825Sdim // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate. 2928280031Sdim // Thus we cannot return here. 2929309124Sdim if (CmpInstr.getOpcode() == ARM::CMPri || 2930353358Sdim CmpInstr.getOpcode() == ARM::t2CMPri || 2931353358Sdim CmpInstr.getOpcode() == ARM::tCMPi8) 2932276479Sdim MI = nullptr; 2933239462Sdim else 2934239462Sdim return false; 2935239462Sdim } 2936239462Sdim 2937321369Sdim bool IsThumb1 = false; 2938321369Sdim if (MI && !isOptimizeCompareCandidate(MI, IsThumb1)) 2939321369Sdim return false; 2940321369Sdim 2941321369Sdim // We also want to do this peephole for cases like this: if (a*b == 0), 2942321369Sdim // and optimise away the CMP instruction from the generated code sequence: 2943321369Sdim // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values 2944321369Sdim // resulting from the select instruction, but these MOVS instructions for 2945321369Sdim // Thumb1 (V6M) are flag setting and are thus preventing this optimisation. 2946321369Sdim // However, if we only have MOVS instructions in between the CMP and the 2947321369Sdim // other instruction (the MULS in this example), then the CPSR is dead so we 2948321369Sdim // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this 2949321369Sdim // reordering and then continue the analysis hoping we can eliminate the 2950321369Sdim // CMP. This peephole works on the vregs, so is still in SSA form. As a 2951321369Sdim // consequence, the movs won't redefine/kill the MUL operands which would 2952321369Sdim // make this reordering illegal. 2953353358Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 2954321369Sdim if (MI && IsThumb1) { 2955321369Sdim --I; 2956353358Sdim if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) { 2957353358Sdim bool CanReorder = true; 2958353358Sdim for (; I != E; --I) { 2959353358Sdim if (I->getOpcode() != ARM::tMOVi8) { 2960353358Sdim CanReorder = false; 2961353358Sdim break; 2962353358Sdim } 2963321369Sdim } 2964353358Sdim if (CanReorder) { 2965353358Sdim MI = MI->removeFromParent(); 2966353358Sdim E = CmpInstr; 2967353358Sdim CmpInstr.getParent()->insert(E, MI); 2968353358Sdim } 2969321369Sdim } 2970321369Sdim I = CmpInstr; 2971321369Sdim E = MI; 2972321369Sdim } 2973321369Sdim 2974239462Sdim // Check that CPSR isn't set between the comparison instruction and the one we 2975341825Sdim // want to change. At the same time, search for SubAdd. 2976353358Sdim bool SubAddIsThumb1 = false; 2977341825Sdim do { 2978341825Sdim const MachineInstr &Instr = *--I; 2979212904Sdim 2980341825Sdim // Check whether CmpInstr can be made redundant by the current instruction. 2981353358Sdim if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr, 2982353358Sdim SubAddIsThumb1)) { 2983341825Sdim SubAdd = &*I; 2984341825Sdim break; 2985341825Sdim } 2986341825Sdim 2987341825Sdim // Allow E (which was initially MI) to be SubAdd but do not search before E. 2988341825Sdim if (I == E) 2989341825Sdim break; 2990341825Sdim 2991239462Sdim if (Instr.modifiesRegister(ARM::CPSR, TRI) || 2992239462Sdim Instr.readsRegister(ARM::CPSR, TRI)) 2993218893Sdim // This instruction modifies or uses CPSR after the one we want to 2994218893Sdim // change. We can't do this transformation. 2995239462Sdim return false; 2996239462Sdim 2997345449Sdim if (I == B) { 2998345449Sdim // In some cases, we scan the use-list of an instruction for an AND; 2999345449Sdim // that AND is in the same BB, but may not be scheduled before the 3000345449Sdim // corresponding TST. In that case, bail out. 3001345449Sdim // 3002345449Sdim // FIXME: We could try to reschedule the AND. 3003345449Sdim return false; 3004345449Sdim } 3005345449Sdim } while (true); 3006213534Sdim 3007239462Sdim // Return false if no candidates exist. 3008341825Sdim if (!MI && !SubAdd) 3009239462Sdim return false; 3010239462Sdim 3011353358Sdim // If we found a SubAdd, use it as it will be closer to the CMP 3012353358Sdim if (SubAdd) { 3013353358Sdim MI = SubAdd; 3014353358Sdim IsThumb1 = SubAddIsThumb1; 3015353358Sdim } 3016239462Sdim 3017243830Sdim // We can't use a predicated instruction - it doesn't always write the flags. 3018309124Sdim if (isPredicated(*MI)) 3019243830Sdim return false; 3020243830Sdim 3021321369Sdim // Scan forward for the use of CPSR 3022321369Sdim // When checking against MI: if it's a conditional code that requires 3023321369Sdim // checking of the V bit or C bit, then this is not safe to do. 3024321369Sdim // It is safe to remove CmpInstr if CPSR is redefined or killed. 3025321369Sdim // If we are done with the basic block, we need to check whether CPSR is 3026321369Sdim // live-out. 3027321369Sdim SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> 3028321369Sdim OperandsToUpdate; 3029321369Sdim bool isSafe = false; 3030321369Sdim I = CmpInstr; 3031321369Sdim E = CmpInstr.getParent()->end(); 3032321369Sdim while (!isSafe && ++I != E) { 3033321369Sdim const MachineInstr &Instr = *I; 3034321369Sdim for (unsigned IO = 0, EO = Instr.getNumOperands(); 3035321369Sdim !isSafe && IO != EO; ++IO) { 3036321369Sdim const MachineOperand &MO = Instr.getOperand(IO); 3037321369Sdim if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { 3038321369Sdim isSafe = true; 3039321369Sdim break; 3040321369Sdim } 3041321369Sdim if (!MO.isReg() || MO.getReg() != ARM::CPSR) 3042321369Sdim continue; 3043321369Sdim if (MO.isDef()) { 3044321369Sdim isSafe = true; 3045321369Sdim break; 3046321369Sdim } 3047321369Sdim // Condition code is after the operand before CPSR except for VSELs. 3048321369Sdim ARMCC::CondCodes CC; 3049321369Sdim bool IsInstrVSel = true; 3050321369Sdim switch (Instr.getOpcode()) { 3051321369Sdim default: 3052321369Sdim IsInstrVSel = false; 3053321369Sdim CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); 3054321369Sdim break; 3055321369Sdim case ARM::VSELEQD: 3056321369Sdim case ARM::VSELEQS: 3057360784Sdim case ARM::VSELEQH: 3058321369Sdim CC = ARMCC::EQ; 3059321369Sdim break; 3060321369Sdim case ARM::VSELGTD: 3061321369Sdim case ARM::VSELGTS: 3062360784Sdim case ARM::VSELGTH: 3063321369Sdim CC = ARMCC::GT; 3064321369Sdim break; 3065321369Sdim case ARM::VSELGED: 3066321369Sdim case ARM::VSELGES: 3067360784Sdim case ARM::VSELGEH: 3068321369Sdim CC = ARMCC::GE; 3069321369Sdim break; 3070360784Sdim case ARM::VSELVSD: 3071321369Sdim case ARM::VSELVSS: 3072360784Sdim case ARM::VSELVSH: 3073321369Sdim CC = ARMCC::VS; 3074321369Sdim break; 3075321369Sdim } 3076321369Sdim 3077341825Sdim if (SubAdd) { 3078321369Sdim // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based 3079321369Sdim // on CMP needs to be updated to be based on SUB. 3080341825Sdim // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also 3081341825Sdim // needs to be modified. 3082321369Sdim // Push the condition code operands to OperandsToUpdate. 3083321369Sdim // If it is safe to remove CmpInstr, the condition code of these 3084321369Sdim // operands will be modified. 3085341825Sdim unsigned Opc = SubAdd->getOpcode(); 3086341825Sdim bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr || 3087353358Sdim Opc == ARM::SUBri || Opc == ARM::t2SUBri || 3088353358Sdim Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 || 3089353358Sdim Opc == ARM::tSUBi8; 3090353358Sdim unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2; 3091353358Sdim if (!IsSub || 3092353358Sdim (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 && 3093353358Sdim SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) { 3094321369Sdim // VSel doesn't support condition code update. 3095321369Sdim if (IsInstrVSel) 3096321369Sdim return false; 3097341825Sdim // Ensure we can swap the condition. 3098341825Sdim ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC)); 3099341825Sdim if (NewCC == ARMCC::AL) 3100341825Sdim return false; 3101321369Sdim OperandsToUpdate.push_back( 3102321369Sdim std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); 3103234353Sdim } 3104321369Sdim } else { 3105341825Sdim // No SubAdd, so this is x = <op> y, z; cmp x, 0. 3106321369Sdim switch (CC) { 3107321369Sdim case ARMCC::EQ: // Z 3108321369Sdim case ARMCC::NE: // Z 3109321369Sdim case ARMCC::MI: // N 3110321369Sdim case ARMCC::PL: // N 3111321369Sdim case ARMCC::AL: // none 3112321369Sdim // CPSR can be used multiple times, we should continue. 3113221345Sdim break; 3114321369Sdim case ARMCC::HS: // C 3115321369Sdim case ARMCC::LO: // C 3116321369Sdim case ARMCC::VS: // V 3117321369Sdim case ARMCC::VC: // V 3118321369Sdim case ARMCC::HI: // C Z 3119321369Sdim case ARMCC::LS: // C Z 3120321369Sdim case ARMCC::GE: // N V 3121321369Sdim case ARMCC::LT: // N V 3122321369Sdim case ARMCC::GT: // Z N V 3123321369Sdim case ARMCC::LE: // Z N V 3124321369Sdim // The instruction uses the V bit or C bit which is not safe. 3125321369Sdim return false; 3126221345Sdim } 3127221345Sdim } 3128221345Sdim } 3129321369Sdim } 3130221345Sdim 3131321369Sdim // If CPSR is not killed nor re-defined, we should check whether it is 3132321369Sdim // live-out. If it is live-out, do not optimize. 3133321369Sdim if (!isSafe) { 3134321369Sdim MachineBasicBlock *MBB = CmpInstr.getParent(); 3135321369Sdim for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 3136321369Sdim SE = MBB->succ_end(); SI != SE; ++SI) 3137321369Sdim if ((*SI)->isLiveIn(ARM::CPSR)) 3138321369Sdim return false; 3139321369Sdim } 3140221345Sdim 3141321369Sdim // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always 3142321369Sdim // set CPSR so this is represented as an explicit output) 3143321369Sdim if (!IsThumb1) { 3144321369Sdim MI->getOperand(5).setReg(ARM::CPSR); 3145321369Sdim MI->getOperand(5).setIsDef(true); 3146321369Sdim } 3147321369Sdim assert(!isPredicated(*MI) && "Can't use flags from predicated instruction"); 3148321369Sdim CmpInstr.eraseFromParent(); 3149239462Sdim 3150321369Sdim // Modify the condition code of operands in OperandsToUpdate. 3151321369Sdim // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to 3152321369Sdim // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. 3153321369Sdim for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) 3154321369Sdim OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); 3155321369Sdim 3156344779Sdim MI->clearRegisterDeads(ARM::CPSR); 3157344779Sdim 3158321369Sdim return true; 3159212904Sdim} 3160218893Sdim 3161341825Sdimbool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const { 3162341825Sdim // Do not sink MI if it might be used to optimize a redundant compare. 3163341825Sdim // We heuristically only look at the instruction immediately following MI to 3164341825Sdim // avoid potentially searching the entire basic block. 3165341825Sdim if (isPredicated(MI)) 3166341825Sdim return true; 3167341825Sdim MachineBasicBlock::const_iterator Next = &MI; 3168341825Sdim ++Next; 3169341825Sdim unsigned SrcReg, SrcReg2; 3170341825Sdim int CmpMask, CmpValue; 3171353358Sdim bool IsThumb1; 3172341825Sdim if (Next != MI.getParent()->end() && 3173341825Sdim analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) && 3174353358Sdim isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1)) 3175341825Sdim return false; 3176341825Sdim return true; 3177341825Sdim} 3178341825Sdim 3179309124Sdimbool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, 3180309124Sdim unsigned Reg, 3181218893Sdim MachineRegisterInfo *MRI) const { 3182218893Sdim // Fold large immediates into add, sub, or, xor. 3183309124Sdim unsigned DefOpc = DefMI.getOpcode(); 3184218893Sdim if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) 3185218893Sdim return false; 3186309124Sdim if (!DefMI.getOperand(1).isImm()) 3187327952Sdim // Could be t2MOVi32imm @xx 3188218893Sdim return false; 3189218893Sdim 3190218893Sdim if (!MRI->hasOneNonDBGUse(Reg)) 3191218893Sdim return false; 3192218893Sdim 3193309124Sdim const MCInstrDesc &DefMCID = DefMI.getDesc(); 3194234353Sdim if (DefMCID.hasOptionalDef()) { 3195234353Sdim unsigned NumOps = DefMCID.getNumOperands(); 3196309124Sdim const MachineOperand &MO = DefMI.getOperand(NumOps - 1); 3197234353Sdim if (MO.getReg() == ARM::CPSR && !MO.isDead()) 3198234353Sdim // If DefMI defines CPSR and it is not dead, it's obviously not safe 3199234353Sdim // to delete DefMI. 3200234353Sdim return false; 3201234353Sdim } 3202234353Sdim 3203309124Sdim const MCInstrDesc &UseMCID = UseMI.getDesc(); 3204234353Sdim if (UseMCID.hasOptionalDef()) { 3205234353Sdim unsigned NumOps = UseMCID.getNumOperands(); 3206309124Sdim if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR) 3207234353Sdim // If the instruction sets the flag, do not attempt this optimization 3208234353Sdim // since it may change the semantics of the code. 3209234353Sdim return false; 3210234353Sdim } 3211234353Sdim 3212309124Sdim unsigned UseOpc = UseMI.getOpcode(); 3213218893Sdim unsigned NewUseOpc = 0; 3214309124Sdim uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm(); 3215218893Sdim uint32_t SOImmValV1 = 0, SOImmValV2 = 0; 3216218893Sdim bool Commute = false; 3217218893Sdim switch (UseOpc) { 3218218893Sdim default: return false; 3219218893Sdim case ARM::SUBrr: 3220218893Sdim case ARM::ADDrr: 3221218893Sdim case ARM::ORRrr: 3222218893Sdim case ARM::EORrr: 3223218893Sdim case ARM::t2SUBrr: 3224218893Sdim case ARM::t2ADDrr: 3225218893Sdim case ARM::t2ORRrr: 3226218893Sdim case ARM::t2EORrr: { 3227309124Sdim Commute = UseMI.getOperand(2).getReg() != Reg; 3228218893Sdim switch (UseOpc) { 3229218893Sdim default: break; 3230309124Sdim case ARM::ADDrr: 3231321369Sdim case ARM::SUBrr: 3232309124Sdim if (UseOpc == ARM::SUBrr && Commute) 3233218893Sdim return false; 3234309124Sdim 3235309124Sdim // ADD/SUB are special because they're essentially the same operation, so 3236309124Sdim // we can handle a larger range of immediates. 3237309124Sdim if (ARM_AM::isSOImmTwoPartVal(ImmVal)) 3238309124Sdim NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri; 3239309124Sdim else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) { 3240309124Sdim ImmVal = -ImmVal; 3241309124Sdim NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri; 3242309124Sdim } else 3243309124Sdim return false; 3244309124Sdim SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); 3245309124Sdim SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); 3246309124Sdim break; 3247218893Sdim case ARM::ORRrr: 3248321369Sdim case ARM::EORrr: 3249218893Sdim if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) 3250218893Sdim return false; 3251218893Sdim SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); 3252218893Sdim SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); 3253218893Sdim switch (UseOpc) { 3254218893Sdim default: break; 3255218893Sdim case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; 3256218893Sdim case ARM::EORrr: NewUseOpc = ARM::EORri; break; 3257218893Sdim } 3258218893Sdim break; 3259309124Sdim case ARM::t2ADDrr: 3260360784Sdim case ARM::t2SUBrr: { 3261309124Sdim if (UseOpc == ARM::t2SUBrr && Commute) 3262218893Sdim return false; 3263309124Sdim 3264309124Sdim // ADD/SUB are special because they're essentially the same operation, so 3265309124Sdim // we can handle a larger range of immediates. 3266360784Sdim const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP; 3267360784Sdim const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; 3268360784Sdim const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; 3269309124Sdim if (ARM_AM::isT2SOImmTwoPartVal(ImmVal)) 3270360784Sdim NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB; 3271309124Sdim else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) { 3272309124Sdim ImmVal = -ImmVal; 3273360784Sdim NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD; 3274309124Sdim } else 3275309124Sdim return false; 3276309124Sdim SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); 3277309124Sdim SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); 3278309124Sdim break; 3279360784Sdim } 3280218893Sdim case ARM::t2ORRrr: 3281321369Sdim case ARM::t2EORrr: 3282218893Sdim if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) 3283218893Sdim return false; 3284218893Sdim SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); 3285218893Sdim SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); 3286218893Sdim switch (UseOpc) { 3287218893Sdim default: break; 3288218893Sdim case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; 3289218893Sdim case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; 3290218893Sdim } 3291218893Sdim break; 3292218893Sdim } 3293218893Sdim } 3294218893Sdim } 3295218893Sdim 3296218893Sdim unsigned OpIdx = Commute ? 2 : 1; 3297360784Sdim Register Reg1 = UseMI.getOperand(OpIdx).getReg(); 3298309124Sdim bool isKill = UseMI.getOperand(OpIdx).isKill(); 3299360784Sdim const TargetRegisterClass *TRC = MRI->getRegClass(Reg); 3300360784Sdim Register NewReg = MRI->createVirtualRegister(TRC); 3301321369Sdim BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc), 3302321369Sdim NewReg) 3303321369Sdim .addReg(Reg1, getKillRegState(isKill)) 3304321369Sdim .addImm(SOImmValV1) 3305321369Sdim .add(predOps(ARMCC::AL)) 3306321369Sdim .add(condCodeOp()); 3307309124Sdim UseMI.setDesc(get(NewUseOpc)); 3308309124Sdim UseMI.getOperand(1).setReg(NewReg); 3309309124Sdim UseMI.getOperand(1).setIsKill(); 3310309124Sdim UseMI.getOperand(2).ChangeToImmediate(SOImmValV2); 3311309124Sdim DefMI.eraseFromParent(); 3312360784Sdim // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP. 3313360784Sdim // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm]. 3314360784Sdim // Then the below code will not be needed, as the input/output register 3315360784Sdim // classes will be rgpr or gprSP. 3316360784Sdim // For now, we fix the UseMI operand explicitly here: 3317360784Sdim switch(NewUseOpc){ 3318360784Sdim case ARM::t2ADDspImm: 3319360784Sdim case ARM::t2SUBspImm: 3320360784Sdim case ARM::t2ADDri: 3321360784Sdim case ARM::t2SUBri: 3322360784Sdim MRI->setRegClass(UseMI.getOperand(0).getReg(), TRC); 3323360784Sdim } 3324218893Sdim return true; 3325218893Sdim} 3326218893Sdim 3327243830Sdimstatic unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, 3328309124Sdim const MachineInstr &MI) { 3329309124Sdim switch (MI.getOpcode()) { 3330243830Sdim default: { 3331309124Sdim const MCInstrDesc &Desc = MI.getDesc(); 3332243830Sdim int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); 3333243830Sdim assert(UOps >= 0 && "bad # UOps"); 3334243830Sdim return UOps; 3335243830Sdim } 3336243830Sdim 3337243830Sdim case ARM::LDRrs: 3338243830Sdim case ARM::LDRBrs: 3339243830Sdim case ARM::STRrs: 3340243830Sdim case ARM::STRBrs: { 3341309124Sdim unsigned ShOpVal = MI.getOperand(3).getImm(); 3342243830Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3343243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3344243830Sdim if (!isSub && 3345243830Sdim (ShImm == 0 || 3346243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3347243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3348243830Sdim return 1; 3349243830Sdim return 2; 3350243830Sdim } 3351243830Sdim 3352243830Sdim case ARM::LDRH: 3353243830Sdim case ARM::STRH: { 3354309124Sdim if (!MI.getOperand(2).getReg()) 3355243830Sdim return 1; 3356243830Sdim 3357309124Sdim unsigned ShOpVal = MI.getOperand(3).getImm(); 3358243830Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3359243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3360243830Sdim if (!isSub && 3361243830Sdim (ShImm == 0 || 3362243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3363243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3364243830Sdim return 1; 3365243830Sdim return 2; 3366243830Sdim } 3367243830Sdim 3368243830Sdim case ARM::LDRSB: 3369243830Sdim case ARM::LDRSH: 3370309124Sdim return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2; 3371243830Sdim 3372243830Sdim case ARM::LDRSB_POST: 3373243830Sdim case ARM::LDRSH_POST: { 3374360784Sdim Register Rt = MI.getOperand(0).getReg(); 3375360784Sdim Register Rm = MI.getOperand(3).getReg(); 3376243830Sdim return (Rt == Rm) ? 4 : 3; 3377243830Sdim } 3378243830Sdim 3379243830Sdim case ARM::LDR_PRE_REG: 3380243830Sdim case ARM::LDRB_PRE_REG: { 3381360784Sdim Register Rt = MI.getOperand(0).getReg(); 3382360784Sdim Register Rm = MI.getOperand(3).getReg(); 3383243830Sdim if (Rt == Rm) 3384243830Sdim return 3; 3385309124Sdim unsigned ShOpVal = MI.getOperand(4).getImm(); 3386243830Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3387243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3388243830Sdim if (!isSub && 3389243830Sdim (ShImm == 0 || 3390243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3391243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3392243830Sdim return 2; 3393243830Sdim return 3; 3394243830Sdim } 3395243830Sdim 3396243830Sdim case ARM::STR_PRE_REG: 3397243830Sdim case ARM::STRB_PRE_REG: { 3398309124Sdim unsigned ShOpVal = MI.getOperand(4).getImm(); 3399243830Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3400243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3401243830Sdim if (!isSub && 3402243830Sdim (ShImm == 0 || 3403243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3404243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3405243830Sdim return 2; 3406243830Sdim return 3; 3407243830Sdim } 3408243830Sdim 3409243830Sdim case ARM::LDRH_PRE: 3410243830Sdim case ARM::STRH_PRE: { 3411360784Sdim Register Rt = MI.getOperand(0).getReg(); 3412360784Sdim Register Rm = MI.getOperand(3).getReg(); 3413243830Sdim if (!Rm) 3414243830Sdim return 2; 3415243830Sdim if (Rt == Rm) 3416243830Sdim return 3; 3417309124Sdim return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2; 3418243830Sdim } 3419243830Sdim 3420243830Sdim case ARM::LDR_POST_REG: 3421243830Sdim case ARM::LDRB_POST_REG: 3422243830Sdim case ARM::LDRH_POST: { 3423360784Sdim Register Rt = MI.getOperand(0).getReg(); 3424360784Sdim Register Rm = MI.getOperand(3).getReg(); 3425243830Sdim return (Rt == Rm) ? 3 : 2; 3426243830Sdim } 3427243830Sdim 3428243830Sdim case ARM::LDR_PRE_IMM: 3429243830Sdim case ARM::LDRB_PRE_IMM: 3430243830Sdim case ARM::LDR_POST_IMM: 3431243830Sdim case ARM::LDRB_POST_IMM: 3432243830Sdim case ARM::STRB_POST_IMM: 3433243830Sdim case ARM::STRB_POST_REG: 3434243830Sdim case ARM::STRB_PRE_IMM: 3435243830Sdim case ARM::STRH_POST: 3436243830Sdim case ARM::STR_POST_IMM: 3437243830Sdim case ARM::STR_POST_REG: 3438243830Sdim case ARM::STR_PRE_IMM: 3439243830Sdim return 2; 3440243830Sdim 3441243830Sdim case ARM::LDRSB_PRE: 3442243830Sdim case ARM::LDRSH_PRE: { 3443360784Sdim Register Rm = MI.getOperand(3).getReg(); 3444243830Sdim if (Rm == 0) 3445243830Sdim return 3; 3446360784Sdim Register Rt = MI.getOperand(0).getReg(); 3447243830Sdim if (Rt == Rm) 3448243830Sdim return 4; 3449309124Sdim unsigned ShOpVal = MI.getOperand(4).getImm(); 3450243830Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3451243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3452243830Sdim if (!isSub && 3453243830Sdim (ShImm == 0 || 3454243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3455243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3456243830Sdim return 3; 3457243830Sdim return 4; 3458243830Sdim } 3459243830Sdim 3460243830Sdim case ARM::LDRD: { 3461360784Sdim Register Rt = MI.getOperand(0).getReg(); 3462360784Sdim Register Rn = MI.getOperand(2).getReg(); 3463360784Sdim Register Rm = MI.getOperand(3).getReg(); 3464243830Sdim if (Rm) 3465309124Sdim return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 3466309124Sdim : 3; 3467243830Sdim return (Rt == Rn) ? 3 : 2; 3468243830Sdim } 3469243830Sdim 3470243830Sdim case ARM::STRD: { 3471360784Sdim Register Rm = MI.getOperand(3).getReg(); 3472243830Sdim if (Rm) 3473309124Sdim return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 3474309124Sdim : 3; 3475243830Sdim return 2; 3476243830Sdim } 3477243830Sdim 3478243830Sdim case ARM::LDRD_POST: 3479243830Sdim case ARM::t2LDRD_POST: 3480243830Sdim return 3; 3481243830Sdim 3482243830Sdim case ARM::STRD_POST: 3483243830Sdim case ARM::t2STRD_POST: 3484243830Sdim return 4; 3485243830Sdim 3486243830Sdim case ARM::LDRD_PRE: { 3487360784Sdim Register Rt = MI.getOperand(0).getReg(); 3488360784Sdim Register Rn = MI.getOperand(3).getReg(); 3489360784Sdim Register Rm = MI.getOperand(4).getReg(); 3490243830Sdim if (Rm) 3491309124Sdim return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 3492309124Sdim : 4; 3493243830Sdim return (Rt == Rn) ? 4 : 3; 3494243830Sdim } 3495243830Sdim 3496243830Sdim case ARM::t2LDRD_PRE: { 3497360784Sdim Register Rt = MI.getOperand(0).getReg(); 3498360784Sdim Register Rn = MI.getOperand(3).getReg(); 3499243830Sdim return (Rt == Rn) ? 4 : 3; 3500243830Sdim } 3501243830Sdim 3502243830Sdim case ARM::STRD_PRE: { 3503360784Sdim Register Rm = MI.getOperand(4).getReg(); 3504243830Sdim if (Rm) 3505309124Sdim return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 3506309124Sdim : 4; 3507243830Sdim return 3; 3508243830Sdim } 3509243830Sdim 3510243830Sdim case ARM::t2STRD_PRE: 3511243830Sdim return 3; 3512243830Sdim 3513243830Sdim case ARM::t2LDR_POST: 3514243830Sdim case ARM::t2LDRB_POST: 3515243830Sdim case ARM::t2LDRB_PRE: 3516243830Sdim case ARM::t2LDRSBi12: 3517243830Sdim case ARM::t2LDRSBi8: 3518243830Sdim case ARM::t2LDRSBpci: 3519243830Sdim case ARM::t2LDRSBs: 3520243830Sdim case ARM::t2LDRH_POST: 3521243830Sdim case ARM::t2LDRH_PRE: 3522243830Sdim case ARM::t2LDRSBT: 3523243830Sdim case ARM::t2LDRSB_POST: 3524243830Sdim case ARM::t2LDRSB_PRE: 3525243830Sdim case ARM::t2LDRSH_POST: 3526243830Sdim case ARM::t2LDRSH_PRE: 3527243830Sdim case ARM::t2LDRSHi12: 3528243830Sdim case ARM::t2LDRSHi8: 3529243830Sdim case ARM::t2LDRSHpci: 3530243830Sdim case ARM::t2LDRSHs: 3531243830Sdim return 2; 3532243830Sdim 3533243830Sdim case ARM::t2LDRDi8: { 3534360784Sdim Register Rt = MI.getOperand(0).getReg(); 3535360784Sdim Register Rn = MI.getOperand(2).getReg(); 3536243830Sdim return (Rt == Rn) ? 3 : 2; 3537243830Sdim } 3538243830Sdim 3539243830Sdim case ARM::t2STRB_POST: 3540243830Sdim case ARM::t2STRB_PRE: 3541243830Sdim case ARM::t2STRBs: 3542243830Sdim case ARM::t2STRDi8: 3543243830Sdim case ARM::t2STRH_POST: 3544243830Sdim case ARM::t2STRH_PRE: 3545243830Sdim case ARM::t2STRHs: 3546243830Sdim case ARM::t2STR_POST: 3547243830Sdim case ARM::t2STR_PRE: 3548243830Sdim case ARM::t2STRs: 3549243830Sdim return 2; 3550243830Sdim } 3551243830Sdim} 3552243830Sdim 3553243830Sdim// Return the number of 32-bit words loaded by LDM or stored by STM. If this 3554243830Sdim// can't be easily determined return 0 (missing MachineMemOperand). 3555243830Sdim// 3556243830Sdim// FIXME: The current MachineInstr design does not support relying on machine 3557243830Sdim// mem operands to determine the width of a memory access. Instead, we expect 3558243830Sdim// the target to provide this information based on the instruction opcode and 3559280031Sdim// operands. However, using MachineMemOperand is the best solution now for 3560243830Sdim// two reasons: 3561243830Sdim// 3562243830Sdim// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI 3563243830Sdim// operands. This is much more dangerous than using the MachineMemOperand 3564243830Sdim// sizes because CodeGen passes can insert/remove optional machine operands. In 3565243830Sdim// fact, it's totally incorrect for preRA passes and appears to be wrong for 3566243830Sdim// postRA passes as well. 3567243830Sdim// 3568243830Sdim// 2) getNumLDMAddresses is only used by the scheduling machine model and any 3569243830Sdim// machine model that calls this should handle the unknown (zero size) case. 3570243830Sdim// 3571243830Sdim// Long term, we should require a target hook that verifies MachineMemOperand 3572243830Sdim// sizes during MC lowering. That target hook should be local to MC lowering 3573243830Sdim// because we can't ensure that it is aware of other MI forms. Doing this will 3574243830Sdim// ensure that MachineMemOperands are correctly propagated through all passes. 3575309124Sdimunsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const { 3576243830Sdim unsigned Size = 0; 3577309124Sdim for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), 3578309124Sdim E = MI.memoperands_end(); 3579309124Sdim I != E; ++I) { 3580243830Sdim Size += (*I)->getSize(); 3581243830Sdim } 3582353358Sdim // FIXME: The scheduler currently can't handle values larger than 16. But 3583353358Sdim // the values can actually go up to 32 for floating-point load/store 3584353358Sdim // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory 3585353358Sdim // operations isn't right; we could end up with "extra" memory operands for 3586353358Sdim // various reasons, like tail merge merging two memory operations. 3587353358Sdim return std::min(Size / 4, 16U); 3588243830Sdim} 3589243830Sdim 3590309124Sdimstatic unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, 3591309124Sdim unsigned NumRegs) { 3592309124Sdim unsigned UOps = 1 + NumRegs; // 1 for address computation. 3593309124Sdim switch (Opc) { 3594309124Sdim default: 3595309124Sdim break; 3596309124Sdim case ARM::VLDMDIA_UPD: 3597309124Sdim case ARM::VLDMDDB_UPD: 3598309124Sdim case ARM::VLDMSIA_UPD: 3599309124Sdim case ARM::VLDMSDB_UPD: 3600309124Sdim case ARM::VSTMDIA_UPD: 3601309124Sdim case ARM::VSTMDDB_UPD: 3602309124Sdim case ARM::VSTMSIA_UPD: 3603309124Sdim case ARM::VSTMSDB_UPD: 3604309124Sdim case ARM::LDMIA_UPD: 3605309124Sdim case ARM::LDMDA_UPD: 3606309124Sdim case ARM::LDMDB_UPD: 3607309124Sdim case ARM::LDMIB_UPD: 3608309124Sdim case ARM::STMIA_UPD: 3609309124Sdim case ARM::STMDA_UPD: 3610309124Sdim case ARM::STMDB_UPD: 3611309124Sdim case ARM::STMIB_UPD: 3612309124Sdim case ARM::tLDMIA_UPD: 3613309124Sdim case ARM::tSTMIA_UPD: 3614309124Sdim case ARM::t2LDMIA_UPD: 3615309124Sdim case ARM::t2LDMDB_UPD: 3616309124Sdim case ARM::t2STMIA_UPD: 3617309124Sdim case ARM::t2STMDB_UPD: 3618309124Sdim ++UOps; // One for base register writeback. 3619309124Sdim break; 3620309124Sdim case ARM::LDMIA_RET: 3621309124Sdim case ARM::tPOP_RET: 3622309124Sdim case ARM::t2LDMIA_RET: 3623309124Sdim UOps += 2; // One for base reg wb, one for write to pc. 3624309124Sdim break; 3625309124Sdim } 3626309124Sdim return UOps; 3627309124Sdim} 3628309124Sdim 3629309124Sdimunsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, 3630309124Sdim const MachineInstr &MI) const { 3631218893Sdim if (!ItinData || ItinData->isEmpty()) 3632218893Sdim return 1; 3633218893Sdim 3634309124Sdim const MCInstrDesc &Desc = MI.getDesc(); 3635218893Sdim unsigned Class = Desc.getSchedClass(); 3636239462Sdim int ItinUOps = ItinData->getNumMicroOps(Class); 3637243830Sdim if (ItinUOps >= 0) { 3638243830Sdim if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) 3639243830Sdim return getNumMicroOpsSwiftLdSt(ItinData, MI); 3640243830Sdim 3641239462Sdim return ItinUOps; 3642243830Sdim } 3643218893Sdim 3644309124Sdim unsigned Opc = MI.getOpcode(); 3645218893Sdim switch (Opc) { 3646218893Sdim default: 3647218893Sdim llvm_unreachable("Unexpected multi-uops instruction!"); 3648218893Sdim case ARM::VLDMQIA: 3649218893Sdim case ARM::VSTMQIA: 3650218893Sdim return 2; 3651218893Sdim 3652218893Sdim // The number of uOps for load / store multiple are determined by the number 3653218893Sdim // registers. 3654218893Sdim // 3655218893Sdim // On Cortex-A8, each pair of register loads / stores can be scheduled on the 3656218893Sdim // same cycle. The scheduling for the first load / store must be done 3657239462Sdim // separately by assuming the address is not 64-bit aligned. 3658218893Sdim // 3659218893Sdim // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address 3660218893Sdim // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON 3661218893Sdim // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. 3662218893Sdim case ARM::VLDMDIA: 3663218893Sdim case ARM::VLDMDIA_UPD: 3664218893Sdim case ARM::VLDMDDB_UPD: 3665218893Sdim case ARM::VLDMSIA: 3666218893Sdim case ARM::VLDMSIA_UPD: 3667218893Sdim case ARM::VLDMSDB_UPD: 3668218893Sdim case ARM::VSTMDIA: 3669218893Sdim case ARM::VSTMDIA_UPD: 3670218893Sdim case ARM::VSTMDDB_UPD: 3671218893Sdim case ARM::VSTMSIA: 3672218893Sdim case ARM::VSTMSIA_UPD: 3673218893Sdim case ARM::VSTMSDB_UPD: { 3674309124Sdim unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands(); 3675218893Sdim return (NumRegs / 2) + (NumRegs % 2) + 1; 3676218893Sdim } 3677218893Sdim 3678218893Sdim case ARM::LDMIA_RET: 3679218893Sdim case ARM::LDMIA: 3680218893Sdim case ARM::LDMDA: 3681218893Sdim case ARM::LDMDB: 3682218893Sdim case ARM::LDMIB: 3683218893Sdim case ARM::LDMIA_UPD: 3684218893Sdim case ARM::LDMDA_UPD: 3685218893Sdim case ARM::LDMDB_UPD: 3686218893Sdim case ARM::LDMIB_UPD: 3687218893Sdim case ARM::STMIA: 3688218893Sdim case ARM::STMDA: 3689218893Sdim case ARM::STMDB: 3690218893Sdim case ARM::STMIB: 3691218893Sdim case ARM::STMIA_UPD: 3692218893Sdim case ARM::STMDA_UPD: 3693218893Sdim case ARM::STMDB_UPD: 3694218893Sdim case ARM::STMIB_UPD: 3695218893Sdim case ARM::tLDMIA: 3696218893Sdim case ARM::tLDMIA_UPD: 3697218893Sdim case ARM::tSTMIA_UPD: 3698218893Sdim case ARM::tPOP_RET: 3699218893Sdim case ARM::tPOP: 3700218893Sdim case ARM::tPUSH: 3701218893Sdim case ARM::t2LDMIA_RET: 3702218893Sdim case ARM::t2LDMIA: 3703218893Sdim case ARM::t2LDMDB: 3704218893Sdim case ARM::t2LDMIA_UPD: 3705218893Sdim case ARM::t2LDMDB_UPD: 3706218893Sdim case ARM::t2STMIA: 3707218893Sdim case ARM::t2STMDB: 3708218893Sdim case ARM::t2STMIA_UPD: 3709218893Sdim case ARM::t2STMDB_UPD: { 3710309124Sdim unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1; 3711309124Sdim switch (Subtarget.getLdStMultipleTiming()) { 3712309124Sdim case ARMSubtarget::SingleIssuePlusExtras: 3713309124Sdim return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs); 3714309124Sdim case ARMSubtarget::SingleIssue: 3715309124Sdim // Assume the worst. 3716309124Sdim return NumRegs; 3717309124Sdim case ARMSubtarget::DoubleIssue: { 3718218893Sdim if (NumRegs < 4) 3719218893Sdim return 2; 3720218893Sdim // 4 registers would be issued: 2, 2. 3721218893Sdim // 5 registers would be issued: 2, 2, 1. 3722309124Sdim unsigned UOps = (NumRegs / 2); 3723218893Sdim if (NumRegs % 2) 3724309124Sdim ++UOps; 3725309124Sdim return UOps; 3726309124Sdim } 3727309124Sdim case ARMSubtarget::DoubleIssueCheckUnalignedAccess: { 3728309124Sdim unsigned UOps = (NumRegs / 2); 3729218893Sdim // If there are odd number of registers or if it's not 64-bit aligned, 3730218893Sdim // then it takes an extra AGU (Address Generation Unit) cycle. 3731309124Sdim if ((NumRegs % 2) || !MI.hasOneMemOperand() || 3732309124Sdim (*MI.memoperands_begin())->getAlignment() < 8) 3733309124Sdim ++UOps; 3734309124Sdim return UOps; 3735309124Sdim } 3736218893Sdim } 3737218893Sdim } 3738218893Sdim } 3739309124Sdim llvm_unreachable("Didn't find the number of microops"); 3740218893Sdim} 3741218893Sdim 3742218893Sdimint 3743218893SdimARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, 3744224145Sdim const MCInstrDesc &DefMCID, 3745218893Sdim unsigned DefClass, 3746218893Sdim unsigned DefIdx, unsigned DefAlign) const { 3747224145Sdim int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 3748218893Sdim if (RegNo <= 0) 3749218893Sdim // Def is the address writeback. 3750218893Sdim return ItinData->getOperandCycle(DefClass, DefIdx); 3751218893Sdim 3752218893Sdim int DefCycle; 3753276479Sdim if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 3754218893Sdim // (regno / 2) + (regno % 2) + 1 3755218893Sdim DefCycle = RegNo / 2 + 1; 3756218893Sdim if (RegNo % 2) 3757218893Sdim ++DefCycle; 3758243830Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3759218893Sdim DefCycle = RegNo; 3760218893Sdim bool isSLoad = false; 3761218893Sdim 3762224145Sdim switch (DefMCID.getOpcode()) { 3763218893Sdim default: break; 3764218893Sdim case ARM::VLDMSIA: 3765218893Sdim case ARM::VLDMSIA_UPD: 3766218893Sdim case ARM::VLDMSDB_UPD: 3767218893Sdim isSLoad = true; 3768218893Sdim break; 3769218893Sdim } 3770218893Sdim 3771218893Sdim // If there are odd number of 'S' registers or if it's not 64-bit aligned, 3772218893Sdim // then it takes an extra cycle. 3773218893Sdim if ((isSLoad && (RegNo % 2)) || DefAlign < 8) 3774218893Sdim ++DefCycle; 3775218893Sdim } else { 3776218893Sdim // Assume the worst. 3777218893Sdim DefCycle = RegNo + 2; 3778218893Sdim } 3779218893Sdim 3780218893Sdim return DefCycle; 3781218893Sdim} 3782218893Sdim 3783321369Sdimbool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const { 3784360784Sdim Register BaseReg = MI.getOperand(0).getReg(); 3785321369Sdim for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) { 3786321369Sdim const auto &Op = MI.getOperand(i); 3787321369Sdim if (Op.isReg() && Op.getReg() == BaseReg) 3788321369Sdim return true; 3789321369Sdim } 3790321369Sdim return false; 3791321369Sdim} 3792321369Sdimunsigned 3793321369SdimARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const { 3794341825Sdim // ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops 3795341825Sdim // (outs GPR:$wb), (ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops) 3796321369Sdim return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands(); 3797321369Sdim} 3798321369Sdim 3799218893Sdimint 3800218893SdimARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, 3801224145Sdim const MCInstrDesc &DefMCID, 3802218893Sdim unsigned DefClass, 3803218893Sdim unsigned DefIdx, unsigned DefAlign) const { 3804224145Sdim int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 3805218893Sdim if (RegNo <= 0) 3806218893Sdim // Def is the address writeback. 3807218893Sdim return ItinData->getOperandCycle(DefClass, DefIdx); 3808218893Sdim 3809218893Sdim int DefCycle; 3810276479Sdim if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 3811218893Sdim // 4 registers would be issued: 1, 2, 1. 3812218893Sdim // 5 registers would be issued: 1, 2, 2. 3813218893Sdim DefCycle = RegNo / 2; 3814218893Sdim if (DefCycle < 1) 3815218893Sdim DefCycle = 1; 3816218893Sdim // Result latency is issue cycle + 2: E2. 3817218893Sdim DefCycle += 2; 3818243830Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3819218893Sdim DefCycle = (RegNo / 2); 3820218893Sdim // If there are odd number of registers or if it's not 64-bit aligned, 3821218893Sdim // then it takes an extra AGU (Address Generation Unit) cycle. 3822218893Sdim if ((RegNo % 2) || DefAlign < 8) 3823218893Sdim ++DefCycle; 3824218893Sdim // Result latency is AGU cycles + 2. 3825218893Sdim DefCycle += 2; 3826218893Sdim } else { 3827218893Sdim // Assume the worst. 3828218893Sdim DefCycle = RegNo + 2; 3829218893Sdim } 3830218893Sdim 3831218893Sdim return DefCycle; 3832218893Sdim} 3833218893Sdim 3834218893Sdimint 3835218893SdimARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, 3836224145Sdim const MCInstrDesc &UseMCID, 3837218893Sdim unsigned UseClass, 3838218893Sdim unsigned UseIdx, unsigned UseAlign) const { 3839224145Sdim int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 3840218893Sdim if (RegNo <= 0) 3841218893Sdim return ItinData->getOperandCycle(UseClass, UseIdx); 3842218893Sdim 3843218893Sdim int UseCycle; 3844276479Sdim if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 3845218893Sdim // (regno / 2) + (regno % 2) + 1 3846218893Sdim UseCycle = RegNo / 2 + 1; 3847218893Sdim if (RegNo % 2) 3848218893Sdim ++UseCycle; 3849243830Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3850218893Sdim UseCycle = RegNo; 3851218893Sdim bool isSStore = false; 3852218893Sdim 3853224145Sdim switch (UseMCID.getOpcode()) { 3854218893Sdim default: break; 3855218893Sdim case ARM::VSTMSIA: 3856218893Sdim case ARM::VSTMSIA_UPD: 3857218893Sdim case ARM::VSTMSDB_UPD: 3858218893Sdim isSStore = true; 3859218893Sdim break; 3860218893Sdim } 3861218893Sdim 3862218893Sdim // If there are odd number of 'S' registers or if it's not 64-bit aligned, 3863218893Sdim // then it takes an extra cycle. 3864218893Sdim if ((isSStore && (RegNo % 2)) || UseAlign < 8) 3865218893Sdim ++UseCycle; 3866218893Sdim } else { 3867218893Sdim // Assume the worst. 3868218893Sdim UseCycle = RegNo + 2; 3869218893Sdim } 3870218893Sdim 3871218893Sdim return UseCycle; 3872218893Sdim} 3873218893Sdim 3874218893Sdimint 3875218893SdimARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, 3876224145Sdim const MCInstrDesc &UseMCID, 3877218893Sdim unsigned UseClass, 3878218893Sdim unsigned UseIdx, unsigned UseAlign) const { 3879224145Sdim int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 3880218893Sdim if (RegNo <= 0) 3881218893Sdim return ItinData->getOperandCycle(UseClass, UseIdx); 3882218893Sdim 3883218893Sdim int UseCycle; 3884276479Sdim if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 3885218893Sdim UseCycle = RegNo / 2; 3886218893Sdim if (UseCycle < 2) 3887218893Sdim UseCycle = 2; 3888218893Sdim // Read in E3. 3889218893Sdim UseCycle += 2; 3890243830Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3891218893Sdim UseCycle = (RegNo / 2); 3892218893Sdim // If there are odd number of registers or if it's not 64-bit aligned, 3893218893Sdim // then it takes an extra AGU (Address Generation Unit) cycle. 3894218893Sdim if ((RegNo % 2) || UseAlign < 8) 3895218893Sdim ++UseCycle; 3896218893Sdim } else { 3897218893Sdim // Assume the worst. 3898218893Sdim UseCycle = 1; 3899218893Sdim } 3900218893Sdim return UseCycle; 3901218893Sdim} 3902218893Sdim 3903218893Sdimint 3904218893SdimARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 3905224145Sdim const MCInstrDesc &DefMCID, 3906218893Sdim unsigned DefIdx, unsigned DefAlign, 3907224145Sdim const MCInstrDesc &UseMCID, 3908218893Sdim unsigned UseIdx, unsigned UseAlign) const { 3909224145Sdim unsigned DefClass = DefMCID.getSchedClass(); 3910224145Sdim unsigned UseClass = UseMCID.getSchedClass(); 3911218893Sdim 3912224145Sdim if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) 3913218893Sdim return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); 3914218893Sdim 3915218893Sdim // This may be a def / use of a variable_ops instruction, the operand 3916218893Sdim // latency might be determinable dynamically. Let the target try to 3917218893Sdim // figure it out. 3918218893Sdim int DefCycle = -1; 3919218893Sdim bool LdmBypass = false; 3920224145Sdim switch (DefMCID.getOpcode()) { 3921218893Sdim default: 3922218893Sdim DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 3923218893Sdim break; 3924218893Sdim 3925218893Sdim case ARM::VLDMDIA: 3926218893Sdim case ARM::VLDMDIA_UPD: 3927218893Sdim case ARM::VLDMDDB_UPD: 3928218893Sdim case ARM::VLDMSIA: 3929218893Sdim case ARM::VLDMSIA_UPD: 3930218893Sdim case ARM::VLDMSDB_UPD: 3931224145Sdim DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 3932218893Sdim break; 3933218893Sdim 3934218893Sdim case ARM::LDMIA_RET: 3935218893Sdim case ARM::LDMIA: 3936218893Sdim case ARM::LDMDA: 3937218893Sdim case ARM::LDMDB: 3938218893Sdim case ARM::LDMIB: 3939218893Sdim case ARM::LDMIA_UPD: 3940218893Sdim case ARM::LDMDA_UPD: 3941218893Sdim case ARM::LDMDB_UPD: 3942218893Sdim case ARM::LDMIB_UPD: 3943218893Sdim case ARM::tLDMIA: 3944218893Sdim case ARM::tLDMIA_UPD: 3945218893Sdim case ARM::tPUSH: 3946218893Sdim case ARM::t2LDMIA_RET: 3947218893Sdim case ARM::t2LDMIA: 3948218893Sdim case ARM::t2LDMDB: 3949218893Sdim case ARM::t2LDMIA_UPD: 3950218893Sdim case ARM::t2LDMDB_UPD: 3951321369Sdim LdmBypass = true; 3952224145Sdim DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 3953218893Sdim break; 3954218893Sdim } 3955218893Sdim 3956218893Sdim if (DefCycle == -1) 3957218893Sdim // We can't seem to determine the result latency of the def, assume it's 2. 3958218893Sdim DefCycle = 2; 3959218893Sdim 3960218893Sdim int UseCycle = -1; 3961224145Sdim switch (UseMCID.getOpcode()) { 3962218893Sdim default: 3963218893Sdim UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); 3964218893Sdim break; 3965218893Sdim 3966218893Sdim case ARM::VSTMDIA: 3967218893Sdim case ARM::VSTMDIA_UPD: 3968218893Sdim case ARM::VSTMDDB_UPD: 3969218893Sdim case ARM::VSTMSIA: 3970218893Sdim case ARM::VSTMSIA_UPD: 3971218893Sdim case ARM::VSTMSDB_UPD: 3972224145Sdim UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 3973218893Sdim break; 3974218893Sdim 3975218893Sdim case ARM::STMIA: 3976218893Sdim case ARM::STMDA: 3977218893Sdim case ARM::STMDB: 3978218893Sdim case ARM::STMIB: 3979218893Sdim case ARM::STMIA_UPD: 3980218893Sdim case ARM::STMDA_UPD: 3981218893Sdim case ARM::STMDB_UPD: 3982218893Sdim case ARM::STMIB_UPD: 3983218893Sdim case ARM::tSTMIA_UPD: 3984218893Sdim case ARM::tPOP_RET: 3985218893Sdim case ARM::tPOP: 3986218893Sdim case ARM::t2STMIA: 3987218893Sdim case ARM::t2STMDB: 3988218893Sdim case ARM::t2STMIA_UPD: 3989218893Sdim case ARM::t2STMDB_UPD: 3990224145Sdim UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 3991218893Sdim break; 3992218893Sdim } 3993218893Sdim 3994218893Sdim if (UseCycle == -1) 3995218893Sdim // Assume it's read in the first stage. 3996218893Sdim UseCycle = 1; 3997218893Sdim 3998218893Sdim UseCycle = DefCycle - UseCycle + 1; 3999218893Sdim if (UseCycle > 0) { 4000218893Sdim if (LdmBypass) { 4001218893Sdim // It's a variable_ops instruction so we can't use DefIdx here. Just use 4002218893Sdim // first def operand. 4003224145Sdim if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, 4004218893Sdim UseClass, UseIdx)) 4005218893Sdim --UseCycle; 4006218893Sdim } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, 4007218893Sdim UseClass, UseIdx)) { 4008218893Sdim --UseCycle; 4009218893Sdim } 4010218893Sdim } 4011218893Sdim 4012218893Sdim return UseCycle; 4013218893Sdim} 4014218893Sdim 4015234353Sdimstatic const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, 4016234353Sdim const MachineInstr *MI, unsigned Reg, 4017234353Sdim unsigned &DefIdx, unsigned &Dist) { 4018234353Sdim Dist = 0; 4019234353Sdim 4020234353Sdim MachineBasicBlock::const_iterator I = MI; ++I; 4021276479Sdim MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator()); 4022234353Sdim assert(II->isInsideBundle() && "Empty bundle?"); 4023234353Sdim 4024234353Sdim int Idx = -1; 4025234353Sdim while (II->isInsideBundle()) { 4026234353Sdim Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); 4027234353Sdim if (Idx != -1) 4028234353Sdim break; 4029234353Sdim --II; 4030234353Sdim ++Dist; 4031234353Sdim } 4032234353Sdim 4033234353Sdim assert(Idx != -1 && "Cannot find bundled definition!"); 4034234353Sdim DefIdx = Idx; 4035296417Sdim return &*II; 4036234353Sdim} 4037234353Sdim 4038234353Sdimstatic const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, 4039309124Sdim const MachineInstr &MI, unsigned Reg, 4040234353Sdim unsigned &UseIdx, unsigned &Dist) { 4041234353Sdim Dist = 0; 4042234353Sdim 4043309124Sdim MachineBasicBlock::const_instr_iterator II = ++MI.getIterator(); 4044234353Sdim assert(II->isInsideBundle() && "Empty bundle?"); 4045309124Sdim MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 4046234353Sdim 4047234353Sdim // FIXME: This doesn't properly handle multiple uses. 4048234353Sdim int Idx = -1; 4049234353Sdim while (II != E && II->isInsideBundle()) { 4050234353Sdim Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); 4051234353Sdim if (Idx != -1) 4052234353Sdim break; 4053234353Sdim if (II->getOpcode() != ARM::t2IT) 4054234353Sdim ++Dist; 4055234353Sdim ++II; 4056234353Sdim } 4057234353Sdim 4058234353Sdim if (Idx == -1) { 4059234353Sdim Dist = 0; 4060276479Sdim return nullptr; 4061234353Sdim } 4062234353Sdim 4063234353Sdim UseIdx = Idx; 4064296417Sdim return &*II; 4065234353Sdim} 4066234353Sdim 4067239462Sdim/// Return the number of cycles to add to (or subtract from) the static 4068239462Sdim/// itinerary based on the def opcode and alignment. The caller will ensure that 4069239462Sdim/// adjusted latency is at least one cycle. 4070239462Sdimstatic int adjustDefLatency(const ARMSubtarget &Subtarget, 4071309124Sdim const MachineInstr &DefMI, 4072309124Sdim const MCInstrDesc &DefMCID, unsigned DefAlign) { 4073239462Sdim int Adjust = 0; 4074276479Sdim if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) { 4075218893Sdim // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 4076218893Sdim // variants are one cycle cheaper. 4077309124Sdim switch (DefMCID.getOpcode()) { 4078218893Sdim default: break; 4079218893Sdim case ARM::LDRrs: 4080218893Sdim case ARM::LDRBrs: { 4081309124Sdim unsigned ShOpVal = DefMI.getOperand(3).getImm(); 4082218893Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 4083218893Sdim if (ShImm == 0 || 4084218893Sdim (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 4085239462Sdim --Adjust; 4086218893Sdim break; 4087218893Sdim } 4088218893Sdim case ARM::t2LDRs: 4089218893Sdim case ARM::t2LDRBs: 4090218893Sdim case ARM::t2LDRHs: 4091218893Sdim case ARM::t2LDRSHs: { 4092218893Sdim // Thumb2 mode: lsl only. 4093309124Sdim unsigned ShAmt = DefMI.getOperand(3).getImm(); 4094218893Sdim if (ShAmt == 0 || ShAmt == 2) 4095239462Sdim --Adjust; 4096218893Sdim break; 4097218893Sdim } 4098218893Sdim } 4099243830Sdim } else if (Subtarget.isSwift()) { 4100243830Sdim // FIXME: Properly handle all of the latency adjustments for address 4101243830Sdim // writeback. 4102309124Sdim switch (DefMCID.getOpcode()) { 4103243830Sdim default: break; 4104243830Sdim case ARM::LDRrs: 4105243830Sdim case ARM::LDRBrs: { 4106309124Sdim unsigned ShOpVal = DefMI.getOperand(3).getImm(); 4107243830Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 4108243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 4109243830Sdim if (!isSub && 4110243830Sdim (ShImm == 0 || 4111243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 4112243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 4113243830Sdim Adjust -= 2; 4114243830Sdim else if (!isSub && 4115243830Sdim ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 4116243830Sdim --Adjust; 4117243830Sdim break; 4118243830Sdim } 4119243830Sdim case ARM::t2LDRs: 4120243830Sdim case ARM::t2LDRBs: 4121243830Sdim case ARM::t2LDRHs: 4122243830Sdim case ARM::t2LDRSHs: { 4123243830Sdim // Thumb2 mode: lsl only. 4124309124Sdim unsigned ShAmt = DefMI.getOperand(3).getImm(); 4125243830Sdim if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) 4126243830Sdim Adjust -= 2; 4127243830Sdim break; 4128243830Sdim } 4129243830Sdim } 4130218893Sdim } 4131218893Sdim 4132309124Sdim if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) { 4133309124Sdim switch (DefMCID.getOpcode()) { 4134221345Sdim default: break; 4135221345Sdim case ARM::VLD1q8: 4136221345Sdim case ARM::VLD1q16: 4137221345Sdim case ARM::VLD1q32: 4138221345Sdim case ARM::VLD1q64: 4139234353Sdim case ARM::VLD1q8wb_fixed: 4140234353Sdim case ARM::VLD1q16wb_fixed: 4141234353Sdim case ARM::VLD1q32wb_fixed: 4142234353Sdim case ARM::VLD1q64wb_fixed: 4143234353Sdim case ARM::VLD1q8wb_register: 4144234353Sdim case ARM::VLD1q16wb_register: 4145234353Sdim case ARM::VLD1q32wb_register: 4146234353Sdim case ARM::VLD1q64wb_register: 4147221345Sdim case ARM::VLD2d8: 4148221345Sdim case ARM::VLD2d16: 4149221345Sdim case ARM::VLD2d32: 4150221345Sdim case ARM::VLD2q8: 4151221345Sdim case ARM::VLD2q16: 4152221345Sdim case ARM::VLD2q32: 4153234353Sdim case ARM::VLD2d8wb_fixed: 4154234353Sdim case ARM::VLD2d16wb_fixed: 4155234353Sdim case ARM::VLD2d32wb_fixed: 4156234353Sdim case ARM::VLD2q8wb_fixed: 4157234353Sdim case ARM::VLD2q16wb_fixed: 4158234353Sdim case ARM::VLD2q32wb_fixed: 4159234353Sdim case ARM::VLD2d8wb_register: 4160234353Sdim case ARM::VLD2d16wb_register: 4161234353Sdim case ARM::VLD2d32wb_register: 4162234353Sdim case ARM::VLD2q8wb_register: 4163234353Sdim case ARM::VLD2q16wb_register: 4164234353Sdim case ARM::VLD2q32wb_register: 4165221345Sdim case ARM::VLD3d8: 4166221345Sdim case ARM::VLD3d16: 4167221345Sdim case ARM::VLD3d32: 4168221345Sdim case ARM::VLD1d64T: 4169221345Sdim case ARM::VLD3d8_UPD: 4170221345Sdim case ARM::VLD3d16_UPD: 4171221345Sdim case ARM::VLD3d32_UPD: 4172234353Sdim case ARM::VLD1d64Twb_fixed: 4173234353Sdim case ARM::VLD1d64Twb_register: 4174221345Sdim case ARM::VLD3q8_UPD: 4175221345Sdim case ARM::VLD3q16_UPD: 4176221345Sdim case ARM::VLD3q32_UPD: 4177221345Sdim case ARM::VLD4d8: 4178221345Sdim case ARM::VLD4d16: 4179221345Sdim case ARM::VLD4d32: 4180221345Sdim case ARM::VLD1d64Q: 4181221345Sdim case ARM::VLD4d8_UPD: 4182221345Sdim case ARM::VLD4d16_UPD: 4183221345Sdim case ARM::VLD4d32_UPD: 4184234353Sdim case ARM::VLD1d64Qwb_fixed: 4185234353Sdim case ARM::VLD1d64Qwb_register: 4186221345Sdim case ARM::VLD4q8_UPD: 4187221345Sdim case ARM::VLD4q16_UPD: 4188221345Sdim case ARM::VLD4q32_UPD: 4189221345Sdim case ARM::VLD1DUPq8: 4190221345Sdim case ARM::VLD1DUPq16: 4191221345Sdim case ARM::VLD1DUPq32: 4192234353Sdim case ARM::VLD1DUPq8wb_fixed: 4193234353Sdim case ARM::VLD1DUPq16wb_fixed: 4194234353Sdim case ARM::VLD1DUPq32wb_fixed: 4195234353Sdim case ARM::VLD1DUPq8wb_register: 4196234353Sdim case ARM::VLD1DUPq16wb_register: 4197234353Sdim case ARM::VLD1DUPq32wb_register: 4198221345Sdim case ARM::VLD2DUPd8: 4199221345Sdim case ARM::VLD2DUPd16: 4200221345Sdim case ARM::VLD2DUPd32: 4201234353Sdim case ARM::VLD2DUPd8wb_fixed: 4202234353Sdim case ARM::VLD2DUPd16wb_fixed: 4203234353Sdim case ARM::VLD2DUPd32wb_fixed: 4204234353Sdim case ARM::VLD2DUPd8wb_register: 4205234353Sdim case ARM::VLD2DUPd16wb_register: 4206234353Sdim case ARM::VLD2DUPd32wb_register: 4207221345Sdim case ARM::VLD4DUPd8: 4208221345Sdim case ARM::VLD4DUPd16: 4209221345Sdim case ARM::VLD4DUPd32: 4210221345Sdim case ARM::VLD4DUPd8_UPD: 4211221345Sdim case ARM::VLD4DUPd16_UPD: 4212221345Sdim case ARM::VLD4DUPd32_UPD: 4213221345Sdim case ARM::VLD1LNd8: 4214221345Sdim case ARM::VLD1LNd16: 4215221345Sdim case ARM::VLD1LNd32: 4216221345Sdim case ARM::VLD1LNd8_UPD: 4217221345Sdim case ARM::VLD1LNd16_UPD: 4218221345Sdim case ARM::VLD1LNd32_UPD: 4219221345Sdim case ARM::VLD2LNd8: 4220221345Sdim case ARM::VLD2LNd16: 4221221345Sdim case ARM::VLD2LNd32: 4222221345Sdim case ARM::VLD2LNq16: 4223221345Sdim case ARM::VLD2LNq32: 4224221345Sdim case ARM::VLD2LNd8_UPD: 4225221345Sdim case ARM::VLD2LNd16_UPD: 4226221345Sdim case ARM::VLD2LNd32_UPD: 4227221345Sdim case ARM::VLD2LNq16_UPD: 4228221345Sdim case ARM::VLD2LNq32_UPD: 4229221345Sdim case ARM::VLD4LNd8: 4230221345Sdim case ARM::VLD4LNd16: 4231221345Sdim case ARM::VLD4LNd32: 4232221345Sdim case ARM::VLD4LNq16: 4233221345Sdim case ARM::VLD4LNq32: 4234221345Sdim case ARM::VLD4LNd8_UPD: 4235221345Sdim case ARM::VLD4LNd16_UPD: 4236221345Sdim case ARM::VLD4LNd32_UPD: 4237221345Sdim case ARM::VLD4LNq16_UPD: 4238221345Sdim case ARM::VLD4LNq32_UPD: 4239221345Sdim // If the address is not 64-bit aligned, the latencies of these 4240221345Sdim // instructions increases by one. 4241239462Sdim ++Adjust; 4242221345Sdim break; 4243221345Sdim } 4244239462Sdim } 4245239462Sdim return Adjust; 4246239462Sdim} 4247221345Sdim 4248309124Sdimint ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 4249309124Sdim const MachineInstr &DefMI, 4250309124Sdim unsigned DefIdx, 4251309124Sdim const MachineInstr &UseMI, 4252309124Sdim unsigned UseIdx) const { 4253239462Sdim // No operand latency. The caller may fall back to getInstrLatency. 4254239462Sdim if (!ItinData || ItinData->isEmpty()) 4255239462Sdim return -1; 4256239462Sdim 4257309124Sdim const MachineOperand &DefMO = DefMI.getOperand(DefIdx); 4258360784Sdim Register Reg = DefMO.getReg(); 4259239462Sdim 4260309124Sdim const MachineInstr *ResolvedDefMI = &DefMI; 4261239462Sdim unsigned DefAdj = 0; 4262309124Sdim if (DefMI.isBundle()) 4263309124Sdim ResolvedDefMI = 4264309124Sdim getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj); 4265309124Sdim if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() || 4266309124Sdim ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) { 4267239462Sdim return 1; 4268239462Sdim } 4269239462Sdim 4270309124Sdim const MachineInstr *ResolvedUseMI = &UseMI; 4271239462Sdim unsigned UseAdj = 0; 4272309124Sdim if (UseMI.isBundle()) { 4273309124Sdim ResolvedUseMI = 4274309124Sdim getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj); 4275309124Sdim if (!ResolvedUseMI) 4276239462Sdim return -1; 4277239462Sdim } 4278239462Sdim 4279309124Sdim return getOperandLatencyImpl( 4280309124Sdim ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO, 4281309124Sdim Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj); 4282309124Sdim} 4283309124Sdim 4284309124Sdimint ARMBaseInstrInfo::getOperandLatencyImpl( 4285309124Sdim const InstrItineraryData *ItinData, const MachineInstr &DefMI, 4286309124Sdim unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj, 4287309124Sdim const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI, 4288309124Sdim unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const { 4289239462Sdim if (Reg == ARM::CPSR) { 4290309124Sdim if (DefMI.getOpcode() == ARM::FMSTAT) { 4291239462Sdim // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) 4292243830Sdim return Subtarget.isLikeA9() ? 1 : 20; 4293239462Sdim } 4294239462Sdim 4295239462Sdim // CPSR set and branch can be paired in the same cycle. 4296309124Sdim if (UseMI.isBranch()) 4297239462Sdim return 0; 4298239462Sdim 4299239462Sdim // Otherwise it takes the instruction latency (generally one). 4300239462Sdim unsigned Latency = getInstrLatency(ItinData, DefMI); 4301239462Sdim 4302239462Sdim // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to 4303239462Sdim // its uses. Instructions which are otherwise scheduled between them may 4304239462Sdim // incur a code size penalty (not able to use the CPSR setting 16-bit 4305239462Sdim // instructions). 4306239462Sdim if (Latency > 0 && Subtarget.isThumb2()) { 4307309124Sdim const MachineFunction *MF = DefMI.getParent()->getParent(); 4308353358Sdim // FIXME: Use Function::hasOptSize(). 4309327952Sdim if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize)) 4310239462Sdim --Latency; 4311239462Sdim } 4312239462Sdim return Latency; 4313239462Sdim } 4314239462Sdim 4315309124Sdim if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit()) 4316239462Sdim return -1; 4317239462Sdim 4318309124Sdim unsigned DefAlign = DefMI.hasOneMemOperand() 4319309124Sdim ? (*DefMI.memoperands_begin())->getAlignment() 4320309124Sdim : 0; 4321309124Sdim unsigned UseAlign = UseMI.hasOneMemOperand() 4322309124Sdim ? (*UseMI.memoperands_begin())->getAlignment() 4323309124Sdim : 0; 4324239462Sdim 4325239462Sdim // Get the itinerary's latency if possible, and handle variable_ops. 4326309124Sdim int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID, 4327309124Sdim UseIdx, UseAlign); 4328239462Sdim // Unable to find operand latency. The caller may resort to getInstrLatency. 4329239462Sdim if (Latency < 0) 4330239462Sdim return Latency; 4331239462Sdim 4332239462Sdim // Adjust for IT block position. 4333239462Sdim int Adj = DefAdj + UseAdj; 4334239462Sdim 4335239462Sdim // Adjust for dynamic def-side opcode variants not captured by the itinerary. 4336239462Sdim Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); 4337239462Sdim if (Adj >= 0 || (int)Latency > -Adj) { 4338239462Sdim return Latency + Adj; 4339239462Sdim } 4340239462Sdim // Return the itinerary latency, which may be zero but not less than zero. 4341218893Sdim return Latency; 4342218893Sdim} 4343218893Sdim 4344218893Sdimint 4345218893SdimARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 4346218893Sdim SDNode *DefNode, unsigned DefIdx, 4347218893Sdim SDNode *UseNode, unsigned UseIdx) const { 4348218893Sdim if (!DefNode->isMachineOpcode()) 4349218893Sdim return 1; 4350218893Sdim 4351224145Sdim const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); 4352218893Sdim 4353224145Sdim if (isZeroCost(DefMCID.Opcode)) 4354218893Sdim return 0; 4355218893Sdim 4356218893Sdim if (!ItinData || ItinData->isEmpty()) 4357224145Sdim return DefMCID.mayLoad() ? 3 : 1; 4358218893Sdim 4359218893Sdim if (!UseNode->isMachineOpcode()) { 4360224145Sdim int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); 4361309124Sdim int Adj = Subtarget.getPreISelOperandLatencyAdjustment(); 4362309124Sdim int Threshold = 1 + Adj; 4363309124Sdim return Latency <= Threshold ? 1 : Latency - Adj; 4364218893Sdim } 4365218893Sdim 4366224145Sdim const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); 4367360784Sdim auto *DefMN = cast<MachineSDNode>(DefNode); 4368218893Sdim unsigned DefAlign = !DefMN->memoperands_empty() 4369218893Sdim ? (*DefMN->memoperands_begin())->getAlignment() : 0; 4370360784Sdim auto *UseMN = cast<MachineSDNode>(UseNode); 4371218893Sdim unsigned UseAlign = !UseMN->memoperands_empty() 4372218893Sdim ? (*UseMN->memoperands_begin())->getAlignment() : 0; 4373224145Sdim int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, 4374224145Sdim UseMCID, UseIdx, UseAlign); 4375218893Sdim 4376218893Sdim if (Latency > 1 && 4377276479Sdim (Subtarget.isCortexA8() || Subtarget.isLikeA9() || 4378276479Sdim Subtarget.isCortexA7())) { 4379218893Sdim // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 4380218893Sdim // variants are one cycle cheaper. 4381224145Sdim switch (DefMCID.getOpcode()) { 4382218893Sdim default: break; 4383218893Sdim case ARM::LDRrs: 4384218893Sdim case ARM::LDRBrs: { 4385218893Sdim unsigned ShOpVal = 4386218893Sdim cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 4387218893Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 4388218893Sdim if (ShImm == 0 || 4389218893Sdim (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 4390218893Sdim --Latency; 4391218893Sdim break; 4392218893Sdim } 4393218893Sdim case ARM::t2LDRs: 4394218893Sdim case ARM::t2LDRBs: 4395218893Sdim case ARM::t2LDRHs: 4396218893Sdim case ARM::t2LDRSHs: { 4397218893Sdim // Thumb2 mode: lsl only. 4398218893Sdim unsigned ShAmt = 4399218893Sdim cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 4400218893Sdim if (ShAmt == 0 || ShAmt == 2) 4401218893Sdim --Latency; 4402218893Sdim break; 4403218893Sdim } 4404218893Sdim } 4405243830Sdim } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) { 4406243830Sdim // FIXME: Properly handle all of the latency adjustments for address 4407243830Sdim // writeback. 4408243830Sdim switch (DefMCID.getOpcode()) { 4409243830Sdim default: break; 4410243830Sdim case ARM::LDRrs: 4411243830Sdim case ARM::LDRBrs: { 4412243830Sdim unsigned ShOpVal = 4413243830Sdim cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 4414243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 4415243830Sdim if (ShImm == 0 || 4416243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 4417243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 4418243830Sdim Latency -= 2; 4419243830Sdim else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 4420243830Sdim --Latency; 4421243830Sdim break; 4422243830Sdim } 4423243830Sdim case ARM::t2LDRs: 4424243830Sdim case ARM::t2LDRBs: 4425243830Sdim case ARM::t2LDRHs: 4426321369Sdim case ARM::t2LDRSHs: 4427243830Sdim // Thumb2 mode: lsl 0-3 only. 4428243830Sdim Latency -= 2; 4429243830Sdim break; 4430243830Sdim } 4431218893Sdim } 4432218893Sdim 4433309124Sdim if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) 4434224145Sdim switch (DefMCID.getOpcode()) { 4435221345Sdim default: break; 4436234353Sdim case ARM::VLD1q8: 4437234353Sdim case ARM::VLD1q16: 4438234353Sdim case ARM::VLD1q32: 4439234353Sdim case ARM::VLD1q64: 4440234353Sdim case ARM::VLD1q8wb_register: 4441234353Sdim case ARM::VLD1q16wb_register: 4442234353Sdim case ARM::VLD1q32wb_register: 4443234353Sdim case ARM::VLD1q64wb_register: 4444234353Sdim case ARM::VLD1q8wb_fixed: 4445234353Sdim case ARM::VLD1q16wb_fixed: 4446234353Sdim case ARM::VLD1q32wb_fixed: 4447234353Sdim case ARM::VLD1q64wb_fixed: 4448234353Sdim case ARM::VLD2d8: 4449234353Sdim case ARM::VLD2d16: 4450234353Sdim case ARM::VLD2d32: 4451221345Sdim case ARM::VLD2q8Pseudo: 4452221345Sdim case ARM::VLD2q16Pseudo: 4453221345Sdim case ARM::VLD2q32Pseudo: 4454234353Sdim case ARM::VLD2d8wb_fixed: 4455234353Sdim case ARM::VLD2d16wb_fixed: 4456234353Sdim case ARM::VLD2d32wb_fixed: 4457234353Sdim case ARM::VLD2q8PseudoWB_fixed: 4458234353Sdim case ARM::VLD2q16PseudoWB_fixed: 4459234353Sdim case ARM::VLD2q32PseudoWB_fixed: 4460234353Sdim case ARM::VLD2d8wb_register: 4461234353Sdim case ARM::VLD2d16wb_register: 4462234353Sdim case ARM::VLD2d32wb_register: 4463234353Sdim case ARM::VLD2q8PseudoWB_register: 4464234353Sdim case ARM::VLD2q16PseudoWB_register: 4465234353Sdim case ARM::VLD2q32PseudoWB_register: 4466221345Sdim case ARM::VLD3d8Pseudo: 4467221345Sdim case ARM::VLD3d16Pseudo: 4468221345Sdim case ARM::VLD3d32Pseudo: 4469341825Sdim case ARM::VLD1d8TPseudo: 4470341825Sdim case ARM::VLD1d16TPseudo: 4471341825Sdim case ARM::VLD1d32TPseudo: 4472221345Sdim case ARM::VLD1d64TPseudo: 4473265925Sdim case ARM::VLD1d64TPseudoWB_fixed: 4474341825Sdim case ARM::VLD1d64TPseudoWB_register: 4475221345Sdim case ARM::VLD3d8Pseudo_UPD: 4476221345Sdim case ARM::VLD3d16Pseudo_UPD: 4477221345Sdim case ARM::VLD3d32Pseudo_UPD: 4478221345Sdim case ARM::VLD3q8Pseudo_UPD: 4479221345Sdim case ARM::VLD3q16Pseudo_UPD: 4480221345Sdim case ARM::VLD3q32Pseudo_UPD: 4481221345Sdim case ARM::VLD3q8oddPseudo: 4482221345Sdim case ARM::VLD3q16oddPseudo: 4483221345Sdim case ARM::VLD3q32oddPseudo: 4484221345Sdim case ARM::VLD3q8oddPseudo_UPD: 4485221345Sdim case ARM::VLD3q16oddPseudo_UPD: 4486221345Sdim case ARM::VLD3q32oddPseudo_UPD: 4487221345Sdim case ARM::VLD4d8Pseudo: 4488221345Sdim case ARM::VLD4d16Pseudo: 4489221345Sdim case ARM::VLD4d32Pseudo: 4490341825Sdim case ARM::VLD1d8QPseudo: 4491341825Sdim case ARM::VLD1d16QPseudo: 4492341825Sdim case ARM::VLD1d32QPseudo: 4493221345Sdim case ARM::VLD1d64QPseudo: 4494265925Sdim case ARM::VLD1d64QPseudoWB_fixed: 4495341825Sdim case ARM::VLD1d64QPseudoWB_register: 4496341825Sdim case ARM::VLD1q8HighQPseudo: 4497341825Sdim case ARM::VLD1q8LowQPseudo_UPD: 4498341825Sdim case ARM::VLD1q8HighTPseudo: 4499341825Sdim case ARM::VLD1q8LowTPseudo_UPD: 4500341825Sdim case ARM::VLD1q16HighQPseudo: 4501341825Sdim case ARM::VLD1q16LowQPseudo_UPD: 4502341825Sdim case ARM::VLD1q16HighTPseudo: 4503341825Sdim case ARM::VLD1q16LowTPseudo_UPD: 4504341825Sdim case ARM::VLD1q32HighQPseudo: 4505341825Sdim case ARM::VLD1q32LowQPseudo_UPD: 4506341825Sdim case ARM::VLD1q32HighTPseudo: 4507341825Sdim case ARM::VLD1q32LowTPseudo_UPD: 4508341825Sdim case ARM::VLD1q64HighQPseudo: 4509341825Sdim case ARM::VLD1q64LowQPseudo_UPD: 4510341825Sdim case ARM::VLD1q64HighTPseudo: 4511341825Sdim case ARM::VLD1q64LowTPseudo_UPD: 4512221345Sdim case ARM::VLD4d8Pseudo_UPD: 4513221345Sdim case ARM::VLD4d16Pseudo_UPD: 4514221345Sdim case ARM::VLD4d32Pseudo_UPD: 4515221345Sdim case ARM::VLD4q8Pseudo_UPD: 4516221345Sdim case ARM::VLD4q16Pseudo_UPD: 4517221345Sdim case ARM::VLD4q32Pseudo_UPD: 4518221345Sdim case ARM::VLD4q8oddPseudo: 4519221345Sdim case ARM::VLD4q16oddPseudo: 4520221345Sdim case ARM::VLD4q32oddPseudo: 4521221345Sdim case ARM::VLD4q8oddPseudo_UPD: 4522221345Sdim case ARM::VLD4q16oddPseudo_UPD: 4523221345Sdim case ARM::VLD4q32oddPseudo_UPD: 4524234353Sdim case ARM::VLD1DUPq8: 4525234353Sdim case ARM::VLD1DUPq16: 4526234353Sdim case ARM::VLD1DUPq32: 4527234353Sdim case ARM::VLD1DUPq8wb_fixed: 4528234353Sdim case ARM::VLD1DUPq16wb_fixed: 4529234353Sdim case ARM::VLD1DUPq32wb_fixed: 4530234353Sdim case ARM::VLD1DUPq8wb_register: 4531234353Sdim case ARM::VLD1DUPq16wb_register: 4532234353Sdim case ARM::VLD1DUPq32wb_register: 4533234353Sdim case ARM::VLD2DUPd8: 4534234353Sdim case ARM::VLD2DUPd16: 4535234353Sdim case ARM::VLD2DUPd32: 4536234353Sdim case ARM::VLD2DUPd8wb_fixed: 4537234353Sdim case ARM::VLD2DUPd16wb_fixed: 4538234353Sdim case ARM::VLD2DUPd32wb_fixed: 4539234353Sdim case ARM::VLD2DUPd8wb_register: 4540234353Sdim case ARM::VLD2DUPd16wb_register: 4541234353Sdim case ARM::VLD2DUPd32wb_register: 4542341825Sdim case ARM::VLD2DUPq8EvenPseudo: 4543341825Sdim case ARM::VLD2DUPq8OddPseudo: 4544341825Sdim case ARM::VLD2DUPq16EvenPseudo: 4545341825Sdim case ARM::VLD2DUPq16OddPseudo: 4546341825Sdim case ARM::VLD2DUPq32EvenPseudo: 4547341825Sdim case ARM::VLD2DUPq32OddPseudo: 4548341825Sdim case ARM::VLD3DUPq8EvenPseudo: 4549341825Sdim case ARM::VLD3DUPq8OddPseudo: 4550341825Sdim case ARM::VLD3DUPq16EvenPseudo: 4551341825Sdim case ARM::VLD3DUPq16OddPseudo: 4552341825Sdim case ARM::VLD3DUPq32EvenPseudo: 4553341825Sdim case ARM::VLD3DUPq32OddPseudo: 4554221345Sdim case ARM::VLD4DUPd8Pseudo: 4555221345Sdim case ARM::VLD4DUPd16Pseudo: 4556221345Sdim case ARM::VLD4DUPd32Pseudo: 4557221345Sdim case ARM::VLD4DUPd8Pseudo_UPD: 4558221345Sdim case ARM::VLD4DUPd16Pseudo_UPD: 4559221345Sdim case ARM::VLD4DUPd32Pseudo_UPD: 4560341825Sdim case ARM::VLD4DUPq8EvenPseudo: 4561341825Sdim case ARM::VLD4DUPq8OddPseudo: 4562341825Sdim case ARM::VLD4DUPq16EvenPseudo: 4563341825Sdim case ARM::VLD4DUPq16OddPseudo: 4564341825Sdim case ARM::VLD4DUPq32EvenPseudo: 4565341825Sdim case ARM::VLD4DUPq32OddPseudo: 4566221345Sdim case ARM::VLD1LNq8Pseudo: 4567221345Sdim case ARM::VLD1LNq16Pseudo: 4568221345Sdim case ARM::VLD1LNq32Pseudo: 4569221345Sdim case ARM::VLD1LNq8Pseudo_UPD: 4570221345Sdim case ARM::VLD1LNq16Pseudo_UPD: 4571221345Sdim case ARM::VLD1LNq32Pseudo_UPD: 4572221345Sdim case ARM::VLD2LNd8Pseudo: 4573221345Sdim case ARM::VLD2LNd16Pseudo: 4574221345Sdim case ARM::VLD2LNd32Pseudo: 4575221345Sdim case ARM::VLD2LNq16Pseudo: 4576221345Sdim case ARM::VLD2LNq32Pseudo: 4577221345Sdim case ARM::VLD2LNd8Pseudo_UPD: 4578221345Sdim case ARM::VLD2LNd16Pseudo_UPD: 4579221345Sdim case ARM::VLD2LNd32Pseudo_UPD: 4580221345Sdim case ARM::VLD2LNq16Pseudo_UPD: 4581221345Sdim case ARM::VLD2LNq32Pseudo_UPD: 4582221345Sdim case ARM::VLD4LNd8Pseudo: 4583221345Sdim case ARM::VLD4LNd16Pseudo: 4584221345Sdim case ARM::VLD4LNd32Pseudo: 4585221345Sdim case ARM::VLD4LNq16Pseudo: 4586221345Sdim case ARM::VLD4LNq32Pseudo: 4587221345Sdim case ARM::VLD4LNd8Pseudo_UPD: 4588221345Sdim case ARM::VLD4LNd16Pseudo_UPD: 4589221345Sdim case ARM::VLD4LNd32Pseudo_UPD: 4590221345Sdim case ARM::VLD4LNq16Pseudo_UPD: 4591221345Sdim case ARM::VLD4LNq32Pseudo_UPD: 4592221345Sdim // If the address is not 64-bit aligned, the latencies of these 4593221345Sdim // instructions increases by one. 4594221345Sdim ++Latency; 4595221345Sdim break; 4596221345Sdim } 4597221345Sdim 4598218893Sdim return Latency; 4599218893Sdim} 4600218893Sdim 4601309124Sdimunsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { 4602309124Sdim if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || 4603309124Sdim MI.isImplicitDef()) 4604261991Sdim return 0; 4605261991Sdim 4606309124Sdim if (MI.isBundle()) 4607261991Sdim return 0; 4608261991Sdim 4609309124Sdim const MCInstrDesc &MCID = MI.getDesc(); 4610261991Sdim 4611321369Sdim if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && 4612321369Sdim !Subtarget.cheapPredicableCPSRDef())) { 4613261991Sdim // When predicated, CPSR is an additional source operand for CPSR updating 4614261991Sdim // instructions, this apparently increases their latencies. 4615261991Sdim return 1; 4616261991Sdim } 4617261991Sdim return 0; 4618261991Sdim} 4619261991Sdim 4620239462Sdimunsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 4621309124Sdim const MachineInstr &MI, 4622239462Sdim unsigned *PredCost) const { 4623309124Sdim if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || 4624309124Sdim MI.isImplicitDef()) 4625218893Sdim return 1; 4626218893Sdim 4627239462Sdim // An instruction scheduler typically runs on unbundled instructions, however 4628239462Sdim // other passes may query the latency of a bundled instruction. 4629309124Sdim if (MI.isBundle()) { 4630239462Sdim unsigned Latency = 0; 4631309124Sdim MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 4632309124Sdim MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 4633234353Sdim while (++I != E && I->isInsideBundle()) { 4634234353Sdim if (I->getOpcode() != ARM::t2IT) 4635309124Sdim Latency += getInstrLatency(ItinData, *I, PredCost); 4636234353Sdim } 4637234353Sdim return Latency; 4638234353Sdim } 4639234353Sdim 4640309124Sdim const MCInstrDesc &MCID = MI.getDesc(); 4641321369Sdim if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && 4642321369Sdim !Subtarget.cheapPredicableCPSRDef()))) { 4643218893Sdim // When predicated, CPSR is an additional source operand for CPSR updating 4644218893Sdim // instructions, this apparently increases their latencies. 4645218893Sdim *PredCost = 1; 4646239462Sdim } 4647239462Sdim // Be sure to call getStageLatency for an empty itinerary in case it has a 4648239462Sdim // valid MinLatency property. 4649239462Sdim if (!ItinData) 4650309124Sdim return MI.mayLoad() ? 3 : 1; 4651239462Sdim 4652239462Sdim unsigned Class = MCID.getSchedClass(); 4653239462Sdim 4654239462Sdim // For instructions with variable uops, use uops as latency. 4655239462Sdim if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0) 4656239462Sdim return getNumMicroOps(ItinData, MI); 4657239462Sdim 4658239462Sdim // For the common case, fall back on the itinerary's latency. 4659239462Sdim unsigned Latency = ItinData->getStageLatency(Class); 4660239462Sdim 4661239462Sdim // Adjust for dynamic def-side opcode variants not captured by the itinerary. 4662309124Sdim unsigned DefAlign = 4663309124Sdim MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0; 4664309124Sdim int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign); 4665239462Sdim if (Adj >= 0 || (int)Latency > -Adj) { 4666239462Sdim return Latency + Adj; 4667239462Sdim } 4668239462Sdim return Latency; 4669218893Sdim} 4670218893Sdim 4671218893Sdimint ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 4672218893Sdim SDNode *Node) const { 4673218893Sdim if (!Node->isMachineOpcode()) 4674218893Sdim return 1; 4675218893Sdim 4676218893Sdim if (!ItinData || ItinData->isEmpty()) 4677218893Sdim return 1; 4678218893Sdim 4679218893Sdim unsigned Opcode = Node->getMachineOpcode(); 4680218893Sdim switch (Opcode) { 4681218893Sdim default: 4682218893Sdim return ItinData->getStageLatency(get(Opcode).getSchedClass()); 4683218893Sdim case ARM::VLDMQIA: 4684218893Sdim case ARM::VSTMQIA: 4685218893Sdim return 2; 4686218893Sdim } 4687218893Sdim} 4688218893Sdim 4689309124Sdimbool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel, 4690309124Sdim const MachineRegisterInfo *MRI, 4691309124Sdim const MachineInstr &DefMI, 4692309124Sdim unsigned DefIdx, 4693309124Sdim const MachineInstr &UseMI, 4694309124Sdim unsigned UseIdx) const { 4695309124Sdim unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; 4696309124Sdim unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask; 4697309124Sdim if (Subtarget.nonpipelinedVFP() && 4698218893Sdim (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) 4699218893Sdim return true; 4700218893Sdim 4701218893Sdim // Hoist VFP / NEON instructions with 4 or higher latency. 4702309124Sdim unsigned Latency = 4703309124Sdim SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx); 4704218893Sdim if (Latency <= 3) 4705218893Sdim return false; 4706218893Sdim return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || 4707218893Sdim UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; 4708218893Sdim} 4709218893Sdim 4710309124Sdimbool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, 4711309124Sdim const MachineInstr &DefMI, 4712309124Sdim unsigned DefIdx) const { 4713288943Sdim const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); 4714218893Sdim if (!ItinData || ItinData->isEmpty()) 4715218893Sdim return false; 4716218893Sdim 4717309124Sdim unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; 4718218893Sdim if (DDomain == ARMII::DomainGeneral) { 4719309124Sdim unsigned DefClass = DefMI.getDesc().getSchedClass(); 4720218893Sdim int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 4721218893Sdim return (DefCycle != -1 && DefCycle <= 2); 4722218893Sdim } 4723218893Sdim return false; 4724218893Sdim} 4725218893Sdim 4726309124Sdimbool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, 4727226633Sdim StringRef &ErrInfo) const { 4728309124Sdim if (convertAddSubFlagsOpcode(MI.getOpcode())) { 4729226633Sdim ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; 4730226633Sdim return false; 4731226633Sdim } 4732353358Sdim if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) { 4733353358Sdim // Make sure we don't generate a lo-lo mov that isn't supported. 4734353358Sdim if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) && 4735353358Sdim !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) { 4736353358Sdim ErrInfo = "Non-flag-setting Thumb1 mov is v6-only"; 4737353358Sdim return false; 4738353358Sdim } 4739353358Sdim } 4740353358Sdim if (MI.getOpcode() == ARM::tPUSH || 4741353358Sdim MI.getOpcode() == ARM::tPOP || 4742353358Sdim MI.getOpcode() == ARM::tPOP_RET) { 4743353358Sdim for (int i = 2, e = MI.getNumOperands(); i < e; ++i) { 4744353358Sdim if (MI.getOperand(i).isImplicit() || 4745353358Sdim !MI.getOperand(i).isReg()) 4746353358Sdim continue; 4747360784Sdim Register Reg = MI.getOperand(i).getReg(); 4748353358Sdim if (Reg < ARM::R0 || Reg > ARM::R7) { 4749353358Sdim if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) && 4750353358Sdim !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) { 4751353358Sdim ErrInfo = "Unsupported register in Thumb1 push/pop"; 4752353358Sdim return false; 4753353358Sdim } 4754353358Sdim } 4755353358Sdim } 4756353358Sdim } 4757226633Sdim return true; 4758226633Sdim} 4759226633Sdim 4760280031Sdim// LoadStackGuard has so far only been implemented for MachO. Different code 4761280031Sdim// sequence is needed for other targets. 4762280031Sdimvoid ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, 4763280031Sdim unsigned LoadImmOpc, 4764309124Sdim unsigned LoadOpc) const { 4765314564Sdim assert(!Subtarget.isROPI() && !Subtarget.isRWPI() && 4766314564Sdim "ROPI/RWPI not currently supported with stack guard"); 4767314564Sdim 4768280031Sdim MachineBasicBlock &MBB = *MI->getParent(); 4769280031Sdim DebugLoc DL = MI->getDebugLoc(); 4770360784Sdim Register Reg = MI->getOperand(0).getReg(); 4771280031Sdim const GlobalValue *GV = 4772280031Sdim cast<GlobalValue>((*MI->memoperands_begin())->getValue()); 4773280031Sdim MachineInstrBuilder MIB; 4774280031Sdim 4775280031Sdim BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg) 4776280031Sdim .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); 4777280031Sdim 4778309124Sdim if (Subtarget.isGVIndirectSymbol(GV)) { 4779280031Sdim MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); 4780280031Sdim MIB.addReg(Reg, RegState::Kill).addImm(0); 4781314564Sdim auto Flags = MachineMemOperand::MOLoad | 4782314564Sdim MachineMemOperand::MODereferenceable | 4783314564Sdim MachineMemOperand::MOInvariant; 4784296417Sdim MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( 4785309124Sdim MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4); 4786321369Sdim MIB.addMemOperand(MMO).add(predOps(ARMCC::AL)); 4787280031Sdim } 4788280031Sdim 4789280031Sdim MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); 4790321369Sdim MIB.addReg(Reg, RegState::Kill) 4791344779Sdim .addImm(0) 4792344779Sdim .cloneMemRefs(*MI) 4793344779Sdim .add(predOps(ARMCC::AL)); 4794280031Sdim} 4795280031Sdim 4796218893Sdimbool 4797218893SdimARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, 4798218893Sdim unsigned &AddSubOpc, 4799218893Sdim bool &NegAcc, bool &HasLane) const { 4800218893Sdim DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); 4801218893Sdim if (I == MLxEntryMap.end()) 4802218893Sdim return false; 4803218893Sdim 4804218893Sdim const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; 4805218893Sdim MulOpc = Entry.MulOpc; 4806218893Sdim AddSubOpc = Entry.AddSubOpc; 4807218893Sdim NegAcc = Entry.NegAcc; 4808218893Sdim HasLane = Entry.HasLane; 4809218893Sdim return true; 4810218893Sdim} 4811226633Sdim 4812226633Sdim//===----------------------------------------------------------------------===// 4813226633Sdim// Execution domains. 4814226633Sdim//===----------------------------------------------------------------------===// 4815226633Sdim// 4816226633Sdim// Some instructions go down the NEON pipeline, some go down the VFP pipeline, 4817226633Sdim// and some can go down both. The vmov instructions go down the VFP pipeline, 4818226633Sdim// but they can be changed to vorr equivalents that are executed by the NEON 4819226633Sdim// pipeline. 4820226633Sdim// 4821226633Sdim// We use the following execution domain numbering: 4822226633Sdim// 4823226633Sdimenum ARMExeDomain { 4824226633Sdim ExeGeneric = 0, 4825226633Sdim ExeVFP = 1, 4826226633Sdim ExeNEON = 2 4827226633Sdim}; 4828321369Sdim 4829226633Sdim// 4830226633Sdim// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h 4831226633Sdim// 4832226633Sdimstd::pair<uint16_t, uint16_t> 4833309124SdimARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const { 4834288943Sdim // If we don't have access to NEON instructions then we won't be able 4835288943Sdim // to swizzle anything to the NEON domain. Check to make sure. 4836288943Sdim if (Subtarget.hasNEON()) { 4837288943Sdim // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON 4838288943Sdim // if they are not predicated. 4839309124Sdim if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI)) 4840288943Sdim return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); 4841226633Sdim 4842288943Sdim // CortexA9 is particularly picky about mixing the two and wants these 4843288943Sdim // converted. 4844309124Sdim if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) && 4845309124Sdim (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR || 4846309124Sdim MI.getOpcode() == ARM::VMOVS)) 4847288943Sdim return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); 4848288943Sdim } 4849226633Sdim // No other instructions can be swizzled, so just determine their domain. 4850309124Sdim unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask; 4851226633Sdim 4852226633Sdim if (Domain & ARMII::DomainNEON) 4853226633Sdim return std::make_pair(ExeNEON, 0); 4854226633Sdim 4855226633Sdim // Certain instructions can go either way on Cortex-A8. 4856226633Sdim // Treat them as NEON instructions. 4857226633Sdim if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) 4858226633Sdim return std::make_pair(ExeNEON, 0); 4859226633Sdim 4860226633Sdim if (Domain & ARMII::DomainVFP) 4861226633Sdim return std::make_pair(ExeVFP, 0); 4862226633Sdim 4863226633Sdim return std::make_pair(ExeGeneric, 0); 4864226633Sdim} 4865226633Sdim 4866243830Sdimstatic unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, 4867243830Sdim unsigned SReg, unsigned &Lane) { 4868243830Sdim unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); 4869243830Sdim Lane = 0; 4870243830Sdim 4871243830Sdim if (DReg != ARM::NoRegister) 4872243830Sdim return DReg; 4873243830Sdim 4874243830Sdim Lane = 1; 4875243830Sdim DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); 4876243830Sdim 4877243830Sdim assert(DReg && "S-register with no D super-register?"); 4878243830Sdim return DReg; 4879243830Sdim} 4880243830Sdim 4881243830Sdim/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, 4882243830Sdim/// set ImplicitSReg to a register number that must be marked as implicit-use or 4883243830Sdim/// zero if no register needs to be defined as implicit-use. 4884243830Sdim/// 4885243830Sdim/// If the function cannot determine if an SPR should be marked implicit use or 4886243830Sdim/// not, it returns false. 4887243830Sdim/// 4888243830Sdim/// This function handles cases where an instruction is being modified from taking 4889243830Sdim/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict 4890243830Sdim/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other 4891243830Sdim/// lane of the DPR). 4892243830Sdim/// 4893243830Sdim/// If the other SPR is defined, an implicit-use of it should be added. Else, 4894243830Sdim/// (including the case where the DPR itself is defined), it should not. 4895243830Sdim/// 4896243830Sdimstatic bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, 4897309124Sdim MachineInstr &MI, unsigned DReg, 4898309124Sdim unsigned Lane, unsigned &ImplicitSReg) { 4899243830Sdim // If the DPR is defined or used already, the other SPR lane will be chained 4900243830Sdim // correctly, so there is nothing to be done. 4901309124Sdim if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) { 4902243830Sdim ImplicitSReg = 0; 4903243830Sdim return true; 4904243830Sdim } 4905243830Sdim 4906243830Sdim // Otherwise we need to go searching to see if the SPR is set explicitly. 4907243830Sdim ImplicitSReg = TRI->getSubReg(DReg, 4908243830Sdim (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); 4909243830Sdim MachineBasicBlock::LivenessQueryResult LQR = 4910309124Sdim MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); 4911243830Sdim 4912243830Sdim if (LQR == MachineBasicBlock::LQR_Live) 4913243830Sdim return true; 4914243830Sdim else if (LQR == MachineBasicBlock::LQR_Unknown) 4915243830Sdim return false; 4916243830Sdim 4917243830Sdim // If the register is known not to be live, there is no need to add an 4918243830Sdim // implicit-use. 4919243830Sdim ImplicitSReg = 0; 4920243830Sdim return true; 4921243830Sdim} 4922243830Sdim 4923309124Sdimvoid ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, 4924309124Sdim unsigned Domain) const { 4925239462Sdim unsigned DstReg, SrcReg, DReg; 4926239462Sdim unsigned Lane; 4927309124Sdim MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); 4928239462Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 4929309124Sdim switch (MI.getOpcode()) { 4930309124Sdim default: 4931309124Sdim llvm_unreachable("cannot handle opcode!"); 4932309124Sdim break; 4933309124Sdim case ARM::VMOVD: 4934309124Sdim if (Domain != ExeNEON) 4935239462Sdim break; 4936226633Sdim 4937309124Sdim // Zap the predicate operands. 4938309124Sdim assert(!isPredicated(MI) && "Cannot predicate a VORRd"); 4939226633Sdim 4940309124Sdim // Make sure we've got NEON instructions. 4941309124Sdim assert(Subtarget.hasNEON() && "VORRd requires NEON"); 4942288943Sdim 4943309124Sdim // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) 4944309124Sdim DstReg = MI.getOperand(0).getReg(); 4945309124Sdim SrcReg = MI.getOperand(1).getReg(); 4946243830Sdim 4947309124Sdim for (unsigned i = MI.getDesc().getNumOperands(); i; --i) 4948309124Sdim MI.RemoveOperand(i - 1); 4949243830Sdim 4950309124Sdim // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) 4951309124Sdim MI.setDesc(get(ARM::VORRd)); 4952321369Sdim MIB.addReg(DstReg, RegState::Define) 4953321369Sdim .addReg(SrcReg) 4954321369Sdim .addReg(SrcReg) 4955321369Sdim .add(predOps(ARMCC::AL)); 4956309124Sdim break; 4957309124Sdim case ARM::VMOVRS: 4958309124Sdim if (Domain != ExeNEON) 4959239462Sdim break; 4960309124Sdim assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); 4961239462Sdim 4962309124Sdim // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) 4963309124Sdim DstReg = MI.getOperand(0).getReg(); 4964309124Sdim SrcReg = MI.getOperand(1).getReg(); 4965239462Sdim 4966309124Sdim for (unsigned i = MI.getDesc().getNumOperands(); i; --i) 4967309124Sdim MI.RemoveOperand(i - 1); 4968239462Sdim 4969309124Sdim DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); 4970239462Sdim 4971309124Sdim // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) 4972309124Sdim // Note that DSrc has been widened and the other lane may be undef, which 4973309124Sdim // contaminates the entire register. 4974309124Sdim MI.setDesc(get(ARM::VGETLNi32)); 4975321369Sdim MIB.addReg(DstReg, RegState::Define) 4976321369Sdim .addReg(DReg, RegState::Undef) 4977321369Sdim .addImm(Lane) 4978321369Sdim .add(predOps(ARMCC::AL)); 4979239462Sdim 4980309124Sdim // The old source should be an implicit use, otherwise we might think it 4981309124Sdim // was dead before here. 4982309124Sdim MIB.addReg(SrcReg, RegState::Implicit); 4983309124Sdim break; 4984309124Sdim case ARM::VMOVSR: { 4985309124Sdim if (Domain != ExeNEON) 4986243830Sdim break; 4987309124Sdim assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); 4988239462Sdim 4989309124Sdim // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) 4990309124Sdim DstReg = MI.getOperand(0).getReg(); 4991309124Sdim SrcReg = MI.getOperand(1).getReg(); 4992243830Sdim 4993309124Sdim DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); 4994243830Sdim 4995309124Sdim unsigned ImplicitSReg; 4996309124Sdim if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) 4997309124Sdim break; 4998243830Sdim 4999309124Sdim for (unsigned i = MI.getDesc().getNumOperands(); i; --i) 5000309124Sdim MI.RemoveOperand(i - 1); 5001243830Sdim 5002309124Sdim // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) 5003309124Sdim // Again DDst may be undefined at the beginning of this instruction. 5004309124Sdim MI.setDesc(get(ARM::VSETLNi32)); 5005309124Sdim MIB.addReg(DReg, RegState::Define) 5006309124Sdim .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI))) 5007309124Sdim .addReg(SrcReg) 5008321369Sdim .addImm(Lane) 5009321369Sdim .add(predOps(ARMCC::AL)); 5010243830Sdim 5011309124Sdim // The narrower destination must be marked as set to keep previous chains 5012309124Sdim // in place. 5013309124Sdim MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 5014309124Sdim if (ImplicitSReg != 0) 5015309124Sdim MIB.addReg(ImplicitSReg, RegState::Implicit); 5016309124Sdim break; 5017243830Sdim } 5018243830Sdim case ARM::VMOVS: { 5019239462Sdim if (Domain != ExeNEON) 5020239462Sdim break; 5021239462Sdim 5022243830Sdim // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) 5023309124Sdim DstReg = MI.getOperand(0).getReg(); 5024309124Sdim SrcReg = MI.getOperand(1).getReg(); 5025243830Sdim 5026243830Sdim unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; 5027243830Sdim DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); 5028243830Sdim DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); 5029243830Sdim 5030243830Sdim unsigned ImplicitSReg; 5031243830Sdim if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) 5032243830Sdim break; 5033243830Sdim 5034309124Sdim for (unsigned i = MI.getDesc().getNumOperands(); i; --i) 5035309124Sdim MI.RemoveOperand(i - 1); 5036243830Sdim 5037243830Sdim if (DSrc == DDst) { 5038243830Sdim // Destination can be: 5039243830Sdim // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) 5040309124Sdim MI.setDesc(get(ARM::VDUPLN32d)); 5041243830Sdim MIB.addReg(DDst, RegState::Define) 5042309124Sdim .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI))) 5043321369Sdim .addImm(SrcLane) 5044321369Sdim .add(predOps(ARMCC::AL)); 5045243830Sdim 5046243830Sdim // Neither the source or the destination are naturally represented any 5047243830Sdim // more, so add them in manually. 5048243830Sdim MIB.addReg(DstReg, RegState::Implicit | RegState::Define); 5049243830Sdim MIB.addReg(SrcReg, RegState::Implicit); 5050243830Sdim if (ImplicitSReg != 0) 5051243830Sdim MIB.addReg(ImplicitSReg, RegState::Implicit); 5052243830Sdim break; 5053239462Sdim } 5054239462Sdim 5055243830Sdim // In general there's no single instruction that can perform an S <-> S 5056243830Sdim // move in NEON space, but a pair of VEXT instructions *can* do the 5057243830Sdim // job. It turns out that the VEXTs needed will only use DSrc once, with 5058243830Sdim // the position based purely on the combination of lane-0 and lane-1 5059243830Sdim // involved. For example 5060243830Sdim // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 5061243830Sdim // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 5062243830Sdim // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 5063243830Sdim // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 5064243830Sdim // 5065243830Sdim // Pattern of the MachineInstrs is: 5066243830Sdim // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) 5067243830Sdim MachineInstrBuilder NewMIB; 5068309124Sdim NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32), 5069309124Sdim DDst); 5070239462Sdim 5071327952Sdim // On the first instruction, both DSrc and DDst may be undef if present. 5072243830Sdim // Specifically when the original instruction didn't have them as an 5073243830Sdim // <imp-use>. 5074243830Sdim unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; 5075309124Sdim bool CurUndef = !MI.readsRegister(CurReg, TRI); 5076243830Sdim NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); 5077239462Sdim 5078243830Sdim CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; 5079309124Sdim CurUndef = !MI.readsRegister(CurReg, TRI); 5080321369Sdim NewMIB.addReg(CurReg, getUndefRegState(CurUndef)) 5081321369Sdim .addImm(1) 5082321369Sdim .add(predOps(ARMCC::AL)); 5083239462Sdim 5084243830Sdim if (SrcLane == DstLane) 5085243830Sdim NewMIB.addReg(SrcReg, RegState::Implicit); 5086243830Sdim 5087309124Sdim MI.setDesc(get(ARM::VEXTd32)); 5088243830Sdim MIB.addReg(DDst, RegState::Define); 5089243830Sdim 5090243830Sdim // On the second instruction, DDst has definitely been defined above, so 5091327952Sdim // it is not undef. DSrc, if present, can be undef as above. 5092243830Sdim CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; 5093309124Sdim CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); 5094243830Sdim MIB.addReg(CurReg, getUndefRegState(CurUndef)); 5095243830Sdim 5096243830Sdim CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; 5097309124Sdim CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); 5098321369Sdim MIB.addReg(CurReg, getUndefRegState(CurUndef)) 5099321369Sdim .addImm(1) 5100321369Sdim .add(predOps(ARMCC::AL)); 5101243830Sdim 5102243830Sdim if (SrcLane != DstLane) 5103243830Sdim MIB.addReg(SrcReg, RegState::Implicit); 5104243830Sdim 5105243830Sdim // As before, the original destination is no longer represented, add it 5106243830Sdim // implicitly. 5107243830Sdim MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 5108243830Sdim if (ImplicitSReg != 0) 5109243830Sdim MIB.addReg(ImplicitSReg, RegState::Implicit); 5110239462Sdim break; 5111243830Sdim } 5112239462Sdim } 5113226633Sdim} 5114234353Sdim 5115243830Sdim//===----------------------------------------------------------------------===// 5116243830Sdim// Partial register updates 5117243830Sdim//===----------------------------------------------------------------------===// 5118243830Sdim// 5119243830Sdim// Swift renames NEON registers with 64-bit granularity. That means any 5120243830Sdim// instruction writing an S-reg implicitly reads the containing D-reg. The 5121243830Sdim// problem is mostly avoided by translating f32 operations to v2f32 operations 5122243830Sdim// on D-registers, but f32 loads are still a problem. 5123243830Sdim// 5124243830Sdim// These instructions can load an f32 into a NEON register: 5125243830Sdim// 5126243830Sdim// VLDRS - Only writes S, partial D update. 5127243830Sdim// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. 5128243830Sdim// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. 5129243830Sdim// 5130243830Sdim// FCONSTD can be used as a dependency-breaking instruction. 5131309124Sdimunsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( 5132309124Sdim const MachineInstr &MI, unsigned OpNum, 5133309124Sdim const TargetRegisterInfo *TRI) const { 5134309124Sdim auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance(); 5135309124Sdim if (!PartialUpdateClearance) 5136243830Sdim return 0; 5137243830Sdim 5138243830Sdim assert(TRI && "Need TRI instance"); 5139243830Sdim 5140309124Sdim const MachineOperand &MO = MI.getOperand(OpNum); 5141243830Sdim if (MO.readsReg()) 5142243830Sdim return 0; 5143360784Sdim Register Reg = MO.getReg(); 5144243830Sdim int UseOp = -1; 5145243830Sdim 5146309124Sdim switch (MI.getOpcode()) { 5147309124Sdim // Normal instructions writing only an S-register. 5148243830Sdim case ARM::VLDRS: 5149243830Sdim case ARM::FCONSTS: 5150243830Sdim case ARM::VMOVSR: 5151243830Sdim case ARM::VMOVv8i8: 5152243830Sdim case ARM::VMOVv4i16: 5153243830Sdim case ARM::VMOVv2i32: 5154243830Sdim case ARM::VMOVv2f32: 5155243830Sdim case ARM::VMOVv1i64: 5156309124Sdim UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI); 5157243830Sdim break; 5158243830Sdim 5159243830Sdim // Explicitly reads the dependency. 5160243830Sdim case ARM::VLD1LNd32: 5161249423Sdim UseOp = 3; 5162243830Sdim break; 5163243830Sdim default: 5164243830Sdim return 0; 5165243830Sdim } 5166243830Sdim 5167243830Sdim // If this instruction actually reads a value from Reg, there is no unwanted 5168243830Sdim // dependency. 5169309124Sdim if (UseOp != -1 && MI.getOperand(UseOp).readsReg()) 5170243830Sdim return 0; 5171243830Sdim 5172243830Sdim // We must be able to clobber the whole D-reg. 5173360784Sdim if (Register::isVirtualRegister(Reg)) { 5174327952Sdim // Virtual register must be a def undef foo:ssub_0 operand. 5175309124Sdim if (!MO.getSubReg() || MI.readsVirtualRegister(Reg)) 5176243830Sdim return 0; 5177243830Sdim } else if (ARM::SPRRegClass.contains(Reg)) { 5178243830Sdim // Physical register: MI must define the full D-reg. 5179243830Sdim unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, 5180243830Sdim &ARM::DPRRegClass); 5181309124Sdim if (!DReg || !MI.definesRegister(DReg, TRI)) 5182243830Sdim return 0; 5183243830Sdim } 5184243830Sdim 5185243830Sdim // MI has an unwanted D-register dependency. 5186243830Sdim // Avoid defs in the previous N instructrions. 5187309124Sdim return PartialUpdateClearance; 5188243830Sdim} 5189243830Sdim 5190243830Sdim// Break a partial register dependency after getPartialRegUpdateClearance 5191243830Sdim// returned non-zero. 5192309124Sdimvoid ARMBaseInstrInfo::breakPartialRegDependency( 5193309124Sdim MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { 5194309124Sdim assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def"); 5195243830Sdim assert(TRI && "Need TRI instance"); 5196243830Sdim 5197309124Sdim const MachineOperand &MO = MI.getOperand(OpNum); 5198360784Sdim Register Reg = MO.getReg(); 5199360784Sdim assert(Register::isPhysicalRegister(Reg) && 5200243830Sdim "Can't break virtual register dependencies."); 5201243830Sdim unsigned DReg = Reg; 5202243830Sdim 5203243830Sdim // If MI defines an S-reg, find the corresponding D super-register. 5204243830Sdim if (ARM::SPRRegClass.contains(Reg)) { 5205243830Sdim DReg = ARM::D0 + (Reg - ARM::S0) / 2; 5206243830Sdim assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); 5207243830Sdim } 5208243830Sdim 5209243830Sdim assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); 5210309124Sdim assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); 5211243830Sdim 5212243830Sdim // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines 5213243830Sdim // the full D-register by loading the same value to both lanes. The 5214243830Sdim // instruction is micro-coded with 2 uops, so don't do this until we can 5215261991Sdim // properly schedule micro-coded instructions. The dispatcher stalls cause 5216243830Sdim // too big regressions. 5217243830Sdim 5218243830Sdim // Insert the dependency-breaking FCONSTD before MI. 5219243830Sdim // 96 is the encoding of 0.5, but the actual value doesn't matter here. 5220321369Sdim BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg) 5221321369Sdim .addImm(96) 5222321369Sdim .add(predOps(ARMCC::AL)); 5223309124Sdim MI.addRegisterKilled(DReg, TRI, true); 5224243830Sdim} 5225243830Sdim 5226234353Sdimbool ARMBaseInstrInfo::hasNOP() const { 5227288943Sdim return Subtarget.getFeatureBits()[ARM::HasV6KOps]; 5228234353Sdim} 5229249423Sdim 5230249423Sdimbool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { 5231261991Sdim if (MI->getNumOperands() < 4) 5232261991Sdim return true; 5233249423Sdim unsigned ShOpVal = MI->getOperand(3).getImm(); 5234249423Sdim unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); 5235249423Sdim // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. 5236249423Sdim if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) || 5237249423Sdim ((ShImm == 1 || ShImm == 2) && 5238249423Sdim ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl)) 5239249423Sdim return true; 5240249423Sdim 5241249423Sdim return false; 5242249423Sdim} 5243280031Sdim 5244280031Sdimbool ARMBaseInstrInfo::getRegSequenceLikeInputs( 5245280031Sdim const MachineInstr &MI, unsigned DefIdx, 5246280031Sdim SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { 5247280031Sdim assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); 5248280031Sdim assert(MI.isRegSequenceLike() && "Invalid kind of instruction"); 5249280031Sdim 5250280031Sdim switch (MI.getOpcode()) { 5251280031Sdim case ARM::VMOVDRR: 5252280031Sdim // dX = VMOVDRR rY, rZ 5253280031Sdim // is the same as: 5254280031Sdim // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1 5255280031Sdim // Populate the InputRegs accordingly. 5256280031Sdim // rY 5257280031Sdim const MachineOperand *MOReg = &MI.getOperand(1); 5258335799Sdim if (!MOReg->isUndef()) 5259335799Sdim InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), 5260335799Sdim MOReg->getSubReg(), ARM::ssub_0)); 5261280031Sdim // rZ 5262280031Sdim MOReg = &MI.getOperand(2); 5263335799Sdim if (!MOReg->isUndef()) 5264335799Sdim InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), 5265335799Sdim MOReg->getSubReg(), ARM::ssub_1)); 5266280031Sdim return true; 5267280031Sdim } 5268280031Sdim llvm_unreachable("Target dependent opcode missing"); 5269280031Sdim} 5270280031Sdim 5271280031Sdimbool ARMBaseInstrInfo::getExtractSubregLikeInputs( 5272280031Sdim const MachineInstr &MI, unsigned DefIdx, 5273280031Sdim RegSubRegPairAndIdx &InputReg) const { 5274280031Sdim assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); 5275280031Sdim assert(MI.isExtractSubregLike() && "Invalid kind of instruction"); 5276280031Sdim 5277280031Sdim switch (MI.getOpcode()) { 5278280031Sdim case ARM::VMOVRRD: 5279280031Sdim // rX, rY = VMOVRRD dZ 5280280031Sdim // is the same as: 5281280031Sdim // rX = EXTRACT_SUBREG dZ, ssub_0 5282280031Sdim // rY = EXTRACT_SUBREG dZ, ssub_1 5283280031Sdim const MachineOperand &MOReg = MI.getOperand(2); 5284335799Sdim if (MOReg.isUndef()) 5285335799Sdim return false; 5286280031Sdim InputReg.Reg = MOReg.getReg(); 5287280031Sdim InputReg.SubReg = MOReg.getSubReg(); 5288280031Sdim InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; 5289280031Sdim return true; 5290280031Sdim } 5291280031Sdim llvm_unreachable("Target dependent opcode missing"); 5292280031Sdim} 5293280031Sdim 5294280031Sdimbool ARMBaseInstrInfo::getInsertSubregLikeInputs( 5295280031Sdim const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, 5296280031Sdim RegSubRegPairAndIdx &InsertedReg) const { 5297280031Sdim assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); 5298280031Sdim assert(MI.isInsertSubregLike() && "Invalid kind of instruction"); 5299280031Sdim 5300280031Sdim switch (MI.getOpcode()) { 5301280031Sdim case ARM::VSETLNi32: 5302280031Sdim // dX = VSETLNi32 dY, rZ, imm 5303280031Sdim const MachineOperand &MOBaseReg = MI.getOperand(1); 5304280031Sdim const MachineOperand &MOInsertedReg = MI.getOperand(2); 5305335799Sdim if (MOInsertedReg.isUndef()) 5306335799Sdim return false; 5307280031Sdim const MachineOperand &MOIndex = MI.getOperand(3); 5308280031Sdim BaseReg.Reg = MOBaseReg.getReg(); 5309280031Sdim BaseReg.SubReg = MOBaseReg.getSubReg(); 5310280031Sdim 5311280031Sdim InsertedReg.Reg = MOInsertedReg.getReg(); 5312280031Sdim InsertedReg.SubReg = MOInsertedReg.getSubReg(); 5313280031Sdim InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1; 5314280031Sdim return true; 5315280031Sdim } 5316280031Sdim llvm_unreachable("Target dependent opcode missing"); 5317280031Sdim} 5318344779Sdim 5319344779Sdimstd::pair<unsigned, unsigned> 5320344779SdimARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 5321344779Sdim const unsigned Mask = ARMII::MO_OPTION_MASK; 5322344779Sdim return std::make_pair(TF & Mask, TF & ~Mask); 5323344779Sdim} 5324344779Sdim 5325344779SdimArrayRef<std::pair<unsigned, const char *>> 5326344779SdimARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 5327344779Sdim using namespace ARMII; 5328344779Sdim 5329344779Sdim static const std::pair<unsigned, const char *> TargetFlags[] = { 5330344779Sdim {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}}; 5331344779Sdim return makeArrayRef(TargetFlags); 5332344779Sdim} 5333344779Sdim 5334344779SdimArrayRef<std::pair<unsigned, const char *>> 5335344779SdimARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { 5336344779Sdim using namespace ARMII; 5337344779Sdim 5338344779Sdim static const std::pair<unsigned, const char *> TargetFlags[] = { 5339344779Sdim {MO_COFFSTUB, "arm-coffstub"}, 5340344779Sdim {MO_GOT, "arm-got"}, 5341344779Sdim {MO_SBREL, "arm-sbrel"}, 5342344779Sdim {MO_DLLIMPORT, "arm-dllimport"}, 5343344779Sdim {MO_SECREL, "arm-secrel"}, 5344344779Sdim {MO_NONLAZY, "arm-nonlazy"}}; 5345344779Sdim return makeArrayRef(TargetFlags); 5346344779Sdim} 5347353358Sdim 5348360784SdimOptional<RegImmPair> ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI, 5349360784Sdim Register Reg) const { 5350360784Sdim int Sign = 1; 5351360784Sdim unsigned Opcode = MI.getOpcode(); 5352360784Sdim int64_t Offset = 0; 5353360784Sdim 5354360784Sdim // TODO: Handle cases where Reg is a super- or sub-register of the 5355360784Sdim // destination register. 5356360784Sdim if (Reg != MI.getOperand(0).getReg()) 5357360784Sdim return None; 5358360784Sdim 5359360784Sdim // We describe SUBri or ADDri instructions. 5360360784Sdim if (Opcode == ARM::SUBri) 5361360784Sdim Sign = -1; 5362360784Sdim else if (Opcode != ARM::ADDri) 5363360784Sdim return None; 5364360784Sdim 5365360784Sdim // TODO: Third operand can be global address (usually some string). Since 5366360784Sdim // strings can be relocated we cannot calculate their offsets for 5367360784Sdim // now. 5368360784Sdim if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() || 5369360784Sdim !MI.getOperand(2).isImm()) 5370360784Sdim return None; 5371360784Sdim 5372360784Sdim Offset = MI.getOperand(2).getImm() * Sign; 5373360784Sdim return RegImmPair{MI.getOperand(1).getReg(), Offset}; 5374360784Sdim} 5375360784Sdim 5376353358Sdimbool llvm::registerDefinedBetween(unsigned Reg, 5377353358Sdim MachineBasicBlock::iterator From, 5378353358Sdim MachineBasicBlock::iterator To, 5379353358Sdim const TargetRegisterInfo *TRI) { 5380353358Sdim for (auto I = From; I != To; ++I) 5381353358Sdim if (I->modifiesRegister(Reg, TRI)) 5382353358Sdim return true; 5383353358Sdim return false; 5384353358Sdim} 5385353358Sdim 5386353358SdimMachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, 5387353358Sdim const TargetRegisterInfo *TRI) { 5388353358Sdim // Search backwards to the instruction that defines CSPR. This may or not 5389353358Sdim // be a CMP, we check that after this loop. If we find another instruction 5390353358Sdim // that reads cpsr, we return nullptr. 5391353358Sdim MachineBasicBlock::iterator CmpMI = Br; 5392353358Sdim while (CmpMI != Br->getParent()->begin()) { 5393353358Sdim --CmpMI; 5394353358Sdim if (CmpMI->modifiesRegister(ARM::CPSR, TRI)) 5395353358Sdim break; 5396353358Sdim if (CmpMI->readsRegister(ARM::CPSR, TRI)) 5397353358Sdim break; 5398353358Sdim } 5399353358Sdim 5400353358Sdim // Check that this inst is a CMP r[0-7], #0 and that the register 5401353358Sdim // is not redefined between the cmp and the br. 5402353358Sdim if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri) 5403353358Sdim return nullptr; 5404360784Sdim Register Reg = CmpMI->getOperand(0).getReg(); 5405353358Sdim unsigned PredReg = 0; 5406353358Sdim ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg); 5407353358Sdim if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0) 5408353358Sdim return nullptr; 5409353358Sdim if (!isARMLowRegister(Reg)) 5410353358Sdim return nullptr; 5411353358Sdim if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI)) 5412353358Sdim return nullptr; 5413353358Sdim 5414353358Sdim return &*CmpMI; 5415353358Sdim} 5416360784Sdim 5417360784Sdimunsigned llvm::ConstantMaterializationCost(unsigned Val, 5418360784Sdim const ARMSubtarget *Subtarget, 5419360784Sdim bool ForCodesize) { 5420360784Sdim if (Subtarget->isThumb()) { 5421360784Sdim if (Val <= 255) // MOV 5422360784Sdim return ForCodesize ? 2 : 1; 5423360784Sdim if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV 5424360784Sdim ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW 5425360784Sdim ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN 5426360784Sdim return ForCodesize ? 4 : 1; 5427360784Sdim if (Val <= 510) // MOV + ADDi8 5428360784Sdim return ForCodesize ? 4 : 2; 5429360784Sdim if (~Val <= 255) // MOV + MVN 5430360784Sdim return ForCodesize ? 4 : 2; 5431360784Sdim if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL 5432360784Sdim return ForCodesize ? 4 : 2; 5433360784Sdim } else { 5434360784Sdim if (ARM_AM::getSOImmVal(Val) != -1) // MOV 5435360784Sdim return ForCodesize ? 4 : 1; 5436360784Sdim if (ARM_AM::getSOImmVal(~Val) != -1) // MVN 5437360784Sdim return ForCodesize ? 4 : 1; 5438360784Sdim if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW 5439360784Sdim return ForCodesize ? 4 : 1; 5440360784Sdim if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs 5441360784Sdim return ForCodesize ? 8 : 2; 5442360784Sdim } 5443360784Sdim if (Subtarget->useMovt()) // MOVW + MOVT 5444360784Sdim return ForCodesize ? 8 : 2; 5445360784Sdim return ForCodesize ? 8 : 3; // Literal pool load 5446360784Sdim} 5447360784Sdim 5448360784Sdimbool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, 5449360784Sdim const ARMSubtarget *Subtarget, 5450360784Sdim bool ForCodesize) { 5451360784Sdim // Check with ForCodesize 5452360784Sdim unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize); 5453360784Sdim unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize); 5454360784Sdim if (Cost1 < Cost2) 5455360784Sdim return true; 5456360784Sdim if (Cost1 > Cost2) 5457360784Sdim return false; 5458360784Sdim 5459360784Sdim // If they are equal, try with !ForCodesize 5460360784Sdim return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) < 5461360784Sdim ConstantMaterializationCost(Val2, Subtarget, !ForCodesize); 5462360784Sdim} 5463