1198090Srdivacky//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// 2198090Srdivacky// 3198090Srdivacky// The LLVM Compiler Infrastructure 4198090Srdivacky// 5198090Srdivacky// This file is distributed under the University of Illinois Open Source 6198090Srdivacky// License. See LICENSE.TXT for details. 7198090Srdivacky// 8198090Srdivacky//===----------------------------------------------------------------------===// 9198090Srdivacky 10198090Srdivacky#define DEBUG_TYPE "t2-reduce-size" 11198090Srdivacky#include "ARM.h" 12249423Sdim#include "ARMBaseInstrInfo.h" 13198090Srdivacky#include "ARMBaseRegisterInfo.h" 14221345Sdim#include "ARMSubtarget.h" 15249423Sdim#include "MCTargetDesc/ARMAddressingModes.h" 16198090Srdivacky#include "Thumb2InstrInfo.h" 17249423Sdim#include "llvm/ADT/DenseMap.h" 18249423Sdim#include "llvm/ADT/PostOrderIterator.h" 19249423Sdim#include "llvm/ADT/Statistic.h" 20249423Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 21198090Srdivacky#include "llvm/CodeGen/MachineInstr.h" 22198090Srdivacky#include "llvm/CodeGen/MachineInstrBuilder.h" 23249423Sdim#include "llvm/IR/Function.h" // To access Function attributes 24198090Srdivacky#include "llvm/Support/CommandLine.h" 25198090Srdivacky#include "llvm/Support/Debug.h" 26198090Srdivacky#include "llvm/Support/raw_ostream.h" 27198090Srdivackyusing namespace llvm; 28198090Srdivacky 29198090SrdivackySTATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); 30198090SrdivackySTATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); 31198090SrdivackySTATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones"); 32198090Srdivacky 33198090Srdivackystatic cl::opt<int> ReduceLimit("t2-reduce-limit", 34198090Srdivacky cl::init(-1), cl::Hidden); 35198090Srdivackystatic cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2", 36198090Srdivacky cl::init(-1), cl::Hidden); 37198090Srdivackystatic cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3", 38198090Srdivacky cl::init(-1), cl::Hidden); 39198090Srdivacky 40198090Srdivackynamespace { 41198090Srdivacky /// ReduceTable - A static table with information on mapping from wide 42198090Srdivacky /// opcodes to narrow 43198090Srdivacky struct ReduceEntry { 44234353Sdim uint16_t WideOpc; // Wide opcode 45234353Sdim uint16_t NarrowOpc1; // Narrow opcode to transform to 46234353Sdim uint16_t NarrowOpc2; // Narrow opcode when it's two-address 47198090Srdivacky uint8_t Imm1Limit; // Limit of immediate field (bits) 48198090Srdivacky uint8_t Imm2Limit; // Limit of immediate field when it's two-address 49198090Srdivacky unsigned LowRegs1 : 1; // Only possible if low-registers are used 50198090Srdivacky unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) 51198090Srdivacky unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa. 52198090Srdivacky // 1 - No cc field. 53198090Srdivacky // 2 - Always set CPSR. 54198090Srdivacky unsigned PredCC2 : 2; 55221345Sdim unsigned PartFlag : 1; // 16-bit instruction does partial flag update 56198090Srdivacky unsigned Special : 1; // Needs to be dealt with specially 57249423Sdim unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift) 58198090Srdivacky }; 59198090Srdivacky 60198090Srdivacky static const ReduceEntry ReduceTable[] = { 61249423Sdim // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM 62249423Sdim { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 }, 63249423Sdim { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 }, 64249423Sdim { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 }, 65249423Sdim { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 }, 66249423Sdim { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, 67249423Sdim { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 }, 68249423Sdim { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, 69249423Sdim { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, 70249423Sdim { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 }, 71249423Sdim //FIXME: Disable CMN, as CCodes are backwards from compare expectations 72249423Sdim //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 73249423Sdim { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 74249423Sdim { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 }, 75249423Sdim { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 }, 76249423Sdim { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 }, 77249423Sdim // FIXME: adr.n immediate offset must be multiple of 4. 78249423Sdim //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 79249423Sdim { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, 80249423Sdim { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 }, 81249423Sdim { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, 82249423Sdim { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, 83249423Sdim { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 }, 84249423Sdim { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 }, 85249423Sdim // FIXME: Do we need the 16-bit 'S' variant? 86249423Sdim { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 }, 87249423Sdim { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 }, 88249423Sdim { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, 89249423Sdim { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 }, 90249423Sdim { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 91249423Sdim { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 92249423Sdim { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, 93249423Sdim { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 }, 94249423Sdim { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 95249423Sdim { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, 96249423Sdim { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 }, 97249423Sdim { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 }, 98249423Sdim { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, 99249423Sdim { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 }, 100249423Sdim { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 101249423Sdim { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 102249423Sdim { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 103249423Sdim { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, 104249423Sdim { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 105249423Sdim { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, 106198090Srdivacky 107249423Sdim // FIXME: Clean this up after splitting each Thumb load / store opcode 108249423Sdim // into multiple ones. 109249423Sdim { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, 110249423Sdim { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 111249423Sdim { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 112249423Sdim { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 113249423Sdim { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 114249423Sdim { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 115249423Sdim { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 116249423Sdim { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 117249423Sdim { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, 118249423Sdim { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 119249423Sdim { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 120249423Sdim { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 121249423Sdim { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, 122249423Sdim { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, 123198090Srdivacky 124249423Sdim { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, 125249423Sdim { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 }, 126249423Sdim { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 }, 127249423Sdim // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent 128249423Sdim { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, 129249423Sdim { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 } 130198090Srdivacky }; 131198090Srdivacky 132198892Srdivacky class Thumb2SizeReduce : public MachineFunctionPass { 133198090Srdivacky public: 134198090Srdivacky static char ID; 135198090Srdivacky Thumb2SizeReduce(); 136198090Srdivacky 137198090Srdivacky const Thumb2InstrInfo *TII; 138221345Sdim const ARMSubtarget *STI; 139198090Srdivacky 140198090Srdivacky virtual bool runOnMachineFunction(MachineFunction &MF); 141198090Srdivacky 142198090Srdivacky virtual const char *getPassName() const { 143198090Srdivacky return "Thumb2 instruction size reduction pass"; 144198090Srdivacky } 145198090Srdivacky 146198090Srdivacky private: 147198090Srdivacky /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. 148198090Srdivacky DenseMap<unsigned, unsigned> ReduceOpcodeMap; 149198090Srdivacky 150249423Sdim bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop); 151221345Sdim 152198090Srdivacky bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 153198090Srdivacky bool is2Addr, ARMCC::CondCodes Pred, 154198090Srdivacky bool LiveCPSR, bool &HasCC, bool &CCDead); 155198090Srdivacky 156198090Srdivacky bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 157198090Srdivacky const ReduceEntry &Entry); 158198090Srdivacky 159198090Srdivacky bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 160249423Sdim const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop); 161198090Srdivacky 162198090Srdivacky /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address 163198090Srdivacky /// instruction. 164198090Srdivacky bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 165249423Sdim const ReduceEntry &Entry, bool LiveCPSR, 166234353Sdim bool IsSelfLoop); 167198090Srdivacky 168198090Srdivacky /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit 169198090Srdivacky /// non-two-address instruction. 170198090Srdivacky bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 171249423Sdim const ReduceEntry &Entry, bool LiveCPSR, 172234353Sdim bool IsSelfLoop); 173198090Srdivacky 174249423Sdim /// ReduceMI - Attempt to reduce MI, return true on success. 175249423Sdim bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, 176249423Sdim bool LiveCPSR, bool IsSelfLoop); 177249423Sdim 178198090Srdivacky /// ReduceMBB - Reduce width of instructions in the specified basic block. 179198090Srdivacky bool ReduceMBB(MachineBasicBlock &MBB); 180249423Sdim 181249423Sdim bool OptimizeSize; 182249423Sdim bool MinimizeSize; 183249423Sdim 184249423Sdim // Last instruction to define CPSR in the current block. 185249423Sdim MachineInstr *CPSRDef; 186249423Sdim // Was CPSR last defined by a high latency instruction? 187249423Sdim // When CPSRDef is null, this refers to CPSR defs in predecessors. 188249423Sdim bool HighLatencyCPSR; 189249423Sdim 190249423Sdim struct MBBInfo { 191249423Sdim // The flags leaving this block have high latency. 192249423Sdim bool HighLatencyCPSR; 193249423Sdim // Has this block been visited yet? 194249423Sdim bool Visited; 195249423Sdim 196249423Sdim MBBInfo() : HighLatencyCPSR(false), Visited(false) {} 197249423Sdim }; 198249423Sdim 199249423Sdim SmallVector<MBBInfo, 8> BlockInfo; 200198090Srdivacky }; 201198090Srdivacky char Thumb2SizeReduce::ID = 0; 202198090Srdivacky} 203198090Srdivacky 204212904SdimThumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { 205249423Sdim OptimizeSize = MinimizeSize = false; 206198090Srdivacky for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { 207198090Srdivacky unsigned FromOpc = ReduceTable[i].WideOpc; 208198090Srdivacky if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) 209198090Srdivacky assert(false && "Duplicated entries?"); 210198090Srdivacky } 211198090Srdivacky} 212198090Srdivacky 213224145Sdimstatic bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { 214234353Sdim for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) 215198090Srdivacky if (*Regs == ARM::CPSR) 216198090Srdivacky return true; 217198090Srdivacky return false; 218198090Srdivacky} 219198090Srdivacky 220249423Sdim// Check for a likely high-latency flag def. 221249423Sdimstatic bool isHighLatencyCPSR(MachineInstr *Def) { 222249423Sdim switch(Def->getOpcode()) { 223249423Sdim case ARM::FMSTAT: 224249423Sdim case ARM::tMUL: 225249423Sdim return true; 226249423Sdim } 227249423Sdim return false; 228249423Sdim} 229249423Sdim 230221345Sdim/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations, 231221345Sdim/// the 's' 16-bit instruction partially update CPSR. Abort the 232221345Sdim/// transformation to avoid adding false dependency on last CPSR setting 233221345Sdim/// instruction which hurts the ability for out-of-order execution engine 234221345Sdim/// to do register renaming magic. 235221345Sdim/// This function checks if there is a read-of-write dependency between the 236221345Sdim/// last instruction that defines the CPSR and the current instruction. If there 237221345Sdim/// is, then there is no harm done since the instruction cannot be retired 238221345Sdim/// before the CPSR setting instruction anyway. 239221345Sdim/// Note, we are not doing full dependency analysis here for the sake of compile 240221345Sdim/// time. We're not looking for cases like: 241221345Sdim/// r0 = muls ... 242221345Sdim/// r1 = add.w r0, ... 243221345Sdim/// ... 244221345Sdim/// = mul.w r1 245221345Sdim/// In this case it would have been ok to narrow the mul.w to muls since there 246221345Sdim/// are indirect RAW dependency between the muls and the mul.w 247198090Srdivackybool 248249423SdimThumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { 249249423Sdim // Disable the check for -Oz (aka OptimizeForSizeHarder). 250249423Sdim if (MinimizeSize || !STI->avoidCPSRPartialUpdate()) 251221345Sdim return false; 252221345Sdim 253249423Sdim if (!CPSRDef) 254234353Sdim // If this BB loops back to itself, conservatively avoid narrowing the 255234353Sdim // first instruction that does partial flag update. 256249423Sdim return HighLatencyCPSR || FirstInSelfLoop; 257234353Sdim 258221345Sdim SmallSet<unsigned, 2> Defs; 259249423Sdim for (unsigned i = 0, e = CPSRDef->getNumOperands(); i != e; ++i) { 260249423Sdim const MachineOperand &MO = CPSRDef->getOperand(i); 261221345Sdim if (!MO.isReg() || MO.isUndef() || MO.isUse()) 262221345Sdim continue; 263221345Sdim unsigned Reg = MO.getReg(); 264221345Sdim if (Reg == 0 || Reg == ARM::CPSR) 265221345Sdim continue; 266221345Sdim Defs.insert(Reg); 267221345Sdim } 268221345Sdim 269221345Sdim for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { 270221345Sdim const MachineOperand &MO = Use->getOperand(i); 271221345Sdim if (!MO.isReg() || MO.isUndef() || MO.isDef()) 272221345Sdim continue; 273221345Sdim unsigned Reg = MO.getReg(); 274221345Sdim if (Defs.count(Reg)) 275221345Sdim return false; 276221345Sdim } 277221345Sdim 278249423Sdim // If the current CPSR has high latency, try to avoid the false dependency. 279249423Sdim if (HighLatencyCPSR) 280249423Sdim return true; 281249423Sdim 282249423Sdim // tMOVi8 usually doesn't start long dependency chains, and there are a lot 283249423Sdim // of them, so always shrink them when CPSR doesn't have high latency. 284249423Sdim if (Use->getOpcode() == ARM::t2MOVi || 285249423Sdim Use->getOpcode() == ARM::t2MOVi16) 286249423Sdim return false; 287249423Sdim 288221345Sdim // No read-after-write dependency. The narrowing will add false dependency. 289221345Sdim return true; 290221345Sdim} 291221345Sdim 292221345Sdimbool 293198090SrdivackyThumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 294198090Srdivacky bool is2Addr, ARMCC::CondCodes Pred, 295198090Srdivacky bool LiveCPSR, bool &HasCC, bool &CCDead) { 296198090Srdivacky if ((is2Addr && Entry.PredCC2 == 0) || 297198090Srdivacky (!is2Addr && Entry.PredCC1 == 0)) { 298198090Srdivacky if (Pred == ARMCC::AL) { 299198090Srdivacky // Not predicated, must set CPSR. 300198090Srdivacky if (!HasCC) { 301198090Srdivacky // Original instruction was not setting CPSR, but CPSR is not 302198090Srdivacky // currently live anyway. It's ok to set it. The CPSR def is 303198090Srdivacky // dead though. 304198090Srdivacky if (!LiveCPSR) { 305198090Srdivacky HasCC = true; 306198090Srdivacky CCDead = true; 307198090Srdivacky return true; 308198090Srdivacky } 309198090Srdivacky return false; 310198090Srdivacky } 311198090Srdivacky } else { 312198090Srdivacky // Predicated, must not set CPSR. 313198090Srdivacky if (HasCC) 314198090Srdivacky return false; 315198090Srdivacky } 316198090Srdivacky } else if ((is2Addr && Entry.PredCC2 == 2) || 317198090Srdivacky (!is2Addr && Entry.PredCC1 == 2)) { 318198090Srdivacky /// Old opcode has an optional def of CPSR. 319198090Srdivacky if (HasCC) 320198090Srdivacky return true; 321218893Sdim // If old opcode does not implicitly define CPSR, then it's not ok since 322218893Sdim // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP. 323198090Srdivacky if (!HasImplicitCPSRDef(MI->getDesc())) 324198090Srdivacky return false; 325198090Srdivacky HasCC = true; 326198090Srdivacky } else { 327198090Srdivacky // 16-bit instruction does not set CPSR. 328198090Srdivacky if (HasCC) 329198090Srdivacky return false; 330198090Srdivacky } 331198090Srdivacky 332198090Srdivacky return true; 333198090Srdivacky} 334198090Srdivacky 335198090Srdivackystatic bool VerifyLowRegs(MachineInstr *MI) { 336198090Srdivacky unsigned Opc = MI->getOpcode(); 337218893Sdim bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA || 338218893Sdim Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD || 339218893Sdim Opc == ARM::t2LDMDB_UPD); 340218893Sdim bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD); 341224145Sdim bool isSPOk = isPCOk || isLROk; 342198090Srdivacky for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 343198090Srdivacky const MachineOperand &MO = MI->getOperand(i); 344198090Srdivacky if (!MO.isReg() || MO.isImplicit()) 345198090Srdivacky continue; 346198090Srdivacky unsigned Reg = MO.getReg(); 347198090Srdivacky if (Reg == 0 || Reg == ARM::CPSR) 348198090Srdivacky continue; 349198090Srdivacky if (isPCOk && Reg == ARM::PC) 350198090Srdivacky continue; 351198090Srdivacky if (isLROk && Reg == ARM::LR) 352198090Srdivacky continue; 353199511Srdivacky if (Reg == ARM::SP) { 354199511Srdivacky if (isSPOk) 355199511Srdivacky continue; 356199511Srdivacky if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12)) 357199511Srdivacky // Special case for these ldr / str with sp as base register. 358199511Srdivacky continue; 359199511Srdivacky } 360198090Srdivacky if (!isARMLowRegister(Reg)) 361198090Srdivacky return false; 362198090Srdivacky } 363198090Srdivacky return true; 364198090Srdivacky} 365198090Srdivacky 366198090Srdivackybool 367198090SrdivackyThumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 368198090Srdivacky const ReduceEntry &Entry) { 369198090Srdivacky if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) 370198090Srdivacky return false; 371198090Srdivacky 372198090Srdivacky unsigned Scale = 1; 373198090Srdivacky bool HasImmOffset = false; 374198090Srdivacky bool HasShift = false; 375199511Srdivacky bool HasOffReg = true; 376198090Srdivacky bool isLdStMul = false; 377198090Srdivacky unsigned Opc = Entry.NarrowOpc1; 378198090Srdivacky unsigned OpNum = 3; // First 'rest' of operands. 379199511Srdivacky uint8_t ImmLimit = Entry.Imm1Limit; 380218893Sdim 381198090Srdivacky switch (Entry.WideOpc) { 382198090Srdivacky default: 383198090Srdivacky llvm_unreachable("Unexpected Thumb2 load / store opcode!"); 384198090Srdivacky case ARM::t2LDRi12: 385218893Sdim case ARM::t2STRi12: 386218893Sdim if (MI->getOperand(1).getReg() == ARM::SP) { 387199511Srdivacky Opc = Entry.NarrowOpc2; 388199511Srdivacky ImmLimit = Entry.Imm2Limit; 389199511Srdivacky HasOffReg = false; 390199511Srdivacky } 391218893Sdim 392198090Srdivacky Scale = 4; 393198090Srdivacky HasImmOffset = true; 394218893Sdim HasOffReg = false; 395198090Srdivacky break; 396198090Srdivacky case ARM::t2LDRBi12: 397198090Srdivacky case ARM::t2STRBi12: 398198090Srdivacky HasImmOffset = true; 399218893Sdim HasOffReg = false; 400198090Srdivacky break; 401198090Srdivacky case ARM::t2LDRHi12: 402198090Srdivacky case ARM::t2STRHi12: 403198090Srdivacky Scale = 2; 404198090Srdivacky HasImmOffset = true; 405218893Sdim HasOffReg = false; 406198090Srdivacky break; 407198090Srdivacky case ARM::t2LDRs: 408198090Srdivacky case ARM::t2LDRBs: 409198090Srdivacky case ARM::t2LDRHs: 410198090Srdivacky case ARM::t2LDRSBs: 411198090Srdivacky case ARM::t2LDRSHs: 412198090Srdivacky case ARM::t2STRs: 413198090Srdivacky case ARM::t2STRBs: 414198090Srdivacky case ARM::t2STRHs: 415198090Srdivacky HasShift = true; 416198090Srdivacky OpNum = 4; 417198090Srdivacky break; 418218893Sdim case ARM::t2LDMIA: 419218893Sdim case ARM::t2LDMDB: { 420198090Srdivacky unsigned BaseReg = MI->getOperand(0).getReg(); 421218893Sdim if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA) 422198090Srdivacky return false; 423218893Sdim 424212904Sdim // For the non-writeback version (this one), the base register must be 425212904Sdim // one of the registers being loaded. 426212904Sdim bool isOK = false; 427212904Sdim for (unsigned i = 4; i < MI->getNumOperands(); ++i) { 428212904Sdim if (MI->getOperand(i).getReg() == BaseReg) { 429212904Sdim isOK = true; 430212904Sdim break; 431212904Sdim } 432212904Sdim } 433218893Sdim 434212904Sdim if (!isOK) 435212904Sdim return false; 436212904Sdim 437205218Srdivacky OpNum = 0; 438205218Srdivacky isLdStMul = true; 439205218Srdivacky break; 440205218Srdivacky } 441218893Sdim case ARM::t2LDMIA_RET: { 442205218Srdivacky unsigned BaseReg = MI->getOperand(1).getReg(); 443205218Srdivacky if (BaseReg != ARM::SP) 444205218Srdivacky return false; 445205218Srdivacky Opc = Entry.NarrowOpc2; // tPOP_RET 446218893Sdim OpNum = 2; 447205218Srdivacky isLdStMul = true; 448205218Srdivacky break; 449205218Srdivacky } 450218893Sdim case ARM::t2LDMIA_UPD: 451218893Sdim case ARM::t2LDMDB_UPD: 452218893Sdim case ARM::t2STMIA_UPD: 453218893Sdim case ARM::t2STMDB_UPD: { 454205218Srdivacky OpNum = 0; 455218893Sdim 456205218Srdivacky unsigned BaseReg = MI->getOperand(1).getReg(); 457205218Srdivacky if (BaseReg == ARM::SP && 458218893Sdim (Entry.WideOpc == ARM::t2LDMIA_UPD || 459218893Sdim Entry.WideOpc == ARM::t2STMDB_UPD)) { 460205218Srdivacky Opc = Entry.NarrowOpc2; // tPOP or tPUSH 461218893Sdim OpNum = 2; 462218893Sdim } else if (!isARMLowRegister(BaseReg) || 463218893Sdim (Entry.WideOpc != ARM::t2LDMIA_UPD && 464218893Sdim Entry.WideOpc != ARM::t2STMIA_UPD)) { 465205218Srdivacky return false; 466198090Srdivacky } 467218893Sdim 468198090Srdivacky isLdStMul = true; 469198090Srdivacky break; 470198090Srdivacky } 471198090Srdivacky } 472198090Srdivacky 473198090Srdivacky unsigned OffsetReg = 0; 474198090Srdivacky bool OffsetKill = false; 475198090Srdivacky if (HasShift) { 476198090Srdivacky OffsetReg = MI->getOperand(2).getReg(); 477198090Srdivacky OffsetKill = MI->getOperand(2).isKill(); 478218893Sdim 479198090Srdivacky if (MI->getOperand(3).getImm()) 480198090Srdivacky // Thumb1 addressing mode doesn't support shift. 481198090Srdivacky return false; 482198090Srdivacky } 483198090Srdivacky 484198090Srdivacky unsigned OffsetImm = 0; 485198090Srdivacky if (HasImmOffset) { 486198090Srdivacky OffsetImm = MI->getOperand(2).getImm(); 487199511Srdivacky unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; 488218893Sdim 489218893Sdim if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) 490198090Srdivacky // Make sure the immediate field fits. 491198090Srdivacky return false; 492198090Srdivacky } 493198090Srdivacky 494198090Srdivacky // Add the 16-bit load / store instruction. 495198090Srdivacky DebugLoc dl = MI->getDebugLoc(); 496234353Sdim MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); 497198090Srdivacky if (!isLdStMul) { 498218893Sdim MIB.addOperand(MI->getOperand(0)); 499218893Sdim MIB.addOperand(MI->getOperand(1)); 500218893Sdim 501218893Sdim if (HasImmOffset) 502218893Sdim MIB.addImm(OffsetImm / Scale); 503218893Sdim 504198090Srdivacky assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); 505198090Srdivacky 506199511Srdivacky if (HasOffReg) 507199511Srdivacky MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); 508198090Srdivacky } 509198090Srdivacky 510198090Srdivacky // Transfer the rest of operands. 511198090Srdivacky for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) 512198090Srdivacky MIB.addOperand(MI->getOperand(OpNum)); 513198090Srdivacky 514199511Srdivacky // Transfer memoperands. 515221345Sdim MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 516199511Srdivacky 517221345Sdim // Transfer MI flags. 518221345Sdim MIB.setMIFlags(MI->getFlags()); 519221345Sdim 520198090Srdivacky DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 521198090Srdivacky 522234353Sdim MBB.erase_instr(MI); 523198090Srdivacky ++NumLdSts; 524198090Srdivacky return true; 525198090Srdivacky} 526198090Srdivacky 527198090Srdivackybool 528198090SrdivackyThumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 529198090Srdivacky const ReduceEntry &Entry, 530249423Sdim bool LiveCPSR, bool IsSelfLoop) { 531224145Sdim unsigned Opc = MI->getOpcode(); 532224145Sdim if (Opc == ARM::t2ADDri) { 533224145Sdim // If the source register is SP, try to reduce to tADDrSPi, otherwise 534224145Sdim // it's a normal reduce. 535224145Sdim if (MI->getOperand(1).getReg() != ARM::SP) { 536249423Sdim if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) 537224145Sdim return true; 538249423Sdim return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 539224145Sdim } 540224145Sdim // Try to reduce to tADDrSPi. 541224145Sdim unsigned Imm = MI->getOperand(2).getImm(); 542224145Sdim // The immediate must be in range, the destination register must be a low 543224145Sdim // reg, the predicate must be "always" and the condition flags must not 544224145Sdim // be being set. 545224145Sdim if (Imm & 3 || Imm > 1020) 546224145Sdim return false; 547224145Sdim if (!isARMLowRegister(MI->getOperand(0).getReg())) 548224145Sdim return false; 549224145Sdim if (MI->getOperand(3).getImm() != ARMCC::AL) 550224145Sdim return false; 551224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 552224145Sdim if (MCID.hasOptionalDef() && 553224145Sdim MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) 554224145Sdim return false; 555224145Sdim 556234353Sdim MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), 557224145Sdim TII->get(ARM::tADDrSPi)) 558224145Sdim .addOperand(MI->getOperand(0)) 559224145Sdim .addOperand(MI->getOperand(1)) 560224145Sdim .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. 561226633Sdim AddDefaultPred(MIB); 562224145Sdim 563224145Sdim // Transfer MI flags. 564224145Sdim MIB.setMIFlags(MI->getFlags()); 565224145Sdim 566224145Sdim DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB); 567224145Sdim 568234353Sdim MBB.erase_instr(MI); 569224145Sdim ++NumNarrows; 570224145Sdim return true; 571224145Sdim } 572224145Sdim 573198090Srdivacky if (Entry.LowRegs1 && !VerifyLowRegs(MI)) 574198090Srdivacky return false; 575198090Srdivacky 576234353Sdim if (MI->mayLoad() || MI->mayStore()) 577198090Srdivacky return ReduceLoadStore(MBB, MI, Entry); 578198090Srdivacky 579198090Srdivacky switch (Opc) { 580198090Srdivacky default: break; 581218893Sdim case ARM::t2ADDSri: 582198090Srdivacky case ARM::t2ADDSrr: { 583198090Srdivacky unsigned PredReg = 0; 584198090Srdivacky if (getInstrPredicate(MI, PredReg) == ARMCC::AL) { 585198090Srdivacky switch (Opc) { 586198090Srdivacky default: break; 587198090Srdivacky case ARM::t2ADDSri: { 588249423Sdim if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) 589198090Srdivacky return true; 590198090Srdivacky // fallthrough 591198090Srdivacky } 592198090Srdivacky case ARM::t2ADDSrr: 593249423Sdim return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 594198090Srdivacky } 595198090Srdivacky } 596198090Srdivacky break; 597198090Srdivacky } 598198090Srdivacky case ARM::t2RSBri: 599198090Srdivacky case ARM::t2RSBSri: 600226633Sdim case ARM::t2SXTB: 601226633Sdim case ARM::t2SXTH: 602226633Sdim case ARM::t2UXTB: 603226633Sdim case ARM::t2UXTH: 604198090Srdivacky if (MI->getOperand(2).getImm() == 0) 605249423Sdim return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 606198090Srdivacky break; 607199989Srdivacky case ARM::t2MOVi16: 608199989Srdivacky // Can convert only 'pure' immediate operands, not immediates obtained as 609199989Srdivacky // globals' addresses. 610199989Srdivacky if (MI->getOperand(1).isImm()) 611249423Sdim return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 612199989Srdivacky break; 613218893Sdim case ARM::t2CMPrr: { 614218893Sdim // Try to reduce to the lo-reg only version first. Why there are two 615218893Sdim // versions of the instruction is a mystery. 616218893Sdim // It would be nice to just have two entries in the master table that 617218893Sdim // are prioritized, but the table assumes a unique entry for each 618218893Sdim // source insn opcode. So for now, we hack a local entry record to use. 619218893Sdim static const ReduceEntry NarrowEntry = 620249423Sdim { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 }; 621249423Sdim if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop)) 622218893Sdim return true; 623249423Sdim return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 624198090Srdivacky } 625218893Sdim } 626198090Srdivacky return false; 627198090Srdivacky} 628198090Srdivacky 629198090Srdivackybool 630198090SrdivackyThumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 631198090Srdivacky const ReduceEntry &Entry, 632249423Sdim bool LiveCPSR, bool IsSelfLoop) { 633198090Srdivacky 634198090Srdivacky if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) 635198090Srdivacky return false; 636198090Srdivacky 637249423Sdim if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && 638249423Sdim STI->avoidMOVsShifterOperand()) 639249423Sdim // Don't issue movs with shifter operand for some CPUs unless we 640249423Sdim // are optimizing / minimizing for size. 641249423Sdim return false; 642249423Sdim 643198090Srdivacky unsigned Reg0 = MI->getOperand(0).getReg(); 644198090Srdivacky unsigned Reg1 = MI->getOperand(1).getReg(); 645234353Sdim // t2MUL is "special". The tied source operand is second, not first. 646234353Sdim if (MI->getOpcode() == ARM::t2MUL) { 647234353Sdim unsigned Reg2 = MI->getOperand(2).getReg(); 648234353Sdim // Early exit if the regs aren't all low regs. 649234353Sdim if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) 650234353Sdim || !isARMLowRegister(Reg2)) 651234353Sdim return false; 652234353Sdim if (Reg0 != Reg2) { 653234353Sdim // If the other operand also isn't the same as the destination, we 654234353Sdim // can't reduce. 655234353Sdim if (Reg1 != Reg0) 656234353Sdim return false; 657234353Sdim // Try to commute the operands to make it a 2-address instruction. 658234353Sdim MachineInstr *CommutedMI = TII->commuteInstruction(MI); 659234353Sdim if (!CommutedMI) 660234353Sdim return false; 661234353Sdim } 662234353Sdim } else if (Reg0 != Reg1) { 663210299Sed // Try to commute the operands to make it a 2-address instruction. 664210299Sed unsigned CommOpIdx1, CommOpIdx2; 665210299Sed if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || 666210299Sed CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0) 667210299Sed return false; 668210299Sed MachineInstr *CommutedMI = TII->commuteInstruction(MI); 669210299Sed if (!CommutedMI) 670210299Sed return false; 671210299Sed } 672198090Srdivacky if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) 673198090Srdivacky return false; 674198090Srdivacky if (Entry.Imm2Limit) { 675198090Srdivacky unsigned Imm = MI->getOperand(2).getImm(); 676198090Srdivacky unsigned Limit = (1 << Entry.Imm2Limit) - 1; 677198090Srdivacky if (Imm > Limit) 678198090Srdivacky return false; 679198090Srdivacky } else { 680198090Srdivacky unsigned Reg2 = MI->getOperand(2).getReg(); 681198090Srdivacky if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) 682198090Srdivacky return false; 683198090Srdivacky } 684198090Srdivacky 685198090Srdivacky // Check if it's possible / necessary to transfer the predicate. 686224145Sdim const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); 687198090Srdivacky unsigned PredReg = 0; 688198090Srdivacky ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 689198090Srdivacky bool SkipPred = false; 690198090Srdivacky if (Pred != ARMCC::AL) { 691224145Sdim if (!NewMCID.isPredicable()) 692198090Srdivacky // Can't transfer predicate, fail. 693198090Srdivacky return false; 694198090Srdivacky } else { 695224145Sdim SkipPred = !NewMCID.isPredicable(); 696198090Srdivacky } 697198090Srdivacky 698198090Srdivacky bool HasCC = false; 699198090Srdivacky bool CCDead = false; 700224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 701224145Sdim if (MCID.hasOptionalDef()) { 702224145Sdim unsigned NumOps = MCID.getNumOperands(); 703198090Srdivacky HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 704198090Srdivacky if (HasCC && MI->getOperand(NumOps-1).isDead()) 705198090Srdivacky CCDead = true; 706198090Srdivacky } 707198090Srdivacky if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) 708198090Srdivacky return false; 709198090Srdivacky 710221345Sdim // Avoid adding a false dependency on partial flag update by some 16-bit 711221345Sdim // instructions which has the 's' bit set. 712224145Sdim if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && 713249423Sdim canAddPseudoFlagDep(MI, IsSelfLoop)) 714221345Sdim return false; 715221345Sdim 716198090Srdivacky // Add the 16-bit instruction. 717198090Srdivacky DebugLoc dl = MI->getDebugLoc(); 718234353Sdim MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); 719198090Srdivacky MIB.addOperand(MI->getOperand(0)); 720224145Sdim if (NewMCID.hasOptionalDef()) { 721198090Srdivacky if (HasCC) 722198090Srdivacky AddDefaultT1CC(MIB, CCDead); 723198090Srdivacky else 724198090Srdivacky AddNoT1CC(MIB); 725198090Srdivacky } 726198090Srdivacky 727198090Srdivacky // Transfer the rest of operands. 728224145Sdim unsigned NumOps = MCID.getNumOperands(); 729198090Srdivacky for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 730224145Sdim if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) 731198090Srdivacky continue; 732224145Sdim if (SkipPred && MCID.OpInfo[i].isPredicate()) 733198090Srdivacky continue; 734198090Srdivacky MIB.addOperand(MI->getOperand(i)); 735198090Srdivacky } 736198090Srdivacky 737221345Sdim // Transfer MI flags. 738221345Sdim MIB.setMIFlags(MI->getFlags()); 739221345Sdim 740198090Srdivacky DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 741198090Srdivacky 742234353Sdim MBB.erase_instr(MI); 743198090Srdivacky ++Num2Addrs; 744198090Srdivacky return true; 745198090Srdivacky} 746198090Srdivacky 747198090Srdivackybool 748198090SrdivackyThumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 749198090Srdivacky const ReduceEntry &Entry, 750249423Sdim bool LiveCPSR, bool IsSelfLoop) { 751198090Srdivacky if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) 752198090Srdivacky return false; 753198090Srdivacky 754249423Sdim if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && 755249423Sdim STI->avoidMOVsShifterOperand()) 756249423Sdim // Don't issue movs with shifter operand for some CPUs unless we 757249423Sdim // are optimizing / minimizing for size. 758249423Sdim return false; 759249423Sdim 760198090Srdivacky unsigned Limit = ~0U; 761198090Srdivacky if (Entry.Imm1Limit) 762224145Sdim Limit = (1 << Entry.Imm1Limit) - 1; 763198090Srdivacky 764224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 765224145Sdim for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { 766224145Sdim if (MCID.OpInfo[i].isPredicate()) 767198090Srdivacky continue; 768198090Srdivacky const MachineOperand &MO = MI->getOperand(i); 769198090Srdivacky if (MO.isReg()) { 770198090Srdivacky unsigned Reg = MO.getReg(); 771198090Srdivacky if (!Reg || Reg == ARM::CPSR) 772198090Srdivacky continue; 773198090Srdivacky if (Entry.LowRegs1 && !isARMLowRegister(Reg)) 774198090Srdivacky return false; 775198090Srdivacky } else if (MO.isImm() && 776224145Sdim !MCID.OpInfo[i].isPredicate()) { 777224145Sdim if (((unsigned)MO.getImm()) > Limit) 778198090Srdivacky return false; 779198090Srdivacky } 780198090Srdivacky } 781198090Srdivacky 782198090Srdivacky // Check if it's possible / necessary to transfer the predicate. 783224145Sdim const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); 784198090Srdivacky unsigned PredReg = 0; 785198090Srdivacky ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 786198090Srdivacky bool SkipPred = false; 787198090Srdivacky if (Pred != ARMCC::AL) { 788224145Sdim if (!NewMCID.isPredicable()) 789198090Srdivacky // Can't transfer predicate, fail. 790198090Srdivacky return false; 791198090Srdivacky } else { 792224145Sdim SkipPred = !NewMCID.isPredicable(); 793198090Srdivacky } 794198090Srdivacky 795198090Srdivacky bool HasCC = false; 796198090Srdivacky bool CCDead = false; 797224145Sdim if (MCID.hasOptionalDef()) { 798224145Sdim unsigned NumOps = MCID.getNumOperands(); 799198090Srdivacky HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 800198090Srdivacky if (HasCC && MI->getOperand(NumOps-1).isDead()) 801198090Srdivacky CCDead = true; 802198090Srdivacky } 803198090Srdivacky if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) 804198090Srdivacky return false; 805198090Srdivacky 806221345Sdim // Avoid adding a false dependency on partial flag update by some 16-bit 807221345Sdim // instructions which has the 's' bit set. 808224145Sdim if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && 809249423Sdim canAddPseudoFlagDep(MI, IsSelfLoop)) 810221345Sdim return false; 811221345Sdim 812198090Srdivacky // Add the 16-bit instruction. 813198090Srdivacky DebugLoc dl = MI->getDebugLoc(); 814234353Sdim MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); 815198090Srdivacky MIB.addOperand(MI->getOperand(0)); 816224145Sdim if (NewMCID.hasOptionalDef()) { 817198090Srdivacky if (HasCC) 818198090Srdivacky AddDefaultT1CC(MIB, CCDead); 819198090Srdivacky else 820198090Srdivacky AddNoT1CC(MIB); 821198090Srdivacky } 822198090Srdivacky 823198090Srdivacky // Transfer the rest of operands. 824224145Sdim unsigned NumOps = MCID.getNumOperands(); 825198090Srdivacky for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 826224145Sdim if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) 827198090Srdivacky continue; 828224145Sdim if ((MCID.getOpcode() == ARM::t2RSBSri || 829226633Sdim MCID.getOpcode() == ARM::t2RSBri || 830226633Sdim MCID.getOpcode() == ARM::t2SXTB || 831226633Sdim MCID.getOpcode() == ARM::t2SXTH || 832226633Sdim MCID.getOpcode() == ARM::t2UXTB || 833226633Sdim MCID.getOpcode() == ARM::t2UXTH) && i == 2) 834198090Srdivacky // Skip the zero immediate operand, it's now implicit. 835198090Srdivacky continue; 836224145Sdim bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); 837198090Srdivacky if (SkipPred && isPred) 838198090Srdivacky continue; 839198090Srdivacky const MachineOperand &MO = MI->getOperand(i); 840224145Sdim if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) 841224145Sdim // Skip implicit def of CPSR. Either it's modeled as an optional 842224145Sdim // def now or it's already an implicit def on the new instruction. 843224145Sdim continue; 844224145Sdim MIB.addOperand(MO); 845198090Srdivacky } 846224145Sdim if (!MCID.isPredicable() && NewMCID.isPredicable()) 847198090Srdivacky AddDefaultPred(MIB); 848198090Srdivacky 849221345Sdim // Transfer MI flags. 850221345Sdim MIB.setMIFlags(MI->getFlags()); 851221345Sdim 852198090Srdivacky DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 853198090Srdivacky 854234353Sdim MBB.erase_instr(MI); 855198090Srdivacky ++NumNarrows; 856198090Srdivacky return true; 857198090Srdivacky} 858198090Srdivacky 859221345Sdimstatic bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { 860198090Srdivacky bool HasDef = false; 861198090Srdivacky for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 862198090Srdivacky const MachineOperand &MO = MI.getOperand(i); 863198090Srdivacky if (!MO.isReg() || MO.isUndef() || MO.isUse()) 864198090Srdivacky continue; 865198090Srdivacky if (MO.getReg() != ARM::CPSR) 866198090Srdivacky continue; 867221345Sdim 868221345Sdim DefCPSR = true; 869198090Srdivacky if (!MO.isDead()) 870198090Srdivacky HasDef = true; 871198090Srdivacky } 872198090Srdivacky 873198090Srdivacky return HasDef || LiveCPSR; 874198090Srdivacky} 875198090Srdivacky 876198090Srdivackystatic bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { 877198090Srdivacky for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 878198090Srdivacky const MachineOperand &MO = MI.getOperand(i); 879198090Srdivacky if (!MO.isReg() || MO.isUndef() || MO.isDef()) 880198090Srdivacky continue; 881198090Srdivacky if (MO.getReg() != ARM::CPSR) 882198090Srdivacky continue; 883198090Srdivacky assert(LiveCPSR && "CPSR liveness tracking is wrong!"); 884198090Srdivacky if (MO.isKill()) { 885198090Srdivacky LiveCPSR = false; 886198090Srdivacky break; 887198090Srdivacky } 888198090Srdivacky } 889198090Srdivacky 890198090Srdivacky return LiveCPSR; 891198090Srdivacky} 892198090Srdivacky 893249423Sdimbool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, 894249423Sdim bool LiveCPSR, bool IsSelfLoop) { 895249423Sdim unsigned Opcode = MI->getOpcode(); 896249423Sdim DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); 897249423Sdim if (OPI == ReduceOpcodeMap.end()) 898249423Sdim return false; 899249423Sdim const ReduceEntry &Entry = ReduceTable[OPI->second]; 900249423Sdim 901249423Sdim // Don't attempt normal reductions on "special" cases for now. 902249423Sdim if (Entry.Special) 903249423Sdim return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop); 904249423Sdim 905249423Sdim // Try to transform to a 16-bit two-address instruction. 906249423Sdim if (Entry.NarrowOpc2 && 907249423Sdim ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) 908249423Sdim return true; 909249423Sdim 910249423Sdim // Try to transform to a 16-bit non-two-address instruction. 911249423Sdim if (Entry.NarrowOpc1 && 912249423Sdim ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) 913249423Sdim return true; 914249423Sdim 915249423Sdim return false; 916249423Sdim} 917249423Sdim 918198090Srdivackybool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { 919198090Srdivacky bool Modified = false; 920198090Srdivacky 921198090Srdivacky // Yes, CPSR could be livein. 922207618Srdivacky bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); 923234353Sdim MachineInstr *BundleMI = 0; 924198090Srdivacky 925249423Sdim CPSRDef = 0; 926249423Sdim HighLatencyCPSR = false; 927249423Sdim 928249423Sdim // Check predecessors for the latest CPSRDef. 929249423Sdim for (MachineBasicBlock::pred_iterator 930249423Sdim I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) { 931249423Sdim const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()]; 932249423Sdim if (!PInfo.Visited) { 933249423Sdim // Since blocks are visited in RPO, this must be a back-edge. 934249423Sdim continue; 935249423Sdim } 936249423Sdim if (PInfo.HighLatencyCPSR) { 937249423Sdim HighLatencyCPSR = true; 938249423Sdim break; 939249423Sdim } 940249423Sdim } 941249423Sdim 942234353Sdim // If this BB loops back to itself, conservatively avoid narrowing the 943234353Sdim // first instruction that does partial flag update. 944234353Sdim bool IsSelfLoop = MBB.isSuccessor(&MBB); 945234353Sdim MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); 946234353Sdim MachineBasicBlock::instr_iterator NextMII; 947198090Srdivacky for (; MII != E; MII = NextMII) { 948200581Srdivacky NextMII = llvm::next(MII); 949198090Srdivacky 950198090Srdivacky MachineInstr *MI = &*MII; 951234353Sdim if (MI->isBundle()) { 952234353Sdim BundleMI = MI; 953234353Sdim continue; 954234353Sdim } 955249423Sdim if (MI->isDebugValue()) 956249423Sdim continue; 957234353Sdim 958198090Srdivacky LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); 959198090Srdivacky 960249423Sdim // Does NextMII belong to the same bundle as MI? 961249423Sdim bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); 962198090Srdivacky 963249423Sdim if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) { 964249423Sdim Modified = true; 965249423Sdim MachineBasicBlock::instr_iterator I = prior(NextMII); 966249423Sdim MI = &*I; 967249423Sdim // Removing and reinserting the first instruction in a bundle will break 968249423Sdim // up the bundle. Fix the bundling if it was broken. 969249423Sdim if (NextInSameBundle && !NextMII->isBundledWithPred()) 970249423Sdim NextMII->bundleWithPred(); 971198090Srdivacky } 972198090Srdivacky 973249423Sdim if (!NextInSameBundle && MI->isInsideBundle()) { 974234353Sdim // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill 975234353Sdim // marker is only on the BUNDLE instruction. Process the BUNDLE 976234353Sdim // instruction as we finish with the bundled instruction to work around 977234353Sdim // the inconsistency. 978234353Sdim if (BundleMI->killsRegister(ARM::CPSR)) 979234353Sdim LiveCPSR = false; 980234353Sdim MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); 981234353Sdim if (MO && !MO->isDead()) 982234353Sdim LiveCPSR = true; 983234353Sdim } 984234353Sdim 985221345Sdim bool DefCPSR = false; 986221345Sdim LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); 987234353Sdim if (MI->isCall()) { 988221345Sdim // Calls don't really set CPSR. 989221345Sdim CPSRDef = 0; 990249423Sdim HighLatencyCPSR = false; 991234353Sdim IsSelfLoop = false; 992234353Sdim } else if (DefCPSR) { 993221345Sdim // This is the last CPSR defining instruction. 994221345Sdim CPSRDef = MI; 995249423Sdim HighLatencyCPSR = isHighLatencyCPSR(CPSRDef); 996234353Sdim IsSelfLoop = false; 997234353Sdim } 998198090Srdivacky } 999198090Srdivacky 1000249423Sdim MBBInfo &Info = BlockInfo[MBB.getNumber()]; 1001249423Sdim Info.HighLatencyCPSR = HighLatencyCPSR; 1002249423Sdim Info.Visited = true; 1003198090Srdivacky return Modified; 1004198090Srdivacky} 1005198090Srdivacky 1006198090Srdivackybool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { 1007198090Srdivacky const TargetMachine &TM = MF.getTarget(); 1008198090Srdivacky TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); 1009221345Sdim STI = &TM.getSubtarget<ARMSubtarget>(); 1010198090Srdivacky 1011249423Sdim // Optimizing / minimizing size? 1012249423Sdim AttributeSet FnAttrs = MF.getFunction()->getAttributes(); 1013249423Sdim OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, 1014249423Sdim Attribute::OptimizeForSize); 1015249423Sdim MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, 1016249423Sdim Attribute::MinSize); 1017249423Sdim 1018249423Sdim BlockInfo.clear(); 1019249423Sdim BlockInfo.resize(MF.getNumBlockIDs()); 1020249423Sdim 1021249423Sdim // Visit blocks in reverse post-order so LastCPSRDef is known for all 1022249423Sdim // predecessors. 1023249423Sdim ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); 1024198090Srdivacky bool Modified = false; 1025249423Sdim for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator 1026249423Sdim I = RPOT.begin(), E = RPOT.end(); I != E; ++I) 1027249423Sdim Modified |= ReduceMBB(**I); 1028198090Srdivacky return Modified; 1029198090Srdivacky} 1030198090Srdivacky 1031198090Srdivacky/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size 1032198090Srdivacky/// reduction pass. 1033198090SrdivackyFunctionPass *llvm::createThumb2SizeReductionPass() { 1034198090Srdivacky return new Thumb2SizeReduce(); 1035198090Srdivacky} 1036