1207618Srdivacky//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=// 2210299Sed// 3207618Srdivacky// The LLVM Compiler Infrastructure 4207618Srdivacky// 5207618Srdivacky// This file is distributed under the University of Illinois Open Source 6207618Srdivacky// License. See LICENSE.TXT for details. 7210299Sed// 8207618Srdivacky//===----------------------------------------------------------------------===// 9207618Srdivacky// 10207618Srdivacky// This file defines the itinerary class data for the ARM Cortex A9 processors. 11207618Srdivacky// 12207618Srdivacky//===----------------------------------------------------------------------===// 13207618Srdivacky 14245431Sdim// ===---------------------------------------------------------------------===// 15245431Sdim// This section contains legacy support for itineraries. This is 16245431Sdim// required until SD and PostRA schedulers are replaced by MachineScheduler. 17245431Sdim 18207618Srdivacky// 19207618Srdivacky// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical 20207618Srdivacky// Reference Manual". 21207618Srdivacky// 22207618Srdivacky// Functional units 23218893Sdimdef A9_Issue0 : FuncUnit; // Issue 0 24218893Sdimdef A9_Issue1 : FuncUnit; // Issue 1 25218893Sdimdef A9_Branch : FuncUnit; // Branch 26218893Sdimdef A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0 27218893Sdimdef A9_ALU1 : FuncUnit; // ALU pipeline 1 28218893Sdimdef A9_AGU : FuncUnit; // Address generation unit for ld / st 29218893Sdimdef A9_NPipe : FuncUnit; // NEON pipeline 30218893Sdimdef A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer 31218893Sdimdef A9_LSUnit : FuncUnit; // L/S Unit 32207618Srdivackydef A9_DRegsVFP: FuncUnit; // FP register set, VFP side 33207618Srdivackydef A9_DRegsN : FuncUnit; // FP register set, NEON side 34207618Srdivacky 35218893Sdim// Bypasses 36218893Sdimdef A9_LdBypass : Bypass; 37218893Sdim 38207618Srdivackydef CortexA9Itineraries : ProcessorItineraries< 39218893Sdim [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0, 40218893Sdim A9_LSUnit, A9_DRegsVFP, A9_DRegsN], 41218893Sdim [A9_LdBypass], [ 42210299Sed // Two fully-pipelined integer ALU pipelines 43218893Sdim 44210299Sed // 45210299Sed // Move instructions, unconditional 46218893Sdim InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 47218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, 48218893Sdim InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 49218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 50218893Sdim InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 51218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 52218893Sdim InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 53218893Sdim InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, 54218893Sdim InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 55218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>, 56218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, 57218893Sdim InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 58218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>, 59218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>, 60218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>, 61218893Sdim InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 62218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>, 63218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>, 64218893Sdim InstrStage<1, [A9_MUX0], 0>, 65218893Sdim InstrStage<1, [A9_AGU], 0>, 66218893Sdim InstrStage<1, [A9_LSUnit]>], [5]>, 67210299Sed // 68218893Sdim // MVN instructions 69218893Sdim InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 70218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], 71218893Sdim [1]>, 72218893Sdim InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 73218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], 74218893Sdim [1, 1], [NoBypass, A9_LdBypass]>, 75218893Sdim InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 76218893Sdim InstrStage<2, [A9_ALU0, A9_ALU1]>], 77218893Sdim [2, 1]>, 78218893Sdim InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 79218893Sdim InstrStage<3, [A9_ALU0, A9_ALU1]>], 80218893Sdim [3, 1, 1]>, 81218893Sdim // 82210299Sed // No operand cycles 83218893Sdim InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 84218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>]>, 85210299Sed // 86210299Sed // Binary Instructions that produce a result 87218893Sdim InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 88218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], 89218893Sdim [1, 1], [NoBypass, A9_LdBypass]>, 90218893Sdim InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 91218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], 92218893Sdim [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>, 93218893Sdim InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 94218893Sdim InstrStage<2, [A9_ALU0, A9_ALU1]>], 95218893Sdim [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>, 96218893Sdim InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 97218893Sdim InstrStage<2, [A9_ALU0, A9_ALU1]>], 98218893Sdim [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>, 99218893Sdim InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 100218893Sdim InstrStage<3, [A9_ALU0, A9_ALU1]>], 101218893Sdim [3, 1, 1, 1], 102218893Sdim [NoBypass, A9_LdBypass, NoBypass, NoBypass]>, 103210299Sed // 104218893Sdim // Bitwise Instructions that produce a result 105218893Sdim InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 106218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 107218893Sdim InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 108218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>, 109218893Sdim InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 110218893Sdim InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, 111218893Sdim InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 112218893Sdim InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>, 113218893Sdim // 114210299Sed // Unary Instructions that produce a result 115218893Sdim 116218893Sdim // CLZ, RBIT, etc. 117218893Sdim InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 118218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 119218893Sdim 120218893Sdim // BFC, BFI, UBFX, SBFX 121218893Sdim InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 122218893Sdim InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>, 123218893Sdim 124210299Sed // 125218893Sdim // Zero and sign extension instructions 126218893Sdim InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 127218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>, 128218893Sdim InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 129218893Sdim InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>, 130218893Sdim InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 131218893Sdim InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>, 132218893Sdim // 133210299Sed // Compare instructions 134218893Sdim InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 135218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], 136218893Sdim [1], [A9_LdBypass]>, 137218893Sdim InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 138218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], 139218893Sdim [1, 1], [A9_LdBypass, A9_LdBypass]>, 140218893Sdim InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 141218893Sdim InstrStage<2, [A9_ALU0, A9_ALU1]>], 142218893Sdim [1, 1], [A9_LdBypass, NoBypass]>, 143218893Sdim InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 144218893Sdim InstrStage<3, [A9_ALU0, A9_ALU1]>], 145218893Sdim [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>, 146210299Sed // 147218893Sdim // Test instructions 148218893Sdim InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 149218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, 150218893Sdim InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 151218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 152218893Sdim InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 153218893Sdim InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>, 154218893Sdim InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 155218893Sdim InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>, 156218893Sdim // 157210299Sed // Move instructions, conditional 158218893Sdim // FIXME: Correctly model the extra input dep on the destination. 159218893Sdim InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 160218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, 161218893Sdim InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 162218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 163218893Sdim InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 164218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, 165218893Sdim InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 166218893Sdim InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, 167218893Sdim InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 168218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>, 169218893Sdim InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 170218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, 171210299Sed 172210299Sed // Integer multiply pipeline 173210299Sed // 174218893Sdim InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 175218893Sdim InstrStage<2, [A9_ALU0]>], [3, 1, 1]>, 176218893Sdim InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 177218893Sdim InstrStage<2, [A9_ALU0]>], 178218893Sdim [3, 1, 1, 1]>, 179218893Sdim InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 180218893Sdim InstrStage<2, [A9_ALU0]>], [4, 1, 1]>, 181218893Sdim InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 182218893Sdim InstrStage<2, [A9_ALU0]>], 183218893Sdim [4, 1, 1, 1]>, 184218893Sdim InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 185218893Sdim InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>, 186218893Sdim InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 187218893Sdim InstrStage<3, [A9_ALU0]>], 188218893Sdim [4, 5, 1, 1]>, 189210299Sed // Integer load pipeline 190210299Sed // FIXME: The timings are some rough approximations 191210299Sed // 192210299Sed // Immediate offset 193218893Sdim InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 194218893Sdim InstrStage<1, [A9_MUX0], 0>, 195218893Sdim InstrStage<1, [A9_AGU], 0>, 196218893Sdim InstrStage<1, [A9_LSUnit]>], 197218893Sdim [3, 1], [A9_LdBypass]>, 198218893Sdim InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 199218893Sdim InstrStage<1, [A9_MUX0], 0>, 200218893Sdim InstrStage<2, [A9_AGU], 0>, 201218893Sdim InstrStage<1, [A9_LSUnit]>], 202218893Sdim [4, 1], [A9_LdBypass]>, 203218893Sdim // FIXME: If address is 64-bit aligned, AGU cycles is 1. 204218893Sdim InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 205218893Sdim InstrStage<1, [A9_MUX0], 0>, 206218893Sdim InstrStage<2, [A9_AGU], 0>, 207218893Sdim InstrStage<1, [A9_LSUnit]>], 208218893Sdim [3, 3, 1], [A9_LdBypass]>, 209210299Sed // 210210299Sed // Register offset 211218893Sdim InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 212218893Sdim InstrStage<1, [A9_MUX0], 0>, 213218893Sdim InstrStage<1, [A9_AGU], 0>, 214218893Sdim InstrStage<1, [A9_LSUnit]>], 215218893Sdim [3, 1, 1], [A9_LdBypass]>, 216218893Sdim InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 217218893Sdim InstrStage<1, [A9_MUX0], 0>, 218218893Sdim InstrStage<2, [A9_AGU], 0>, 219218893Sdim InstrStage<1, [A9_LSUnit]>], 220218893Sdim [4, 1, 1], [A9_LdBypass]>, 221218893Sdim InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 222218893Sdim InstrStage<1, [A9_MUX0], 0>, 223218893Sdim InstrStage<2, [A9_AGU], 0>, 224218893Sdim InstrStage<1, [A9_LSUnit]>], 225218893Sdim [3, 3, 1, 1], [A9_LdBypass]>, 226210299Sed // 227210299Sed // Scaled register offset 228218893Sdim InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 229218893Sdim InstrStage<1, [A9_MUX0], 0>, 230218893Sdim InstrStage<1, [A9_AGU], 0>, 231218893Sdim InstrStage<1, [A9_LSUnit], 0>], 232218893Sdim [4, 1, 1], [A9_LdBypass]>, 233218893Sdim InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 234218893Sdim InstrStage<1, [A9_MUX0], 0>, 235218893Sdim InstrStage<2, [A9_AGU], 0>, 236218893Sdim InstrStage<1, [A9_LSUnit]>], 237218893Sdim [5, 1, 1], [A9_LdBypass]>, 238210299Sed // 239210299Sed // Immediate offset with update 240218893Sdim InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 241218893Sdim InstrStage<1, [A9_MUX0], 0>, 242218893Sdim InstrStage<1, [A9_AGU], 0>, 243218893Sdim InstrStage<1, [A9_LSUnit]>], 244218893Sdim [3, 2, 1], [A9_LdBypass]>, 245218893Sdim InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 246218893Sdim InstrStage<1, [A9_MUX0], 0>, 247218893Sdim InstrStage<2, [A9_AGU], 0>, 248218893Sdim InstrStage<1, [A9_LSUnit]>], 249218893Sdim [4, 3, 1], [A9_LdBypass]>, 250210299Sed // 251210299Sed // Register offset with update 252218893Sdim InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 253218893Sdim InstrStage<1, [A9_MUX0], 0>, 254218893Sdim InstrStage<1, [A9_AGU], 0>, 255218893Sdim InstrStage<1, [A9_LSUnit]>], 256218893Sdim [3, 2, 1, 1], [A9_LdBypass]>, 257218893Sdim InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 258218893Sdim InstrStage<1, [A9_MUX0], 0>, 259218893Sdim InstrStage<2, [A9_AGU], 0>, 260218893Sdim InstrStage<1, [A9_LSUnit]>], 261218893Sdim [4, 3, 1, 1], [A9_LdBypass]>, 262218893Sdim InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 263218893Sdim InstrStage<1, [A9_MUX0], 0>, 264218893Sdim InstrStage<2, [A9_AGU], 0>, 265218893Sdim InstrStage<1, [A9_LSUnit]>], 266218893Sdim [3, 3, 1, 1], [A9_LdBypass]>, 267210299Sed // 268210299Sed // Scaled register offset with update 269218893Sdim InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 270218893Sdim InstrStage<1, [A9_MUX0], 0>, 271218893Sdim InstrStage<1, [A9_AGU], 0>, 272218893Sdim InstrStage<1, [A9_LSUnit]>], 273218893Sdim [4, 3, 1, 1], [A9_LdBypass]>, 274218893Sdim InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 275218893Sdim InstrStage<1, [A9_MUX0], 0>, 276218893Sdim InstrStage<2, [A9_AGU], 0>, 277218893Sdim InstrStage<1, [A9_LSUnit]>], 278218893Sdim [5, 4, 1, 1], [A9_LdBypass]>, 279210299Sed // 280218893Sdim // Load multiple, def is the 5th operand. 281218893Sdim // FIXME: This assumes 3 to 4 registers. 282218893Sdim InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 283218893Sdim InstrStage<1, [A9_MUX0], 0>, 284218893Sdim InstrStage<2, [A9_AGU], 1>, 285218893Sdim InstrStage<2, [A9_LSUnit]>], 286218893Sdim [1, 1, 1, 1, 3], 287245431Sdim [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], 288245431Sdim -1>, // dynamic uops 289218893Sdim // 290218893Sdim // Load multiple + update, defs are the 1st and 5th operands. 291218893Sdim InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 292218893Sdim InstrStage<1, [A9_MUX0], 0>, 293218893Sdim InstrStage<2, [A9_AGU], 1>, 294218893Sdim InstrStage<2, [A9_LSUnit]>], 295218893Sdim [2, 1, 1, 1, 3], 296245431Sdim [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], 297245431Sdim -1>, // dynamic uops 298218893Sdim // 299218893Sdim // Load multiple plus branch 300218893Sdim InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 301218893Sdim InstrStage<1, [A9_MUX0], 0>, 302218893Sdim InstrStage<1, [A9_AGU], 1>, 303218893Sdim InstrStage<2, [A9_LSUnit]>, 304218893Sdim InstrStage<1, [A9_Branch]>], 305218893Sdim [1, 2, 1, 1, 3], 306245431Sdim [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], 307245431Sdim -1>, // dynamic uops 308218893Sdim // 309218893Sdim // Pop, def is the 3rd operand. 310218893Sdim InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 311218893Sdim InstrStage<1, [A9_MUX0], 0>, 312218893Sdim InstrStage<2, [A9_AGU], 1>, 313218893Sdim InstrStage<2, [A9_LSUnit]>], 314218893Sdim [1, 1, 3], 315245431Sdim [NoBypass, NoBypass, A9_LdBypass], 316245431Sdim -1>, // dynamic uops 317218893Sdim // 318218893Sdim // Pop + branch, def is the 3rd operand. 319218893Sdim InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 320218893Sdim InstrStage<1, [A9_MUX0], 0>, 321218893Sdim InstrStage<2, [A9_AGU], 1>, 322218893Sdim InstrStage<2, [A9_LSUnit]>, 323218893Sdim InstrStage<1, [A9_Branch]>], 324218893Sdim [1, 1, 3], 325245431Sdim [NoBypass, NoBypass, A9_LdBypass], 326245431Sdim -1>, // dynamic uops 327218893Sdim // 328218893Sdim // iLoadi + iALUr for t2LDRpci_pic. 329218893Sdim InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 330218893Sdim InstrStage<1, [A9_MUX0], 0>, 331218893Sdim InstrStage<1, [A9_AGU], 0>, 332218893Sdim InstrStage<1, [A9_LSUnit]>, 333218893Sdim InstrStage<1, [A9_ALU0, A9_ALU1]>], 334218893Sdim [2, 1]>, 335218893Sdim 336210299Sed // Integer store pipeline 337210299Sed /// 338210299Sed // Immediate offset 339218893Sdim InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 340218893Sdim InstrStage<1, [A9_MUX0], 0>, 341218893Sdim InstrStage<1, [A9_AGU], 0>, 342218893Sdim InstrStage<1, [A9_LSUnit]>], [1, 1]>, 343218893Sdim InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 344218893Sdim InstrStage<1, [A9_MUX0], 0>, 345218893Sdim InstrStage<2, [A9_AGU], 1>, 346218893Sdim InstrStage<1, [A9_LSUnit]>], [1, 1]>, 347218893Sdim // FIXME: If address is 64-bit aligned, AGU cycles is 1. 348218893Sdim InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 349218893Sdim InstrStage<1, [A9_MUX0], 0>, 350218893Sdim InstrStage<2, [A9_AGU], 1>, 351218893Sdim InstrStage<1, [A9_LSUnit]>], [1, 1]>, 352210299Sed // 353210299Sed // Register offset 354218893Sdim InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 355218893Sdim InstrStage<1, [A9_MUX0], 0>, 356218893Sdim InstrStage<1, [A9_AGU], 0>, 357218893Sdim InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 358218893Sdim InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 359218893Sdim InstrStage<1, [A9_MUX0], 0>, 360218893Sdim InstrStage<2, [A9_AGU], 1>, 361218893Sdim InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 362218893Sdim InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 363218893Sdim InstrStage<1, [A9_MUX0], 0>, 364218893Sdim InstrStage<2, [A9_AGU], 1>, 365218893Sdim InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 366210299Sed // 367210299Sed // Scaled register offset 368218893Sdim InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 369218893Sdim InstrStage<1, [A9_MUX0], 0>, 370218893Sdim InstrStage<1, [A9_AGU], 0>, 371218893Sdim InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 372218893Sdim InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 373218893Sdim InstrStage<1, [A9_MUX0], 0>, 374218893Sdim InstrStage<2, [A9_AGU], 1>, 375218893Sdim InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, 376210299Sed // 377210299Sed // Immediate offset with update 378218893Sdim InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 379218893Sdim InstrStage<1, [A9_MUX0], 0>, 380218893Sdim InstrStage<1, [A9_AGU], 0>, 381218893Sdim InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>, 382218893Sdim InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 383218893Sdim InstrStage<1, [A9_MUX0], 0>, 384218893Sdim InstrStage<2, [A9_AGU], 1>, 385218893Sdim InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>, 386210299Sed // 387210299Sed // Register offset with update 388218893Sdim InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 389218893Sdim InstrStage<1, [A9_MUX0], 0>, 390218893Sdim InstrStage<1, [A9_AGU], 0>, 391218893Sdim InstrStage<1, [A9_LSUnit]>], 392218893Sdim [2, 1, 1, 1]>, 393218893Sdim InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 394218893Sdim InstrStage<1, [A9_MUX0], 0>, 395218893Sdim InstrStage<2, [A9_AGU], 1>, 396218893Sdim InstrStage<1, [A9_LSUnit]>], 397218893Sdim [3, 1, 1, 1]>, 398218893Sdim InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 399218893Sdim InstrStage<1, [A9_MUX0], 0>, 400218893Sdim InstrStage<2, [A9_AGU], 1>, 401218893Sdim InstrStage<1, [A9_LSUnit]>], 402218893Sdim [3, 1, 1, 1]>, 403210299Sed // 404210299Sed // Scaled register offset with update 405218893Sdim InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 406218893Sdim InstrStage<1, [A9_MUX0], 0>, 407218893Sdim InstrStage<1, [A9_AGU], 0>, 408218893Sdim InstrStage<1, [A9_LSUnit]>], 409218893Sdim [2, 1, 1, 1]>, 410218893Sdim InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 411218893Sdim InstrStage<1, [A9_MUX0], 0>, 412218893Sdim InstrStage<2, [A9_AGU], 1>, 413218893Sdim InstrStage<1, [A9_LSUnit]>], 414218893Sdim [3, 1, 1, 1]>, 415210299Sed // 416210299Sed // Store multiple 417218893Sdim InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 418218893Sdim InstrStage<1, [A9_MUX0], 0>, 419218893Sdim InstrStage<1, [A9_AGU], 0>, 420245431Sdim InstrStage<2, [A9_LSUnit]>], 421245431Sdim [], [], -1>, // dynamic uops 422218893Sdim // 423218893Sdim // Store multiple + update 424218893Sdim InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 425218893Sdim InstrStage<1, [A9_MUX0], 0>, 426218893Sdim InstrStage<1, [A9_AGU], 0>, 427245431Sdim InstrStage<2, [A9_LSUnit]>], 428245431Sdim [2], [], -1>, // dynamic uops 429218893Sdim // 430218893Sdim // Preload 431218893Sdim InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>, 432218893Sdim 433210299Sed // Branch 434210299Sed // 435210299Sed // no delay slots, so the latency of a branch is unimportant 436218893Sdim InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>, 437218893Sdim InstrStage<1, [A9_Issue1], 0>, 438218893Sdim InstrStage<1, [A9_Branch]>]>, 439210299Sed 440207618Srdivacky // VFP and NEON shares the same register file. This means that every VFP 441207618Srdivacky // instruction should wait for full completion of the consecutive NEON 442207618Srdivacky // instruction and vice-versa. We model this behavior with two artificial FUs: 443207618Srdivacky // DRegsVFP and DRegsVFP. 444207618Srdivacky // 445207618Srdivacky // Every VFP instruction: 446207618Srdivacky // - Acquires DRegsVFP resource for 1 cycle 447207618Srdivacky // - Reserves DRegsN resource for the whole duration (including time to 448207618Srdivacky // register file writeback!). 449207618Srdivacky // Every NEON instruction does the same but with FUs swapped. 450207618Srdivacky // 451210299Sed // Since the reserved FU cannot be acquired, this models precisely 452210299Sed // "cross-domain" stalls. 453207618Srdivacky 454207618Srdivacky // VFP 455207618Srdivacky // Issue through integer pipeline, and execute in NEON unit. 456207618Srdivacky 457207618Srdivacky // FP Special Register to Integer Register File Move 458218893Sdim InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 459218893Sdim InstrStage<1, [A9_MUX0], 0>, 460218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 461207618Srdivacky InstrStage<2, [A9_DRegsN], 0, Reserved>, 462218893Sdim InstrStage<1, [A9_NPipe]>], 463218893Sdim [1]>, 464207618Srdivacky // 465207618Srdivacky // Single-precision FP Unary 466218893Sdim InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 467218893Sdim InstrStage<1, [A9_MUX0], 0>, 468218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 469207618Srdivacky // Extra latency cycles since wbck is 2 cycles 470207618Srdivacky InstrStage<3, [A9_DRegsN], 0, Reserved>, 471218893Sdim InstrStage<1, [A9_NPipe]>], 472218893Sdim [1, 1]>, 473207618Srdivacky // 474207618Srdivacky // Double-precision FP Unary 475218893Sdim InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 476218893Sdim InstrStage<1, [A9_MUX0], 0>, 477218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 478207618Srdivacky // Extra latency cycles since wbck is 2 cycles 479207618Srdivacky InstrStage<3, [A9_DRegsN], 0, Reserved>, 480218893Sdim InstrStage<1, [A9_NPipe]>], 481218893Sdim [1, 1]>, 482207618Srdivacky 483207618Srdivacky // 484207618Srdivacky // Single-precision FP Compare 485218893Sdim InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 486218893Sdim InstrStage<1, [A9_MUX0], 0>, 487218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 488207618Srdivacky // Extra latency cycles since wbck is 4 cycles 489207618Srdivacky InstrStage<5, [A9_DRegsN], 0, Reserved>, 490218893Sdim InstrStage<1, [A9_NPipe]>], 491218893Sdim [1, 1]>, 492207618Srdivacky // 493207618Srdivacky // Double-precision FP Compare 494218893Sdim InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 495218893Sdim InstrStage<1, [A9_MUX0], 0>, 496218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 497207618Srdivacky // Extra latency cycles since wbck is 4 cycles 498207618Srdivacky InstrStage<5, [A9_DRegsN], 0, Reserved>, 499218893Sdim InstrStage<1, [A9_NPipe]>], 500218893Sdim [1, 1]>, 501207618Srdivacky // 502207618Srdivacky // Single to Double FP Convert 503218893Sdim InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 504218893Sdim InstrStage<1, [A9_MUX0], 0>, 505218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 506207618Srdivacky InstrStage<5, [A9_DRegsN], 0, Reserved>, 507218893Sdim InstrStage<1, [A9_NPipe]>], 508218893Sdim [4, 1]>, 509207618Srdivacky // 510207618Srdivacky // Double to Single FP Convert 511218893Sdim InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 512218893Sdim InstrStage<1, [A9_MUX0], 0>, 513218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 514207618Srdivacky InstrStage<5, [A9_DRegsN], 0, Reserved>, 515218893Sdim InstrStage<1, [A9_NPipe]>], 516218893Sdim [4, 1]>, 517207618Srdivacky 518207618Srdivacky // 519207618Srdivacky // Single to Half FP Convert 520218893Sdim InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 521218893Sdim InstrStage<1, [A9_MUX0], 0>, 522218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 523207618Srdivacky InstrStage<5, [A9_DRegsN], 0, Reserved>, 524218893Sdim InstrStage<1, [A9_NPipe]>], 525218893Sdim [4, 1]>, 526207618Srdivacky // 527207618Srdivacky // Half to Single FP Convert 528218893Sdim InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 529218893Sdim InstrStage<1, [A9_MUX0], 0>, 530218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 531207618Srdivacky InstrStage<3, [A9_DRegsN], 0, Reserved>, 532218893Sdim InstrStage<1, [A9_NPipe]>], 533218893Sdim [2, 1]>, 534207618Srdivacky 535207618Srdivacky // 536207618Srdivacky // Single-Precision FP to Integer Convert 537218893Sdim InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 538218893Sdim InstrStage<1, [A9_MUX0], 0>, 539218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 540207618Srdivacky InstrStage<5, [A9_DRegsN], 0, Reserved>, 541218893Sdim InstrStage<1, [A9_NPipe]>], 542218893Sdim [4, 1]>, 543207618Srdivacky // 544207618Srdivacky // Double-Precision FP to Integer Convert 545218893Sdim InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 546218893Sdim InstrStage<1, [A9_MUX0], 0>, 547218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 548207618Srdivacky InstrStage<5, [A9_DRegsN], 0, Reserved>, 549218893Sdim InstrStage<1, [A9_NPipe]>], 550218893Sdim [4, 1]>, 551207618Srdivacky // 552207618Srdivacky // Integer to Single-Precision FP Convert 553218893Sdim InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 554218893Sdim InstrStage<1, [A9_MUX0], 0>, 555218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 556207618Srdivacky InstrStage<5, [A9_DRegsN], 0, Reserved>, 557218893Sdim InstrStage<1, [A9_NPipe]>], 558218893Sdim [4, 1]>, 559207618Srdivacky // 560207618Srdivacky // Integer to Double-Precision FP Convert 561218893Sdim InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 562218893Sdim InstrStage<1, [A9_MUX0], 0>, 563218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 564207618Srdivacky InstrStage<5, [A9_DRegsN], 0, Reserved>, 565218893Sdim InstrStage<1, [A9_NPipe]>], 566218893Sdim [4, 1]>, 567207618Srdivacky // 568207618Srdivacky // Single-precision FP ALU 569218893Sdim InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 570218893Sdim InstrStage<1, [A9_MUX0], 0>, 571218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 572207618Srdivacky InstrStage<5, [A9_DRegsN], 0, Reserved>, 573218893Sdim InstrStage<1, [A9_NPipe]>], 574218893Sdim [4, 1, 1]>, 575207618Srdivacky // 576207618Srdivacky // Double-precision FP ALU 577218893Sdim InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 578218893Sdim InstrStage<1, [A9_MUX0], 0>, 579218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 580207618Srdivacky InstrStage<5, [A9_DRegsN], 0, Reserved>, 581218893Sdim InstrStage<1, [A9_NPipe]>], 582218893Sdim [4, 1, 1]>, 583207618Srdivacky // 584207618Srdivacky // Single-precision FP Multiply 585218893Sdim InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 586218893Sdim InstrStage<1, [A9_MUX0], 0>, 587218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 588207618Srdivacky InstrStage<6, [A9_DRegsN], 0, Reserved>, 589218893Sdim InstrStage<1, [A9_NPipe]>], 590218893Sdim [5, 1, 1]>, 591207618Srdivacky // 592207618Srdivacky // Double-precision FP Multiply 593218893Sdim InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 594218893Sdim InstrStage<1, [A9_MUX0], 0>, 595218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 596207618Srdivacky InstrStage<7, [A9_DRegsN], 0, Reserved>, 597218893Sdim InstrStage<2, [A9_NPipe]>], 598218893Sdim [6, 1, 1]>, 599207618Srdivacky // 600207618Srdivacky // Single-precision FP MAC 601218893Sdim InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 602218893Sdim InstrStage<1, [A9_MUX0], 0>, 603218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 604207618Srdivacky InstrStage<9, [A9_DRegsN], 0, Reserved>, 605218893Sdim InstrStage<1, [A9_NPipe]>], 606218893Sdim [8, 1, 1, 1]>, 607207618Srdivacky // 608207618Srdivacky // Double-precision FP MAC 609218893Sdim InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 610218893Sdim InstrStage<1, [A9_MUX0], 0>, 611218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 612207618Srdivacky InstrStage<10, [A9_DRegsN], 0, Reserved>, 613218893Sdim InstrStage<2, [A9_NPipe]>], 614218893Sdim [9, 1, 1, 1]>, 615207618Srdivacky // 616235633Sdim // Single-precision Fused FP MAC 617235633Sdim InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 618235633Sdim InstrStage<1, [A9_MUX0], 0>, 619235633Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 620235633Sdim InstrStage<9, [A9_DRegsN], 0, Reserved>, 621235633Sdim InstrStage<1, [A9_NPipe]>], 622235633Sdim [8, 1, 1, 1]>, 623235633Sdim // 624235633Sdim // Double-precision Fused FP MAC 625235633Sdim InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 626235633Sdim InstrStage<1, [A9_MUX0], 0>, 627235633Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 628235633Sdim InstrStage<10, [A9_DRegsN], 0, Reserved>, 629235633Sdim InstrStage<2, [A9_NPipe]>], 630235633Sdim [9, 1, 1, 1]>, 631235633Sdim // 632207618Srdivacky // Single-precision FP DIV 633218893Sdim InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 634218893Sdim InstrStage<1, [A9_MUX0], 0>, 635218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 636207618Srdivacky InstrStage<16, [A9_DRegsN], 0, Reserved>, 637218893Sdim InstrStage<10, [A9_NPipe]>], 638218893Sdim [15, 1, 1]>, 639207618Srdivacky // 640207618Srdivacky // Double-precision FP DIV 641218893Sdim InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 642218893Sdim InstrStage<1, [A9_MUX0], 0>, 643218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 644207618Srdivacky InstrStage<26, [A9_DRegsN], 0, Reserved>, 645218893Sdim InstrStage<20, [A9_NPipe]>], 646218893Sdim [25, 1, 1]>, 647207618Srdivacky // 648207618Srdivacky // Single-precision FP SQRT 649218893Sdim InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 650218893Sdim InstrStage<1, [A9_MUX0], 0>, 651218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 652207618Srdivacky InstrStage<18, [A9_DRegsN], 0, Reserved>, 653218893Sdim InstrStage<13, [A9_NPipe]>], 654218893Sdim [17, 1]>, 655207618Srdivacky // 656207618Srdivacky // Double-precision FP SQRT 657218893Sdim InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 658218893Sdim InstrStage<1, [A9_MUX0], 0>, 659218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 660207618Srdivacky InstrStage<33, [A9_DRegsN], 0, Reserved>, 661218893Sdim InstrStage<28, [A9_NPipe]>], 662218893Sdim [32, 1]>, 663207618Srdivacky 664207618Srdivacky // 665207618Srdivacky // Integer to Single-precision Move 666218893Sdim InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 667218893Sdim InstrStage<1, [A9_MUX0], 0>, 668218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 669207618Srdivacky // Extra 1 latency cycle since wbck is 2 cycles 670207618Srdivacky InstrStage<3, [A9_DRegsN], 0, Reserved>, 671218893Sdim InstrStage<1, [A9_NPipe]>], 672218893Sdim [1, 1]>, 673207618Srdivacky // 674207618Srdivacky // Integer to Double-precision Move 675218893Sdim InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 676218893Sdim InstrStage<1, [A9_MUX0], 0>, 677218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 678207618Srdivacky // Extra 1 latency cycle since wbck is 2 cycles 679207618Srdivacky InstrStage<3, [A9_DRegsN], 0, Reserved>, 680218893Sdim InstrStage<1, [A9_NPipe]>], 681218893Sdim [1, 1, 1]>, 682207618Srdivacky // 683207618Srdivacky // Single-precision to Integer Move 684221345Sdim // 685221345Sdim // On A9 move-from-VFP is free to issue with no stall if other VFP 686221345Sdim // operations are in flight. I assume it still can't dual-issue though. 687218893Sdim InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 688221345Sdim InstrStage<1, [A9_MUX0], 0>], 689218893Sdim [2, 1]>, 690207618Srdivacky // 691207618Srdivacky // Double-precision to Integer Move 692221345Sdim // 693221345Sdim // On A9 move-from-VFP is free to issue with no stall if other VFP 694221345Sdim // operations are in flight. I assume it still can't dual-issue though. 695218893Sdim InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 696221345Sdim InstrStage<1, [A9_MUX0], 0>], 697218893Sdim [2, 1, 1]>, 698207618Srdivacky // 699207618Srdivacky // Single-precision FP Load 700218893Sdim InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 701218893Sdim InstrStage<1, [A9_MUX0], 0>, 702218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 703207618Srdivacky InstrStage<2, [A9_DRegsN], 0, Reserved>, 704218893Sdim InstrStage<1, [A9_NPipe], 0>, 705218893Sdim InstrStage<1, [A9_LSUnit]>], 706218893Sdim [1, 1]>, 707207618Srdivacky // 708207618Srdivacky // Double-precision FP Load 709218893Sdim // FIXME: Result latency is 1 if address is 64-bit aligned. 710218893Sdim InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 711218893Sdim InstrStage<1, [A9_MUX0], 0>, 712218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 713207618Srdivacky InstrStage<2, [A9_DRegsN], 0, Reserved>, 714218893Sdim InstrStage<1, [A9_NPipe], 0>, 715218893Sdim InstrStage<1, [A9_LSUnit]>], 716218893Sdim [2, 1]>, 717207618Srdivacky // 718207618Srdivacky // FP Load Multiple 719221345Sdim // FIXME: assumes 2 doubles which requires 2 LS cycles. 720218893Sdim InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 721218893Sdim InstrStage<1, [A9_MUX0], 0>, 722218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 723207618Srdivacky InstrStage<2, [A9_DRegsN], 0, Reserved>, 724218893Sdim InstrStage<1, [A9_NPipe], 0>, 725245431Sdim InstrStage<2, [A9_LSUnit]>], 726245431Sdim [1, 1, 1, 1], [], -1>, // dynamic uops 727207618Srdivacky // 728218893Sdim // FP Load Multiple + update 729221345Sdim // FIXME: assumes 2 doubles which requires 2 LS cycles. 730218893Sdim InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 731218893Sdim InstrStage<1, [A9_MUX0], 0>, 732218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 733218893Sdim InstrStage<2, [A9_DRegsN], 0, Reserved>, 734218893Sdim InstrStage<1, [A9_NPipe], 0>, 735245431Sdim InstrStage<2, [A9_LSUnit]>], 736245431Sdim [2, 1, 1, 1], [], -1>, // dynamic uops 737218893Sdim // 738207618Srdivacky // Single-precision FP Store 739218893Sdim InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 740218893Sdim InstrStage<1, [A9_MUX0], 0>, 741218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 742207618Srdivacky InstrStage<2, [A9_DRegsN], 0, Reserved>, 743218893Sdim InstrStage<1, [A9_NPipe], 0>, 744218893Sdim InstrStage<1, [A9_LSUnit]>], 745218893Sdim [1, 1]>, 746207618Srdivacky // 747207618Srdivacky // Double-precision FP Store 748218893Sdim InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 749218893Sdim InstrStage<1, [A9_MUX0], 0>, 750218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 751207618Srdivacky InstrStage<2, [A9_DRegsN], 0, Reserved>, 752218893Sdim InstrStage<1, [A9_NPipe], 0>, 753218893Sdim InstrStage<1, [A9_LSUnit]>], 754218893Sdim [1, 1]>, 755207618Srdivacky // 756207618Srdivacky // FP Store Multiple 757221345Sdim // FIXME: assumes 2 doubles which requires 2 LS cycles. 758218893Sdim InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 759218893Sdim InstrStage<1, [A9_MUX0], 0>, 760218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 761207618Srdivacky InstrStage<2, [A9_DRegsN], 0, Reserved>, 762218893Sdim InstrStage<1, [A9_NPipe], 0>, 763245431Sdim InstrStage<2, [A9_LSUnit]>], 764245431Sdim [1, 1, 1, 1], [], -1>, // dynamic uops 765218893Sdim // 766218893Sdim // FP Store Multiple + update 767221345Sdim // FIXME: assumes 2 doubles which requires 2 LS cycles. 768218893Sdim InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 769218893Sdim InstrStage<1, [A9_MUX0], 0>, 770218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Required>, 771218893Sdim InstrStage<2, [A9_DRegsN], 0, Reserved>, 772218893Sdim InstrStage<1, [A9_NPipe], 0>, 773245431Sdim InstrStage<2, [A9_LSUnit]>], 774245431Sdim [2, 1, 1, 1], [], -1>, // dynamic uops 775207618Srdivacky // NEON 776207618Srdivacky // VLD1 777218893Sdim InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 778218893Sdim InstrStage<1, [A9_MUX0], 0>, 779218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 780221345Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 781221345Sdim InstrStage<1, [A9_NPipe], 0>, 782221345Sdim InstrStage<1, [A9_LSUnit]>], 783221345Sdim [1, 1]>, 784218893Sdim // VLD1x2 785218893Sdim InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 786218893Sdim InstrStage<1, [A9_MUX0], 0>, 787218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 788221345Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 789221345Sdim InstrStage<1, [A9_NPipe], 0>, 790221345Sdim InstrStage<1, [A9_LSUnit]>], 791221345Sdim [1, 1, 1]>, 792218893Sdim // VLD1x3 793218893Sdim InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 794218893Sdim InstrStage<1, [A9_MUX0], 0>, 795218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 796221345Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 797221345Sdim InstrStage<2, [A9_NPipe], 0>, 798221345Sdim InstrStage<2, [A9_LSUnit]>], 799221345Sdim [1, 1, 2, 1]>, 800218893Sdim // VLD1x4 801218893Sdim InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 802218893Sdim InstrStage<1, [A9_MUX0], 0>, 803218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 804221345Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 805221345Sdim InstrStage<2, [A9_NPipe], 0>, 806221345Sdim InstrStage<2, [A9_LSUnit]>], 807221345Sdim [1, 1, 2, 2, 1]>, 808218893Sdim // VLD1u 809218893Sdim InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 810218893Sdim InstrStage<1, [A9_MUX0], 0>, 811218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 812221345Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 813221345Sdim InstrStage<1, [A9_NPipe], 0>, 814221345Sdim InstrStage<1, [A9_LSUnit]>], 815221345Sdim [1, 2, 1]>, 816218893Sdim // VLD1x2u 817218893Sdim InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 818218893Sdim InstrStage<1, [A9_MUX0], 0>, 819218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 820221345Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 821221345Sdim InstrStage<1, [A9_NPipe], 0>, 822221345Sdim InstrStage<1, [A9_LSUnit]>], 823221345Sdim [1, 1, 2, 1]>, 824218893Sdim // VLD1x3u 825218893Sdim InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 826218893Sdim InstrStage<1, [A9_MUX0], 0>, 827218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 828221345Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 829221345Sdim InstrStage<2, [A9_NPipe], 0>, 830221345Sdim InstrStage<2, [A9_LSUnit]>], 831221345Sdim [1, 1, 2, 2, 1]>, 832218893Sdim // VLD1x4u 833218893Sdim InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 834218893Sdim InstrStage<1, [A9_MUX0], 0>, 835218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 836221345Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 837221345Sdim InstrStage<2, [A9_NPipe], 0>, 838221345Sdim InstrStage<2, [A9_LSUnit]>], 839221345Sdim [1, 1, 2, 2, 2, 1]>, 840207618Srdivacky // 841218893Sdim // VLD1ln 842218893Sdim InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 843218893Sdim InstrStage<1, [A9_MUX0], 0>, 844218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 845221345Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 846221345Sdim InstrStage<2, [A9_NPipe], 0>, 847221345Sdim InstrStage<2, [A9_LSUnit]>], 848221345Sdim [3, 1, 1, 1]>, 849218893Sdim // 850218893Sdim // VLD1lnu 851218893Sdim InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 852218893Sdim InstrStage<1, [A9_MUX0], 0>, 853218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 854221345Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 855221345Sdim InstrStage<2, [A9_NPipe], 0>, 856221345Sdim InstrStage<2, [A9_LSUnit]>], 857221345Sdim [3, 2, 1, 1, 1, 1]>, 858218893Sdim // 859218893Sdim // VLD1dup 860218893Sdim InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 861218893Sdim InstrStage<1, [A9_MUX0], 0>, 862218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 863221345Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 864221345Sdim InstrStage<1, [A9_NPipe], 0>, 865221345Sdim InstrStage<1, [A9_LSUnit]>], 866221345Sdim [2, 1]>, 867218893Sdim // 868218893Sdim // VLD1dupu 869218893Sdim InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 870218893Sdim InstrStage<1, [A9_MUX0], 0>, 871218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 872221345Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 873221345Sdim InstrStage<1, [A9_NPipe], 0>, 874221345Sdim InstrStage<1, [A9_LSUnit]>], 875221345Sdim [2, 2, 1, 1]>, 876218893Sdim // 877207618Srdivacky // VLD2 878218893Sdim InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 879218893Sdim InstrStage<1, [A9_MUX0], 0>, 880218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 881218893Sdim // Extra latency cycles since wbck is 7 cycles 882221345Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 883221345Sdim InstrStage<1, [A9_NPipe], 0>, 884221345Sdim InstrStage<1, [A9_LSUnit]>], 885221345Sdim [2, 2, 1]>, 886207618Srdivacky // 887218893Sdim // VLD2x2 888218893Sdim InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 889218893Sdim InstrStage<1, [A9_MUX0], 0>, 890218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 891221345Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 892221345Sdim InstrStage<2, [A9_NPipe], 0>, 893221345Sdim InstrStage<2, [A9_LSUnit]>], 894221345Sdim [2, 3, 2, 3, 1]>, 895218893Sdim // 896218893Sdim // VLD2ln 897218893Sdim InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 898218893Sdim InstrStage<1, [A9_MUX0], 0>, 899218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 900221345Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 901221345Sdim InstrStage<2, [A9_NPipe], 0>, 902221345Sdim InstrStage<2, [A9_LSUnit]>], 903221345Sdim [3, 3, 1, 1, 1, 1]>, 904218893Sdim // 905218893Sdim // VLD2u 906218893Sdim InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 907218893Sdim InstrStage<1, [A9_MUX0], 0>, 908218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 909218893Sdim // Extra latency cycles since wbck is 7 cycles 910221345Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 911221345Sdim InstrStage<1, [A9_NPipe], 0>, 912221345Sdim InstrStage<1, [A9_LSUnit]>], 913221345Sdim [2, 2, 2, 1, 1, 1]>, 914218893Sdim // 915218893Sdim // VLD2x2u 916218893Sdim InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 917218893Sdim InstrStage<1, [A9_MUX0], 0>, 918218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 919221345Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 920221345Sdim InstrStage<2, [A9_NPipe], 0>, 921221345Sdim InstrStage<2, [A9_LSUnit]>], 922221345Sdim [2, 3, 2, 3, 2, 1]>, 923218893Sdim // 924218893Sdim // VLD2lnu 925218893Sdim InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 926218893Sdim InstrStage<1, [A9_MUX0], 0>, 927218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 928221345Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 929221345Sdim InstrStage<2, [A9_NPipe], 0>, 930221345Sdim InstrStage<2, [A9_LSUnit]>], 931221345Sdim [3, 3, 2, 1, 1, 1, 1, 1]>, 932218893Sdim // 933218893Sdim // VLD2dup 934218893Sdim InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 935218893Sdim InstrStage<1, [A9_MUX0], 0>, 936218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 937221345Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 938221345Sdim InstrStage<1, [A9_NPipe], 0>, 939221345Sdim InstrStage<1, [A9_LSUnit]>], 940221345Sdim [2, 2, 1]>, 941218893Sdim // 942218893Sdim // VLD2dupu 943218893Sdim InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 944218893Sdim InstrStage<1, [A9_MUX0], 0>, 945218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 946221345Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 947221345Sdim InstrStage<1, [A9_NPipe], 0>, 948221345Sdim InstrStage<1, [A9_LSUnit]>], 949221345Sdim [2, 2, 2, 1, 1]>, 950218893Sdim // 951207618Srdivacky // VLD3 952218893Sdim InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 953218893Sdim InstrStage<1, [A9_MUX0], 0>, 954218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 955221345Sdim InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 956221345Sdim InstrStage<3, [A9_NPipe], 0>, 957221345Sdim InstrStage<3, [A9_LSUnit]>], 958221345Sdim [3, 3, 4, 1]>, 959207618Srdivacky // 960218893Sdim // VLD3ln 961218893Sdim InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 962218893Sdim InstrStage<1, [A9_MUX0], 0>, 963218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 964218893Sdim InstrStage<11,[A9_DRegsVFP], 0, Reserved>, 965218893Sdim InstrStage<5, [A9_NPipe], 0>, 966218893Sdim InstrStage<5, [A9_LSUnit]>], 967218893Sdim [5, 5, 6, 1, 1, 1, 1, 2]>, 968218893Sdim // 969218893Sdim // VLD3u 970218893Sdim InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 971218893Sdim InstrStage<1, [A9_MUX0], 0>, 972218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 973221345Sdim InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 974221345Sdim InstrStage<3, [A9_NPipe], 0>, 975221345Sdim InstrStage<3, [A9_LSUnit]>], 976221345Sdim [3, 3, 4, 2, 1]>, 977218893Sdim // 978218893Sdim // VLD3lnu 979218893Sdim InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 980218893Sdim InstrStage<1, [A9_MUX0], 0>, 981218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 982218893Sdim InstrStage<11,[A9_DRegsVFP], 0, Reserved>, 983218893Sdim InstrStage<5, [A9_NPipe], 0>, 984218893Sdim InstrStage<5, [A9_LSUnit]>], 985218893Sdim [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>, 986218893Sdim // 987218893Sdim // VLD3dup 988218893Sdim InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 989218893Sdim InstrStage<1, [A9_MUX0], 0>, 990218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 991218893Sdim InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 992218893Sdim InstrStage<3, [A9_NPipe], 0>, 993218893Sdim InstrStage<3, [A9_LSUnit]>], 994218893Sdim [3, 3, 4, 1]>, 995218893Sdim // 996218893Sdim // VLD3dupu 997218893Sdim InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 998218893Sdim InstrStage<1, [A9_MUX0], 0>, 999218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1000218893Sdim InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1001218893Sdim InstrStage<3, [A9_NPipe], 0>, 1002218893Sdim InstrStage<3, [A9_LSUnit]>], 1003218893Sdim [3, 3, 4, 2, 1, 1]>, 1004218893Sdim // 1005207618Srdivacky // VLD4 1006218893Sdim InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1007218893Sdim InstrStage<1, [A9_MUX0], 0>, 1008218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1009221345Sdim InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 1010221345Sdim InstrStage<3, [A9_NPipe], 0>, 1011221345Sdim InstrStage<3, [A9_LSUnit]>], 1012221345Sdim [3, 3, 4, 4, 1]>, 1013207618Srdivacky // 1014218893Sdim // VLD4ln 1015218893Sdim InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1016218893Sdim InstrStage<1, [A9_MUX0], 0>, 1017218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1018221345Sdim InstrStage<10,[A9_DRegsVFP], 0, Reserved>, 1019221345Sdim InstrStage<4, [A9_NPipe], 0>, 1020221345Sdim InstrStage<4, [A9_LSUnit]>], 1021221345Sdim [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, 1022207618Srdivacky // 1023218893Sdim // VLD4u 1024218893Sdim InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1025218893Sdim InstrStage<1, [A9_MUX0], 0>, 1026218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1027221345Sdim InstrStage<9,[A9_DRegsVFP], 0, Reserved>, 1028221345Sdim InstrStage<3, [A9_NPipe], 0>, 1029221345Sdim InstrStage<3, [A9_LSUnit]>], 1030221345Sdim [3, 3, 4, 4, 2, 1]>, 1031218893Sdim // 1032218893Sdim // VLD4lnu 1033218893Sdim InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1034218893Sdim InstrStage<1, [A9_MUX0], 0>, 1035218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1036221345Sdim InstrStage<10,[A9_DRegsVFP], 0, Reserved>, 1037221345Sdim InstrStage<4, [A9_NPipe], 0>, 1038221345Sdim InstrStage<4, [A9_LSUnit]>], 1039221345Sdim [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, 1040218893Sdim // 1041218893Sdim // VLD4dup 1042218893Sdim InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1043218893Sdim InstrStage<1, [A9_MUX0], 0>, 1044218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1045221345Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1046221345Sdim InstrStage<2, [A9_NPipe], 0>, 1047221345Sdim InstrStage<2, [A9_LSUnit]>], 1048221345Sdim [2, 2, 3, 3, 1]>, 1049218893Sdim // 1050218893Sdim // VLD4dupu 1051218893Sdim InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1052218893Sdim InstrStage<1, [A9_MUX0], 0>, 1053218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1054221345Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1055221345Sdim InstrStage<2, [A9_NPipe], 0>, 1056221345Sdim InstrStage<2, [A9_LSUnit]>], 1057221345Sdim [2, 2, 3, 3, 2, 1, 1]>, 1058218893Sdim // 1059218893Sdim // VST1 1060218893Sdim InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1061218893Sdim InstrStage<1, [A9_MUX0], 0>, 1062218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1063221345Sdim InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1064221345Sdim InstrStage<1, [A9_NPipe], 0>, 1065221345Sdim InstrStage<1, [A9_LSUnit]>], 1066218893Sdim [1, 1, 1]>, 1067218893Sdim // 1068218893Sdim // VST1x2 1069218893Sdim InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1070218893Sdim InstrStage<1, [A9_MUX0], 0>, 1071218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1072221345Sdim InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1073221345Sdim InstrStage<1, [A9_NPipe], 0>, 1074221345Sdim InstrStage<1, [A9_LSUnit]>], 1075218893Sdim [1, 1, 1, 1]>, 1076218893Sdim // 1077218893Sdim // VST1x3 1078218893Sdim InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1079218893Sdim InstrStage<1, [A9_MUX0], 0>, 1080218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1081221345Sdim InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1082221345Sdim InstrStage<2, [A9_NPipe], 0>, 1083221345Sdim InstrStage<2, [A9_LSUnit]>], 1084218893Sdim [1, 1, 1, 1, 2]>, 1085218893Sdim // 1086218893Sdim // VST1x4 1087218893Sdim InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1088218893Sdim InstrStage<1, [A9_MUX0], 0>, 1089218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1090221345Sdim InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1091221345Sdim InstrStage<2, [A9_NPipe], 0>, 1092221345Sdim InstrStage<2, [A9_LSUnit]>], 1093218893Sdim [1, 1, 1, 1, 2, 2]>, 1094218893Sdim // 1095218893Sdim // VST1u 1096218893Sdim InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1097218893Sdim InstrStage<1, [A9_MUX0], 0>, 1098218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1099221345Sdim InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1100221345Sdim InstrStage<1, [A9_NPipe], 0>, 1101221345Sdim InstrStage<1, [A9_LSUnit]>], 1102218893Sdim [2, 1, 1, 1, 1]>, 1103218893Sdim // 1104218893Sdim // VST1x2u 1105218893Sdim InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1106218893Sdim InstrStage<1, [A9_MUX0], 0>, 1107218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1108221345Sdim InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1109221345Sdim InstrStage<1, [A9_NPipe], 0>, 1110221345Sdim InstrStage<1, [A9_LSUnit]>], 1111218893Sdim [2, 1, 1, 1, 1, 1]>, 1112218893Sdim // 1113218893Sdim // VST1x3u 1114218893Sdim InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1115218893Sdim InstrStage<1, [A9_MUX0], 0>, 1116218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1117218893Sdim InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1118221345Sdim InstrStage<2, [A9_NPipe], 0>, 1119221345Sdim InstrStage<2, [A9_LSUnit]>], 1120218893Sdim [2, 1, 1, 1, 1, 1, 2]>, 1121218893Sdim // 1122218893Sdim // VST1x4u 1123218893Sdim InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1124218893Sdim InstrStage<1, [A9_MUX0], 0>, 1125218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1126221345Sdim InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1127221345Sdim InstrStage<2, [A9_NPipe], 0>, 1128221345Sdim InstrStage<2, [A9_LSUnit]>], 1129218893Sdim [2, 1, 1, 1, 1, 1, 2, 2]>, 1130218893Sdim // 1131218893Sdim // VST1ln 1132218893Sdim InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1133218893Sdim InstrStage<1, [A9_MUX0], 0>, 1134218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1135221345Sdim InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1136221345Sdim InstrStage<1, [A9_NPipe], 0>, 1137221345Sdim InstrStage<1, [A9_LSUnit]>], 1138218893Sdim [1, 1, 1]>, 1139218893Sdim // 1140218893Sdim // VST1lnu 1141218893Sdim InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1142218893Sdim InstrStage<1, [A9_MUX0], 0>, 1143218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1144221345Sdim InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1145221345Sdim InstrStage<1, [A9_NPipe], 0>, 1146221345Sdim InstrStage<1, [A9_LSUnit]>], 1147218893Sdim [2, 1, 1, 1, 1]>, 1148218893Sdim // 1149218893Sdim // VST2 1150218893Sdim InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1151218893Sdim InstrStage<1, [A9_MUX0], 0>, 1152218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1153221345Sdim InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1154221345Sdim InstrStage<1, [A9_NPipe], 0>, 1155221345Sdim InstrStage<1, [A9_LSUnit]>], 1156218893Sdim [1, 1, 1, 1]>, 1157218893Sdim // 1158218893Sdim // VST2x2 1159218893Sdim InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1160218893Sdim InstrStage<1, [A9_MUX0], 0>, 1161218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1162218893Sdim InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1163218893Sdim InstrStage<3, [A9_NPipe], 0>, 1164218893Sdim InstrStage<3, [A9_LSUnit]>], 1165218893Sdim [1, 1, 1, 1, 2, 2]>, 1166218893Sdim // 1167218893Sdim // VST2u 1168218893Sdim InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1169218893Sdim InstrStage<1, [A9_MUX0], 0>, 1170218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1171221345Sdim InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1172221345Sdim InstrStage<1, [A9_NPipe], 0>, 1173221345Sdim InstrStage<1, [A9_LSUnit]>], 1174218893Sdim [2, 1, 1, 1, 1, 1]>, 1175218893Sdim // 1176218893Sdim // VST2x2u 1177218893Sdim InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1178218893Sdim InstrStage<1, [A9_MUX0], 0>, 1179218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1180218893Sdim InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1181218893Sdim InstrStage<3, [A9_NPipe], 0>, 1182218893Sdim InstrStage<3, [A9_LSUnit]>], 1183218893Sdim [2, 1, 1, 1, 1, 1, 2, 2]>, 1184218893Sdim // 1185218893Sdim // VST2ln 1186218893Sdim InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1187218893Sdim InstrStage<1, [A9_MUX0], 0>, 1188218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1189221345Sdim InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1190221345Sdim InstrStage<1, [A9_NPipe], 0>, 1191221345Sdim InstrStage<1, [A9_LSUnit]>], 1192218893Sdim [1, 1, 1, 1]>, 1193218893Sdim // 1194218893Sdim // VST2lnu 1195218893Sdim InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1196218893Sdim InstrStage<1, [A9_MUX0], 0>, 1197218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1198221345Sdim InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1199221345Sdim InstrStage<1, [A9_NPipe], 0>, 1200221345Sdim InstrStage<1, [A9_LSUnit]>], 1201218893Sdim [2, 1, 1, 1, 1, 1]>, 1202218893Sdim // 1203218893Sdim // VST3 1204218893Sdim InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1205218893Sdim InstrStage<1, [A9_MUX0], 0>, 1206218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1207221345Sdim InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1208221345Sdim InstrStage<2, [A9_NPipe], 0>, 1209221345Sdim InstrStage<2, [A9_LSUnit]>], 1210218893Sdim [1, 1, 1, 1, 2]>, 1211218893Sdim // 1212218893Sdim // VST3u 1213218893Sdim InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1214218893Sdim InstrStage<1, [A9_MUX0], 0>, 1215218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1216221345Sdim InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1217221345Sdim InstrStage<2, [A9_NPipe], 0>, 1218221345Sdim InstrStage<2, [A9_LSUnit]>], 1219218893Sdim [2, 1, 1, 1, 1, 1, 2]>, 1220218893Sdim // 1221218893Sdim // VST3ln 1222218893Sdim InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1223218893Sdim InstrStage<1, [A9_MUX0], 0>, 1224218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1225218893Sdim InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1226218893Sdim InstrStage<3, [A9_NPipe], 0>, 1227218893Sdim InstrStage<3, [A9_LSUnit]>], 1228218893Sdim [1, 1, 1, 1, 2]>, 1229218893Sdim // 1230218893Sdim // VST3lnu 1231218893Sdim InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1232218893Sdim InstrStage<1, [A9_MUX0], 0>, 1233218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1234218893Sdim InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1235218893Sdim InstrStage<3, [A9_NPipe], 0>, 1236218893Sdim InstrStage<3, [A9_LSUnit]>], 1237218893Sdim [2, 1, 1, 1, 1, 1, 2]>, 1238218893Sdim // 1239218893Sdim // VST4 1240218893Sdim InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1241218893Sdim InstrStage<1, [A9_MUX0], 0>, 1242218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1243221345Sdim InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1244221345Sdim InstrStage<2, [A9_NPipe], 0>, 1245221345Sdim InstrStage<2, [A9_LSUnit]>], 1246218893Sdim [1, 1, 1, 1, 2, 2]>, 1247218893Sdim // 1248218893Sdim // VST4u 1249218893Sdim InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1250218893Sdim InstrStage<1, [A9_MUX0], 0>, 1251218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1252221345Sdim InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1253221345Sdim InstrStage<2, [A9_NPipe], 0>, 1254221345Sdim InstrStage<2, [A9_LSUnit]>], 1255218893Sdim [2, 1, 1, 1, 1, 1, 2, 2]>, 1256218893Sdim // 1257218893Sdim // VST4ln 1258218893Sdim InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1259218893Sdim InstrStage<1, [A9_MUX0], 0>, 1260218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1261221345Sdim InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1262221345Sdim InstrStage<2, [A9_NPipe], 0>, 1263221345Sdim InstrStage<2, [A9_LSUnit]>], 1264218893Sdim [1, 1, 1, 1, 2, 2]>, 1265218893Sdim // 1266218893Sdim // VST4lnu 1267218893Sdim InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1268218893Sdim InstrStage<1, [A9_MUX0], 0>, 1269218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1270221345Sdim InstrStage<2, [A9_DRegsVFP], 0, Reserved>, 1271221345Sdim InstrStage<2, [A9_NPipe], 0>, 1272221345Sdim InstrStage<2, [A9_LSUnit]>], 1273218893Sdim [2, 1, 1, 1, 1, 1, 2, 2]>, 1274218893Sdim 1275218893Sdim // 1276207618Srdivacky // Double-register Integer Unary 1277218893Sdim InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1278218893Sdim InstrStage<1, [A9_MUX0], 0>, 1279218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1280207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1281207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1282218893Sdim InstrStage<1, [A9_NPipe]>], 1283218893Sdim [4, 2]>, 1284207618Srdivacky // 1285207618Srdivacky // Quad-register Integer Unary 1286218893Sdim InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1287218893Sdim InstrStage<1, [A9_MUX0], 0>, 1288218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1289207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1290207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1291218893Sdim InstrStage<1, [A9_NPipe]>], 1292218893Sdim [4, 2]>, 1293207618Srdivacky // 1294207618Srdivacky // Double-register Integer Q-Unary 1295218893Sdim InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1296218893Sdim InstrStage<1, [A9_MUX0], 0>, 1297218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1298207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1299207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1300218893Sdim InstrStage<1, [A9_NPipe]>], 1301218893Sdim [4, 1]>, 1302207618Srdivacky // 1303207618Srdivacky // Quad-register Integer CountQ-Unary 1304218893Sdim InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1305218893Sdim InstrStage<1, [A9_MUX0], 0>, 1306218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1307207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1308207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1309218893Sdim InstrStage<1, [A9_NPipe]>], 1310218893Sdim [4, 1]>, 1311207618Srdivacky // 1312207618Srdivacky // Double-register Integer Binary 1313218893Sdim InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1314218893Sdim InstrStage<1, [A9_MUX0], 0>, 1315218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1316207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1317207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1318218893Sdim InstrStage<1, [A9_NPipe]>], 1319218893Sdim [3, 2, 2]>, 1320207618Srdivacky // 1321207618Srdivacky // Quad-register Integer Binary 1322218893Sdim InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1323218893Sdim InstrStage<1, [A9_MUX0], 0>, 1324218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1325207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1326207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1327218893Sdim InstrStage<1, [A9_NPipe]>], 1328218893Sdim [3, 2, 2]>, 1329207618Srdivacky // 1330207618Srdivacky // Double-register Integer Subtract 1331218893Sdim InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1332218893Sdim InstrStage<1, [A9_MUX0], 0>, 1333218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1334207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1335207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1336218893Sdim InstrStage<1, [A9_NPipe]>], 1337218893Sdim [3, 2, 1]>, 1338207618Srdivacky // 1339207618Srdivacky // Quad-register Integer Subtract 1340218893Sdim InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1341218893Sdim InstrStage<1, [A9_MUX0], 0>, 1342218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1343207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1344207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1345218893Sdim InstrStage<1, [A9_NPipe]>], 1346218893Sdim [3, 2, 1]>, 1347207618Srdivacky // 1348207618Srdivacky // Double-register Integer Shift 1349218893Sdim InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1350218893Sdim InstrStage<1, [A9_MUX0], 0>, 1351218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1352207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1353207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1354218893Sdim InstrStage<1, [A9_NPipe]>], 1355218893Sdim [3, 1, 1]>, 1356207618Srdivacky // 1357207618Srdivacky // Quad-register Integer Shift 1358218893Sdim InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1359218893Sdim InstrStage<1, [A9_MUX0], 0>, 1360218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1361207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1362207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1363218893Sdim InstrStage<1, [A9_NPipe]>], 1364218893Sdim [3, 1, 1]>, 1365207618Srdivacky // 1366207618Srdivacky // Double-register Integer Shift (4 cycle) 1367218893Sdim InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1368218893Sdim InstrStage<1, [A9_MUX0], 0>, 1369218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1370207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1371207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1372218893Sdim InstrStage<1, [A9_NPipe]>], 1373218893Sdim [4, 1, 1]>, 1374207618Srdivacky // 1375207618Srdivacky // Quad-register Integer Shift (4 cycle) 1376218893Sdim InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1377218893Sdim InstrStage<1, [A9_MUX0], 0>, 1378218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1379207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1380207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1381218893Sdim InstrStage<1, [A9_NPipe]>], 1382218893Sdim [4, 1, 1]>, 1383207618Srdivacky // 1384207618Srdivacky // Double-register Integer Binary (4 cycle) 1385218893Sdim InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1386218893Sdim InstrStage<1, [A9_MUX0], 0>, 1387218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1388207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1389207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1390218893Sdim InstrStage<1, [A9_NPipe]>], 1391218893Sdim [4, 2, 2]>, 1392207618Srdivacky // 1393207618Srdivacky // Quad-register Integer Binary (4 cycle) 1394218893Sdim InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1395218893Sdim InstrStage<1, [A9_MUX0], 0>, 1396218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1397207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1398207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1399218893Sdim InstrStage<1, [A9_NPipe]>], 1400218893Sdim [4, 2, 2]>, 1401207618Srdivacky // 1402207618Srdivacky // Double-register Integer Subtract (4 cycle) 1403218893Sdim InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1404218893Sdim InstrStage<1, [A9_MUX0], 0>, 1405218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1406207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1407207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1408218893Sdim InstrStage<1, [A9_NPipe]>], 1409218893Sdim [4, 2, 1]>, 1410207618Srdivacky // 1411207618Srdivacky // Quad-register Integer Subtract (4 cycle) 1412218893Sdim InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1413218893Sdim InstrStage<1, [A9_MUX0], 0>, 1414218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1415207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1416207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1417218893Sdim InstrStage<1, [A9_NPipe]>], 1418218893Sdim [4, 2, 1]>, 1419207618Srdivacky 1420207618Srdivacky // 1421207618Srdivacky // Double-register Integer Count 1422218893Sdim InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1423218893Sdim InstrStage<1, [A9_MUX0], 0>, 1424218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1425207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1426207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1427218893Sdim InstrStage<1, [A9_NPipe]>], 1428218893Sdim [3, 2, 2]>, 1429207618Srdivacky // 1430207618Srdivacky // Quad-register Integer Count 1431207618Srdivacky // Result written in N3, but that is relative to the last cycle of multicycle, 1432207618Srdivacky // so we use 4 for those cases 1433218893Sdim InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1434218893Sdim InstrStage<1, [A9_MUX0], 0>, 1435218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1436207618Srdivacky // Extra latency cycles since wbck is 7 cycles 1437207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1438218893Sdim InstrStage<2, [A9_NPipe]>], 1439218893Sdim [4, 2, 2]>, 1440207618Srdivacky // 1441207618Srdivacky // Double-register Absolute Difference and Accumulate 1442218893Sdim InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1443218893Sdim InstrStage<1, [A9_MUX0], 0>, 1444218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1445207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1446207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1447218893Sdim InstrStage<1, [A9_NPipe]>], 1448218893Sdim [6, 3, 2, 1]>, 1449207618Srdivacky // 1450207618Srdivacky // Quad-register Absolute Difference and Accumulate 1451218893Sdim InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1452218893Sdim InstrStage<1, [A9_MUX0], 0>, 1453218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1454207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1455207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1456218893Sdim InstrStage<2, [A9_NPipe]>], 1457218893Sdim [6, 3, 2, 1]>, 1458207618Srdivacky // 1459207618Srdivacky // Double-register Integer Pair Add Long 1460218893Sdim InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1461218893Sdim InstrStage<1, [A9_MUX0], 0>, 1462218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1463207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1464207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1465218893Sdim InstrStage<1, [A9_NPipe]>], 1466218893Sdim [6, 3, 1]>, 1467207618Srdivacky // 1468207618Srdivacky // Quad-register Integer Pair Add Long 1469218893Sdim InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1470218893Sdim InstrStage<1, [A9_MUX0], 0>, 1471218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1472207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1473207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1474218893Sdim InstrStage<2, [A9_NPipe]>], 1475218893Sdim [6, 3, 1]>, 1476207618Srdivacky 1477207618Srdivacky // 1478207618Srdivacky // Double-register Integer Multiply (.8, .16) 1479218893Sdim InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1480218893Sdim InstrStage<1, [A9_MUX0], 0>, 1481218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1482207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1483207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1484218893Sdim InstrStage<1, [A9_NPipe]>], 1485218893Sdim [6, 2, 2]>, 1486207618Srdivacky // 1487207618Srdivacky // Quad-register Integer Multiply (.8, .16) 1488218893Sdim InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1489218893Sdim InstrStage<1, [A9_MUX0], 0>, 1490218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1491207618Srdivacky // Extra latency cycles since wbck is 7 cycles 1492207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1493218893Sdim InstrStage<2, [A9_NPipe]>], 1494218893Sdim [7, 2, 2]>, 1495207618Srdivacky 1496207618Srdivacky // 1497207618Srdivacky // Double-register Integer Multiply (.32) 1498218893Sdim InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1499218893Sdim InstrStage<1, [A9_MUX0], 0>, 1500218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1501207618Srdivacky // Extra latency cycles since wbck is 7 cycles 1502207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1503218893Sdim InstrStage<2, [A9_NPipe]>], 1504218893Sdim [7, 2, 1]>, 1505207618Srdivacky // 1506207618Srdivacky // Quad-register Integer Multiply (.32) 1507218893Sdim InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1508218893Sdim InstrStage<1, [A9_MUX0], 0>, 1509218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1510207618Srdivacky // Extra latency cycles since wbck is 9 cycles 1511207618Srdivacky InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1512218893Sdim InstrStage<4, [A9_NPipe]>], 1513218893Sdim [9, 2, 1]>, 1514207618Srdivacky // 1515207618Srdivacky // Double-register Integer Multiply-Accumulate (.8, .16) 1516218893Sdim InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1517218893Sdim InstrStage<1, [A9_MUX0], 0>, 1518218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1519207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1520207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1521218893Sdim InstrStage<1, [A9_NPipe]>], 1522218893Sdim [6, 3, 2, 2]>, 1523207618Srdivacky // 1524207618Srdivacky // Double-register Integer Multiply-Accumulate (.32) 1525218893Sdim InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1526218893Sdim InstrStage<1, [A9_MUX0], 0>, 1527218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1528207618Srdivacky // Extra latency cycles since wbck is 7 cycles 1529207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1530218893Sdim InstrStage<2, [A9_NPipe]>], 1531218893Sdim [7, 3, 2, 1]>, 1532207618Srdivacky // 1533207618Srdivacky // Quad-register Integer Multiply-Accumulate (.8, .16) 1534218893Sdim InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1535218893Sdim InstrStage<1, [A9_MUX0], 0>, 1536218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1537207618Srdivacky // Extra latency cycles since wbck is 7 cycles 1538207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1539218893Sdim InstrStage<2, [A9_NPipe]>], 1540218893Sdim [7, 3, 2, 2]>, 1541207618Srdivacky // 1542207618Srdivacky // Quad-register Integer Multiply-Accumulate (.32) 1543218893Sdim InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1544218893Sdim InstrStage<1, [A9_MUX0], 0>, 1545218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1546207618Srdivacky // Extra latency cycles since wbck is 9 cycles 1547207618Srdivacky InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1548218893Sdim InstrStage<4, [A9_NPipe]>], 1549218893Sdim [9, 3, 2, 1]>, 1550218893Sdim 1551207618Srdivacky // 1552218893Sdim // Move 1553218893Sdim InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1554218893Sdim InstrStage<1, [A9_MUX0], 0>, 1555218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1556218893Sdim InstrStage<1, [A9_DRegsVFP], 0, Reserved>, 1557218893Sdim InstrStage<1, [A9_NPipe]>], 1558218893Sdim [1,1]>, 1559218893Sdim // 1560207618Srdivacky // Move Immediate 1561218893Sdim InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1562218893Sdim InstrStage<1, [A9_MUX0], 0>, 1563218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1564207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1565207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1566218893Sdim InstrStage<1, [A9_NPipe]>], 1567218893Sdim [3]>, 1568207618Srdivacky // 1569207618Srdivacky // Double-register Permute Move 1570218893Sdim InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1571218893Sdim InstrStage<1, [A9_MUX0], 0>, 1572218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1573218893Sdim // Extra latency cycles since wbck is 6 cycles 1574218893Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1575218893Sdim InstrStage<1, [A9_NPipe]>], 1576218893Sdim [2, 1]>, 1577207618Srdivacky // 1578207618Srdivacky // Quad-register Permute Move 1579218893Sdim InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1580218893Sdim InstrStage<1, [A9_MUX0], 0>, 1581218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1582218893Sdim // Extra latency cycles since wbck is 6 cycles 1583218893Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1584218893Sdim InstrStage<1, [A9_NPipe]>], 1585218893Sdim [2, 1]>, 1586207618Srdivacky // 1587207618Srdivacky // Integer to Single-precision Move 1588218893Sdim InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1589218893Sdim InstrStage<1, [A9_MUX0], 0>, 1590218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1591207618Srdivacky InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1592218893Sdim InstrStage<1, [A9_NPipe]>], 1593218893Sdim [1, 1]>, 1594207618Srdivacky // 1595207618Srdivacky // Integer to Double-precision Move 1596218893Sdim InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1597218893Sdim InstrStage<1, [A9_MUX0], 0>, 1598218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1599207618Srdivacky InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1600218893Sdim InstrStage<1, [A9_NPipe]>], 1601218893Sdim [1, 1, 1]>, 1602207618Srdivacky // 1603207618Srdivacky // Single-precision to Integer Move 1604218893Sdim InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1605218893Sdim InstrStage<1, [A9_MUX0], 0>, 1606218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1607207618Srdivacky InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1608218893Sdim InstrStage<1, [A9_NPipe]>], 1609218893Sdim [2, 1]>, 1610207618Srdivacky // 1611207618Srdivacky // Double-precision to Integer Move 1612218893Sdim InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1613218893Sdim InstrStage<1, [A9_MUX0], 0>, 1614218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1615207618Srdivacky InstrStage<3, [A9_DRegsVFP], 0, Reserved>, 1616218893Sdim InstrStage<1, [A9_NPipe]>], 1617218893Sdim [2, 2, 1]>, 1618207618Srdivacky // 1619207618Srdivacky // Integer to Lane Move 1620218893Sdim InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1621218893Sdim InstrStage<1, [A9_MUX0], 0>, 1622218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1623207618Srdivacky InstrStage<4, [A9_DRegsVFP], 0, Reserved>, 1624218893Sdim InstrStage<2, [A9_NPipe]>], 1625218893Sdim [3, 1, 1]>, 1626207618Srdivacky 1627207618Srdivacky // 1628218893Sdim // Vector narrow move 1629218893Sdim InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1630218893Sdim InstrStage<1, [A9_MUX0], 0>, 1631218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1632218893Sdim // Extra latency cycles since wbck is 6 cycles 1633218893Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1634218893Sdim InstrStage<1, [A9_NPipe]>], 1635218893Sdim [3, 1]>, 1636218893Sdim // 1637207618Srdivacky // Double-register FP Unary 1638218893Sdim InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1639218893Sdim InstrStage<1, [A9_MUX0], 0>, 1640218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1641207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1642207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1643218893Sdim InstrStage<1, [A9_NPipe]>], 1644218893Sdim [5, 2]>, 1645207618Srdivacky // 1646207618Srdivacky // Quad-register FP Unary 1647207618Srdivacky // Result written in N5, but that is relative to the last cycle of multicycle, 1648207618Srdivacky // so we use 6 for those cases 1649218893Sdim InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1650218893Sdim InstrStage<1, [A9_MUX0], 0>, 1651218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1652207618Srdivacky // Extra latency cycles since wbck is 7 cycles 1653207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1654218893Sdim InstrStage<2, [A9_NPipe]>], 1655218893Sdim [6, 2]>, 1656207618Srdivacky // 1657207618Srdivacky // Double-register FP Binary 1658207618Srdivacky // FIXME: We're using this itin for many instructions and [2, 2] here is too 1659207618Srdivacky // optimistic. 1660218893Sdim InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1661218893Sdim InstrStage<1, [A9_MUX0], 0>, 1662218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1663218893Sdim // Extra latency cycles since wbck is 6 cycles 1664207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1665218893Sdim InstrStage<1, [A9_NPipe]>], 1666218893Sdim [5, 2, 2]>, 1667218893Sdim 1668207618Srdivacky // 1669218893Sdim // VPADD, etc. 1670218893Sdim InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1671218893Sdim InstrStage<1, [A9_MUX0], 0>, 1672218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1673218893Sdim // Extra latency cycles since wbck is 6 cycles 1674218893Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1675218893Sdim InstrStage<1, [A9_NPipe]>], 1676218893Sdim [5, 1, 1]>, 1677218893Sdim // 1678218893Sdim // Double-register FP VMUL 1679218893Sdim InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1680218893Sdim InstrStage<1, [A9_MUX0], 0>, 1681218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1682218893Sdim // Extra latency cycles since wbck is 6 cycles 1683218893Sdim InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1684218893Sdim InstrStage<1, [A9_NPipe]>], 1685218893Sdim [5, 2, 1]>, 1686218893Sdim // 1687207618Srdivacky // Quad-register FP Binary 1688207618Srdivacky // Result written in N5, but that is relative to the last cycle of multicycle, 1689207618Srdivacky // so we use 6 for those cases 1690207618Srdivacky // FIXME: We're using this itin for many instructions and [2, 2] here is too 1691207618Srdivacky // optimistic. 1692218893Sdim InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1693218893Sdim InstrStage<1, [A9_MUX0], 0>, 1694218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1695218893Sdim // Extra latency cycles since wbck is 7 cycles 1696207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1697218893Sdim InstrStage<2, [A9_NPipe]>], 1698218893Sdim [6, 2, 2]>, 1699207618Srdivacky // 1700218893Sdim // Quad-register FP VMUL 1701218893Sdim InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1702218893Sdim InstrStage<1, [A9_MUX0], 0>, 1703218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1704218893Sdim // Extra latency cycles since wbck is 7 cycles 1705218893Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1706218893Sdim InstrStage<1, [A9_NPipe]>], 1707218893Sdim [6, 2, 1]>, 1708218893Sdim // 1709207618Srdivacky // Double-register FP Multiple-Accumulate 1710218893Sdim InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1711218893Sdim InstrStage<1, [A9_MUX0], 0>, 1712218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1713207618Srdivacky // Extra latency cycles since wbck is 7 cycles 1714207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1715218893Sdim InstrStage<2, [A9_NPipe]>], 1716218893Sdim [6, 3, 2, 1]>, 1717207618Srdivacky // 1718207618Srdivacky // Quad-register FP Multiple-Accumulate 1719207618Srdivacky // Result written in N9, but that is relative to the last cycle of multicycle, 1720207618Srdivacky // so we use 10 for those cases 1721218893Sdim InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1722218893Sdim InstrStage<1, [A9_MUX0], 0>, 1723218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1724207618Srdivacky // Extra latency cycles since wbck is 9 cycles 1725207618Srdivacky InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1726218893Sdim InstrStage<4, [A9_NPipe]>], 1727218893Sdim [8, 4, 2, 1]>, 1728207618Srdivacky // 1729235633Sdim // Double-register Fused FP Multiple-Accumulate 1730235633Sdim InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1731235633Sdim InstrStage<1, [A9_MUX0], 0>, 1732235633Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1733235633Sdim // Extra latency cycles since wbck is 7 cycles 1734235633Sdim InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1735235633Sdim InstrStage<2, [A9_NPipe]>], 1736235633Sdim [6, 3, 2, 1]>, 1737235633Sdim // 1738235633Sdim // Quad-register Fused FP Multiple-Accumulate 1739235633Sdim // Result written in N9, but that is relative to the last cycle of multicycle, 1740235633Sdim // so we use 10 for those cases 1741235633Sdim InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1742235633Sdim InstrStage<1, [A9_MUX0], 0>, 1743235633Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1744235633Sdim // Extra latency cycles since wbck is 9 cycles 1745235633Sdim InstrStage<10, [A9_DRegsVFP], 0, Reserved>, 1746235633Sdim InstrStage<4, [A9_NPipe]>], 1747235633Sdim [8, 4, 2, 1]>, 1748235633Sdim // 1749207618Srdivacky // Double-register Reciprical Step 1750218893Sdim InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1751218893Sdim InstrStage<1, [A9_MUX0], 0>, 1752218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1753218893Sdim // Extra latency cycles since wbck is 10 cycles 1754218893Sdim InstrStage<11, [A9_DRegsVFP], 0, Reserved>, 1755218893Sdim InstrStage<1, [A9_NPipe]>], 1756218893Sdim [9, 2, 2]>, 1757207618Srdivacky // 1758207618Srdivacky // Quad-register Reciprical Step 1759218893Sdim InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1760218893Sdim InstrStage<1, [A9_MUX0], 0>, 1761218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1762218893Sdim // Extra latency cycles since wbck is 11 cycles 1763218893Sdim InstrStage<12, [A9_DRegsVFP], 0, Reserved>, 1764218893Sdim InstrStage<2, [A9_NPipe]>], 1765218893Sdim [10, 2, 2]>, 1766207618Srdivacky // 1767207618Srdivacky // Double-register Permute 1768218893Sdim InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1769218893Sdim InstrStage<1, [A9_MUX0], 0>, 1770218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1771207618Srdivacky // Extra latency cycles since wbck is 6 cycles 1772207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1773218893Sdim InstrStage<1, [A9_NPipe]>], 1774218893Sdim [2, 2, 1, 1]>, 1775207618Srdivacky // 1776207618Srdivacky // Quad-register Permute 1777207618Srdivacky // Result written in N2, but that is relative to the last cycle of multicycle, 1778207618Srdivacky // so we use 3 for those cases 1779218893Sdim InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1780218893Sdim InstrStage<1, [A9_MUX0], 0>, 1781218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1782207618Srdivacky // Extra latency cycles since wbck is 7 cycles 1783207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1784218893Sdim InstrStage<2, [A9_NPipe]>], 1785218893Sdim [3, 3, 1, 1]>, 1786207618Srdivacky // 1787207618Srdivacky // Quad-register Permute (3 cycle issue) 1788207618Srdivacky // Result written in N2, but that is relative to the last cycle of multicycle, 1789207618Srdivacky // so we use 4 for those cases 1790218893Sdim InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1791218893Sdim InstrStage<1, [A9_MUX0], 0>, 1792218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1793207618Srdivacky // Extra latency cycles since wbck is 8 cycles 1794207618Srdivacky InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1795218893Sdim InstrStage<3, [A9_NPipe]>], 1796218893Sdim [4, 4, 1, 1]>, 1797207618Srdivacky 1798207618Srdivacky // 1799207618Srdivacky // Double-register VEXT 1800218893Sdim InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1801218893Sdim InstrStage<1, [A9_MUX0], 0>, 1802218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1803218893Sdim // Extra latency cycles since wbck is 6 cycles 1804207618Srdivacky InstrStage<7, [A9_DRegsVFP], 0, Reserved>, 1805218893Sdim InstrStage<1, [A9_NPipe]>], 1806218893Sdim [2, 1, 1]>, 1807207618Srdivacky // 1808207618Srdivacky // Quad-register VEXT 1809218893Sdim InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1810218893Sdim InstrStage<1, [A9_MUX0], 0>, 1811218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1812218893Sdim // Extra latency cycles since wbck is 7 cycles 1813207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1814218893Sdim InstrStage<2, [A9_NPipe]>], 1815218893Sdim [3, 1, 2]>, 1816207618Srdivacky // 1817207618Srdivacky // VTB 1818218893Sdim InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1819218893Sdim InstrStage<1, [A9_MUX0], 0>, 1820218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1821207618Srdivacky // Extra latency cycles since wbck is 7 cycles 1822207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1823218893Sdim InstrStage<2, [A9_NPipe]>], 1824218893Sdim [3, 2, 1]>, 1825218893Sdim InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1826218893Sdim InstrStage<1, [A9_MUX0], 0>, 1827218893Sdim InstrStage<2, [A9_DRegsN], 0, Required>, 1828207618Srdivacky // Extra latency cycles since wbck is 7 cycles 1829207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1830218893Sdim InstrStage<2, [A9_NPipe]>], 1831218893Sdim [3, 2, 2, 1]>, 1832218893Sdim InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1833218893Sdim InstrStage<1, [A9_MUX0], 0>, 1834218893Sdim InstrStage<2, [A9_DRegsN], 0, Required>, 1835207618Srdivacky // Extra latency cycles since wbck is 8 cycles 1836207618Srdivacky InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1837218893Sdim InstrStage<3, [A9_NPipe]>], 1838218893Sdim [4, 2, 2, 3, 1]>, 1839218893Sdim InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1840218893Sdim InstrStage<1, [A9_MUX0], 0>, 1841218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1842207618Srdivacky // Extra latency cycles since wbck is 8 cycles 1843207618Srdivacky InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1844218893Sdim InstrStage<3, [A9_NPipe]>], 1845218893Sdim [4, 2, 2, 3, 3, 1]>, 1846207618Srdivacky // 1847207618Srdivacky // VTBX 1848218893Sdim InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1849218893Sdim InstrStage<1, [A9_MUX0], 0>, 1850218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1851207618Srdivacky // Extra latency cycles since wbck is 7 cycles 1852207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1853218893Sdim InstrStage<2, [A9_NPipe]>], 1854218893Sdim [3, 1, 2, 1]>, 1855218893Sdim InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1856218893Sdim InstrStage<1, [A9_MUX0], 0>, 1857218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1858207618Srdivacky // Extra latency cycles since wbck is 7 cycles 1859207618Srdivacky InstrStage<8, [A9_DRegsVFP], 0, Reserved>, 1860218893Sdim InstrStage<2, [A9_NPipe]>], 1861218893Sdim [3, 1, 2, 2, 1]>, 1862218893Sdim InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1863218893Sdim InstrStage<1, [A9_MUX0], 0>, 1864218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1865207618Srdivacky // Extra latency cycles since wbck is 8 cycles 1866207618Srdivacky InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1867218893Sdim InstrStage<3, [A9_NPipe]>], 1868218893Sdim [4, 1, 2, 2, 3, 1]>, 1869218893Sdim InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, 1870218893Sdim InstrStage<1, [A9_MUX0], 0>, 1871218893Sdim InstrStage<1, [A9_DRegsN], 0, Required>, 1872207618Srdivacky // Extra latency cycles since wbck is 8 cycles 1873207618Srdivacky InstrStage<9, [A9_DRegsVFP], 0, Reserved>, 1874218893Sdim InstrStage<2, [A9_NPipe]>], 1875218893Sdim [4, 1, 2, 2, 3, 3, 1]> 1876207618Srdivacky]>; 1877245431Sdim 1878245431Sdim// ===---------------------------------------------------------------------===// 1879245431Sdim// The following definitions describe the simpler per-operand machine model. 1880245431Sdim// This works with MachineScheduler and will eventually replace itineraries. 1881245431Sdim 1882263509Sdimclass A9WriteLMOpsListType<list<WriteSequence> writes> { 1883263509Sdim list <WriteSequence> Writes = writes; 1884263509Sdim SchedMachineModel SchedModel = ?; 1885263509Sdim} 1886245431Sdim 1887245431Sdim// Cortex-A9 machine model for scheduling and other instruction cost heuristics. 1888245431Sdimdef CortexA9Model : SchedMachineModel { 1889245431Sdim let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. 1890263509Sdim let MicroOpBufferSize = 56; // Based on available renamed registers. 1891245431Sdim let LoadLatency = 2; // Optimistic load latency assuming bypass. 1892245431Sdim // This is overriden by OperandCycles if the 1893245431Sdim // Itineraries are queried instead. 1894245431Sdim let MispredictPenalty = 8; // Based on estimate of pipeline depth. 1895245431Sdim 1896245431Sdim let Itineraries = CortexA9Itineraries; 1897245431Sdim} 1898245431Sdim 1899245431Sdim//===----------------------------------------------------------------------===// 1900245431Sdim// Define each kind of processor resource and number available. 1901245431Sdim 1902252723Sdimlet SchedModel = CortexA9Model in { 1903252723Sdim 1904245431Sdimdef A9UnitALU : ProcResource<2>; 1905245431Sdimdef A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } 1906245431Sdimdef A9UnitAGU : ProcResource<1>; 1907245431Sdimdef A9UnitLS : ProcResource<1>; 1908263509Sdimdef A9UnitFP : ProcResource<1> { let BufferSize = 0; } 1909245431Sdimdef A9UnitB : ProcResource<1>; 1910245431Sdim 1911245431Sdim//===----------------------------------------------------------------------===// 1912245431Sdim// Define scheduler read/write types with their resources and latency on A9. 1913245431Sdim 1914245431Sdim// Consume an issue slot, but no processor resources. This is useful when all 1915245431Sdim// other writes associated with the operand have NumMicroOps = 0. 1916245431Sdimdef A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; } 1917245431Sdim 1918245431Sdim// Write an integer register. 1919245431Sdimdef A9WriteI : SchedWriteRes<[A9UnitALU]>; 1920245431Sdim// Write an integer shifted-by register 1921245431Sdimdef A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } 1922245431Sdim 1923245431Sdim// Basic ALU. 1924252723Sdimdef A9WriteALU : SchedWriteRes<[A9UnitALU]>; 1925245431Sdim// ALU with operand shifted by immediate. 1926252723Sdimdef : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; } 1927245431Sdim// ALU with operand shifted by register. 1928252723Sdimdef A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; } 1929245431Sdim 1930245431Sdim// Multiplication 1931245431Sdimdef A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; } 1932245431Sdimdef A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5; 1933245431Sdim let NumMicroOps = 0; } 1934245431Sdimdef A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; } 1935245431Sdimdef A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4; 1936245431Sdim let NumMicroOps = 0; } 1937245431Sdim 1938245431Sdim// Floating-point 1939245431Sdim// Only one FP or AGU instruction may issue per cycle. We model this 1940245431Sdim// by having FP instructions consume the AGU resource. 1941245431Sdimdef A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } 1942245431Sdimdef A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } 1943245431Sdimdef A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } 1944245431Sdimdef A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } 1945245431Sdimdef A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; } 1946245431Sdimdef A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } 1947245431Sdimdef A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; } 1948245431Sdimdef A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; } 1949245431Sdimdef A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; } 1950245431Sdimdef A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; } 1951245431Sdim 1952245431Sdim// NEON has an odd mix of latencies. Simply name the write types by latency. 1953245431Sdimdef A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } 1954245431Sdimdef A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; } 1955245431Sdimdef A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; } 1956245431Sdimdef A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } 1957245431Sdimdef A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } 1958245431Sdimdef A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } 1959245431Sdimdef A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; } 1960245431Sdimdef A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } 1961245431Sdimdef A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; } 1962245431Sdim 1963245431Sdim// Reserve A9UnitFP for 2 consecutive cycles. 1964245431Sdimdef A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { 1965245431Sdim let Latency = 4; 1966245431Sdim let ResourceCycles = [2]; 1967245431Sdim} 1968245431Sdimdef A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { 1969245431Sdim let Latency = 7; 1970245431Sdim let ResourceCycles = [2]; 1971245431Sdim} 1972245431Sdimdef A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { 1973245431Sdim let Latency = 9; 1974245431Sdim let ResourceCycles = [2]; 1975245431Sdim} 1976245431Sdim 1977245431Sdim// Branches don't have a def operand but still consume resources. 1978245431Sdimdef A9WriteB : SchedWriteRes<[A9UnitB]>; 1979245431Sdim 1980245431Sdim// Address generation. 1981245431Sdimdef A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; } 1982245431Sdim 1983245431Sdim// Load Integer. 1984245431Sdimdef A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; } 1985245431Sdim// Load the upper 32-bits using the same micro-op. 1986245431Sdimdef A9WriteLHi : SchedWriteRes<[]> { let Latency = 3; 1987245431Sdim let NumMicroOps = 0; } 1988245431Sdim// Offset shifted by register. 1989245431Sdimdef A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } 1990245431Sdim// Load (and zero extend) a byte. 1991245431Sdimdef A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } 1992245431Sdimdef A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; } 1993245431Sdim 1994245431Sdim// Load or Store Float, aligned. 1995245431Sdimdef A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; } 1996245431Sdim 1997245431Sdim// Store Integer. 1998245431Sdimdef A9WriteS : SchedWriteRes<[A9UnitLS]>; 1999245431Sdim 2000245431Sdim//===----------------------------------------------------------------------===// 2001245431Sdim// Define resources dynamically for load multiple variants. 2002245431Sdim 2003245431Sdim// Define helpers for extra latency without consuming resources. 2004245431Sdimdef A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; } 2005245431Sdimforeach NumCycles = 2-8 in { 2006245431Sdimdef A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>; 2007245431Sdim} // foreach NumCycles 2008245431Sdim 2009245431Sdim// Define address generation sequences and predicates for 8 flavors of LDMs. 2010245431Sdimforeach NumAddr = 1-8 in { 2011245431Sdim 2012245431Sdim// Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive 2013245431Sdim// latency for instructions that generate multiple loads or stores. 2014245431Sdimdef A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>; 2015245431Sdim 2016245431Sdim// Define a predicate to select the LDM based on number of memory addresses. 2017245431Sdimdef A9LMAdr#NumAddr#Pred : 2018263509Sdim SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>; 2019245431Sdim 2020245431Sdim} // foreach NumAddr 2021245431Sdim 2022245431Sdim// Fall-back for unknown LDMs. 2023245431Sdimdef A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">; 2024245431Sdim 2025245431Sdim// LDM/VLDM/VLDn address generation latency & resources. 2026245431Sdim// Dynamically select the A9WriteAdrN sequence using a predicate. 2027245431Sdimdef A9WriteLMAdr : SchedWriteVariant<[ 2028245431Sdim SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>, 2029245431Sdim SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>, 2030245431Sdim SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>, 2031245431Sdim SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>, 2032245431Sdim SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>, 2033245431Sdim SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>, 2034245431Sdim SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>, 2035245431Sdim SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>, 2036245431Sdim // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers. 2037245431Sdim SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>; 2038245431Sdim 2039245431Sdim// Define LDM Resources. 2040245431Sdim// These take no issue resource, so they can be combined with other 2041245431Sdim// writes like WriteB. 2042245431Sdim// A9WriteLMLo takes a single LS resource and 2 cycles. 2043245431Sdimdef A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2; 2044245431Sdim let NumMicroOps = 0; } 2045245431Sdim// Assuming aligned access, the upper half of each pair is free with 2046245431Sdim// the same latency. 2047245431Sdimdef A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2; 2048245431Sdim let NumMicroOps = 0; } 2049245431Sdim// Each A9WriteL#N variant adds N cycles of latency without consuming 2050245431Sdim// additional resources. 2051245431Sdimforeach NumAddr = 1-8 in { 2052245431Sdimdef A9WriteL#NumAddr : WriteSequence< 2053245431Sdim [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2054245431Sdimdef A9WriteL#NumAddr#Hi : WriteSequence< 2055245431Sdim [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2056245431Sdim} 2057245431Sdim 2058245431Sdim//===----------------------------------------------------------------------===// 2059245431Sdim// LDM: Load multiple into 32-bit integer registers. 2060245431Sdim 2061263509Sdimdef A9WriteLMOpsList : A9WriteLMOpsListType< 2062263509Sdim [A9WriteL1, A9WriteL1Hi, 2063263509Sdim A9WriteL2, A9WriteL2Hi, 2064263509Sdim A9WriteL3, A9WriteL3Hi, 2065263509Sdim A9WriteL4, A9WriteL4Hi, 2066263509Sdim A9WriteL5, A9WriteL5Hi, 2067263509Sdim A9WriteL6, A9WriteL6Hi, 2068263509Sdim A9WriteL7, A9WriteL7Hi, 2069263509Sdim A9WriteL8, A9WriteL8Hi]>; 2070263509Sdim 2071245431Sdim// A9WriteLM variants expand into a pair of writes for each 64-bit 2072245431Sdim// value loaded. When the number of registers is odd, the last 2073245431Sdim// A9WriteLnHi is naturally ignored because the instruction has no 2074245431Sdim// following def operands. These variants take no issue resource, so 2075245431Sdim// they may need to be part of a WriteSequence that includes A9WriteIssue. 2076245431Sdimdef A9WriteLM : SchedWriteVariant<[ 2077263509Sdim SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>, 2078263509Sdim SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>, 2079263509Sdim SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>, 2080263509Sdim SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>, 2081263509Sdim SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>, 2082263509Sdim SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>, 2083263509Sdim SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>, 2084263509Sdim SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>, 2085245431Sdim // For unknown LDMs, define the maximum number of writes, but only 2086245431Sdim // make the first two consume resources. 2087245431Sdim SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi, 2088245431Sdim A9WriteL2, A9WriteL2Hi, 2089245431Sdim A9WriteL3Hi, A9WriteL3Hi, 2090245431Sdim A9WriteL4Hi, A9WriteL4Hi, 2091245431Sdim A9WriteL5Hi, A9WriteL5Hi, 2092245431Sdim A9WriteL6Hi, A9WriteL6Hi, 2093245431Sdim A9WriteL7Hi, A9WriteL7Hi, 2094245431Sdim A9WriteL8Hi, A9WriteL8Hi]>]> { 2095245431Sdim let Variadic = 1; 2096245431Sdim} 2097245431Sdim 2098245431Sdim//===----------------------------------------------------------------------===// 2099245431Sdim// VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support. 2100245431Sdim 2101245431Sdim// A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources 2102245431Sdim// so can be used in WriteSequences for in single-issue instructions that 2103245431Sdim// encapsulate multiple loads. 2104245431Sdimdef A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { 2105245431Sdim let Latency = 1; 2106245431Sdim let NumMicroOps = 0; 2107245431Sdim} 2108245431Sdim 2109245431Sdimforeach NumAddr = 1-8 in { 2110245431Sdim 2111245431Sdim// Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops. 2112245431Sdimdef A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>; 2113245431Sdim 2114245431Sdim// A9WriteLfp1-8 definitions are statically expanded into a sequence of 2115245431Sdim// A9WriteLfpOps with additive latency that takes a single issue slot. 2116245431Sdim// Used directly to describe NEON VLDn. 2117245431Sdimdef A9WriteLfp#NumAddr : WriteSequence< 2118245431Sdim [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; 2119245431Sdim 2120245431Sdim// A9WriteLfp1-8Mov adds a cycle of latency and FP resource for 2121245431Sdim// permuting loaded values. 2122245431Sdimdef A9WriteLfp#NumAddr#Mov : WriteSequence< 2123245431Sdim [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; 2124245431Sdim 2125245431Sdim} // foreach NumAddr 2126245431Sdim 2127245431Sdim// Define VLDM/VSTM PreRA resources. 2128245431Sdim// A9WriteLMfpPreRA are dynamically expanded into the correct 2129245431Sdim// A9WriteLfp1-8 sequence based on a predicate. This supports the 2130245431Sdim// preRA VLDM variants in which all 64-bit loads are written to the 2131245431Sdim// same tuple of either single or double precision registers. 2132245431Sdimdef A9WriteLMfpPreRA : SchedWriteVariant<[ 2133245431Sdim SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>, 2134245431Sdim SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>, 2135245431Sdim SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>, 2136245431Sdim SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>, 2137245431Sdim SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>, 2138245431Sdim SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>, 2139245431Sdim SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>, 2140245431Sdim SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>, 2141245431Sdim // For unknown VLDM/VSTM PreRA, assume 2xS registers. 2142245431Sdim SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>; 2143245431Sdim 2144245431Sdim// Define VLDM/VSTM PostRA Resources. 2145245431Sdim// A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency. 2146245431Sdimdef A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; } 2147245431Sdim 2148245431Sdimforeach NumAddr = 1-8 in { 2149245431Sdim 2150245431Sdim// Each A9WriteL#N variant adds N cycles of latency without consuming 2151245431Sdim// additional resources. 2152245431Sdimdef A9WriteLMfp#NumAddr : WriteSequence< 2153245431Sdim [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2154245431Sdim 2155245431Sdim// Assuming aligned access, the upper half of each pair is free with 2156245431Sdim// the same latency. 2157245431Sdimdef A9WriteLMfp#NumAddr#Hi : WriteSequence< 2158245431Sdim [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; 2159245431Sdim 2160245431Sdim} // foreach NumAddr 2161245431Sdim 2162245431Sdim// VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a 2163245431Sdim// pair of writes for each 64-bit data loaded. When the number of 2164245431Sdim// registers is odd, the last WriteLMfpnHi is naturally ignored because 2165245431Sdim// the instruction has no following def operands. 2166263509Sdim 2167263509Sdimdef A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType< 2168263509Sdim [A9WriteLMfp1, A9WriteLMfp2, // 0-1 2169263509Sdim A9WriteLMfp3, A9WriteLMfp4, // 2-3 2170263509Sdim A9WriteLMfp5, A9WriteLMfp6, // 4-5 2171263509Sdim A9WriteLMfp7, A9WriteLMfp8, // 6-7 2172263509Sdim A9WriteLMfp1Hi, // 8-8 2173263509Sdim A9WriteLMfp2Hi, A9WriteLMfp2Hi, // 9-10 2174263509Sdim A9WriteLMfp3Hi, A9WriteLMfp3Hi, // 11-12 2175263509Sdim A9WriteLMfp4Hi, A9WriteLMfp4Hi, // 13-14 2176263509Sdim A9WriteLMfp5Hi, A9WriteLMfp5Hi, // 15-16 2177263509Sdim A9WriteLMfp6Hi, A9WriteLMfp6Hi, // 17-18 2178263509Sdim A9WriteLMfp7Hi, A9WriteLMfp7Hi, // 19-20 2179263509Sdim A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22 2180263509Sdim 2181245431Sdimdef A9WriteLMfpPostRA : SchedWriteVariant<[ 2182263509Sdim SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>, 2183263509Sdim SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>, 2184263509Sdim SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>, 2185263509Sdim SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>, 2186263509Sdim SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>, 2187263509Sdim SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>, 2188263509Sdim SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>, 2189263509Sdim SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>, 2190245431Sdim // For unknown LDMs, define the maximum number of writes, but only 2191263509Sdim // make the first two consume resources. We are optimizing for the case 2192263509Sdim // where the operands are DPRs, and this determines the first eight 2193263509Sdim // types. The remaining eight types are filled to cover the case 2194263509Sdim // where the operands are SPRs. 2195263509Sdim SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2, 2196263509Sdim A9WriteLMfp3Hi, A9WriteLMfp4Hi, 2197263509Sdim A9WriteLMfp5Hi, A9WriteLMfp6Hi, 2198263509Sdim A9WriteLMfp7Hi, A9WriteLMfp8Hi, 2199245431Sdim A9WriteLMfp5Hi, A9WriteLMfp5Hi, 2200245431Sdim A9WriteLMfp6Hi, A9WriteLMfp6Hi, 2201245431Sdim A9WriteLMfp7Hi, A9WriteLMfp7Hi, 2202245431Sdim A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> { 2203245431Sdim let Variadic = 1; 2204245431Sdim} 2205245431Sdim 2206245431Sdim// Distinguish between our multiple MI-level forms of the same 2207245431Sdim// VLDM/VSTM instructions. 2208245431Sdimdef A9PreRA : SchedPredicate< 2209245431Sdim "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">; 2210245431Sdimdef A9PostRA : SchedPredicate< 2211245431Sdim "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">; 2212245431Sdim 2213245431Sdim// VLDM represents all destination registers as a single register 2214245431Sdim// tuple, unlike LDM. So the number of write operands is not variadic. 2215245431Sdimdef A9WriteLMfp : SchedWriteVariant<[ 2216245431Sdim SchedVar<A9PreRA, [A9WriteLMfpPreRA]>, 2217245431Sdim SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>; 2218245431Sdim 2219245431Sdim//===----------------------------------------------------------------------===// 2220245431Sdim// Resources for other (non LDM/VLDM) Variants. 2221245431Sdim 2222245431Sdim// These mov immediate writers are unconditionally expanded with 2223245431Sdim// additive latency. 2224245431Sdimdef A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>; 2225252723Sdimdef A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>; 2226245431Sdimdef A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>; 2227245431Sdim 2228245431Sdim// Some ALU operations can read loaded integer values one cycle early. 2229252723Sdimdef A9ReadALU : SchedReadAdvance<1, 2230245431Sdim [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi, 2231245431Sdim A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4, 2232245431Sdim A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8, 2233245431Sdim A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi, 2234245431Sdim A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>; 2235245431Sdim 2236245431Sdim// Read types for operands that are unconditionally read in cycle N 2237245431Sdim// after the instruction issues, decreases producer latency by N-1. 2238245431Sdimdef A9Read2 : SchedReadAdvance<1>; 2239245431Sdimdef A9Read3 : SchedReadAdvance<2>; 2240245431Sdimdef A9Read4 : SchedReadAdvance<3>; 2241245431Sdim 2242245431Sdim//===----------------------------------------------------------------------===// 2243245431Sdim// Map itinerary classes to scheduler read/write resources per operand. 2244245431Sdim// 2245245431Sdim// For ARM, we piggyback scheduler resources on the Itinerary classes 2246245431Sdim// to avoid perturbing the existing instruction definitions. 2247245431Sdim 2248245431Sdim// This table follows the ARM Cortex-A9 Technical Reference Manuals, 2249245431Sdim// mostly in order. 2250245431Sdim 2251263509Sdimdef :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, 2252245431Sdim IIC_iMVNi,IIC_iMVNsi, 2253245431Sdim IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>; 2254263509Sdimdef :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>; 2255245431Sdimdef :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>; 2256245431Sdim 2257245431Sdimdef :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>; 2258245431Sdimdef :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>; 2259245431Sdimdef :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>; 2260245431Sdim 2261252723Sdimdef :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; 2262252723Sdimdef :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; 2263252723Sdimdef :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>; 2264252723Sdimdef :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; 2265252723Sdimdef :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>; 2266252723Sdimdef :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB 2267252723Sdimdef :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; 2268252723Sdimdef :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>; 2269245431Sdim 2270245431Sdim// A9WriteHi ignored for MUL32. 2271245431Sdimdef :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32, 2272245431Sdim IIC_iMUL64,IIC_iMAC64]>; 2273245431Sdim// FIXME: SMLALxx needs itin classes 2274245431Sdimdef :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>; 2275245431Sdim 2276245431Sdim// TODO: For floating-point ops, we model the pipeline forwarding 2277245431Sdim// latencies here. WAW latencies are sometimes longer. 2278245431Sdim 2279245431Sdimdef :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI, 2280245431Sdim IIC_fpUNA32, IIC_fpUNA64, 2281245431Sdim IIC_fpCMP32, IIC_fpCMP64]>; 2282245431Sdimdef :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>; 2283245431Sdimdef :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS, 2284245431Sdim IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI, 2285245431Sdim IIC_fpALU32, IIC_fpALU64]>; 2286245431Sdimdef :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>; 2287245431Sdimdef :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>; 2288245431Sdimdef :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>; 2289245431Sdimdef :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>; 2290245431Sdimdef :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>; 2291245431Sdimdef :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>; 2292245431Sdimdef :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>; 2293245431Sdimdef :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>; 2294245431Sdim 2295245431Sdimdef :ItinRW<[A9WriteB], [IIC_Br]>; 2296245431Sdim 2297245431Sdim// A9 PLD is processed in a dedicated unit. 2298245431Sdimdef :ItinRW<[], [IIC_Preload]>; 2299245431Sdim 2300245431Sdim// Note: We must assume that loads are aligned, since the machine 2301245431Sdim// model cannot know this statically and A9 ignores alignment hints. 2302245431Sdim 2303245431Sdim// A9WriteAdr consumes AGU regardless address writeback. But it's 2304245431Sdim// latency is only relevant for users of an updated address. 2305245431Sdimdef :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r, 2306245431Sdim IIC_iLoad_iu,IIC_iLoad_ru]>; 2307245431Sdimdef :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>; 2308245431Sdimdef :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r, 2309245431Sdim IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>; 2310245431Sdimdef :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>; 2311245431Sdimdef :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r, 2312245431Sdim IIC_iLoad_d_ru]>; 2313245431Sdim// Store either has no def operands, or the one def for address writeback. 2314245431Sdimdef :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r, 2315245431Sdim IIC_iStore_iu, IIC_iStore_ru, 2316245431Sdim IIC_iStore_d_i, IIC_iStore_d_r, 2317245431Sdim IIC_iStore_d_ru]>; 2318245431Sdimdef :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu, 2319245431Sdim IIC_iStore_bh_i, IIC_iStore_bh_r, 2320245431Sdim IIC_iStore_bh_iu, IIC_iStore_bh_ru]>; 2321245431Sdimdef :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>; 2322245431Sdim 2323245431Sdim// A9WriteML will be expanded into a separate write for each def 2324245431Sdim// operand. Address generation consumes resources, but A9WriteLMAdr 2325245431Sdim// is listed after all def operands, so has no effective latency. 2326245431Sdim// 2327245431Sdim// Note: A9WriteLM expands into an even number of def operands. The 2328245431Sdim// actual number of def operands may be less by one. 2329245431Sdimdef :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>; 2330245431Sdim 2331245431Sdim// Load multiple with address writeback has an extra def operand in 2332245431Sdim// front of the loaded registers. 2333245431Sdim// 2334245431Sdim// Reuse the load-multiple variants for store-multiple because the 2335245431Sdim// resources are identical, For stores only the address writeback 2336245431Sdim// has a def operand so the WriteL latencies are unused. 2337245431Sdimdef :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu, 2338245431Sdim IIC_iStore_m, 2339245431Sdim IIC_iStore_mu]>; 2340245431Sdimdef :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>; 2341252723Sdimdef :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>; 2342245431Sdim 2343245431Sdimdef :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>; 2344245431Sdim 2345245431Sdimdef :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>; 2346245431Sdimdef :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>; 2347245431Sdimdef :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64, 2348245431Sdim IIC_fpStore_m, IIC_fpStore_mu]>; 2349245431Sdim 2350245431Sdim// Note: Unlike VLDM, VLD1 expects the writeback operand after the 2351245431Sdim// normal writes. 2352245431Sdimdef :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u, 2353245431Sdim IIC_VLD1x2, IIC_VLD1x2u]>; 2354245431Sdimdef :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u, 2355245431Sdim IIC_VLD1x4, IIC_VLD1x4u, 2356245431Sdim IIC_VLD4dup, IIC_VLD4dupu]>; 2357245431Sdimdef :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu, 2358245431Sdim IIC_VLD2, IIC_VLD2u, 2359245431Sdim IIC_VLD2dup, IIC_VLD2dupu]>; 2360245431Sdimdef :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu, 2361245431Sdim IIC_VLD2x2, IIC_VLD2x2u, 2362245431Sdim IIC_VLD2ln, IIC_VLD2lnu]>; 2363245431Sdimdef :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u, 2364245431Sdim IIC_VLD3dup, IIC_VLD3dupu]>; 2365245431Sdimdef :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u, 2366245431Sdim IIC_VLD4ln, IIC_VLD4lnu]>; 2367245431Sdimdef :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>; 2368245431Sdim 2369245431Sdim// Vector stores use similar resources to vector loads, so use the 2370245431Sdim// same write types. The address write must be first for stores with 2371245431Sdim// address writeback. 2372245431Sdimdef :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u, 2373245431Sdim IIC_VST1x2, IIC_VST1x2u, 2374245431Sdim IIC_VST1ln, IIC_VST1lnu, 2375245431Sdim IIC_VST2, IIC_VST2u, 2376245431Sdim IIC_VST2x2, IIC_VST2x2u, 2377245431Sdim IIC_VST2ln, IIC_VST2lnu]>; 2378245431Sdimdef :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u, 2379245431Sdim IIC_VST1x4, IIC_VST1x4u, 2380245431Sdim IIC_VST3, IIC_VST3u, 2381245431Sdim IIC_VST3ln, IIC_VST3lnu, 2382245431Sdim IIC_VST4, IIC_VST4u, 2383245431Sdim IIC_VST4ln, IIC_VST4lnu]>; 2384245431Sdim 2385245431Sdim// NEON moves. 2386245431Sdimdef :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>; 2387245431Sdimdef :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>; 2388245431Sdimdef :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>; 2389245431Sdim 2390245431Sdim// NEON integer arithmetic 2391245431Sdim// 2392245431Sdim// VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL 2393245431Sdimdef :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>; 2394245431Sdim// VSUB/VMVN/VCLSD/VCLZD/VCNTD 2395245431Sdimdef :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>; 2396245431Sdim// VADDL/VSUBL/VNEG are mapped later under IIC_SHLi. 2397245431Sdim// ... 2398245431Sdim// VHADD/VRHADD/VQADD/VTST/VADH/VRADH 2399245431Sdimdef :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>; 2400245431Sdim// VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL 2401245431Sdimdef :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>; 2402245431Sdim// VQNEG/VQABS 2403245431Sdimdef :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>; 2404245431Sdim// VABS 2405245431Sdimdef :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>; 2406245431Sdim// VPADD/VPADDL are mapped later under IIC_SHLi. 2407245431Sdim// ... 2408245431Sdim// VCLSQ/VCLZQ/VCNTQ, takes two cycles. 2409245431Sdimdef :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>; 2410245431Sdim// VMOVimm/VMVNimm/VORRimm/VBICimm 2411245431Sdimdef :ItinRW<[A9WriteV3], [IIC_VMOVImm]>; 2412245431Sdimdef :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>; 2413245431Sdimdef :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>; 2414245431Sdim 2415245431Sdim// NEON integer multiply 2416245431Sdim// 2417245431Sdim// Note: these don't quite match the timing docs, but they do match 2418245431Sdim// the original A9 itinerary. 2419245431Sdimdef :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>; 2420245431Sdimdef :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>; 2421245431Sdimdef :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>; 2422245431Sdimdef :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>; 2423245431Sdimdef :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>; 2424245431Sdimdef :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>; 2425245431Sdimdef :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>; 2426245431Sdimdef :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>; 2427245431Sdim 2428245431Sdim// NEON integer shift 2429245431Sdim// TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles. 2430245431Sdimdef :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>; 2431245431Sdimdef :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>; 2432245431Sdim 2433245431Sdim// NEON permute 2434245431Sdimdef :ItinRW<[A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>; 2435245431Sdimdef :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2], 2436245431Sdim [IIC_VPERMQ3, IIC_VEXTQ]>; 2437245431Sdimdef :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>; 2438245431Sdimdef :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>; 2439245431Sdimdef :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>; 2440245431Sdimdef :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>; 2441245431Sdimdef :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>; 2442245431Sdimdef :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>; 2443245431Sdimdef :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>; 2444245431Sdimdef :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3], 2445245431Sdim [IIC_VTBX4]>; 2446245431Sdim 2447245431Sdim// NEON floating-point 2448245431Sdimdef :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>; 2449245431Sdimdef :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>; 2450245431Sdimdef :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>; 2451245431Sdimdef :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>; 2452245431Sdimdef :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>; 2453245431Sdimdef :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>; 2454245431Sdimdef :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>; 2455245431Sdimdef :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>; 2456252723Sdim 2457252723Sdim// Map SchedRWs that are identical for cortexa9 to existing resources. 2458252723Sdimdef : SchedAlias<WriteALU, A9WriteALU>; 2459252723Sdimdef : SchedAlias<WriteALUsr, A9WriteALUsr>; 2460252723Sdimdef : SchedAlias<WriteALUSsr, A9WriteALUsr>; 2461252723Sdimdef : SchedAlias<ReadALU, A9ReadALU>; 2462252723Sdimdef : SchedAlias<ReadALUsr, A9ReadALU>; 2463263509Sdimdef : InstRW< [WriteALU], 2464263509Sdim (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr", 2465263509Sdim "BICrr")>; 2466263509Sdimdef : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>; 2467263509Sdimdef : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>; 2468252723Sdim 2469263509Sdim 2470252723Sdimdef : SchedAlias<WriteCMP, A9WriteALU>; 2471252723Sdimdef : SchedAlias<WriteCMPsi, A9WriteALU>; 2472252723Sdimdef : SchedAlias<WriteCMPsr, A9WriteALU>; 2473263509Sdim 2474263509Sdimdef : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi", 2475263509Sdim "MOVCCsr")>; 2476263509Sdimdef : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>; 2477263509Sdimdef : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm", 2478263509Sdim "MOV_ga_dyn")>; 2479263509Sdimdef : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>; 2480263509Sdimdef : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; 2481263509Sdim 2482263509Sdimdef : InstRW< [WriteALU], (instregex "SEL")>; 2483263509Sdim 2484263509Sdimdef : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>; 2485263509Sdim 2486263509Sdimdef : InstRW< [A9WriteM], 2487263509Sdim (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS", 2488263509Sdim "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>; 2489263509Sdimdef : InstRW< [A9WriteM, A9WriteMHi], 2490263509Sdim (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL", 2491263509Sdim "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB", 2492263509Sdim "SMLALTT")>; 2493263509Sdim// FIXME: These instructions used to have NoItinerary. Just copied the one from above. 2494263509Sdimdef : InstRW< [A9WriteM, A9WriteMHi], 2495263509Sdim (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX", 2496263509Sdim "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>; 2497263509Sdim 2498263509Sdimdef : InstRW<[A9WriteM16, A9WriteM16Hi], 2499263509Sdim (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>; 2500263509Sdimdef : InstRW<[A9WriteM16, A9WriteM16Hi], 2501263509Sdim (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>; 2502263509Sdim 2503263509Sdimdef : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>; 2504263509Sdimdef : InstRW<[A9WriteLsi], (instregex "LDRrs")>; 2505263509Sdimdef : InstRW<[A9WriteLb], 2506263509Sdim (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB", 2507263509Sdim "LDRH", "LDRSH", "LDRSB")>; 2508263509Sdimdef : InstRW<[A9WriteLbsi], (instregex "LDRrs")>; 2509263509Sdim 2510263509Sdimdef : WriteRes<WriteDiv, []> { let Latency = 0; } 2511263509Sdim 2512263509Sdimdef : WriteRes<WriteBr, [A9UnitB]>; 2513263509Sdimdef : WriteRes<WriteBrL, [A9UnitB]>; 2514263509Sdimdef : WriteRes<WriteBrTbl, [A9UnitB]>; 2515263509Sdimdef : WriteRes<WritePreLd, []>; 2516263509Sdimdef : SchedAlias<WriteCvtFP, A9WriteF>; 2517263509Sdimdef : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } 2518245431Sdim} // SchedModel = CortexA9Model 2519