1;; Scheduling description for UltraSPARC-I/II. 2;; Copyright (C) 2002-2015 Free Software Foundation, Inc. 3;; 4;; This file is part of GCC. 5;; 6;; GCC is free software; you can redistribute it and/or modify 7;; it under the terms of the GNU General Public License as published by 8;; the Free Software Foundation; either version 3, or (at your option) 9;; any later version. 10;; 11;; GCC is distributed in the hope that it will be useful, 12;; but WITHOUT ANY WARRANTY; without even the implied warranty of 13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14;; GNU General Public License for more details. 15;; 16;; You should have received a copy of the GNU General Public License 17;; along with GCC; see the file COPYING3. If not see 18;; <http://www.gnu.org/licenses/>. 19 20;; UltraSPARC-I and II are quad-issue processors. Interesting features 21;; to note: 22;; 23;; - Buffered loads, they can queue waiting for the actual data until 24;; an instruction actually tries to reference the destination register 25;; as an input 26;; - Two integer units. Only one of them can do shifts, and the other 27;; is the only one which may do condition code setting instructions. 28;; Complicating things further, a shift may go only into the first 29;; slot in a dispatched group. And if you have a non-condition code 30;; setting instruction and one that does set the condition codes. The 31;; former must be issued first in order for both of them to issue. 32;; - Stores can issue before the value being stored is available. As long 33;; as the input data becomes ready before the store is to move out of the 34;; store buffer, it will not cause a stall. 35;; - Branches may issue in the same cycle as an instruction setting the 36;; condition codes being tested by that branch. This does not apply 37;; to floating point, only integer. 38 39(define_automaton "ultrasparc_0,ultrasparc_1") 40 41(define_cpu_unit "us1_fdivider,us1_fpm" "ultrasparc_0"); 42(define_cpu_unit "us1_fpa,us1_load_writeback" "ultrasparc_1") 43(define_cpu_unit "us1_fps_0,us1_fps_1,us1_fpd_0,us1_fpd_1" "ultrasparc_1") 44(define_cpu_unit "us1_slot0,us1_slot1,us1_slot2,us1_slot3" "ultrasparc_1") 45(define_cpu_unit "us1_ieu0,us1_ieu1,us1_cti,us1_lsu" "ultrasparc_1") 46 47(define_reservation "us1_slot012" "(us1_slot0 | us1_slot1 | us1_slot2)") 48(define_reservation "us1_slotany" "(us1_slot0 | us1_slot1 | us1_slot2 | us1_slot3)") 49(define_reservation "us1_single_issue" "us1_slot0 + us1_slot1 + us1_slot2 + us1_slot3") 50 51(define_reservation "us1_fp_single" "(us1_fps_0 | us1_fps_1)") 52(define_reservation "us1_fp_double" "(us1_fpd_0 | us1_fpd_1)") 53 54;; This is a simplified representation of the issue at hand. 55;; For most cases, going from one FP precision type insn to another 56;; just breaks up the insn group. However for some cases, such 57;; a situation causes the second insn to stall 2 more cycles. 58(exclusion_set "us1_fps_0,us1_fps_1" "us1_fpd_0,us1_fpd_1") 59 60;; If we have to schedule an ieu1 specific instruction and we want 61;; to reserve the ieu0 unit as well, we must reserve it first. So for 62;; example we could not schedule this sequence: 63;; COMPARE IEU1 64;; IALU IEU0 65;; but we could schedule them together like this: 66;; IALU IEU0 67;; COMPARE IEU1 68;; This basically requires that ieu0 is reserved before ieu1 when 69;; it is required that both be reserved. 70(absence_set "us1_ieu0" "us1_ieu1") 71 72;; This defines the slotting order. Most IEU instructions can only 73;; execute in the first three slots, FPU and branches can go into 74;; any slot. We represent instructions which "break the group" 75;; as requiring reservation of us1_slot0. 76(absence_set "us1_slot0" "us1_slot1,us1_slot2,us1_slot3") 77(absence_set "us1_slot1" "us1_slot2,us1_slot3") 78(absence_set "us1_slot2" "us1_slot3") 79 80(define_insn_reservation "us1_single" 1 81 (and (eq_attr "cpu" "ultrasparc") 82 (eq_attr "type" "multi,savew,flushw,iflush,trap,gsr")) 83 "us1_single_issue") 84 85(define_insn_reservation "us1_simple_ieuN" 1 86 (and (eq_attr "cpu" "ultrasparc") 87 (eq_attr "type" "ialu")) 88 "(us1_ieu0 | us1_ieu1) + us1_slot012") 89 90(define_insn_reservation "us1_simple_ieu0" 1 91 (and (eq_attr "cpu" "ultrasparc") 92 (eq_attr "type" "shift")) 93 "us1_ieu0 + us1_slot012") 94 95(define_insn_reservation "us1_simple_ieu1" 1 96 (and (eq_attr "cpu" "ultrasparc") 97 (eq_attr "type" "compare,edge,edgen,array")) 98 "us1_ieu1 + us1_slot012") 99 100(define_insn_reservation "us1_ialuX" 1 101 (and (eq_attr "cpu" "ultrasparc") 102 (eq_attr "type" "ialuX")) 103 "us1_single_issue") 104 105(define_insn_reservation "us1_cmove" 2 106 (and (eq_attr "cpu" "ultrasparc") 107 (eq_attr "type" "cmove")) 108 "us1_single_issue, nothing") 109 110(define_insn_reservation "us1_imul" 1 111 (and (eq_attr "cpu" "ultrasparc") 112 (eq_attr "type" "imul")) 113 "us1_single_issue") 114 115(define_insn_reservation "us1_idiv" 1 116 (and (eq_attr "cpu" "ultrasparc") 117 (eq_attr "type" "idiv")) 118 "us1_single_issue") 119 120;; For loads, the "delayed return mode" behavior of the chip 121;; is represented using the us1_load_writeback resource. 122(define_insn_reservation "us1_load" 2 123 (and (eq_attr "cpu" "ultrasparc") 124 (eq_attr "type" "load,fpload")) 125 "us1_lsu + us1_slot012, us1_load_writeback") 126 127(define_insn_reservation "us1_load_signed" 3 128 (and (eq_attr "cpu" "ultrasparc") 129 (eq_attr "type" "sload")) 130 "us1_lsu + us1_slot012, nothing, us1_load_writeback") 131 132(define_insn_reservation "us1_store" 1 133 (and (eq_attr "cpu" "ultrasparc") 134 (eq_attr "type" "store,fpstore")) 135 "us1_lsu + us1_slot012") 136 137(define_insn_reservation "us1_branch" 1 138 (and (eq_attr "cpu" "ultrasparc") 139 (eq_attr "type" "branch")) 140 "us1_cti + us1_slotany") 141 142(define_insn_reservation "us1_call_jmpl" 1 143 (and (eq_attr "cpu" "ultrasparc") 144 (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch")) 145 "us1_cti + us1_ieu1 + us1_slot0") 146 147(define_insn_reservation "us1_fmov_single" 1 148 (and (and (eq_attr "cpu" "ultrasparc") 149 (eq_attr "type" "fpmove")) 150 (eq_attr "fptype" "single")) 151 "us1_fpa + us1_fp_single + us1_slotany") 152 153(define_insn_reservation "us1_fmov_double" 1 154 (and (and (eq_attr "cpu" "ultrasparc") 155 (eq_attr "type" "fpmove")) 156 (eq_attr "fptype" "double")) 157 "us1_fpa + us1_fp_double + us1_slotany") 158 159(define_insn_reservation "us1_fcmov_single" 2 160 (and (and (eq_attr "cpu" "ultrasparc") 161 (eq_attr "type" "fpcmove,fpcrmove")) 162 (eq_attr "fptype" "single")) 163 "us1_fpa + us1_fp_single + us1_slotany, nothing") 164 165(define_insn_reservation "us1_fcmov_double" 2 166 (and (and (eq_attr "cpu" "ultrasparc") 167 (eq_attr "type" "fpcmove,fpcrmove")) 168 (eq_attr "fptype" "double")) 169 "us1_fpa + us1_fp_double + us1_slotany, nothing") 170 171(define_insn_reservation "us1_faddsub_single" 4 172 (and (and (eq_attr "cpu" "ultrasparc") 173 (eq_attr "type" "fp")) 174 (eq_attr "fptype" "single")) 175 "us1_fpa + us1_fp_single + us1_slotany, nothing*3") 176 177(define_insn_reservation "us1_faddsub_double" 4 178 (and (and (eq_attr "cpu" "ultrasparc") 179 (eq_attr "type" "fp")) 180 (eq_attr "fptype" "double")) 181 "us1_fpa + us1_fp_double + us1_slotany, nothing*3") 182 183(define_insn_reservation "us1_fpcmp_single" 1 184 (and (and (eq_attr "cpu" "ultrasparc") 185 (eq_attr "type" "fpcmp")) 186 (eq_attr "fptype" "single")) 187 "us1_fpa + us1_fp_single + us1_slotany") 188 189(define_insn_reservation "us1_fpcmp_double" 1 190 (and (and (eq_attr "cpu" "ultrasparc") 191 (eq_attr "type" "fpcmp")) 192 (eq_attr "fptype" "double")) 193 "us1_fpa + us1_fp_double + us1_slotany") 194 195(define_insn_reservation "us1_fmult_single" 4 196 (and (and (eq_attr "cpu" "ultrasparc") 197 (eq_attr "type" "fpmul")) 198 (eq_attr "fptype" "single")) 199 "us1_fpm + us1_fp_single + us1_slotany, nothing*3") 200 201(define_insn_reservation "us1_fmult_double" 4 202 (and (and (eq_attr "cpu" "ultrasparc") 203 (eq_attr "type" "fpmul")) 204 (eq_attr "fptype" "double")) 205 "us1_fpm + us1_fp_double + us1_slotany, nothing*3") 206 207;; This is actually in theory dangerous, because it is possible 208;; for the chip to prematurely dispatch the dependent instruction 209;; in the G stage, resulting in a 9 cycle stall. However I have never 210;; been able to trigger this case myself even with hand written code, 211;; so it must require some rare complicated pipeline state. 212(define_bypass 3 213 "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double" 214 "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double") 215 216;; Floating point divide and square root use the multiplier unit 217;; for final rounding 3 cycles before the divide/sqrt is complete. 218 219(define_insn_reservation "us1_fdivs" 220 13 221 (and (eq_attr "cpu" "ultrasparc") 222 (eq_attr "type" "fpdivs,fpsqrts")) 223 "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*8, (us1_fpm + us1_fdivider), us1_fdivider*2" 224 ) 225 226(define_bypass 227 12 228 "us1_fdivs" 229 "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double") 230 231(define_insn_reservation "us1_fdivd" 232 23 233 (and (eq_attr "cpu" "ultrasparc") 234 (eq_attr "type" "fpdivd,fpsqrtd")) 235 "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*18, (us1_fpm + us1_fdivider), us1_fdivider*2" 236 ) 237(define_bypass 238 22 239 "us1_fdivd" 240 "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double") 241 242;; Any store may multi issue with the insn creating the source 243;; data as long as that creating insn is not an FPU div/sqrt. 244;; We need a special guard function because this bypass does 245;; not apply to the address inputs of the store. 246(define_bypass 0 "us1_simple_ieuN,us1_simple_ieu1,us1_simple_ieu0,us1_faddsub_single,us1_faddsub_double,us1_fmov_single,us1_fmov_double,us1_fcmov_single,us1_fcmov_double,us1_fmult_single,us1_fmult_double" "us1_store" 247 "store_data_bypass_p") 248 249;; An integer branch may execute in the same cycle as the compare 250;; creating the condition codes. 251(define_bypass 0 "us1_simple_ieu1" "us1_branch") 252 253;; VIS scheduling 254(define_insn_reservation "us1_fga_single" 255 2 256 (and (and 257 (eq_attr "cpu" "ultrasparc") 258 (eq_attr "type" "fga,visl,vismv")) 259 (eq_attr "fptype" "single")) 260 "us1_fpa + us1_fp_single + us1_slotany, nothing") 261 262(define_bypass 1 "us1_fga_single" "us1_fga_single") 263 264(define_insn_reservation "us1_fga_double" 265 2 266 (and (and 267 (eq_attr "cpu" "ultrasparc") 268 (eq_attr "type" "fga,visl,vismv")) 269 (eq_attr "fptype" "double")) 270 "us1_fpa + us1_fp_double + us1_slotany, nothing") 271 272(define_bypass 1 "us1_fga_double" "us1_fga_double") 273 274(define_insn_reservation "us1_fgm_single" 275 4 276 (and (and 277 (eq_attr "cpu" "ultrasparc") 278 (eq_attr "type" "fgm_pack,fgm_mul")) 279 (eq_attr "fptype" "single")) 280 "us1_fpm + us1_fp_single + us1_slotany, nothing*3") 281 282(define_bypass 3 "us1_fgm_single" "us1_fga_single") 283 284(define_insn_reservation "us1_fgm_double" 285 4 286 (and (and 287 (eq_attr "cpu" "ultrasparc") 288 (eq_attr "type" "fgm_pack,fgm_mul")) 289 (eq_attr "fptype" "double")) 290 "us1_fpm + us1_fp_double + us1_slotany, nothing*3") 291 292(define_bypass 3 "us1_fgm_double" "us1_fga_double") 293 294(define_insn_reservation "us1_pdist" 295 4 296 (and (eq_attr "cpu" "ultrasparc") 297 (eq_attr "type" "pdist")) 298 "us1_fpm + us1_fp_double + us1_slotany, nothing*3") 299 300(define_bypass 3 "us1_pdist" "us1_fga_double,us1_fga_single") 301(define_bypass 1 "us1_pdist" "us1_pdist") 302