1;; Scheduling description for IBM POWER10 processor. 2;; Copyright (C) 2016-2020 Free Software Foundation, Inc. 3;; 4;; This is a clone of power9.md. It is intended to be a placeholder until a 5;; real scheduler model can be contributed. 6;; The original power9.md was contributed by Pat Haugen (pthaugen@us.ibm.com). 7 8;; This file is part of GCC. 9;; 10;; GCC is free software; you can redistribute it and/or modify it 11;; under the terms of the GNU General Public License as published 12;; by the Free Software Foundation; either version 3, or (at your 13;; option) any later version. 14;; 15;; GCC is distributed in the hope that it will be useful, but WITHOUT 16;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 17;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 18;; License for more details. 19;; 20;; You should have received a copy of the GNU General Public License 21;; along with GCC; see the file COPYING3. If not see 22;; <http://www.gnu.org/licenses/>. 23 24;; This file was cloned from power9.md, it does not (yet) describe the actual 25;; POWER10 processor. 26 27(define_automaton "power10dsp,power10lsu,power10vsu,power10fpdiv,power10misc") 28 29(define_cpu_unit "lsu0_power10,lsu1_power10,lsu2_power10,lsu3_power10" "power10lsu") 30(define_cpu_unit "vsu0_power10,vsu1_power10,vsu2_power10,vsu3_power10" "power10vsu") 31; Two vector permute units, part of vsu 32(define_cpu_unit "prm0_power10,prm1_power10" "power10vsu") 33; Two fixed point divide units, not pipelined 34(define_cpu_unit "fx_div0_power10,fx_div1_power10" "power10misc") 35(define_cpu_unit "bru_power10,cryptu_power10,dfu_power10" "power10misc") 36; Create a false unit for use by non-pipelined FP div/sqrt 37(define_cpu_unit "fp_div0_power10,fp_div1_power10,fp_div2_power10,fp_div3_power10" 38 "power10fpdiv") 39 40 41(define_cpu_unit "x0_power10,x1_power10,xa0_power10,xa1_power10, 42 x2_power10,x3_power10,xb0_power10,xb1_power10, 43 br0_power10,br1_power10" "power10dsp") 44 45 46; Dispatch port reservations 47; 48; The processor can dispatch a maximum of 6 iops per cycle with the following 49; general restrictions (other restrictions also apply): 50; 1) At most 2 iops per execution slice 51; 2) At most 2 iops to the branch unit 52; Note that insn position in a dispatch group of 6 insns does not infer which 53; execution slice the insn is routed to. The units are used to infer the 54; conflicts that exist (i.e. an 'even' requirement will preclude dispatch 55; with 2 insns with 'superslice' requirement). 56 57; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but 58; are listed as separate units to allow those insns that preclude its use to 59; still be scheduled two to a superslice while reserving the 3rd slot. The 60; same applies for xb0/xb1. 61(define_reservation "DU_xa_power10" "xa0_power10+xa1_power10") 62(define_reservation "DU_xb_power10" "xb0_power10+xb1_power10") 63 64; Any execution slice dispatch 65(define_reservation "DU_any_power10" 66 "x0_power10|x1_power10|DU_xa_power10|x2_power10|x3_power10| 67 DU_xb_power10") 68 69; Even slice, actually takes even/odd slots 70(define_reservation "DU_even_power10" "x0_power10+x1_power10|x2_power10+x3_power10") 71 72; Slice plus 3rd slot 73(define_reservation "DU_slice_3_power10" 74 "x0_power10+xa0_power10|x1_power10+xa1_power10| 75 x2_power10+xb0_power10|x3_power10+xb1_power10") 76 77; Superslice 78(define_reservation "DU_super_power10" 79 "x0_power10+x1_power10|x2_power10+x3_power10") 80 81; 2-way cracked 82(define_reservation "DU_C2_power10" "x0_power10+x1_power10| 83 x1_power10+DU_xa_power10| 84 x1_power10+x2_power10| 85 DU_xa_power10+x2_power10| 86 x2_power10+x3_power10| 87 x3_power10+DU_xb_power10") 88 89; 2-way cracked plus 3rd slot 90(define_reservation "DU_C2_3_power10" "x0_power10+x1_power10+xa0_power10| 91 x1_power10+x2_power10+xa1_power10| 92 x2_power10+x3_power10+xb0_power10") 93 94; 3-way cracked (consumes whole decode/dispatch cycle) 95(define_reservation "DU_C3_power10" 96 "x0_power10+x1_power10+xa0_power10+xa1_power10+x2_power10+ 97 x3_power10+xb0_power10+xb1_power10+br0_power10+br1_power10") 98 99; Branch ports 100(define_reservation "DU_branch_power10" "br0_power10|br1_power10") 101 102 103; Execution unit reservations 104(define_reservation "LSU_power10" 105 "lsu0_power10|lsu1_power10|lsu2_power10|lsu3_power10") 106 107(define_reservation "LSU_pair_power10" 108 "lsu0_power10+lsu1_power10|lsu1_power10+lsu2_power10| 109 lsu2_power10+lsu3_power10|lsu3_power10+lsu0_power10") 110 111(define_reservation "VSU_power10" 112 "vsu0_power10|vsu1_power10|vsu2_power10|vsu3_power10") 113 114(define_reservation "VSU_super_power10" 115 "vsu0_power10+vsu1_power10|vsu2_power10+vsu3_power10") 116 117(define_reservation "VSU_PRM_power10" "prm0_power10|prm1_power10") 118 119; Define the reservation to be used by FP div/sqrt which allows other insns 120; to be issued to the VSU, but blocks other div/sqrt for a number of cycles. 121; Note that the number of cycles blocked varies depending on insn, but we 122; just use the same number for all in order to keep the number of DFA states 123; reasonable. 124(define_reservation "FP_DIV_power10" 125 "fp_div0_power10*8|fp_div1_power10*8|fp_div2_power10*8| 126 fp_div3_power10*8") 127(define_reservation "VEC_DIV_power10" 128 "fp_div0_power10*8+fp_div1_power10*8| 129 fp_div2_power10*8+fp_div3_power10*8") 130 131 132; LS Unit 133(define_insn_reservation "power10-load" 4 134 (and (eq_attr "type" "load") 135 (eq_attr "sign_extend" "no") 136 (eq_attr "update" "no") 137 (eq_attr "cpu" "power10")) 138 "DU_any_power10,LSU_power10") 139 140(define_insn_reservation "power10-load-update" 4 141 (and (eq_attr "type" "load") 142 (eq_attr "sign_extend" "no") 143 (eq_attr "update" "yes") 144 (eq_attr "cpu" "power10")) 145 "DU_C2_power10,LSU_power10+VSU_power10") 146 147(define_insn_reservation "power10-load-ext" 6 148 (and (eq_attr "type" "load") 149 (eq_attr "sign_extend" "yes") 150 (eq_attr "update" "no") 151 (eq_attr "cpu" "power10")) 152 "DU_C2_power10,LSU_power10") 153 154(define_insn_reservation "power10-load-ext-update" 6 155 (and (eq_attr "type" "load") 156 (eq_attr "sign_extend" "yes") 157 (eq_attr "update" "yes") 158 (eq_attr "cpu" "power10")) 159 "DU_C3_power10,LSU_power10+VSU_power10") 160 161(define_insn_reservation "power10-fpload-double" 4 162 (and (eq_attr "type" "fpload") 163 (eq_attr "update" "no") 164 (eq_attr "size" "64") 165 (eq_attr "cpu" "power10")) 166 "DU_slice_3_power10,LSU_power10") 167 168(define_insn_reservation "power10-fpload-update-double" 4 169 (and (eq_attr "type" "fpload") 170 (eq_attr "update" "yes") 171 (eq_attr "size" "64") 172 (eq_attr "cpu" "power10")) 173 "DU_C2_3_power10,LSU_power10+VSU_power10") 174 175; SFmode loads are cracked and have additional 2 cycles over DFmode 176(define_insn_reservation "power10-fpload-single" 6 177 (and (eq_attr "type" "fpload") 178 (eq_attr "update" "no") 179 (eq_attr "size" "32") 180 (eq_attr "cpu" "power10")) 181 "DU_C2_3_power10,LSU_power10") 182 183(define_insn_reservation "power10-fpload-update-single" 6 184 (and (eq_attr "type" "fpload") 185 (eq_attr "update" "yes") 186 (eq_attr "size" "32") 187 (eq_attr "cpu" "power10")) 188 "DU_C3_power10,LSU_power10+VSU_power10") 189 190(define_insn_reservation "power10-vecload" 5 191 (and (eq_attr "type" "vecload") 192 (eq_attr "cpu" "power10")) 193 "DU_any_power10,LSU_pair_power10") 194 195; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store 196(define_insn_reservation "power10-store" 0 197 (and (eq_attr "type" "store") 198 (eq_attr "update" "no") 199 (eq_attr "indexed" "no") 200 (eq_attr "cpu" "power10")) 201 "DU_slice_3_power10,LSU_power10") 202 203(define_insn_reservation "power10-store-indexed" 0 204 (and (eq_attr "type" "store") 205 (eq_attr "update" "no") 206 (eq_attr "indexed" "yes") 207 (eq_attr "cpu" "power10")) 208 "DU_slice_3_power10,LSU_power10") 209 210; Update forms have 2 cycle latency for updated addr reg 211(define_insn_reservation "power10-store-update" 2 212 (and (eq_attr "type" "store") 213 (eq_attr "update" "yes") 214 (eq_attr "indexed" "no") 215 (eq_attr "cpu" "power10")) 216 "DU_C2_3_power10,LSU_power10+VSU_power10") 217 218; Update forms have 2 cycle latency for updated addr reg 219(define_insn_reservation "power10-store-update-indexed" 2 220 (and (eq_attr "type" "store") 221 (eq_attr "update" "yes") 222 (eq_attr "indexed" "yes") 223 (eq_attr "cpu" "power10")) 224 "DU_C2_3_power10,LSU_power10+VSU_power10") 225 226(define_insn_reservation "power10-fpstore" 0 227 (and (eq_attr "type" "fpstore") 228 (eq_attr "update" "no") 229 (eq_attr "cpu" "power10")) 230 "DU_slice_3_power10,LSU_power10") 231 232; Update forms have 2 cycle latency for updated addr reg 233(define_insn_reservation "power10-fpstore-update" 2 234 (and (eq_attr "type" "fpstore") 235 (eq_attr "update" "yes") 236 (eq_attr "cpu" "power10")) 237 "DU_C2_3_power10,LSU_power10+VSU_power10") 238 239(define_insn_reservation "power10-vecstore" 0 240 (and (eq_attr "type" "vecstore") 241 (eq_attr "cpu" "power10")) 242 "DU_super_power10,LSU_pair_power10") 243 244(define_insn_reservation "power10-larx" 4 245 (and (eq_attr "type" "load_l") 246 (eq_attr "cpu" "power10")) 247 "DU_any_power10,LSU_power10") 248 249(define_insn_reservation "power10-stcx" 2 250 (and (eq_attr "type" "store_c") 251 (eq_attr "cpu" "power10")) 252 "DU_C2_3_power10,LSU_power10+VSU_power10") 253 254(define_insn_reservation "power10-sync" 4 255 (and (eq_attr "type" "sync,isync") 256 (eq_attr "cpu" "power10")) 257 "DU_any_power10,LSU_power10") 258 259 260; VSU Execution Unit 261 262; Fixed point ops 263 264; Most ALU insns are simple 2 cycle, including record form 265(define_insn_reservation "power10-alu" 2 266 (and (eq_attr "type" "add,exts,integer,logical,isel") 267 (eq_attr "cpu" "power10")) 268 "DU_any_power10,VSU_power10") 269; 5 cycle CR latency 270(define_bypass 5 "power10-alu" 271 "power10-crlogical,power10-mfcr,power10-mfcrf") 272 273; Rotate/shift prevent use of third slot 274(define_insn_reservation "power10-rot" 2 275 (and (eq_attr "type" "insert,shift") 276 (eq_attr "dot" "no") 277 (eq_attr "cpu" "power10")) 278 "DU_slice_3_power10,VSU_power10") 279 280; Record form rotate/shift are cracked 281(define_insn_reservation "power10-cracked-alu" 2 282 (and (eq_attr "type" "insert,shift") 283 (eq_attr "dot" "yes") 284 (eq_attr "cpu" "power10")) 285 "DU_C2_3_power10,VSU_power10") 286; 7 cycle CR latency 287(define_bypass 7 "power10-cracked-alu" 288 "power10-crlogical,power10-mfcr,power10-mfcrf") 289 290(define_insn_reservation "power10-alu2" 3 291 (and (eq_attr "type" "cntlz,popcnt,trap") 292 (eq_attr "cpu" "power10")) 293 "DU_any_power10,VSU_power10") 294; 6 cycle CR latency 295(define_bypass 6 "power10-alu2" 296 "power10-crlogical,power10-mfcr,power10-mfcrf") 297 298(define_insn_reservation "power10-cmp" 2 299 (and (eq_attr "type" "cmp") 300 (eq_attr "cpu" "power10")) 301 "DU_any_power10,VSU_power10") 302 303 304; Treat 'two' and 'three' types as 2 or 3 way cracked 305(define_insn_reservation "power10-two" 4 306 (and (eq_attr "type" "two") 307 (eq_attr "cpu" "power10")) 308 "DU_C2_power10,VSU_power10") 309 310(define_insn_reservation "power10-three" 6 311 (and (eq_attr "type" "three") 312 (eq_attr "cpu" "power10")) 313 "DU_C3_power10,VSU_power10") 314 315(define_insn_reservation "power10-mul" 5 316 (and (eq_attr "type" "mul") 317 (eq_attr "dot" "no") 318 (eq_attr "cpu" "power10")) 319 "DU_slice_3_power10,VSU_power10") 320 321(define_insn_reservation "power10-mul-compare" 5 322 (and (eq_attr "type" "mul") 323 (eq_attr "dot" "yes") 324 (eq_attr "cpu" "power10")) 325 "DU_C2_3_power10,VSU_power10") 326; 10 cycle CR latency 327(define_bypass 10 "power10-mul-compare" 328 "power10-crlogical,power10-mfcr,power10-mfcrf") 329 330; Fixed point divides reserve the divide units for a minimum of 8 cycles 331(define_insn_reservation "power10-idiv" 16 332 (and (eq_attr "type" "div") 333 (eq_attr "size" "32") 334 (eq_attr "cpu" "power10")) 335 "DU_even_power10,fx_div0_power10*8|fx_div1_power10*8") 336 337(define_insn_reservation "power10-ldiv" 24 338 (and (eq_attr "type" "div") 339 (eq_attr "size" "64") 340 (eq_attr "cpu" "power10")) 341 "DU_even_power10,fx_div0_power10*8|fx_div1_power10*8") 342 343(define_insn_reservation "power10-crlogical" 2 344 (and (eq_attr "type" "cr_logical") 345 (eq_attr "cpu" "power10")) 346 "DU_any_power10,VSU_power10") 347 348(define_insn_reservation "power10-mfcrf" 2 349 (and (eq_attr "type" "mfcrf") 350 (eq_attr "cpu" "power10")) 351 "DU_any_power10,VSU_power10") 352 353(define_insn_reservation "power10-mfcr" 6 354 (and (eq_attr "type" "mfcr") 355 (eq_attr "cpu" "power10")) 356 "DU_C3_power10,VSU_power10") 357 358; Should differentiate between 1 cr field and > 1 since target of > 1 cr 359; is cracked 360(define_insn_reservation "power10-mtcr" 2 361 (and (eq_attr "type" "mtcr") 362 (eq_attr "cpu" "power10")) 363 "DU_any_power10,VSU_power10") 364 365; Move to LR/CTR are executed in VSU 366(define_insn_reservation "power10-mtjmpr" 5 367 (and (eq_attr "type" "mtjmpr") 368 (eq_attr "cpu" "power10")) 369 "DU_any_power10,VSU_power10") 370 371; Floating point/Vector ops 372(define_insn_reservation "power10-fpsimple" 2 373 (and (eq_attr "type" "fpsimple") 374 (eq_attr "cpu" "power10")) 375 "DU_slice_3_power10,VSU_power10") 376 377(define_insn_reservation "power10-fp" 5 378 (and (eq_attr "type" "fp,dmul") 379 (eq_attr "cpu" "power10")) 380 "DU_slice_3_power10,VSU_power10") 381 382(define_insn_reservation "power10-fpcompare" 3 383 (and (eq_attr "type" "fpcompare") 384 (eq_attr "cpu" "power10")) 385 "DU_slice_3_power10,VSU_power10") 386 387; FP div/sqrt are executed in the VSU slices. They are not pipelined wrt other 388; div/sqrt insns, but for the most part do not block pipelined ops. 389(define_insn_reservation "power10-sdiv" 22 390 (and (eq_attr "type" "sdiv") 391 (eq_attr "cpu" "power10")) 392 "DU_slice_3_power10,VSU_power10,FP_DIV_power10") 393 394(define_insn_reservation "power10-ddiv" 27 395 (and (eq_attr "type" "ddiv") 396 (eq_attr "cpu" "power10")) 397 "DU_slice_3_power10,VSU_power10,FP_DIV_power10") 398 399(define_insn_reservation "power10-sqrt" 26 400 (and (eq_attr "type" "ssqrt") 401 (eq_attr "cpu" "power10")) 402 "DU_slice_3_power10,VSU_power10,FP_DIV_power10") 403 404(define_insn_reservation "power10-dsqrt" 36 405 (and (eq_attr "type" "dsqrt") 406 (eq_attr "cpu" "power10")) 407 "DU_slice_3_power10,VSU_power10,FP_DIV_power10") 408 409(define_insn_reservation "power10-vec-2cyc" 2 410 (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx") 411 (eq_attr "cpu" "power10")) 412 "DU_super_power10,VSU_super_power10") 413 414(define_insn_reservation "power10-veccmp" 3 415 (and (eq_attr "type" "veccmp") 416 (eq_attr "cpu" "power10")) 417 "DU_super_power10,VSU_super_power10") 418 419(define_insn_reservation "power10-vecsimple" 3 420 (and (eq_attr "type" "vecsimple") 421 (eq_attr "cpu" "power10")) 422 "DU_super_power10,VSU_super_power10") 423 424(define_insn_reservation "power10-vecnormal" 7 425 (and (eq_attr "type" "vecfloat,vecdouble") 426 (eq_attr "size" "!128") 427 (eq_attr "cpu" "power10")) 428 "DU_super_power10,VSU_super_power10") 429 430; Quad-precision FP ops, execute in DFU 431(define_insn_reservation "power10-qp" 12 432 (and (eq_attr "type" "vecfloat,vecdouble") 433 (eq_attr "size" "128") 434 (eq_attr "cpu" "power10")) 435 "DU_super_power10,dfu_power10") 436 437(define_insn_reservation "power10-vecperm" 3 438 (and (eq_attr "type" "vecperm") 439 (eq_attr "cpu" "power10")) 440 "DU_super_power10,VSU_PRM_power10") 441 442(define_insn_reservation "power10-veccomplex" 7 443 (and (eq_attr "type" "veccomplex") 444 (eq_attr "cpu" "power10")) 445 "DU_super_power10,VSU_super_power10") 446 447(define_insn_reservation "power10-vecfdiv" 24 448 (and (eq_attr "type" "vecfdiv") 449 (eq_attr "cpu" "power10")) 450 "DU_super_power10,VSU_super_power10,VEC_DIV_power10") 451 452(define_insn_reservation "power10-vecdiv" 27 453 (and (eq_attr "type" "vecdiv") 454 (eq_attr "size" "!128") 455 (eq_attr "cpu" "power10")) 456 "DU_super_power10,VSU_super_power10,VEC_DIV_power10") 457 458; Use 8 for DFU reservation on QP div/mul to limit DFA state size 459(define_insn_reservation "power10-qpdiv" 56 460 (and (eq_attr "type" "vecdiv") 461 (eq_attr "size" "128") 462 (eq_attr "cpu" "power10")) 463 "DU_super_power10,dfu_power10*8") 464 465(define_insn_reservation "power10-qpmul" 24 466 (and (eq_attr "type" "qmul") 467 (eq_attr "size" "128") 468 (eq_attr "cpu" "power10")) 469 "DU_super_power10,dfu_power10*8") 470 471(define_insn_reservation "power10-mffgpr" 2 472 (and (eq_attr "type" "mffgpr") 473 (eq_attr "cpu" "power10")) 474 "DU_slice_3_power10,VSU_power10") 475 476(define_insn_reservation "power10-mftgpr" 2 477 (and (eq_attr "type" "mftgpr") 478 (eq_attr "cpu" "power10")) 479 "DU_slice_3_power10,VSU_power10") 480 481 482; Branch Unit 483; Move from LR/CTR are executed in BRU but consume a writeback port from an 484; execution slice. 485(define_insn_reservation "power10-mfjmpr" 6 486 (and (eq_attr "type" "mfjmpr") 487 (eq_attr "cpu" "power10")) 488 "DU_branch_power10,bru_power10+VSU_power10") 489 490; Branch is 2 cycles 491(define_insn_reservation "power10-branch" 2 492 (and (eq_attr "type" "jmpreg,branch") 493 (eq_attr "cpu" "power10")) 494 "DU_branch_power10,bru_power10") 495 496 497; Crypto Unit 498(define_insn_reservation "power10-crypto" 6 499 (and (eq_attr "type" "crypto") 500 (eq_attr "cpu" "power10")) 501 "DU_super_power10,cryptu_power10") 502 503 504; HTM Unit 505(define_insn_reservation "power10-htm" 4 506 (and (eq_attr "type" "htm") 507 (eq_attr "cpu" "power10")) 508 "DU_C2_power10,LSU_power10") 509 510(define_insn_reservation "power10-htm-simple" 2 511 (and (eq_attr "type" "htmsimple") 512 (eq_attr "cpu" "power10")) 513 "DU_any_power10,VSU_power10") 514 515 516; DFP Unit 517(define_insn_reservation "power10-dfp" 12 518 (and (eq_attr "type" "dfp") 519 (eq_attr "cpu" "power10")) 520 "DU_even_power10,dfu_power10") 521 522