1;; Scheduling description for IBM POWER9 processor. 2;; Copyright (C) 2016-2020 Free Software Foundation, Inc. 3;; 4;; Contributed by Pat Haugen (pthaugen@us.ibm.com). 5 6;; This file is part of GCC. 7;; 8;; GCC is free software; you can redistribute it and/or modify it 9;; under the terms of the GNU General Public License as published 10;; by the Free Software Foundation; either version 3, or (at your 11;; option) any later version. 12;; 13;; GCC is distributed in the hope that it will be useful, but WITHOUT 14;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 16;; License for more details. 17;; 18;; You should have received a copy of the GNU General Public License 19;; along with GCC; see the file COPYING3. If not see 20;; <http://www.gnu.org/licenses/>. 21 22(define_automaton "power9dsp,power9lsu,power9vsu,power9fpdiv,power9misc") 23 24(define_cpu_unit "lsu0_power9,lsu1_power9,lsu2_power9,lsu3_power9" "power9lsu") 25(define_cpu_unit "vsu0_power9,vsu1_power9,vsu2_power9,vsu3_power9" "power9vsu") 26; Two vector permute units, part of vsu 27(define_cpu_unit "prm0_power9,prm1_power9" "power9vsu") 28; Two fixed point divide units, not pipelined 29(define_cpu_unit "fx_div0_power9,fx_div1_power9" "power9misc") 30(define_cpu_unit "bru_power9,cryptu_power9,dfu_power9" "power9misc") 31; Create a false unit for use by non-pipelined FP div/sqrt 32(define_cpu_unit "fp_div0_power9,fp_div1_power9,fp_div2_power9,fp_div3_power9" 33 "power9fpdiv") 34 35 36(define_cpu_unit "x0_power9,x1_power9,xa0_power9,xa1_power9, 37 x2_power9,x3_power9,xb0_power9,xb1_power9, 38 br0_power9,br1_power9" "power9dsp") 39 40 41; Dispatch port reservations 42; 43; Power9 can dispatch a maximum of 6 iops per cycle with the following 44; general restrictions (other restrictions also apply): 45; 1) At most 2 iops per execution slice 46; 2) At most 2 iops to the branch unit 47; Note that insn position in a dispatch group of 6 insns does not infer which 48; execution slice the insn is routed to. The units are used to infer the 49; conflicts that exist (i.e. an 'even' requirement will preclude dispatch 50; with 2 insns with 'superslice' requirement). 51 52; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but 53; are listed as separate units to allow those insns that preclude its use to 54; still be scheduled two to a superslice while reserving the 3rd slot. The 55; same applies for xb0/xb1. 56(define_reservation "DU_xa_power9" "xa0_power9+xa1_power9") 57(define_reservation "DU_xb_power9" "xb0_power9+xb1_power9") 58 59; Any execution slice dispatch 60(define_reservation "DU_any_power9" 61 "x0_power9|x1_power9|DU_xa_power9|x2_power9|x3_power9| 62 DU_xb_power9") 63 64; Even slice, actually takes even/odd slots 65(define_reservation "DU_even_power9" "x0_power9+x1_power9|x2_power9+x3_power9") 66 67; Slice plus 3rd slot 68(define_reservation "DU_slice_3_power9" 69 "x0_power9+xa0_power9|x1_power9+xa1_power9| 70 x2_power9+xb0_power9|x3_power9+xb1_power9") 71 72; Superslice 73(define_reservation "DU_super_power9" 74 "x0_power9+x1_power9|x2_power9+x3_power9") 75 76; 2-way cracked 77(define_reservation "DU_C2_power9" "x0_power9+x1_power9| 78 x1_power9+DU_xa_power9| 79 x1_power9+x2_power9| 80 DU_xa_power9+x2_power9| 81 x2_power9+x3_power9| 82 x3_power9+DU_xb_power9") 83 84; 2-way cracked plus 3rd slot 85(define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9| 86 x1_power9+x2_power9+xa1_power9| 87 x2_power9+x3_power9+xb0_power9") 88 89; 3-way cracked (consumes whole decode/dispatch cycle) 90(define_reservation "DU_C3_power9" 91 "x0_power9+x1_power9+xa0_power9+xa1_power9+x2_power9+ 92 x3_power9+xb0_power9+xb1_power9+br0_power9+br1_power9") 93 94; Branch ports 95(define_reservation "DU_branch_power9" "br0_power9|br1_power9") 96 97 98; Execution unit reservations 99(define_reservation "LSU_power9" 100 "lsu0_power9|lsu1_power9|lsu2_power9|lsu3_power9") 101 102(define_reservation "LSU_pair_power9" 103 "lsu0_power9+lsu1_power9|lsu1_power9+lsu2_power9| 104 lsu2_power9+lsu3_power9|lsu3_power9+lsu0_power9") 105 106(define_reservation "VSU_power9" 107 "vsu0_power9|vsu1_power9|vsu2_power9|vsu3_power9") 108 109(define_reservation "VSU_super_power9" 110 "vsu0_power9+vsu1_power9|vsu2_power9+vsu3_power9") 111 112(define_reservation "VSU_PRM_power9" "prm0_power9|prm1_power9") 113 114; Define the reservation to be used by FP div/sqrt which allows other insns 115; to be issued to the VSU, but blocks other div/sqrt for a number of cycles. 116; Note that the number of cycles blocked varies depending on insn, but we 117; just use the same number for all in order to keep the number of DFA states 118; reasonable. 119(define_reservation "FP_DIV_power9" 120 "fp_div0_power9*8|fp_div1_power9*8|fp_div2_power9*8| 121 fp_div3_power9*8") 122(define_reservation "VEC_DIV_power9" 123 "fp_div0_power9*8+fp_div1_power9*8| 124 fp_div2_power9*8+fp_div3_power9*8") 125 126 127; LS Unit 128(define_insn_reservation "power9-load" 4 129 (and (eq_attr "type" "load") 130 (eq_attr "sign_extend" "no") 131 (eq_attr "update" "no") 132 (eq_attr "cpu" "power9")) 133 "DU_any_power9,LSU_power9") 134 135(define_insn_reservation "power9-load-update" 4 136 (and (eq_attr "type" "load") 137 (eq_attr "sign_extend" "no") 138 (eq_attr "update" "yes") 139 (eq_attr "cpu" "power9")) 140 "DU_C2_power9,LSU_power9+VSU_power9") 141 142(define_insn_reservation "power9-load-ext" 6 143 (and (eq_attr "type" "load") 144 (eq_attr "sign_extend" "yes") 145 (eq_attr "update" "no") 146 (eq_attr "cpu" "power9")) 147 "DU_C2_power9,LSU_power9") 148 149(define_insn_reservation "power9-load-ext-update" 6 150 (and (eq_attr "type" "load") 151 (eq_attr "sign_extend" "yes") 152 (eq_attr "update" "yes") 153 (eq_attr "cpu" "power9")) 154 "DU_C3_power9,LSU_power9+VSU_power9") 155 156(define_insn_reservation "power9-fpload-double" 4 157 (and (eq_attr "type" "fpload") 158 (eq_attr "update" "no") 159 (eq_attr "size" "64") 160 (eq_attr "cpu" "power9")) 161 "DU_slice_3_power9,LSU_power9") 162 163(define_insn_reservation "power9-fpload-update-double" 4 164 (and (eq_attr "type" "fpload") 165 (eq_attr "update" "yes") 166 (eq_attr "size" "64") 167 (eq_attr "cpu" "power9")) 168 "DU_C2_3_power9,LSU_power9+VSU_power9") 169 170; SFmode loads are cracked and have additional 2 cycles over DFmode 171(define_insn_reservation "power9-fpload-single" 6 172 (and (eq_attr "type" "fpload") 173 (eq_attr "update" "no") 174 (eq_attr "size" "32") 175 (eq_attr "cpu" "power9")) 176 "DU_C2_3_power9,LSU_power9") 177 178(define_insn_reservation "power9-fpload-update-single" 6 179 (and (eq_attr "type" "fpload") 180 (eq_attr "update" "yes") 181 (eq_attr "size" "32") 182 (eq_attr "cpu" "power9")) 183 "DU_C3_power9,LSU_power9+VSU_power9") 184 185(define_insn_reservation "power9-vecload" 5 186 (and (eq_attr "type" "vecload") 187 (eq_attr "cpu" "power9")) 188 "DU_any_power9,LSU_pair_power9") 189 190; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store 191(define_insn_reservation "power9-store" 0 192 (and (eq_attr "type" "store") 193 (eq_attr "update" "no") 194 (eq_attr "indexed" "no") 195 (eq_attr "cpu" "power9")) 196 "DU_slice_3_power9,LSU_power9") 197 198(define_insn_reservation "power9-store-indexed" 0 199 (and (eq_attr "type" "store") 200 (eq_attr "update" "no") 201 (eq_attr "indexed" "yes") 202 (eq_attr "cpu" "power9")) 203 "DU_slice_3_power9,LSU_power9") 204 205; Update forms have 2 cycle latency for updated addr reg 206(define_insn_reservation "power9-store-update" 2 207 (and (eq_attr "type" "store") 208 (eq_attr "update" "yes") 209 (eq_attr "indexed" "no") 210 (eq_attr "cpu" "power9")) 211 "DU_C2_3_power9,LSU_power9+VSU_power9") 212 213; Update forms have 2 cycle latency for updated addr reg 214(define_insn_reservation "power9-store-update-indexed" 2 215 (and (eq_attr "type" "store") 216 (eq_attr "update" "yes") 217 (eq_attr "indexed" "yes") 218 (eq_attr "cpu" "power9")) 219 "DU_C2_3_power9,LSU_power9+VSU_power9") 220 221(define_insn_reservation "power9-fpstore" 0 222 (and (eq_attr "type" "fpstore") 223 (eq_attr "update" "no") 224 (eq_attr "cpu" "power9")) 225 "DU_slice_3_power9,LSU_power9") 226 227; Update forms have 2 cycle latency for updated addr reg 228(define_insn_reservation "power9-fpstore-update" 2 229 (and (eq_attr "type" "fpstore") 230 (eq_attr "update" "yes") 231 (eq_attr "cpu" "power9")) 232 "DU_C2_3_power9,LSU_power9+VSU_power9") 233 234(define_insn_reservation "power9-vecstore" 0 235 (and (eq_attr "type" "vecstore") 236 (eq_attr "cpu" "power9")) 237 "DU_super_power9,LSU_pair_power9") 238 239; Store forwarding latency is 6 240(define_bypass 6 "power9-*store*" "power9-*load*") 241 242(define_insn_reservation "power9-larx" 4 243 (and (eq_attr "type" "load_l") 244 (eq_attr "cpu" "power9")) 245 "DU_any_power9,LSU_power9") 246 247(define_insn_reservation "power9-stcx" 2 248 (and (eq_attr "type" "store_c") 249 (eq_attr "cpu" "power9")) 250 "DU_C2_3_power9,LSU_power9+VSU_power9") 251 252(define_insn_reservation "power9-sync" 4 253 (and (eq_attr "type" "sync,isync") 254 (eq_attr "cpu" "power9")) 255 "DU_any_power9,LSU_power9") 256 257 258; VSU Execution Unit 259 260; Fixed point ops 261 262; Most ALU insns are simple 2 cycle, including record form 263(define_insn_reservation "power9-alu" 2 264 (and (eq_attr "type" "add,exts,integer,logical,isel") 265 (eq_attr "cpu" "power9")) 266 "DU_any_power9,VSU_power9") 267; 5 cycle CR latency 268(define_bypass 5 "power9-alu" 269 "power9-crlogical,power9-mfcr,power9-mfcrf") 270 271; Rotate/shift prevent use of third slot 272(define_insn_reservation "power9-rot" 2 273 (and (eq_attr "type" "insert,shift") 274 (eq_attr "dot" "no") 275 (eq_attr "cpu" "power9")) 276 "DU_slice_3_power9,VSU_power9") 277 278; Record form rotate/shift are cracked 279(define_insn_reservation "power9-cracked-alu" 2 280 (and (eq_attr "type" "insert,shift") 281 (eq_attr "dot" "yes") 282 (eq_attr "cpu" "power9")) 283 "DU_C2_3_power9,VSU_power9") 284; 7 cycle CR latency 285(define_bypass 7 "power9-cracked-alu" 286 "power9-crlogical,power9-mfcr,power9-mfcrf") 287 288(define_insn_reservation "power9-alu2" 3 289 (and (eq_attr "type" "cntlz,popcnt,trap") 290 (eq_attr "cpu" "power9")) 291 "DU_any_power9,VSU_power9") 292; 6 cycle CR latency 293(define_bypass 6 "power9-alu2" 294 "power9-crlogical,power9-mfcr,power9-mfcrf") 295 296(define_insn_reservation "power9-cmp" 2 297 (and (eq_attr "type" "cmp") 298 (eq_attr "cpu" "power9")) 299 "DU_any_power9,VSU_power9") 300 301 302; Treat 'two' and 'three' types as 2 or 3 way cracked 303(define_insn_reservation "power9-two" 4 304 (and (eq_attr "type" "two") 305 (eq_attr "cpu" "power9")) 306 "DU_C2_power9,VSU_power9") 307 308(define_insn_reservation "power9-three" 6 309 (and (eq_attr "type" "three") 310 (eq_attr "cpu" "power9")) 311 "DU_C3_power9,VSU_power9") 312 313(define_insn_reservation "power9-mul" 5 314 (and (eq_attr "type" "mul") 315 (eq_attr "dot" "no") 316 (eq_attr "cpu" "power9")) 317 "DU_slice_3_power9,VSU_power9") 318 319(define_insn_reservation "power9-mul-compare" 5 320 (and (eq_attr "type" "mul") 321 (eq_attr "dot" "yes") 322 (eq_attr "cpu" "power9")) 323 "DU_C2_3_power9,VSU_power9") 324; 10 cycle CR latency 325(define_bypass 10 "power9-mul-compare" 326 "power9-crlogical,power9-mfcr,power9-mfcrf") 327 328; Fixed point divides reserve the divide units for a minimum of 8 cycles 329(define_insn_reservation "power9-idiv" 16 330 (and (eq_attr "type" "div") 331 (eq_attr "size" "32") 332 (eq_attr "cpu" "power9")) 333 "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8") 334 335(define_insn_reservation "power9-ldiv" 24 336 (and (eq_attr "type" "div") 337 (eq_attr "size" "64") 338 (eq_attr "cpu" "power9")) 339 "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8") 340 341(define_insn_reservation "power9-crlogical" 2 342 (and (eq_attr "type" "cr_logical") 343 (eq_attr "cpu" "power9")) 344 "DU_any_power9,VSU_power9") 345 346(define_insn_reservation "power9-mfcrf" 2 347 (and (eq_attr "type" "mfcrf") 348 (eq_attr "cpu" "power9")) 349 "DU_any_power9,VSU_power9") 350 351(define_insn_reservation "power9-mfcr" 6 352 (and (eq_attr "type" "mfcr") 353 (eq_attr "cpu" "power9")) 354 "DU_C3_power9,VSU_power9") 355 356; Should differentiate between 1 cr field and > 1 since target of > 1 cr 357; is cracked 358(define_insn_reservation "power9-mtcr" 2 359 (and (eq_attr "type" "mtcr") 360 (eq_attr "cpu" "power9")) 361 "DU_any_power9,VSU_power9") 362 363; Move to LR/CTR are executed in VSU 364(define_insn_reservation "power9-mtjmpr" 5 365 (and (eq_attr "type" "mtjmpr") 366 (eq_attr "cpu" "power9")) 367 "DU_any_power9,VSU_power9") 368 369; Floating point/Vector ops 370(define_insn_reservation "power9-fpsimple" 2 371 (and (eq_attr "type" "fpsimple") 372 (eq_attr "cpu" "power9")) 373 "DU_slice_3_power9,VSU_power9") 374 375(define_insn_reservation "power9-fp" 5 376 (and (eq_attr "type" "fp,dmul") 377 (eq_attr "cpu" "power9")) 378 "DU_slice_3_power9,VSU_power9") 379 380(define_insn_reservation "power9-fpcompare" 3 381 (and (eq_attr "type" "fpcompare") 382 (eq_attr "cpu" "power9")) 383 "DU_slice_3_power9,VSU_power9") 384 385; FP div/sqrt are executed in the VSU slices. They are not pipelined wrt other 386; div/sqrt insns, but for the most part do not block pipelined ops. 387(define_insn_reservation "power9-sdiv" 22 388 (and (eq_attr "type" "sdiv") 389 (eq_attr "cpu" "power9")) 390 "DU_slice_3_power9,VSU_power9,FP_DIV_power9") 391 392(define_insn_reservation "power9-ddiv" 27 393 (and (eq_attr "type" "ddiv") 394 (eq_attr "cpu" "power9")) 395 "DU_slice_3_power9,VSU_power9,FP_DIV_power9") 396 397(define_insn_reservation "power9-sqrt" 26 398 (and (eq_attr "type" "ssqrt") 399 (eq_attr "cpu" "power9")) 400 "DU_slice_3_power9,VSU_power9,FP_DIV_power9") 401 402(define_insn_reservation "power9-dsqrt" 36 403 (and (eq_attr "type" "dsqrt") 404 (eq_attr "cpu" "power9")) 405 "DU_slice_3_power9,VSU_power9,FP_DIV_power9") 406 407(define_insn_reservation "power9-vec-2cyc" 2 408 (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx") 409 (eq_attr "cpu" "power9")) 410 "DU_super_power9,VSU_super_power9") 411 412(define_insn_reservation "power9-veccmp" 3 413 (and (eq_attr "type" "veccmp") 414 (eq_attr "cpu" "power9")) 415 "DU_super_power9,VSU_super_power9") 416 417(define_insn_reservation "power9-vecsimple" 3 418 (and (eq_attr "type" "vecsimple") 419 (eq_attr "cpu" "power9")) 420 "DU_super_power9,VSU_super_power9") 421 422(define_insn_reservation "power9-vecnormal" 7 423 (and (eq_attr "type" "vecfloat,vecdouble") 424 (eq_attr "size" "!128") 425 (eq_attr "cpu" "power9")) 426 "DU_super_power9,VSU_super_power9") 427 428; Quad-precision FP ops, execute in DFU 429(define_insn_reservation "power9-qp" 12 430 (and (eq_attr "type" "vecfloat,vecdouble") 431 (eq_attr "size" "128") 432 (eq_attr "cpu" "power9")) 433 "DU_super_power9,dfu_power9") 434 435(define_insn_reservation "power9-vecperm" 3 436 (and (eq_attr "type" "vecperm") 437 (eq_attr "cpu" "power9")) 438 "DU_super_power9,VSU_PRM_power9") 439 440(define_insn_reservation "power9-veccomplex" 7 441 (and (eq_attr "type" "veccomplex") 442 (eq_attr "cpu" "power9")) 443 "DU_super_power9,VSU_super_power9") 444 445(define_insn_reservation "power9-vecfdiv" 24 446 (and (eq_attr "type" "vecfdiv") 447 (eq_attr "cpu" "power9")) 448 "DU_super_power9,VSU_super_power9,VEC_DIV_power9") 449 450(define_insn_reservation "power9-vecdiv" 27 451 (and (eq_attr "type" "vecdiv") 452 (eq_attr "size" "!128") 453 (eq_attr "cpu" "power9")) 454 "DU_super_power9,VSU_super_power9,VEC_DIV_power9") 455 456; Use 8 for DFU reservation on QP div/mul to limit DFA state size 457(define_insn_reservation "power9-qpdiv" 56 458 (and (eq_attr "type" "vecdiv") 459 (eq_attr "size" "128") 460 (eq_attr "cpu" "power9")) 461 "DU_super_power9,dfu_power9*8") 462 463(define_insn_reservation "power9-qpmul" 24 464 (and (eq_attr "type" "qmul") 465 (eq_attr "size" "128") 466 (eq_attr "cpu" "power9")) 467 "DU_super_power9,dfu_power9*8") 468 469(define_insn_reservation "power9-mffgpr" 2 470 (and (eq_attr "type" "mffgpr") 471 (eq_attr "cpu" "power9")) 472 "DU_slice_3_power9,VSU_power9") 473 474(define_insn_reservation "power9-mftgpr" 2 475 (and (eq_attr "type" "mftgpr") 476 (eq_attr "cpu" "power9")) 477 "DU_slice_3_power9,VSU_power9") 478 479 480; Branch Unit 481; Move from LR/CTR are executed in BRU but consume a writeback port from an 482; execution slice. 483(define_insn_reservation "power9-mfjmpr" 6 484 (and (eq_attr "type" "mfjmpr") 485 (eq_attr "cpu" "power9")) 486 "DU_branch_power9,bru_power9+VSU_power9") 487 488; Branch is 2 cycles 489(define_insn_reservation "power9-branch" 2 490 (and (eq_attr "type" "jmpreg,branch") 491 (eq_attr "cpu" "power9")) 492 "DU_branch_power9,bru_power9") 493 494 495; Crypto Unit 496(define_insn_reservation "power9-crypto" 6 497 (and (eq_attr "type" "crypto") 498 (eq_attr "cpu" "power9")) 499 "DU_super_power9,cryptu_power9") 500 501 502; HTM Unit 503(define_insn_reservation "power9-htm" 4 504 (and (eq_attr "type" "htm") 505 (eq_attr "cpu" "power9")) 506 "DU_C2_power9,LSU_power9") 507 508(define_insn_reservation "power9-htm-simple" 2 509 (and (eq_attr "type" "htmsimple") 510 (eq_attr "cpu" "power9")) 511 "DU_any_power9,VSU_power9") 512 513 514; DFP Unit 515(define_insn_reservation "power9-dfp" 12 516 (and (eq_attr "type" "dfp") 517 (eq_attr "cpu" "power9")) 518 "DU_even_power9,dfu_power9") 519 520