1;; Copyright (C) 2012-2020 Free Software Foundation, Inc. 2;; 3;; This file is part of GCC. 4;; 5;; GCC is free software; you can redistribute it and/or modify 6;; it under the terms of the GNU General Public License as published by 7;; the Free Software Foundation; either version 3, or (at your option) 8;; any later version. 9;; 10;; GCC is distributed in the hope that it will be useful, 11;; but WITHOUT ANY WARRANTY; without even the implied warranty of 12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13;; GNU General Public License for more details. 14;; 15;; You should have received a copy of the GNU General Public License 16;; along with GCC; see the file COPYING3. If not see 17;; <http://www.gnu.org/licenses/>. 18;; 19;; AMD bdver3 and bdver4 Scheduling 20;; 21;; The bdver3 and bdver4 contains three pipelined FP units and two integer 22;; units. ;; Fetching and decoding logic is different from previous fam15 23;; processors. Fetching is done every two cycles rather than every cycle 24;; and two decode units are available. The decode units therefore decode 25;; four instructions in two cycles. 26;; 27;; The load/store queue unit is not attached to the schedulers but 28;; communicates with all the execution units separately instead. 29;; 30;; bdver3 and bdver4 belong to fam15 processors. We use the same insn 31;; attribute that was used for bdver1 decoding scheme. 32 33(define_automaton "bdver3,bdver3_ieu,bdver3_load,bdver3_fp,bdver3_agu") 34 35(define_cpu_unit "bdver3-decode0" "bdver3") 36(define_cpu_unit "bdver3-decode1" "bdver3") 37(define_cpu_unit "bdver3-decode2" "bdver3") 38(define_cpu_unit "bdver3-decode3" "bdver3") 39 40;; Double decoded instructions take two cycles whereas 41;; direct instructions take one cycle. 42;; Vectorpath instructions are single issue instructions. 43;; So, we engage all units vector instructions. 44(define_reservation "bdver3-vector" "bdver3-decode0+bdver3-decode1+bdver3-decode2+bdver3-decode3") 45 46;; Direct instructions can be issued to any of the four decoders 47(define_reservation "bdver3-direct" "(bdver3-decode0|bdver3-decode1|bdver3-decode2|bdver3-decode3)") 48 49;; Double instructions take two cycles to decode. 50(define_reservation "bdver3-double" "(bdver3-decode0,bdver3-decode0)| 51 (bdver3-decode1,bdver3-decode1)| (bdver3-decode2,bdver3-decode2)| 52 (bdver3-decode3,bdver3-decode3)") 53 54(define_cpu_unit "bdver3-ieu0" "bdver3_ieu") 55(define_cpu_unit "bdver3-ieu1" "bdver3_ieu") 56(define_reservation "bdver3-ieu" "(bdver3-ieu0|bdver3-ieu1)") 57 58(define_cpu_unit "bdver3-agu0" "bdver3_agu") 59(define_cpu_unit "bdver3-agu1" "bdver3_agu") 60(define_reservation "bdver3-agu" "(bdver3-agu0|bdver3-agu1)") 61 62(define_cpu_unit "bdver3-load0" "bdver3_load") 63(define_cpu_unit "bdver3-load1" "bdver3_load") 64(define_reservation "bdver3-load" "bdver3-agu, 65 (bdver3-load0|bdver3-load1),nothing") 66;; 128bit SSE instructions issue two loads at once. 67(define_reservation "bdver3-load2" "bdver3-agu, 68 (bdver3-load0+bdver3-load1),nothing") 69 70(define_reservation "bdver3-store" "(bdver3-load0 | bdver3-load1)") 71;; 128bit SSE instructions issue two stores at once. 72(define_reservation "bdver3-store2" "(bdver3-load0+bdver3-load1)") 73 74;; vectorpath (microcoded) instructions are single issue instructions. 75;; So, they occupy all the integer units. 76(define_reservation "bdver3-ivector" "bdver3-ieu0+bdver3-ieu1+ 77 bdver3-agu0+bdver3-agu1+ 78 bdver3-load0+bdver3-load1") 79 80(define_reservation "bdver3-fpsched" "nothing,nothing,nothing") 81 82;; The floating point loads. 83(define_reservation "bdver3-fpload" "(bdver3-fpsched + bdver3-load)") 84(define_reservation "bdver3-fpload2" "(bdver3-fpsched + bdver3-load2)") 85 86;; Three FP units. 87(define_cpu_unit "bdver3-ffma0" "bdver3_fp") 88(define_cpu_unit "bdver3-ffma1" "bdver3_fp") 89(define_cpu_unit "bdver3-fpsto" "bdver3_fp") 90 91(define_reservation "bdver3-fvector" "bdver3-ffma0+bdver3-ffma1+ 92 bdver3-fpsto+bdver3-load0+ 93 bdver3-load1") 94 95(define_reservation "bdver3-ffma" "(bdver3-ffma0 | bdver3-ffma1)") 96(define_reservation "bdver3-fcvt" "bdver3-ffma0") 97(define_reservation "bdver3-fmma" "bdver3-ffma0") 98(define_reservation "bdver3-fxbar" "bdver3-ffma1") 99(define_reservation "bdver3-fmal" "(bdver3-ffma0 | bdver3-fpsto)") 100(define_reservation "bdver3-fsto" "bdver3-fpsto") 101(define_reservation "bdver3-fpshuf" "bdver3-fpsto") 102 103;; Jump instructions are executed in the branch unit completely transparent to us. 104(define_insn_reservation "bdver3_call" 2 105 (and (eq_attr "cpu" "bdver3,bdver4") 106 (eq_attr "type" "call,callv")) 107 "bdver3-double,(bdver3-agu | bdver3-ieu),nothing") 108;; PUSH mem is double path. 109(define_insn_reservation "bdver3_push" 1 110 (and (eq_attr "cpu" "bdver3,bdver4") 111 (eq_attr "type" "push")) 112 "bdver3-direct,bdver3-ieu,bdver3-store") 113;; POP r16/mem are double path. 114(define_insn_reservation "bdver3_pop" 1 115 (and (eq_attr "cpu" "bdver3,bdver4") 116 (eq_attr "type" "pop")) 117 "bdver3-direct,bdver3-ivector") 118;; LEAVE no latency info so far, assume same with amdfam10. 119(define_insn_reservation "bdver3_leave" 3 120 (and (eq_attr "cpu" "bdver3,bdver4") 121 (eq_attr "type" "leave")) 122 "bdver3-vector,bdver3-ivector") 123;; LEA executes in AGU unit with 1 cycle latency on BDVER3. 124(define_insn_reservation "bdver3_lea" 1 125 (and (eq_attr "cpu" "bdver3,bdver4") 126 (eq_attr "type" "lea")) 127 "bdver3-direct,bdver3-ieu") 128;; MUL executes in special multiplier unit attached to IEU1. 129(define_insn_reservation "bdver3_imul_DI" 6 130 (and (eq_attr "cpu" "bdver3,bdver4") 131 (and (eq_attr "type" "imul") 132 (and (eq_attr "mode" "DI") 133 (eq_attr "memory" "none,unknown")))) 134 "bdver3-direct,bdver3-ieu1") 135(define_insn_reservation "bdver3_imul" 4 136 (and (eq_attr "cpu" "bdver3,bdver4") 137 (and (eq_attr "type" "imul") 138 (eq_attr "memory" "none,unknown"))) 139 "bdver3-direct,bdver3-ieu1") 140(define_insn_reservation "bdver3_imul_mem_DI" 10 141 (and (eq_attr "cpu" "bdver3,bdver4") 142 (and (eq_attr "type" "imul") 143 (and (eq_attr "mode" "DI") 144 (eq_attr "memory" "load,both")))) 145 "bdver3-direct,bdver3-load,bdver3-ieu1") 146(define_insn_reservation "bdver3_imul_mem" 8 147 (and (eq_attr "cpu" "bdver3,bdver4") 148 (and (eq_attr "type" "imul") 149 (eq_attr "memory" "load,both"))) 150 "bdver3-direct,bdver3-load,bdver3-ieu1") 151 152(define_insn_reservation "bdver3_str" 6 153 (and (eq_attr "cpu" "bdver3,bdver4") 154 (and (eq_attr "type" "str") 155 (eq_attr "memory" "load,both,store"))) 156 "bdver3-vector,bdver3-load,bdver3-ivector") 157 158;; Integer instructions. 159(define_insn_reservation "bdver3_idirect" 1 160 (and (eq_attr "cpu" "bdver3,bdver4") 161 (and (eq_attr "bdver1_decode" "direct") 162 (and (eq_attr "unit" "integer,unknown") 163 (eq_attr "memory" "none,unknown")))) 164 "bdver3-direct,(bdver3-ieu|bdver3-agu)") 165(define_insn_reservation "bdver3_ivector" 2 166 (and (eq_attr "cpu" "bdver3,bdver4") 167 (and (eq_attr "bdver1_decode" "vector") 168 (and (eq_attr "unit" "integer,unknown") 169 (eq_attr "memory" "none,unknown")))) 170 "bdver3-vector,bdver3-ivector") 171(define_insn_reservation "bdver3_idirect_loadmov" 4 172 (and (eq_attr "cpu" "bdver3,bdver4") 173 (and (eq_attr "type" "imov") 174 (eq_attr "memory" "load"))) 175 "bdver3-direct,bdver3-load") 176(define_insn_reservation "bdver3_idirect_load" 5 177 (and (eq_attr "cpu" "bdver3,bdver4") 178 (and (eq_attr "bdver1_decode" "direct") 179 (and (eq_attr "unit" "integer,unknown") 180 (eq_attr "memory" "load")))) 181 "bdver3-direct,bdver3-load,bdver3-ieu") 182(define_insn_reservation "bdver3_idirect_movstore" 5 183 (and (eq_attr "cpu" "bdver3,bdver4") 184 (and (eq_attr "type" "imov") 185 (eq_attr "memory" "store"))) 186 "bdver3-direct,bdver3-ieu,bdver3-store") 187(define_insn_reservation "bdver3_idirect_both" 4 188 (and (eq_attr "cpu" "bdver3,bdver4") 189 (and (eq_attr "bdver1_decode" "direct") 190 (and (eq_attr "unit" "integer,unknown") 191 (eq_attr "memory" "both")))) 192 "bdver3-direct,bdver3-load, 193 bdver3-ieu,bdver3-store, 194 bdver3-store") 195(define_insn_reservation "bdver3_idirect_store" 4 196 (and (eq_attr "cpu" "bdver3,bdver4") 197 (and (eq_attr "bdver1_decode" "direct") 198 (and (eq_attr "unit" "integer,unknown") 199 (eq_attr "memory" "store")))) 200 "bdver3-direct,(bdver3-ieu+bdver3-agu), 201 bdver3-store") 202;; BDVER3 floating point units. 203(define_insn_reservation "bdver3_fldxf" 13 204 (and (eq_attr "cpu" "bdver3,bdver4") 205 (and (eq_attr "type" "fmov") 206 (and (eq_attr "memory" "load") 207 (eq_attr "mode" "XF")))) 208 "bdver3-vector,bdver3-fpload2,bdver3-fvector*9") 209(define_insn_reservation "bdver3_fld" 2 210 (and (eq_attr "cpu" "bdver3,bdver4") 211 (and (eq_attr "type" "fmov") 212 (eq_attr "memory" "load"))) 213 "bdver3-direct,bdver3-fpload,bdver3-ffma") 214(define_insn_reservation "bdver3_fstxf" 4 215 (and (eq_attr "cpu" "bdver3,bdver4") 216 (and (eq_attr "type" "fmov") 217 (and (eq_attr "memory" "store,both") 218 (eq_attr "mode" "XF")))) 219 "bdver3-vector,(bdver3-fpsched+bdver3-agu),(bdver3-store2+(bdver3-fvector*6))") 220(define_insn_reservation "bdver3_fst" 2 221 (and (eq_attr "cpu" "bdver3,bdver4") 222 (and (eq_attr "type" "fmov") 223 (eq_attr "memory" "store,both"))) 224 "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)") 225(define_insn_reservation "bdver3_fist" 2 226 (and (eq_attr "cpu" "bdver3,bdver4") 227 (eq_attr "type" "fistp,fisttp")) 228 "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)") 229(define_insn_reservation "bdver3_fmov_bdver3" 2 230 (and (eq_attr "cpu" "bdver3,bdver4") 231 (eq_attr "type" "fmov")) 232 "bdver3-direct,bdver3-fpsched,bdver3-ffma") 233(define_insn_reservation "bdver3_fadd_load" 10 234 (and (eq_attr "cpu" "bdver3,bdver4") 235 (and (eq_attr "type" "fop") 236 (eq_attr "memory" "load"))) 237 "bdver3-direct,bdver3-fpload,bdver3-ffma") 238(define_insn_reservation "bdver3_fadd" 6 239 (and (eq_attr "cpu" "bdver3,bdver4") 240 (eq_attr "type" "fop")) 241 "bdver3-direct,bdver3-fpsched,bdver3-ffma") 242(define_insn_reservation "bdver3_fmul_load" 6 243 (and (eq_attr "cpu" "bdver3,bdver4") 244 (and (eq_attr "type" "fmul") 245 (eq_attr "memory" "load"))) 246 "bdver3-double,bdver3-fpload,bdver3-ffma") 247(define_insn_reservation "bdver3_fmul" 6 248 (and (eq_attr "cpu" "bdver3,bdver4") 249 (eq_attr "type" "fmul")) 250 "bdver3-direct,bdver3-fpsched,bdver3-ffma") 251(define_insn_reservation "bdver3_fsgn" 2 252 (and (eq_attr "cpu" "bdver3,bdver4") 253 (eq_attr "type" "fsgn")) 254 "bdver3-direct,bdver3-fpsched,bdver3-ffma") 255(define_insn_reservation "bdver3_fdiv_load" 42 256 (and (eq_attr "cpu" "bdver3,bdver4") 257 (and (eq_attr "type" "fdiv") 258 (eq_attr "memory" "load"))) 259 "bdver3-direct,bdver3-fpload,bdver3-ffma") 260(define_insn_reservation "bdver3_fdiv" 42 261 (and (eq_attr "cpu" "bdver3,bdver4") 262 (eq_attr "type" "fdiv")) 263 "bdver3-direct,bdver3-fpsched,bdver3-ffma") 264(define_insn_reservation "bdver3_fpspc_load" 143 265 (and (eq_attr "cpu" "bdver3,bdver4") 266 (and (eq_attr "type" "fpspc") 267 (eq_attr "memory" "load"))) 268 "bdver3-vector,bdver3-fpload,bdver3-fvector") 269(define_insn_reservation "bdver3_fcmov_load" 17 270 (and (eq_attr "cpu" "bdver3,bdver4") 271 (and (eq_attr "type" "fcmov") 272 (eq_attr "memory" "load"))) 273 "bdver3-vector,bdver3-fpload,bdver3-fvector") 274(define_insn_reservation "bdver3_fcmov" 15 275 (and (eq_attr "cpu" "bdver3,bdver4") 276 (eq_attr "type" "fcmov")) 277 "bdver3-vector,bdver3-fpsched,bdver3-fvector") 278(define_insn_reservation "bdver3_fcomi_load" 6 279 (and (eq_attr "cpu" "bdver3,bdver4") 280 (and (eq_attr "type" "fcmp") 281 (and (eq_attr "bdver1_decode" "double") 282 (eq_attr "memory" "load")))) 283 "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)") 284(define_insn_reservation "bdver3_fcomi" 2 285 (and (eq_attr "cpu" "bdver3,bdver4") 286 (and (eq_attr "bdver1_decode" "double") 287 (eq_attr "type" "fcmp"))) 288 "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)") 289(define_insn_reservation "bdver3_fcom_load" 6 290 (and (eq_attr "cpu" "bdver3,bdver4") 291 (and (eq_attr "type" "fcmp") 292 (eq_attr "memory" "load"))) 293 "bdver3-direct,bdver3-fpload,bdver3-ffma") 294(define_insn_reservation "bdver3_fcom" 2 295 (and (eq_attr "cpu" "bdver3,bdver4") 296 (eq_attr "type" "fcmp")) 297 "bdver3-direct,bdver3-fpsched,bdver3-ffma") 298(define_insn_reservation "bdver3_fxch" 2 299 (and (eq_attr "cpu" "bdver3,bdver4") 300 (eq_attr "type" "fxch")) 301 "bdver3-direct,bdver3-fpsched,bdver3-ffma") 302 303;; SSE loads. 304(define_insn_reservation "bdver3_ssevector_avx128_unaligned_load" 4 305 (and (eq_attr "cpu" "bdver3,bdver4") 306 (and (eq_attr "type" "ssemov") 307 (and (eq_attr "prefix" "vex") 308 (and (eq_attr "movu" "1") 309 (and (eq_attr "mode" "V4SF,V2DF") 310 (eq_attr "memory" "load")))))) 311 "bdver3-direct,bdver3-fpload") 312(define_insn_reservation "bdver3_ssevector_avx256_unaligned_load" 5 313 (and (eq_attr "cpu" "bdver3,bdver4") 314 (and (eq_attr "type" "ssemov") 315 (and (eq_attr "movu" "1") 316 (and (eq_attr "mode" "V8SF,V4DF") 317 (eq_attr "memory" "load"))))) 318 "bdver3-double,bdver3-fpload") 319(define_insn_reservation "bdver3_ssevector_sse128_unaligned_load" 4 320 (and (eq_attr "cpu" "bdver3,bdver4") 321 (and (eq_attr "type" "ssemov") 322 (and (eq_attr "movu" "1") 323 (and (eq_attr "mode" "V4SF,V2DF") 324 (eq_attr "memory" "load"))))) 325 "bdver3-direct,bdver3-fpload,bdver3-fmal") 326(define_insn_reservation "bdver3_ssevector_avx128_load" 4 327 (and (eq_attr "cpu" "bdver3,bdver4") 328 (and (eq_attr "type" "ssemov") 329 (and (eq_attr "prefix" "vex") 330 (and (eq_attr "mode" "V4SF,V2DF,TI") 331 (eq_attr "memory" "load"))))) 332 "bdver3-direct,bdver3-fpload,bdver3-fmal") 333(define_insn_reservation "bdver3_ssevector_avx256_load" 5 334 (and (eq_attr "cpu" "bdver3,bdver4") 335 (and (eq_attr "type" "ssemov") 336 (and (eq_attr "mode" "V8SF,V4DF,OI") 337 (eq_attr "memory" "load")))) 338 "bdver3-double,bdver3-fpload,bdver3-fmal") 339(define_insn_reservation "bdver3_ssevector_sse128_load" 4 340 (and (eq_attr "cpu" "bdver3,bdver4") 341 (and (eq_attr "type" "ssemov") 342 (and (eq_attr "mode" "V4SF,V2DF,TI") 343 (eq_attr "memory" "load")))) 344 "bdver3-direct,bdver3-fpload") 345(define_insn_reservation "bdver3_ssescalar_movq_load" 4 346 (and (eq_attr "cpu" "bdver3,bdver4") 347 (and (eq_attr "type" "ssemov") 348 (and (eq_attr "mode" "DI") 349 (eq_attr "memory" "load")))) 350 "bdver3-direct,bdver3-fpload,bdver3-fmal") 351(define_insn_reservation "bdver3_ssescalar_vmovss_load" 4 352 (and (eq_attr "cpu" "bdver3,bdver4") 353 (and (eq_attr "type" "ssemov") 354 (and (eq_attr "prefix" "vex") 355 (and (eq_attr "mode" "SF") 356 (eq_attr "memory" "load"))))) 357 "bdver3-direct,bdver3-fpload") 358(define_insn_reservation "bdver3_ssescalar_sse128_load" 4 359 (and (eq_attr "cpu" "bdver3,bdver4") 360 (and (eq_attr "type" "ssemov") 361 (and (eq_attr "mode" "SF,DF") 362 (eq_attr "memory" "load")))) 363 "bdver3-direct,bdver3-fpload, bdver3-ffma") 364(define_insn_reservation "bdver3_mmxsse_load" 4 365 (and (eq_attr "cpu" "bdver3,bdver4") 366 (and (eq_attr "type" "mmxmov,ssemov") 367 (eq_attr "memory" "load"))) 368 "bdver3-direct,bdver3-fpload, bdver3-fmal") 369 370;; SSE stores. 371(define_insn_reservation "bdver3_sse_store_avx256" 5 372 (and (eq_attr "cpu" "bdver3,bdver4") 373 (and (eq_attr "type" "ssemov") 374 (and (eq_attr "mode" "V8SF,V4DF,OI") 375 (eq_attr "memory" "store,both")))) 376 "bdver3-double,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)") 377(define_insn_reservation "bdver3_sse_store" 4 378 (and (eq_attr "cpu" "bdver3,bdver4") 379 (and (eq_attr "type" "ssemov") 380 (and (eq_attr "mode" "V4SF,V2DF,TI") 381 (eq_attr "memory" "store,both")))) 382 "bdver3-direct,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)") 383(define_insn_reservation "bdver3_mmxsse_store_short" 4 384 (and (eq_attr "cpu" "bdver3,bdver4") 385 (and (eq_attr "type" "mmxmov,ssemov") 386 (eq_attr "memory" "store,both"))) 387 "bdver3-direct,bdver3-fpsched,(bdver3-fsto+bdver3-store)") 388 389;; Register moves. 390(define_insn_reservation "bdver3_ssevector_avx256" 3 391 (and (eq_attr "cpu" "bdver3,bdver4") 392 (and (eq_attr "type" "ssemov") 393 (and (eq_attr "mode" "V8SF,V4DF,OI") 394 (eq_attr "memory" "none")))) 395 "bdver3-double,bdver3-fpsched,bdver3-fmal") 396(define_insn_reservation "bdver3_movss_movsd" 2 397 (and (eq_attr "cpu" "bdver3,bdver4") 398 (and (eq_attr "type" "ssemov") 399 (and (eq_attr "mode" "SF,DF") 400 (eq_attr "memory" "none")))) 401 "bdver3-direct,bdver3-fpsched,bdver3-ffma") 402(define_insn_reservation "bdver3_mmxssemov" 2 403 (and (eq_attr "cpu" "bdver3,bdver4") 404 (and (eq_attr "type" "mmxmov,ssemov") 405 (eq_attr "memory" "none"))) 406 "bdver3-direct,bdver3-fpsched,bdver3-fmal") 407;; SSE logs. 408(define_insn_reservation "bdver3_sselog_load_256" 7 409 (and (eq_attr "cpu" "bdver3,bdver4") 410 (and (eq_attr "type" "sselog,sselog1") 411 (and (eq_attr "mode" "V8SF") 412 (eq_attr "memory" "load")))) 413 "bdver3-double,bdver3-fpload,bdver3-fmal") 414(define_insn_reservation "bdver3_sselog_256" 3 415 (and (eq_attr "cpu" "bdver3,bdver4") 416 (and (eq_attr "type" "sselog,sselog1") 417 (eq_attr "mode" "V8SF"))) 418 "bdver3-double,bdver3-fpsched,bdver3-fmal") 419(define_insn_reservation "bdver3_sselog_load" 6 420 (and (eq_attr "cpu" "bdver3,bdver4") 421 (and (eq_attr "type" "sselog,sselog1") 422 (eq_attr "memory" "load"))) 423 "bdver3-direct,bdver3-fpload,bdver3-fxbar") 424(define_insn_reservation "bdver3_sselog" 2 425 (and (eq_attr "cpu" "bdver3,bdver4") 426 (eq_attr "type" "sselog,sselog1")) 427 "bdver3-direct,bdver3-fpsched,bdver3-fxbar") 428 429;; SSE Shuffles 430(define_insn_reservation "bdver3_sseshuf_load_256" 7 431 (and (eq_attr "cpu" "bdver3,bdver4") 432 (and (eq_attr "type" "sseshuf,sseshuf1") 433 (and (eq_attr "mode" "V8SF") 434 (eq_attr "memory" "load")))) 435 "bdver3-double,bdver3-fpload,bdver3-fpshuf") 436(define_insn_reservation "bdver3_sseshuf_load" 6 437 (and (eq_attr "cpu" "bdver3,bdver4") 438 (and (eq_attr "type" "sseshuf,sseshuf1") 439 (eq_attr "memory" "load"))) 440 "bdver3-direct,bdver3-fpload,bdver3-fpshuf") 441 442(define_insn_reservation "bdver3_sseshuf_256" 3 443 (and (eq_attr "cpu" "bdver3,bdver4") 444 (and (eq_attr "type" "sseshuf") 445 (eq_attr "mode" "V8SF"))) 446 "bdver3-double,bdver3-fpsched,bdver3-fpshuf") 447(define_insn_reservation "bdver3_sseshuf" 2 448 (and (eq_attr "cpu" "bdver3,bdver4") 449 (eq_attr "type" "sseshuf,sseshuf1")) 450 "bdver3-direct,bdver3-fpsched,bdver3-fpshuf") 451 452;; PCMP actually executes in FMAL. 453(define_insn_reservation "bdver3_ssecmp_load" 6 454 (and (eq_attr "cpu" "bdver3,bdver4") 455 (and (eq_attr "type" "ssecmp") 456 (eq_attr "memory" "load"))) 457 "bdver3-direct,bdver3-fpload,bdver3-ffma") 458(define_insn_reservation "bdver3_ssecmp" 2 459 (and (eq_attr "cpu" "bdver3,bdver4") 460 (eq_attr "type" "ssecmp")) 461 "bdver3-direct,bdver3-fpsched,bdver3-ffma") 462(define_insn_reservation "bdver3_ssecomi_load" 6 463 (and (eq_attr "cpu" "bdver3,bdver4") 464 (and (eq_attr "type" "ssecomi") 465 (eq_attr "memory" "load"))) 466 "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)") 467(define_insn_reservation "bdver3_ssecomi" 2 468 (and (eq_attr "cpu" "bdver3,bdver4") 469 (eq_attr "type" "ssecomi")) 470 "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)") 471 472;; Conversions behaves very irregularly and the scheduling is critical here. 473;; Take each instruction separately. 474 475;; 256 bit conversion. 476(define_insn_reservation "bdver3_vcvtX2Y_avx256_load" 8 477 (and (eq_attr "cpu" "bdver3,bdver4") 478 (and (eq_attr "type" "ssecvt") 479 (and (eq_attr "memory" "load") 480 (ior (ior (match_operand:V4DF 0 "register_operand") 481 (ior (match_operand:V8SF 0 "register_operand") 482 (match_operand:V8SI 0 "register_operand"))) 483 (ior (match_operand:V4DF 1 "nonimmediate_operand") 484 (ior (match_operand:V8SF 1 "nonimmediate_operand") 485 (match_operand:V8SI 1 "nonimmediate_operand"))))))) 486 "bdver3-vector,bdver3-fpload,bdver3-fvector") 487(define_insn_reservation "bdver3_vcvtX2Y_avx256" 4 488 (and (eq_attr "cpu" "bdver3,bdver4") 489 (and (eq_attr "type" "ssecvt") 490 (and (eq_attr "memory" "none") 491 (ior (ior (match_operand:V4DF 0 "register_operand") 492 (ior (match_operand:V8SF 0 "register_operand") 493 (match_operand:V8SI 0 "register_operand"))) 494 (ior (match_operand:V4DF 1 "nonimmediate_operand") 495 (ior (match_operand:V8SF 1 "nonimmediate_operand") 496 (match_operand:V8SI 1 "nonimmediate_operand"))))))) 497 "bdver3-vector,bdver3-fpsched,bdver3-fvector") 498;; CVTSS2SD, CVTSD2SS. 499(define_insn_reservation "bdver3_ssecvt_cvtss2sd_load" 8 500 (and (eq_attr "cpu" "bdver3,bdver4") 501 (and (eq_attr "type" "ssecvt") 502 (and (eq_attr "mode" "SF,DF") 503 (eq_attr "memory" "load")))) 504 "bdver3-direct,bdver3-fpload,bdver3-fcvt") 505(define_insn_reservation "bdver3_ssecvt_cvtss2sd" 4 506 (and (eq_attr "cpu" "bdver3,bdver4") 507 (and (eq_attr "type" "ssecvt") 508 (and (eq_attr "mode" "SF,DF") 509 (eq_attr "memory" "none")))) 510 "bdver3-direct,bdver3-fpsched,bdver3-fcvt") 511;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ. 512(define_insn_reservation "bdver3_sseicvt_cvtsi2sd_load" 8 513 (and (eq_attr "cpu" "bdver3,bdver4") 514 (and (eq_attr "type" "sseicvt") 515 (and (eq_attr "mode" "SF,DF") 516 (eq_attr "memory" "load")))) 517 "bdver3-direct,bdver3-fpload,bdver3-fcvt") 518(define_insn_reservation "bdver3_sseicvt_cvtsi2sd" 4 519 (and (eq_attr "cpu" "bdver3,bdver4") 520 (and (eq_attr "type" "sseicvt") 521 (and (eq_attr "mode" "SF,DF") 522 (eq_attr "memory" "none")))) 523 "bdver3-double,bdver3-fpsched,(nothing | bdver3-fcvt)") 524;; CVTPD2PS. 525(define_insn_reservation "bdver3_ssecvt_cvtpd2ps_load" 8 526 (and (eq_attr "cpu" "bdver3,bdver4") 527 (and (eq_attr "type" "ssecvt") 528 (and (eq_attr "memory" "load") 529 (and (match_operand:V4SF 0 "register_operand") 530 (match_operand:V2DF 1 "nonimmediate_operand"))))) 531 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)") 532(define_insn_reservation "bdver3_ssecvt_cvtpd2ps" 4 533 (and (eq_attr "cpu" "bdver3,bdver4") 534 (and (eq_attr "type" "ssecvt") 535 (and (eq_attr "memory" "none") 536 (and (match_operand:V4SF 0 "register_operand") 537 (match_operand:V2DF 1 "nonimmediate_operand"))))) 538 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)") 539;; CVTPI2PS, CVTDQ2PS. 540(define_insn_reservation "bdver3_ssecvt_cvtdq2ps_load" 8 541 (and (eq_attr "cpu" "bdver3,bdver4") 542 (and (eq_attr "type" "ssecvt") 543 (and (eq_attr "memory" "load") 544 (and (match_operand:V4SF 0 "register_operand") 545 (ior (match_operand:V2SI 1 "nonimmediate_operand") 546 (match_operand:V4SI 1 "nonimmediate_operand")))))) 547 "bdver3-direct,bdver3-fpload,bdver3-fcvt") 548(define_insn_reservation "bdver3_ssecvt_cvtdq2ps" 4 549 (and (eq_attr "cpu" "bdver3,bdver4") 550 (and (eq_attr "type" "ssecvt") 551 (and (eq_attr "memory" "none") 552 (and (match_operand:V4SF 0 "register_operand") 553 (ior (match_operand:V2SI 1 "nonimmediate_operand") 554 (match_operand:V4SI 1 "nonimmediate_operand")))))) 555 "bdver3-direct,bdver3-fpsched,bdver3-fcvt") 556;; CVTDQ2PD. 557(define_insn_reservation "bdver3_ssecvt_cvtdq2pd_load" 8 558 (and (eq_attr "cpu" "bdver3,bdver4") 559 (and (eq_attr "type" "ssecvt") 560 (and (eq_attr "memory" "load") 561 (and (match_operand:V2DF 0 "register_operand") 562 (match_operand:V4SI 1 "nonimmediate_operand"))))) 563 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)") 564(define_insn_reservation "bdver3_ssecvt_cvtdq2pd" 4 565 (and (eq_attr "cpu" "bdver3,bdver4") 566 (and (eq_attr "type" "ssecvt") 567 (and (eq_attr "memory" "none") 568 (and (match_operand:V2DF 0 "register_operand") 569 (match_operand:V4SI 1 "nonimmediate_operand"))))) 570 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)") 571;; CVTPS2PD, CVTPI2PD. 572(define_insn_reservation "bdver3_ssecvt_cvtps2pd_load" 6 573 (and (eq_attr "cpu" "bdver3,bdver4") 574 (and (eq_attr "type" "ssecvt") 575 (and (eq_attr "memory" "load") 576 (and (match_operand:V2DF 0 "register_operand") 577 (ior (match_operand:V2SI 1 "nonimmediate_operand") 578 (match_operand:V4SF 1 "nonimmediate_operand")))))) 579 "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)") 580(define_insn_reservation "bdver3_ssecvt_cvtps2pd" 2 581 (and (eq_attr "cpu" "bdver3,bdver4") 582 (and (eq_attr "type" "ssecvt") 583 (and (eq_attr "memory" "load") 584 (and (match_operand:V2DF 0 "register_operand") 585 (ior (match_operand:V2SI 1 "nonimmediate_operand") 586 (match_operand:V4SF 1 "nonimmediate_operand")))))) 587 "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)") 588;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ. 589(define_insn_reservation "bdver3_ssecvt_cvtsX2si_load" 8 590 (and (eq_attr "cpu" "bdver3,bdver4") 591 (and (eq_attr "type" "sseicvt") 592 (and (eq_attr "mode" "SI,DI") 593 (eq_attr "memory" "load")))) 594 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fsto)") 595(define_insn_reservation "bdver3_ssecvt_cvtsX2si" 4 596 (and (eq_attr "cpu" "bdver3,bdver4") 597 (and (eq_attr "type" "sseicvt") 598 (and (eq_attr "mode" "SI,DI") 599 (eq_attr "memory" "none")))) 600 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fsto)") 601;; CVTPD2PI, CVTTPD2PI. 602(define_insn_reservation "bdver3_ssecvt_cvtpd2pi_load" 8 603 (and (eq_attr "cpu" "bdver3,bdver4") 604 (and (eq_attr "type" "ssecvt") 605 (and (eq_attr "memory" "load") 606 (and (match_operand:V2DF 1 "nonimmediate_operand") 607 (match_operand:V2SI 0 "register_operand"))))) 608 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)") 609(define_insn_reservation "bdver3_ssecvt_cvtpd2pi" 4 610 (and (eq_attr "cpu" "bdver3,bdver4") 611 (and (eq_attr "type" "ssecvt") 612 (and (eq_attr "memory" "none") 613 (and (match_operand:V2DF 1 "nonimmediate_operand") 614 (match_operand:V2SI 0 "register_operand"))))) 615 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)") 616;; CVTPD2DQ, CVTTPD2DQ. 617(define_insn_reservation "bdver3_ssecvt_cvtpd2dq_load" 6 618 (and (eq_attr "cpu" "bdver3,bdver4") 619 (and (eq_attr "type" "ssecvt") 620 (and (eq_attr "memory" "load") 621 (and (match_operand:V2DF 1 "nonimmediate_operand") 622 (match_operand:V4SI 0 "register_operand"))))) 623 "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)") 624(define_insn_reservation "bdver3_ssecvt_cvtpd2dq" 2 625 (and (eq_attr "cpu" "bdver3,bdver4") 626 (and (eq_attr "type" "ssecvt") 627 (and (eq_attr "memory" "none") 628 (and (match_operand:V2DF 1 "nonimmediate_operand") 629 (match_operand:V4SI 0 "register_operand"))))) 630 "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)") 631;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ. 632(define_insn_reservation "bdver3_ssecvt_cvtps2pi_load" 8 633 (and (eq_attr "cpu" "bdver3,bdver4") 634 (and (eq_attr "type" "ssecvt") 635 (and (eq_attr "memory" "load") 636 (and (match_operand:V4SF 1 "nonimmediate_operand") 637 (ior (match_operand: V2SI 0 "register_operand") 638 (match_operand: V4SI 0 "register_operand")))))) 639 "bdver3-direct,bdver3-fpload,bdver3-fcvt") 640(define_insn_reservation "bdver3_ssecvt_cvtps2pi" 4 641 (and (eq_attr "cpu" "bdver3,bdver4") 642 (and (eq_attr "type" "ssecvt") 643 (and (eq_attr "memory" "none") 644 (and (match_operand:V4SF 1 "nonimmediate_operand") 645 (ior (match_operand: V2SI 0 "register_operand") 646 (match_operand: V4SI 0 "register_operand")))))) 647 "bdver3-direct,bdver3-fpsched,bdver3-fcvt") 648 649;; SSE MUL, ADD, and MULADD. 650(define_insn_reservation "bdver3_ssemuladd_load_256" 11 651 (and (eq_attr "cpu" "bdver3,bdver4") 652 (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") 653 (and (eq_attr "mode" "V8SF,V4DF") 654 (eq_attr "memory" "load")))) 655 "bdver3-double,bdver3-fpload,bdver3-ffma") 656(define_insn_reservation "bdver3_ssemuladd_256" 7 657 (and (eq_attr "cpu" "bdver3,bdver4") 658 (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") 659 (and (eq_attr "mode" "V8SF,V4DF") 660 (eq_attr "memory" "none")))) 661 "bdver3-double,bdver3-fpsched,bdver3-ffma") 662(define_insn_reservation "bdver3_ssemuladd_load" 10 663 (and (eq_attr "cpu" "bdver3,bdver4") 664 (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") 665 (eq_attr "memory" "load"))) 666 "bdver3-direct,bdver3-fpload,bdver3-ffma") 667(define_insn_reservation "bdver3_ssemuladd" 6 668 (and (eq_attr "cpu" "bdver3,bdver4") 669 (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") 670 (eq_attr "memory" "none"))) 671 "bdver3-direct,bdver3-fpsched,bdver3-ffma") 672(define_insn_reservation "bdver3_sseimul_load" 8 673 (and (eq_attr "cpu" "bdver3,bdver4") 674 (and (eq_attr "type" "sseimul") 675 (eq_attr "memory" "load"))) 676 "bdver3-direct,bdver3-fpload,bdver3-fmma") 677(define_insn_reservation "bdver3_sseimul" 4 678 (and (eq_attr "cpu" "bdver3,bdver4") 679 (and (eq_attr "type" "sseimul") 680 (eq_attr "memory" "none"))) 681 "bdver3-direct,bdver3-fpsched,bdver3-fmma") 682(define_insn_reservation "bdver3_sseiadd_load" 6 683 (and (eq_attr "cpu" "bdver3,bdver4") 684 (and (eq_attr "type" "sseiadd") 685 (eq_attr "memory" "load"))) 686 "bdver3-direct,bdver3-fpload,bdver3-fmal") 687(define_insn_reservation "bdver3_sseiadd" 2 688 (and (eq_attr "cpu" "bdver3,bdver4") 689 (and (eq_attr "type" "sseiadd") 690 (eq_attr "memory" "none"))) 691 "bdver3-direct,bdver3-fpsched,bdver3-fmal") 692 693;; SSE DIV: no throughput information (assume same as amdfam10). 694(define_insn_reservation "bdver3_ssediv_double_load_256" 27 695 (and (eq_attr "cpu" "bdver3,bdver4") 696 (and (eq_attr "type" "ssediv") 697 (and (eq_attr "mode" "V4DF") 698 (eq_attr "memory" "load")))) 699 "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)") 700(define_insn_reservation "bdver3_ssediv_double_256" 27 701 (and (eq_attr "cpu" "bdver3,bdver4") 702 (and (eq_attr "type" "ssediv") 703 (and (eq_attr "mode" "V4DF") 704 (eq_attr "memory" "none")))) 705 "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)") 706(define_insn_reservation "bdver3_ssediv_single_load_256" 27 707 (and (eq_attr "cpu" "bdver3,bdver4") 708 (and (eq_attr "type" "ssediv") 709 (and (eq_attr "mode" "V8SF") 710 (eq_attr "memory" "load")))) 711 "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)") 712(define_insn_reservation "bdver3_ssediv_single_256" 24 713 (and (eq_attr "cpu" "bdver3,bdver4") 714 (and (eq_attr "type" "ssediv") 715 (and (eq_attr "mode" "V8SF") 716 (eq_attr "memory" "none")))) 717 "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)") 718(define_insn_reservation "bdver3_ssediv_double_load" 27 719 (and (eq_attr "cpu" "bdver3,bdver4") 720 (and (eq_attr "type" "ssediv") 721 (and (eq_attr "mode" "DF,V2DF") 722 (eq_attr "memory" "load")))) 723 "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)") 724(define_insn_reservation "bdver3_ssediv_double" 27 725 (and (eq_attr "cpu" "bdver3,bdver4") 726 (and (eq_attr "type" "ssediv") 727 (and (eq_attr "mode" "DF,V2DF") 728 (eq_attr "memory" "none")))) 729 "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)") 730(define_insn_reservation "bdver3_ssediv_single_load" 27 731 (and (eq_attr "cpu" "bdver3,bdver4") 732 (and (eq_attr "type" "ssediv") 733 (and (eq_attr "mode" "SF,V4SF") 734 (eq_attr "memory" "load")))) 735 "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)") 736(define_insn_reservation "bdver3_ssediv_single" 24 737 (and (eq_attr "cpu" "bdver3,bdver4") 738 (and (eq_attr "type" "ssediv") 739 (and (eq_attr "mode" "SF,V4SF") 740 (eq_attr "memory" "none")))) 741 "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)") 742 743(define_insn_reservation "bdver3_sseins" 3 744 (and (eq_attr "cpu" "bdver3,bdver4") 745 (and (eq_attr "type" "sseins") 746 (eq_attr "mode" "TI"))) 747 "bdver3-direct,bdver3-fpsched,bdver3-fxbar") 748 749