1;; Copyright (C) 2002-2020 Free Software Foundation, Inc. 2;; 3;; This file is part of GCC. 4;; 5;; GCC is free software; you can redistribute it and/or modify 6;; it under the terms of the GNU General Public License as published by 7;; the Free Software Foundation; either version 3, or (at your option) 8;; any later version. 9;; 10;; GCC is distributed in the hope that it will be useful, 11;; but WITHOUT ANY WARRANTY; without even the implied warranty of 12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13;; GNU General Public License for more details. 14;; 15;; You should have received a copy of the GNU General Public License 16;; along with GCC; see the file COPYING3. If not see 17;; <http://www.gnu.org/licenses/>. 18;; 19;; AMD Athlon Scheduling 20;; 21;; The Athlon does contain three pipelined FP units, three integer units and 22;; three address generation units. 23;; 24;; The predecode logic is determining boundaries of instructions in the 64 25;; byte cache line. So the cache line straddling problem of K6 might be issue 26;; here as well, but it is not noted in the documentation. 27;; 28;; Three DirectPath instructions decoders and only one VectorPath decoder 29;; is available. They can decode three DirectPath instructions or one VectorPath 30;; instruction per cycle. 31;; Decoded macro instructions are then passed to 72 entry instruction control 32;; unit, that passes 33;; it to the specialized integer (18 entry) and fp (36 entry) schedulers. 34;; 35;; The load/store queue unit is not attached to the schedulers but 36;; communicates with all the execution units separately instead. 37 38(define_attr "athlon_decode" "direct,vector,double" 39 (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,leave") 40 (const_string "vector") 41 (and (eq_attr "type" "push") 42 (match_operand 1 "memory_operand")) 43 (const_string "vector") 44 (and (eq_attr "type" "fmov") 45 (and (eq_attr "memory" "load,store") 46 (eq_attr "mode" "XF"))) 47 (const_string "vector")] 48 (const_string "direct"))) 49 50(define_attr "amdfam10_decode" "direct,vector,double" 51 (const_string "direct")) 52;; 53;; decode0 decode1 decode2 54;; \ | / 55;; instruction control unit (72 entry scheduler) 56;; | | 57;; integer scheduler (18) stack map 58;; / | | | | \ stack rename 59;; ieu0 agu0 ieu1 agu1 ieu2 agu2 scheduler 60;; | agu0 | agu1 agu2 register file 61;; | \ | | / | | | 62;; \ /\ | / fadd fmul fstore 63;; \ / \ | / fadd fmul fstore 64;; imul load/store (2x) fadd fmul fstore 65 66(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp") 67(define_cpu_unit "athlon-decode0" "athlon") 68(define_cpu_unit "athlon-decode1" "athlon") 69(define_cpu_unit "athlon-decode2" "athlon") 70(define_cpu_unit "athlon-decodev" "athlon") 71;; Model the fact that double decoded instruction may take 2 cycles 72;; to decode when decoder2 and decoder0 in next cycle 73;; is used (this is needed to allow troughput of 1.5 double decoded 74;; instructions per cycle). 75;; 76;; In order to avoid dependence between reservation of decoder 77;; and other units, we model decoder as two stage fully pipelined unit 78;; and only double decoded instruction may occupy unit in the first cycle. 79;; With this scheme however two double instructions can be issued cycle0. 80;; 81;; Avoid this by using presence set requiring decoder0 to be allocated 82;; too. Vector decoded instructions then can't be issued when 83;; modeled as consuming decoder0+decoder1+decoder2. 84;; We solve that by specialized vector decoder unit and exclusion set. 85(presence_set "athlon-decode2" "athlon-decode0") 86(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2") 87(define_reservation "athlon-vector" "nothing,athlon-decodev") 88(define_reservation "athlon-direct0" "nothing,athlon-decode0") 89(define_reservation "athlon-direct" "nothing, 90 (athlon-decode0 | athlon-decode1 91 | athlon-decode2)") 92;; Double instructions behaves like two direct instructions. 93(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0) 94 | (nothing,(athlon-decode0 + athlon-decode1)) 95 | (nothing,(athlon-decode1 + athlon-decode2)))") 96 97;; Agu and ieu unit results in extremely large automatons and 98;; in our approximation they are hardly filled in. Only ieu 99;; unit can, as issue rate is 3 and agu unit is always used 100;; first in the insn reservations. Skip the models. 101 102;(define_cpu_unit "athlon-ieu0" "athlon_ieu") 103;(define_cpu_unit "athlon-ieu1" "athlon_ieu") 104;(define_cpu_unit "athlon-ieu2" "athlon_ieu") 105;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)") 106(define_reservation "athlon-ieu" "nothing") 107(define_cpu_unit "athlon-ieu0" "athlon") 108;(define_cpu_unit "athlon-agu0" "athlon_agu") 109;(define_cpu_unit "athlon-agu1" "athlon_agu") 110;(define_cpu_unit "athlon-agu2" "athlon_agu") 111;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)") 112(define_reservation "athlon-agu" "nothing") 113 114(define_cpu_unit "athlon-mult" "athlon_mult") 115 116(define_cpu_unit "athlon-load0" "athlon_load") 117(define_cpu_unit "athlon-load1" "athlon_load") 118(define_reservation "athlon-load" "athlon-agu, 119 (athlon-load0 | athlon-load1),nothing") 120;; 128bit SSE instructions issue two loads at once 121(define_reservation "athlon-load2" "athlon-agu, 122 (athlon-load0 + athlon-load1),nothing") 123 124(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)") 125;; 128bit SSE instructions issue two stores at once 126(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)") 127 128 129;; The FP operations start to execute at stage 12 in the pipeline, while 130;; integer operations start to execute at stage 9 for Athlon and 11 for K8 131;; Compensate the difference for Athlon because it results in significantly 132;; smaller automata. 133(define_reservation "athlon-fpsched" "nothing,nothing,nothing") 134;; The floating point loads. 135(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)") 136(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)") 137(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)") 138(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)") 139 140 141;; The three fp units are fully pipelined with latency of 3 142(define_cpu_unit "athlon-fadd" "athlon_fp") 143(define_cpu_unit "athlon-fmul" "athlon_fp") 144(define_cpu_unit "athlon-fstore" "athlon_fp") 145(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)") 146(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)") 147 148;; Vector operations usually consume many of pipes. 149(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)") 150 151 152;; Jump instructions are executed in the branch unit completely transparent to us 153(define_insn_reservation "athlon_branch" 0 154 (and (eq_attr "cpu" "athlon,k8,amdfam10") 155 (eq_attr "type" "ibr")) 156 "athlon-direct,athlon-ieu") 157(define_insn_reservation "athlon_call" 0 158 (and (eq_attr "cpu" "athlon,k8") 159 (eq_attr "type" "call,callv")) 160 "athlon-vector,athlon-ieu") 161(define_insn_reservation "athlon_call_amdfam10" 0 162 (and (eq_attr "cpu" "amdfam10") 163 (eq_attr "type" "call,callv")) 164 "athlon-double,athlon-ieu") 165 166;; Latency of push operation is 3 cycles, but ESP value is available 167;; earlier 168(define_insn_reservation "athlon_push" 2 169 (and (eq_attr "cpu" "athlon,k8,amdfam10") 170 (eq_attr "type" "push")) 171 "athlon-direct,athlon-agu,athlon-store") 172(define_insn_reservation "athlon_pop" 4 173 (and (eq_attr "cpu" "athlon,k8") 174 (eq_attr "type" "pop")) 175 "athlon-vector,athlon-load,athlon-ieu") 176(define_insn_reservation "athlon_pop_k8" 3 177 (and (eq_attr "cpu" "k8") 178 (eq_attr "type" "pop")) 179 "athlon-double,(athlon-ieu+athlon-load)") 180(define_insn_reservation "athlon_pop_amdfam10" 3 181 (and (eq_attr "cpu" "amdfam10") 182 (eq_attr "type" "pop")) 183 "athlon-direct,(athlon-ieu+athlon-load)") 184(define_insn_reservation "athlon_leave" 3 185 (and (eq_attr "cpu" "athlon") 186 (eq_attr "type" "leave")) 187 "athlon-vector,(athlon-ieu+athlon-load)") 188(define_insn_reservation "athlon_leave_k8" 3 189 (and (eq_attr "cpu" "k8,amdfam10") 190 (eq_attr "type" "leave")) 191 "athlon-double,(athlon-ieu+athlon-load)") 192 193;; Lea executes in AGU unit with 2 cycles latency. 194(define_insn_reservation "athlon_lea" 2 195 (and (eq_attr "cpu" "athlon,k8") 196 (eq_attr "type" "lea")) 197 "athlon-direct,athlon-agu,nothing") 198;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10 199(define_insn_reservation "athlon_lea_amdfam10" 1 200 (and (eq_attr "cpu" "amdfam10") 201 (eq_attr "type" "lea")) 202 "athlon-direct,athlon-agu,nothing") 203 204;; Mul executes in special multiplier unit attached to IEU0 205(define_insn_reservation "athlon_imul" 5 206 (and (eq_attr "cpu" "athlon") 207 (and (eq_attr "type" "imul") 208 (eq_attr "memory" "none,unknown"))) 209 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0") 210;; ??? Widening multiply is vector or double. 211(define_insn_reservation "athlon_imul_k8_DI" 4 212 (and (eq_attr "cpu" "k8,amdfam10") 213 (and (eq_attr "type" "imul") 214 (and (eq_attr "mode" "DI") 215 (eq_attr "memory" "none,unknown")))) 216 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0") 217(define_insn_reservation "athlon_imul_k8" 3 218 (and (eq_attr "cpu" "k8,amdfam10") 219 (and (eq_attr "type" "imul") 220 (eq_attr "memory" "none,unknown"))) 221 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0") 222(define_insn_reservation "athlon_imul_amdfam10_HI" 4 223 (and (eq_attr "cpu" "amdfam10") 224 (and (eq_attr "type" "imul") 225 (and (eq_attr "mode" "HI") 226 (eq_attr "memory" "none,unknown")))) 227 "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0") 228(define_insn_reservation "athlon_imul_mem" 8 229 (and (eq_attr "cpu" "athlon") 230 (and (eq_attr "type" "imul") 231 (eq_attr "memory" "load,both"))) 232 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu") 233(define_insn_reservation "athlon_imul_mem_k8_DI" 7 234 (and (eq_attr "cpu" "k8,amdfam10") 235 (and (eq_attr "type" "imul") 236 (and (eq_attr "mode" "DI") 237 (eq_attr "memory" "load,both")))) 238 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu") 239(define_insn_reservation "athlon_imul_mem_k8" 6 240 (and (eq_attr "cpu" "k8,amdfam10") 241 (and (eq_attr "type" "imul") 242 (eq_attr "memory" "load,both"))) 243 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu") 244 245;; Idiv cannot execute in parallel with other instructions. Dealing with it 246;; as with short latency vector instruction is good approximation avoiding 247;; scheduler from trying too hard to can hide it's latency by overlap with 248;; other instructions. 249;; ??? Experiments show that the idiv can overlap with roughly 6 cycles 250;; of the other code 251;; Using the same heuristics for amdfam10 as K8 with idiv 252 253(define_insn_reservation "athlon_idiv" 6 254 (and (eq_attr "cpu" "athlon,k8,amdfam10") 255 (and (eq_attr "type" "idiv") 256 (eq_attr "memory" "none,unknown"))) 257 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))") 258(define_insn_reservation "athlon_idiv_mem" 9 259 (and (eq_attr "cpu" "athlon,k8,amdfam10") 260 (and (eq_attr "type" "idiv") 261 (eq_attr "memory" "load,both"))) 262 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))") 263;; The parallelism of string instructions is not documented. Model it same way 264;; as idiv to create smaller automata. This probably does not matter much. 265;; Using the same heuristics for amdfam10 as K8 with idiv 266(define_insn_reservation "athlon_str" 6 267 (and (eq_attr "cpu" "athlon,k8,amdfam10") 268 (and (eq_attr "type" "str") 269 (eq_attr "memory" "load,both,store"))) 270 "athlon-vector,athlon-load,athlon-ieu0*6") 271 272(define_insn_reservation "athlon_idirect" 1 273 (and (eq_attr "cpu" "athlon,k8") 274 (and (eq_attr "athlon_decode" "direct") 275 (and (eq_attr "unit" "integer,unknown") 276 (eq_attr "memory" "none,unknown")))) 277 "athlon-direct,athlon-ieu") 278(define_insn_reservation "athlon_idirect_amdfam10" 1 279 (and (eq_attr "cpu" "amdfam10") 280 (and (eq_attr "amdfam10_decode" "direct") 281 (and (eq_attr "unit" "integer,unknown") 282 (eq_attr "memory" "none,unknown")))) 283 "athlon-direct,athlon-ieu") 284(define_insn_reservation "athlon_ivector" 2 285 (and (eq_attr "cpu" "athlon,k8") 286 (and (eq_attr "athlon_decode" "vector") 287 (and (eq_attr "unit" "integer,unknown") 288 (eq_attr "memory" "none,unknown")))) 289 "athlon-vector,athlon-ieu,athlon-ieu") 290(define_insn_reservation "athlon_ivector_amdfam10" 2 291 (and (eq_attr "cpu" "amdfam10") 292 (and (eq_attr "amdfam10_decode" "vector") 293 (and (eq_attr "unit" "integer,unknown") 294 (eq_attr "memory" "none,unknown")))) 295 "athlon-vector,athlon-ieu,athlon-ieu") 296 297(define_insn_reservation "athlon_idirect_loadmov" 3 298 (and (eq_attr "cpu" "athlon,k8,amdfam10") 299 (and (eq_attr "type" "imov") 300 (eq_attr "memory" "load"))) 301 "athlon-direct,athlon-load") 302 303(define_insn_reservation "athlon_idirect_load" 4 304 (and (eq_attr "cpu" "athlon,k8") 305 (and (eq_attr "athlon_decode" "direct") 306 (and (eq_attr "unit" "integer,unknown") 307 (eq_attr "memory" "load")))) 308 "athlon-direct,athlon-load,athlon-ieu") 309(define_insn_reservation "athlon_idirect_load_amdfam10" 4 310 (and (eq_attr "cpu" "amdfam10") 311 (and (eq_attr "amdfam10_decode" "direct") 312 (and (eq_attr "unit" "integer,unknown") 313 (eq_attr "memory" "load")))) 314 "athlon-direct,athlon-load,athlon-ieu") 315(define_insn_reservation "athlon_ivector_load" 6 316 (and (eq_attr "cpu" "athlon,k8") 317 (and (eq_attr "athlon_decode" "vector") 318 (and (eq_attr "unit" "integer,unknown") 319 (eq_attr "memory" "load")))) 320 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") 321(define_insn_reservation "athlon_ivector_load_amdfam10" 6 322 (and (eq_attr "cpu" "amdfam10") 323 (and (eq_attr "amdfam10_decode" "vector") 324 (and (eq_attr "unit" "integer,unknown") 325 (eq_attr "memory" "load")))) 326 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") 327 328(define_insn_reservation "athlon_idirect_movstore" 1 329 (and (eq_attr "cpu" "athlon,k8,amdfam10") 330 (and (eq_attr "type" "imov") 331 (eq_attr "memory" "store"))) 332 "athlon-direct,athlon-agu,athlon-store") 333 334(define_insn_reservation "athlon_idirect_both" 4 335 (and (eq_attr "cpu" "athlon,k8") 336 (and (eq_attr "athlon_decode" "direct") 337 (and (eq_attr "unit" "integer,unknown") 338 (eq_attr "memory" "both")))) 339 "athlon-direct,athlon-load, 340 athlon-ieu,athlon-store, 341 athlon-store") 342(define_insn_reservation "athlon_idirect_both_amdfam10" 4 343 (and (eq_attr "cpu" "amdfam10") 344 (and (eq_attr "amdfam10_decode" "direct") 345 (and (eq_attr "unit" "integer,unknown") 346 (eq_attr "memory" "both")))) 347 "athlon-direct,athlon-load, 348 athlon-ieu,athlon-store, 349 athlon-store") 350 351(define_insn_reservation "athlon_ivector_both" 6 352 (and (eq_attr "cpu" "athlon,k8") 353 (and (eq_attr "athlon_decode" "vector") 354 (and (eq_attr "unit" "integer,unknown") 355 (eq_attr "memory" "both")))) 356 "athlon-vector,athlon-load, 357 athlon-ieu, 358 athlon-ieu, 359 athlon-store") 360(define_insn_reservation "athlon_ivector_both_amdfam10" 6 361 (and (eq_attr "cpu" "amdfam10") 362 (and (eq_attr "amdfam10_decode" "vector") 363 (and (eq_attr "unit" "integer,unknown") 364 (eq_attr "memory" "both")))) 365 "athlon-vector,athlon-load, 366 athlon-ieu, 367 athlon-ieu, 368 athlon-store") 369 370(define_insn_reservation "athlon_idirect_store" 1 371 (and (eq_attr "cpu" "athlon,k8") 372 (and (eq_attr "athlon_decode" "direct") 373 (and (eq_attr "unit" "integer,unknown") 374 (eq_attr "memory" "store")))) 375 "athlon-direct,(athlon-ieu+athlon-agu), 376 athlon-store") 377(define_insn_reservation "athlon_idirect_store_amdfam10" 1 378 (and (eq_attr "cpu" "amdfam10") 379 (and (eq_attr "amdfam10_decode" "direct") 380 (and (eq_attr "unit" "integer,unknown") 381 (eq_attr "memory" "store")))) 382 "athlon-direct,(athlon-ieu+athlon-agu), 383 athlon-store") 384 385(define_insn_reservation "athlon_ivector_store" 2 386 (and (eq_attr "cpu" "athlon,k8") 387 (and (eq_attr "athlon_decode" "vector") 388 (and (eq_attr "unit" "integer,unknown") 389 (eq_attr "memory" "store")))) 390 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu, 391 athlon-store") 392(define_insn_reservation "athlon_ivector_store_amdfam10" 2 393 (and (eq_attr "cpu" "amdfam10") 394 (and (eq_attr "amdfam10_decode" "vector") 395 (and (eq_attr "unit" "integer,unknown") 396 (eq_attr "memory" "store")))) 397 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu, 398 athlon-store") 399 400;; Athlon floatin point unit 401(define_insn_reservation "athlon_fldxf" 12 402 (and (eq_attr "cpu" "athlon") 403 (and (eq_attr "type" "fmov") 404 (and (eq_attr "memory" "load") 405 (eq_attr "mode" "XF")))) 406 "athlon-vector,athlon-fpload2,athlon-fvector*9") 407(define_insn_reservation "athlon_fldxf_k8" 13 408 (and (eq_attr "cpu" "k8,amdfam10") 409 (and (eq_attr "type" "fmov") 410 (and (eq_attr "memory" "load") 411 (eq_attr "mode" "XF")))) 412 "athlon-vector,athlon-fpload2k8,athlon-fvector*9") 413;; Assume superforwarding to take place so effective latency of fany op is 0. 414(define_insn_reservation "athlon_fld" 0 415 (and (eq_attr "cpu" "athlon") 416 (and (eq_attr "type" "fmov") 417 (eq_attr "memory" "load"))) 418 "athlon-direct,athlon-fpload,athlon-fany") 419(define_insn_reservation "athlon_fld_k8" 2 420 (and (eq_attr "cpu" "k8,amdfam10") 421 (and (eq_attr "type" "fmov") 422 (eq_attr "memory" "load"))) 423 "athlon-direct,athlon-fploadk8,athlon-fstore") 424 425(define_insn_reservation "athlon_fstxf" 10 426 (and (eq_attr "cpu" "athlon") 427 (and (eq_attr "type" "fmov") 428 (and (eq_attr "memory" "store,both") 429 (eq_attr "mode" "XF")))) 430 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))") 431(define_insn_reservation "athlon_fstxf_k8" 8 432 (and (eq_attr "cpu" "k8,amdfam10") 433 (and (eq_attr "type" "fmov") 434 (and (eq_attr "memory" "store,both") 435 (eq_attr "mode" "XF")))) 436 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))") 437(define_insn_reservation "athlon_fst" 4 438 (and (eq_attr "cpu" "athlon") 439 (and (eq_attr "type" "fmov") 440 (eq_attr "memory" "store,both"))) 441 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 442(define_insn_reservation "athlon_fst_k8" 2 443 (and (eq_attr "cpu" "k8,amdfam10") 444 (and (eq_attr "type" "fmov") 445 (eq_attr "memory" "store,both"))) 446 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 447(define_insn_reservation "athlon_fist" 4 448 (and (eq_attr "cpu" "athlon,k8,amdfam10") 449 (eq_attr "type" "fistp,fisttp")) 450 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 451(define_insn_reservation "athlon_fmov" 2 452 (and (eq_attr "cpu" "athlon,k8,amdfam10") 453 (eq_attr "type" "fmov")) 454 "athlon-direct,athlon-fpsched,athlon-faddmul") 455(define_insn_reservation "athlon_fadd_load" 4 456 (and (eq_attr "cpu" "athlon") 457 (and (eq_attr "type" "fop") 458 (eq_attr "memory" "load"))) 459 "athlon-direct,athlon-fpload,athlon-fadd") 460(define_insn_reservation "athlon_fadd_load_k8" 6 461 (and (eq_attr "cpu" "k8,amdfam10") 462 (and (eq_attr "type" "fop") 463 (eq_attr "memory" "load"))) 464 "athlon-direct,athlon-fploadk8,athlon-fadd") 465(define_insn_reservation "athlon_fadd" 4 466 (and (eq_attr "cpu" "athlon,k8,amdfam10") 467 (eq_attr "type" "fop")) 468 "athlon-direct,athlon-fpsched,athlon-fadd") 469(define_insn_reservation "athlon_fmul_load" 4 470 (and (eq_attr "cpu" "athlon") 471 (and (eq_attr "type" "fmul") 472 (eq_attr "memory" "load"))) 473 "athlon-direct,athlon-fpload,athlon-fmul") 474(define_insn_reservation "athlon_fmul_load_k8" 6 475 (and (eq_attr "cpu" "k8,amdfam10") 476 (and (eq_attr "type" "fmul") 477 (eq_attr "memory" "load"))) 478 "athlon-direct,athlon-fploadk8,athlon-fmul") 479(define_insn_reservation "athlon_fmul" 4 480 (and (eq_attr "cpu" "athlon,k8,amdfam10") 481 (eq_attr "type" "fmul")) 482 "athlon-direct,athlon-fpsched,athlon-fmul") 483(define_insn_reservation "athlon_fsgn" 2 484 (and (eq_attr "cpu" "athlon,k8,amdfam10") 485 (eq_attr "type" "fsgn")) 486 "athlon-direct,athlon-fpsched,athlon-fmul") 487(define_insn_reservation "athlon_fdiv_load" 24 488 (and (eq_attr "cpu" "athlon") 489 (and (eq_attr "type" "fdiv") 490 (eq_attr "memory" "load"))) 491 "athlon-direct,athlon-fpload,athlon-fmul") 492(define_insn_reservation "athlon_fdiv_load_k8" 13 493 (and (eq_attr "cpu" "k8,amdfam10") 494 (and (eq_attr "type" "fdiv") 495 (eq_attr "memory" "load"))) 496 "athlon-direct,athlon-fploadk8,athlon-fmul") 497(define_insn_reservation "athlon_fdiv" 24 498 (and (eq_attr "cpu" "athlon") 499 (eq_attr "type" "fdiv")) 500 "athlon-direct,athlon-fpsched,athlon-fmul") 501(define_insn_reservation "athlon_fdiv_k8" 11 502 (and (eq_attr "cpu" "k8,amdfam10") 503 (eq_attr "type" "fdiv")) 504 "athlon-direct,athlon-fpsched,athlon-fmul") 505(define_insn_reservation "athlon_fpspc_load" 103 506 (and (eq_attr "cpu" "athlon,k8,amdfam10") 507 (and (eq_attr "type" "fpspc") 508 (eq_attr "memory" "load"))) 509 "athlon-vector,athlon-fpload,athlon-fvector") 510(define_insn_reservation "athlon_fpspc" 100 511 (and (eq_attr "cpu" "athlon,k8,amdfam10") 512 (eq_attr "type" "fpspc")) 513 "athlon-vector,athlon-fpsched,athlon-fvector") 514(define_insn_reservation "athlon_fcmov_load" 7 515 (and (eq_attr "cpu" "athlon") 516 (and (eq_attr "type" "fcmov") 517 (eq_attr "memory" "load"))) 518 "athlon-vector,athlon-fpload,athlon-fvector") 519(define_insn_reservation "athlon_fcmov" 7 520 (and (eq_attr "cpu" "athlon") 521 (eq_attr "type" "fcmov")) 522 "athlon-vector,athlon-fpsched,athlon-fvector") 523(define_insn_reservation "athlon_fcmov_load_k8" 17 524 (and (eq_attr "cpu" "k8,amdfam10") 525 (and (eq_attr "type" "fcmov") 526 (eq_attr "memory" "load"))) 527 "athlon-vector,athlon-fploadk8,athlon-fvector") 528(define_insn_reservation "athlon_fcmov_k8" 15 529 (and (eq_attr "cpu" "k8,amdfam10") 530 (eq_attr "type" "fcmov")) 531 "athlon-vector,athlon-fpsched,athlon-fvector") 532;; fcomi is vector decoded by uses only one pipe. 533(define_insn_reservation "athlon_fcomi_load" 3 534 (and (eq_attr "cpu" "athlon") 535 (and (eq_attr "type" "fcmp") 536 (and (eq_attr "athlon_decode" "vector") 537 (eq_attr "memory" "load")))) 538 "athlon-vector,athlon-fpload,athlon-fadd") 539(define_insn_reservation "athlon_fcomi_load_k8" 5 540 (and (eq_attr "cpu" "k8,amdfam10") 541 (and (eq_attr "type" "fcmp") 542 (and (eq_attr "athlon_decode" "vector") 543 (eq_attr "memory" "load")))) 544 "athlon-vector,athlon-fploadk8,athlon-fadd") 545(define_insn_reservation "athlon_fcomi" 3 546 (and (eq_attr "cpu" "athlon,k8,amdfam10") 547 (and (eq_attr "athlon_decode" "vector") 548 (eq_attr "type" "fcmp"))) 549 "athlon-vector,athlon-fpsched,athlon-fadd") 550(define_insn_reservation "athlon_fcom_load" 2 551 (and (eq_attr "cpu" "athlon") 552 (and (eq_attr "type" "fcmp") 553 (eq_attr "memory" "load"))) 554 "athlon-direct,athlon-fpload,athlon-fadd") 555(define_insn_reservation "athlon_fcom_load_k8" 4 556 (and (eq_attr "cpu" "k8,amdfam10") 557 (and (eq_attr "type" "fcmp") 558 (eq_attr "memory" "load"))) 559 "athlon-direct,athlon-fploadk8,athlon-fadd") 560(define_insn_reservation "athlon_fcom" 2 561 (and (eq_attr "cpu" "athlon,k8,amdfam10") 562 (eq_attr "type" "fcmp")) 563 "athlon-direct,athlon-fpsched,athlon-fadd") 564;; Never seen by the scheduler because we still don't do post reg-stack 565;; scheduling. 566;(define_insn_reservation "athlon_fxch" 2 567; (and (eq_attr "cpu" "athlon,k8,amdfam10") 568; (eq_attr "type" "fxch")) 569; "athlon-direct,athlon-fpsched,athlon-fany") 570 571;; Athlon handle MMX operations in the FPU unit with shorter latencies 572 573(define_insn_reservation "athlon_movlpd_load" 0 574 (and (eq_attr "cpu" "athlon") 575 (and (eq_attr "type" "ssemov") 576 (match_operand:DF 1 "memory_operand"))) 577 "athlon-direct,athlon-fpload,athlon-fany") 578(define_insn_reservation "athlon_movlpd_load_k8" 2 579 (and (eq_attr "cpu" "k8") 580 (and (eq_attr "type" "ssemov") 581 (match_operand:DF 1 "memory_operand"))) 582 "athlon-direct,athlon-fploadk8,athlon-fstore") 583(define_insn_reservation "athlon_movaps_load_k8" 2 584 (and (eq_attr "cpu" "k8") 585 (and (eq_attr "type" "ssemov") 586 (and (eq_attr "mode" "V4SF,V2DF,TI") 587 (eq_attr "memory" "load")))) 588 "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore") 589(define_insn_reservation "athlon_movaps_load" 0 590 (and (eq_attr "cpu" "athlon") 591 (and (eq_attr "type" "ssemov") 592 (and (eq_attr "mode" "V4SF,V2DF,TI") 593 (eq_attr "memory" "load")))) 594 "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)") 595(define_insn_reservation "athlon_movss_load" 1 596 (and (eq_attr "cpu" "athlon") 597 (and (eq_attr "type" "ssemov") 598 (and (eq_attr "mode" "SF,DI") 599 (eq_attr "memory" "load")))) 600 "athlon-vector,athlon-fpload,(athlon-fany*2)") 601(define_insn_reservation "athlon_movss_load_k8" 1 602 (and (eq_attr "cpu" "k8") 603 (and (eq_attr "type" "ssemov") 604 (and (eq_attr "mode" "SF,DI") 605 (eq_attr "memory" "load")))) 606 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)") 607(define_insn_reservation "athlon_mmxsseld" 0 608 (and (eq_attr "cpu" "athlon") 609 (and (eq_attr "type" "mmxmov,ssemov") 610 (eq_attr "memory" "load"))) 611 "athlon-direct,athlon-fpload,athlon-fany") 612(define_insn_reservation "athlon_mmxsseld_k8" 2 613 (and (eq_attr "cpu" "k8") 614 (and (eq_attr "type" "mmxmov,ssemov") 615 (eq_attr "memory" "load"))) 616 "athlon-direct,athlon-fploadk8,athlon-fstore") 617;; On AMDFAM10 all double, single and integer packed and scalar SSEx data 618;; loads generated are direct path, latency of 2 and do not use any FP 619;; executions units. No separate entries for movlpx/movhpx loads, which 620;; are direct path, latency of 4 and use the FADD/FMUL FP execution units, 621;; as they will not be generated. 622(define_insn_reservation "athlon_sseld_amdfam10" 2 623 (and (eq_attr "cpu" "amdfam10") 624 (and (eq_attr "type" "ssemov") 625 (eq_attr "memory" "load"))) 626 "athlon-direct,athlon-fploadk8") 627;; On AMDFAM10 MMX data loads generated are direct path, latency of 4 628;; and can use any FP executions units 629(define_insn_reservation "athlon_mmxld_amdfam10" 4 630 (and (eq_attr "cpu" "amdfam10") 631 (and (eq_attr "type" "mmxmov") 632 (eq_attr "memory" "load"))) 633 "athlon-direct,athlon-fploadk8, athlon-fany") 634(define_insn_reservation "athlon_mmxssest" 3 635 (and (eq_attr "cpu" "k8") 636 (and (eq_attr "type" "mmxmov,ssemov") 637 (and (eq_attr "mode" "V4SF,V2DF,TI") 638 (eq_attr "memory" "store,both")))) 639 "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") 640(define_insn_reservation "athlon_mmxssest_k8" 3 641 (and (eq_attr "cpu" "k8") 642 (and (eq_attr "type" "mmxmov,ssemov") 643 (and (eq_attr "mode" "V4SF,V2DF,TI") 644 (eq_attr "memory" "store,both")))) 645 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") 646(define_insn_reservation "athlon_mmxssest_short" 2 647 (and (eq_attr "cpu" "athlon,k8") 648 (and (eq_attr "type" "mmxmov,ssemov") 649 (eq_attr "memory" "store,both"))) 650 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 651;; On AMDFAM10 all double, single and integer packed SSEx data stores 652;; generated are all double path, latency of 2 and use the FSTORE FP 653;; execution unit. No entries separate for movupx/movdqu, which are 654;; vector path, latency of 3 and use the FSTORE*2 FP execution unit, 655;; as they will not be generated. 656(define_insn_reservation "athlon_ssest_amdfam10" 2 657 (and (eq_attr "cpu" "amdfam10") 658 (and (eq_attr "type" "ssemov") 659 (and (eq_attr "mode" "V4SF,V2DF,TI") 660 (eq_attr "memory" "store,both")))) 661 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)") 662;; On AMDFAM10 all double, single and integer scalar SSEx and MMX 663;; data stores generated are all direct path, latency of 2 and use 664;; the FSTORE FP execution unit 665(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2 666 (and (eq_attr "cpu" "amdfam10") 667 (and (eq_attr "type" "mmxmov,ssemov") 668 (eq_attr "memory" "store,both"))) 669 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 670(define_insn_reservation "athlon_movaps_k8" 2 671 (and (eq_attr "cpu" "k8") 672 (and (eq_attr "type" "ssemov") 673 (eq_attr "mode" "V4SF,V2DF,TI"))) 674 "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))") 675(define_insn_reservation "athlon_movaps" 2 676 (and (eq_attr "cpu" "athlon") 677 (and (eq_attr "type" "ssemov") 678 (eq_attr "mode" "V4SF,V2DF,TI"))) 679 "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)") 680(define_insn_reservation "athlon_mmxssemov" 2 681 (and (eq_attr "cpu" "athlon,k8") 682 (eq_attr "type" "mmxmov,ssemov")) 683 "athlon-direct,athlon-fpsched,athlon-faddmul") 684(define_insn_reservation "athlon_mmxmul_load" 4 685 (and (eq_attr "cpu" "athlon,k8") 686 (and (eq_attr "type" "mmxmul") 687 (eq_attr "memory" "load"))) 688 "athlon-direct,athlon-fpload,athlon-fmul") 689(define_insn_reservation "athlon_mmxmul" 3 690 (and (eq_attr "cpu" "athlon,k8") 691 (eq_attr "type" "mmxmul")) 692 "athlon-direct,athlon-fpsched,athlon-fmul") 693(define_insn_reservation "athlon_mmx_load" 3 694 (and (eq_attr "cpu" "athlon,k8") 695 (and (eq_attr "unit" "mmx") 696 (eq_attr "memory" "load"))) 697 "athlon-direct,athlon-fpload,athlon-faddmul") 698(define_insn_reservation "athlon_mmx" 2 699 (and (eq_attr "cpu" "athlon,k8") 700 (eq_attr "unit" "mmx")) 701 "athlon-direct,athlon-fpsched,athlon-faddmul") 702;; SSE operations are handled by the i387 unit as well. The latency 703;; is same as for i387 operations for scalar operations 704 705(define_insn_reservation "athlon_sselog_load" 3 706 (and (eq_attr "cpu" "athlon") 707 (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1") 708 (eq_attr "memory" "load"))) 709 "athlon-vector,athlon-fpload2,(athlon-fmul*2)") 710(define_insn_reservation "athlon_sselog_load_k8" 5 711 (and (eq_attr "cpu" "k8") 712 (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1") 713 (eq_attr "memory" "load"))) 714 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") 715(define_insn_reservation "athlon_sselog_load_amdfam10" 4 716 (and (eq_attr "cpu" "amdfam10") 717 (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1") 718 (eq_attr "memory" "load"))) 719 "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)") 720(define_insn_reservation "athlon_sselog" 3 721 (and (eq_attr "cpu" "athlon") 722 (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")) 723 "athlon-vector,athlon-fpsched,athlon-fmul*2") 724(define_insn_reservation "athlon_sselog_k8" 3 725 (and (eq_attr "cpu" "k8") 726 (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")) 727 "athlon-double,athlon-fpsched,athlon-fmul") 728(define_insn_reservation "athlon_sselog_amdfam10" 2 729 (and (eq_attr "cpu" "amdfam10") 730 (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")) 731 "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)") 732 733;; ??? pcmp executes in addmul, probably not worthwhile to bother about that. 734(define_insn_reservation "athlon_ssecmp_load" 2 735 (and (eq_attr "cpu" "athlon") 736 (and (eq_attr "type" "ssecmp") 737 (and (eq_attr "mode" "SF,DF,DI") 738 (eq_attr "memory" "load")))) 739 "athlon-direct,athlon-fpload,athlon-fadd") 740(define_insn_reservation "athlon_ssecmp_load_k8" 4 741 (and (eq_attr "cpu" "k8,amdfam10") 742 (and (eq_attr "type" "ssecmp") 743 (and (eq_attr "mode" "SF,DF,DI,TI") 744 (eq_attr "memory" "load")))) 745 "athlon-direct,athlon-fploadk8,athlon-fadd") 746(define_insn_reservation "athlon_ssecmp" 2 747 (and (eq_attr "cpu" "athlon,k8,amdfam10") 748 (and (eq_attr "type" "ssecmp") 749 (eq_attr "mode" "SF,DF,DI,TI"))) 750 "athlon-direct,athlon-fpsched,athlon-fadd") 751(define_insn_reservation "athlon_ssecmpvector_load" 3 752 (and (eq_attr "cpu" "athlon") 753 (and (eq_attr "type" "ssecmp") 754 (eq_attr "memory" "load"))) 755 "athlon-vector,athlon-fpload2,(athlon-fadd*2)") 756(define_insn_reservation "athlon_ssecmpvector_load_k8" 5 757 (and (eq_attr "cpu" "k8") 758 (and (eq_attr "type" "ssecmp") 759 (eq_attr "memory" "load"))) 760 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") 761(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4 762 (and (eq_attr "cpu" "amdfam10") 763 (and (eq_attr "type" "ssecmp") 764 (eq_attr "memory" "load"))) 765 "athlon-direct,athlon-fploadk8,athlon-fadd") 766(define_insn_reservation "athlon_ssecmpvector" 3 767 (and (eq_attr "cpu" "athlon") 768 (eq_attr "type" "ssecmp")) 769 "athlon-vector,athlon-fpsched,(athlon-fadd*2)") 770(define_insn_reservation "athlon_ssecmpvector_k8" 3 771 (and (eq_attr "cpu" "k8") 772 (eq_attr "type" "ssecmp")) 773 "athlon-double,athlon-fpsched,(athlon-fadd*2)") 774(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2 775 (and (eq_attr "cpu" "amdfam10") 776 (eq_attr "type" "ssecmp")) 777 "athlon-direct,athlon-fpsched,athlon-fadd") 778(define_insn_reservation "athlon_ssecomi_load" 4 779 (and (eq_attr "cpu" "athlon") 780 (and (eq_attr "type" "ssecomi") 781 (eq_attr "memory" "load"))) 782 "athlon-vector,athlon-fpload,athlon-fadd") 783(define_insn_reservation "athlon_ssecomi_load_k8" 6 784 (and (eq_attr "cpu" "k8") 785 (and (eq_attr "type" "ssecomi") 786 (eq_attr "memory" "load"))) 787 "athlon-vector,athlon-fploadk8,athlon-fadd") 788(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5 789 (and (eq_attr "cpu" "amdfam10") 790 (and (eq_attr "type" "ssecomi") 791 (eq_attr "memory" "load"))) 792 "athlon-direct,athlon-fploadk8,athlon-fadd") 793(define_insn_reservation "athlon_ssecomi" 4 794 (and (eq_attr "cpu" "athlon,k8") 795 (eq_attr "type" "ssecomi")) 796 "athlon-vector,athlon-fpsched,athlon-fadd") 797(define_insn_reservation "athlon_ssecomi_amdfam10" 3 798 (and (eq_attr "cpu" "amdfam10") 799;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10 800 (eq_attr "type" "ssecomi")) 801 "athlon-direct,athlon-fpsched,athlon-fadd") 802(define_insn_reservation "athlon_sseadd_load" 4 803 (and (eq_attr "cpu" "athlon") 804 (and (eq_attr "type" "sseadd,sseadd1") 805 (and (eq_attr "mode" "SF,DF,DI") 806 (eq_attr "memory" "load")))) 807 "athlon-direct,athlon-fpload,athlon-fadd") 808(define_insn_reservation "athlon_sseadd_load_k8" 6 809 (and (eq_attr "cpu" "k8,amdfam10") 810 (and (eq_attr "type" "sseadd,sseadd1") 811 (and (eq_attr "mode" "SF,DF,DI") 812 (eq_attr "memory" "load")))) 813 "athlon-direct,athlon-fploadk8,athlon-fadd") 814(define_insn_reservation "athlon_sseadd" 4 815 (and (eq_attr "cpu" "athlon,k8,amdfam10") 816 (and (eq_attr "type" "sseadd,sseadd1") 817 (eq_attr "mode" "SF,DF,DI"))) 818 "athlon-direct,athlon-fpsched,athlon-fadd") 819(define_insn_reservation "athlon_sseaddvector_load" 5 820 (and (eq_attr "cpu" "athlon") 821 (and (eq_attr "type" "sseadd,sseadd1") 822 (eq_attr "memory" "load"))) 823 "athlon-vector,athlon-fpload2,(athlon-fadd*2)") 824(define_insn_reservation "athlon_sseaddvector_load_k8" 7 825 (and (eq_attr "cpu" "k8") 826 (and (eq_attr "type" "sseadd,sseadd1") 827 (eq_attr "memory" "load"))) 828 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") 829(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6 830 (and (eq_attr "cpu" "amdfam10") 831 (and (eq_attr "type" "sseadd,sseadd1") 832 (eq_attr "memory" "load"))) 833 "athlon-direct,athlon-fploadk8,athlon-fadd") 834(define_insn_reservation "athlon_sseaddvector" 5 835 (and (eq_attr "cpu" "athlon") 836 (eq_attr "type" "sseadd,sseadd1")) 837 "athlon-vector,athlon-fpsched,(athlon-fadd*2)") 838(define_insn_reservation "athlon_sseaddvector_k8" 5 839 (and (eq_attr "cpu" "k8") 840 (eq_attr "type" "sseadd,sseadd1")) 841 "athlon-double,athlon-fpsched,(athlon-fadd*2)") 842(define_insn_reservation "athlon_sseaddvector_amdfam10" 4 843 (and (eq_attr "cpu" "amdfam10") 844 (eq_attr "type" "sseadd,sseadd1")) 845 "athlon-direct,athlon-fpsched,athlon-fadd") 846 847;; Conversions behaves very irregularly and the scheduling is critical here. 848;; Take each instruction separately. Assume that the mode is always set to the 849;; destination one and athlon_decode is set to the K8 versions. 850 851;; cvtss2sd 852(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4 853 (and (eq_attr "cpu" "k8,athlon") 854 (and (eq_attr "type" "ssecvt") 855 (and (eq_attr "athlon_decode" "direct") 856 (and (eq_attr "mode" "DF") 857 (eq_attr "memory" "load"))))) 858 "athlon-direct,athlon-fploadk8,athlon-fstore") 859(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7 860 (and (eq_attr "cpu" "amdfam10") 861 (and (eq_attr "type" "ssecvt") 862 (and (eq_attr "amdfam10_decode" "double") 863 (and (eq_attr "mode" "DF") 864 (eq_attr "memory" "load"))))) 865 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 866(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2 867 (and (eq_attr "cpu" "athlon,k8") 868 (and (eq_attr "type" "ssecvt") 869 (and (eq_attr "athlon_decode" "direct") 870 (eq_attr "mode" "DF")))) 871 "athlon-direct,athlon-fpsched,athlon-fstore") 872(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7 873 (and (eq_attr "cpu" "amdfam10") 874 (and (eq_attr "type" "ssecvt") 875 (and (eq_attr "amdfam10_decode" "vector") 876 (eq_attr "mode" "DF")))) 877 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)") 878;; cvtps2pd. Model same way the other double decoded FP conversions. 879(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5 880 (and (eq_attr "cpu" "k8,athlon") 881 (and (eq_attr "type" "ssecvt") 882 (and (eq_attr "athlon_decode" "double") 883 (and (eq_attr "mode" "V2DF,V4SF,TI") 884 (eq_attr "memory" "load"))))) 885 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)") 886(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4 887 (and (eq_attr "cpu" "amdfam10") 888 (and (eq_attr "type" "ssecvt") 889 (and (eq_attr "amdfam10_decode" "direct") 890 (and (eq_attr "mode" "V2DF,V4SF,TI") 891 (eq_attr "memory" "load"))))) 892 "athlon-direct,athlon-fploadk8,athlon-fstore") 893(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3 894 (and (eq_attr "cpu" "k8,athlon") 895 (and (eq_attr "type" "ssecvt") 896 (and (eq_attr "athlon_decode" "double") 897 (eq_attr "mode" "V2DF,V4SF,TI")))) 898 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore") 899(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2 900 (and (eq_attr "cpu" "amdfam10") 901 (and (eq_attr "type" "ssecvt") 902 (and (eq_attr "amdfam10_decode" "direct") 903 (eq_attr "mode" "V2DF,V4SF,TI")))) 904 "athlon-direct,athlon-fpsched,athlon-fstore") 905;; cvtsi2sd mem,reg is directpath path (cvtsi2sd reg,reg is doublepath) 906;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6 907(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6 908 (and (eq_attr "cpu" "athlon,k8") 909 (and (eq_attr "type" "sseicvt") 910 (and (eq_attr "athlon_decode" "direct") 911 (and (eq_attr "mode" "SF,DF") 912 (eq_attr "memory" "load"))))) 913 "athlon-direct,athlon-fploadk8,athlon-fstore") 914(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9 915 (and (eq_attr "cpu" "amdfam10") 916 (and (eq_attr "type" "sseicvt") 917 (and (eq_attr "amdfam10_decode" "double") 918 (and (eq_attr "mode" "SF,DF") 919 (eq_attr "memory" "load"))))) 920 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 921;; cvtsi2ss mem, reg is doublepath 922(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9 923 (and (eq_attr "cpu" "athlon") 924 (and (eq_attr "type" "sseicvt") 925 (and (eq_attr "athlon_decode" "double") 926 (and (eq_attr "mode" "SF,DF") 927 (eq_attr "memory" "load"))))) 928 "athlon-vector,athlon-fpload,(athlon-fstore*2)") 929(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9 930 (and (eq_attr "cpu" "k8") 931 (and (eq_attr "type" "sseicvt") 932 (and (eq_attr "athlon_decode" "double") 933 (and (eq_attr "mode" "SF,DF") 934 (eq_attr "memory" "load"))))) 935 "athlon-double,athlon-fploadk8,(athlon-fstore*2)") 936(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9 937 (and (eq_attr "cpu" "amdfam10") 938 (and (eq_attr "type" "sseicvt") 939 (and (eq_attr "amdfam10_decode" "double") 940 (and (eq_attr "mode" "SF,DF") 941 (eq_attr "memory" "load"))))) 942 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 943;; cvtsi2sd reg,reg is double decoded (vector on Athlon) 944(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11 945 (and (eq_attr "cpu" "k8,athlon") 946 (and (eq_attr "type" "sseicvt") 947 (and (eq_attr "athlon_decode" "double") 948 (and (eq_attr "mode" "SF,DF") 949 (eq_attr "memory" "none"))))) 950 "athlon-double,athlon-fploadk8,athlon-fstore") 951(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14 952 (and (eq_attr "cpu" "amdfam10") 953 (and (eq_attr "type" "sseicvt") 954 (and (eq_attr "amdfam10_decode" "vector") 955 (and (eq_attr "mode" "SF,DF") 956 (eq_attr "memory" "none"))))) 957 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 958;; cvtsi2ss reg, reg is doublepath 959(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14 960 (and (eq_attr "cpu" "athlon,k8") 961 (and (eq_attr "type" "sseicvt") 962 (and (eq_attr "athlon_decode" "vector") 963 (and (eq_attr "mode" "SF,DF") 964 (eq_attr "memory" "none"))))) 965 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)") 966(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14 967 (and (eq_attr "cpu" "amdfam10") 968 (and (eq_attr "type" "sseicvt") 969 (and (eq_attr "amdfam10_decode" "vector") 970 (and (eq_attr "mode" "SF,DF") 971 (eq_attr "memory" "none"))))) 972 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 973;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9 974(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9 975 (and (eq_attr "cpu" "k8,athlon") 976 (and (eq_attr "type" "ssecvt") 977 (and (eq_attr "athlon_decode" "double") 978 (and (eq_attr "mode" "SF") 979 (eq_attr "memory" "load"))))) 980 "athlon-double,athlon-fploadk8,(athlon-fstore*3)") 981(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9 982 (and (eq_attr "cpu" "amdfam10") 983 (and (eq_attr "type" "ssecvt") 984 (and (eq_attr "amdfam10_decode" "double") 985 (and (eq_attr "mode" "SF") 986 (eq_attr "memory" "load"))))) 987 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 988;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12 989(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12 990 (and (eq_attr "cpu" "athlon,k8") 991 (and (eq_attr "type" "ssecvt") 992 (and (eq_attr "athlon_decode" "vector") 993 (and (eq_attr "mode" "SF") 994 (eq_attr "memory" "none"))))) 995 "athlon-vector,athlon-fpsched,(athlon-fvector*3)") 996(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8 997 (and (eq_attr "cpu" "amdfam10") 998 (and (eq_attr "type" "ssecvt") 999 (and (eq_attr "amdfam10_decode" "vector") 1000 (and (eq_attr "mode" "SF") 1001 (eq_attr "memory" "none"))))) 1002 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)") 1003(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8 1004 (and (eq_attr "cpu" "athlon,k8") 1005 (and (eq_attr "type" "ssecvt") 1006 (and (eq_attr "athlon_decode" "vector") 1007 (and (eq_attr "mode" "V4SF,V2DF,TI") 1008 (eq_attr "memory" "load"))))) 1009 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)") 1010(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9 1011 (and (eq_attr "cpu" "amdfam10") 1012 (and (eq_attr "type" "ssecvt") 1013 (and (eq_attr "amdfam10_decode" "double") 1014 (and (eq_attr "mode" "V4SF,V2DF,TI") 1015 (eq_attr "memory" "load"))))) 1016 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 1017;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10 1018;; ??? Why it is fater than cvtsd2ss? 1019(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8 1020 (and (eq_attr "cpu" "athlon,k8") 1021 (and (eq_attr "type" "ssecvt") 1022 (and (eq_attr "athlon_decode" "vector") 1023 (and (eq_attr "mode" "V4SF,V2DF,TI") 1024 (eq_attr "memory" "none"))))) 1025 "athlon-vector,athlon-fpsched,athlon-fvector*2") 1026(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7 1027 (and (eq_attr "cpu" "amdfam10") 1028 (and (eq_attr "type" "ssecvt") 1029 (and (eq_attr "amdfam10_decode" "double") 1030 (and (eq_attr "mode" "V4SF,V2DF,TI") 1031 (eq_attr "memory" "none"))))) 1032 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)") 1033;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9 1034(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9 1035 (and (eq_attr "cpu" "athlon,k8") 1036 (and (eq_attr "type" "sseicvt") 1037 (and (eq_attr "athlon_decode" "vector") 1038 (and (eq_attr "mode" "SI,DI") 1039 (eq_attr "memory" "load"))))) 1040 "athlon-vector,athlon-fploadk8,athlon-fvector") 1041(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10 1042 (and (eq_attr "cpu" "amdfam10") 1043 (and (eq_attr "type" "sseicvt") 1044 (and (eq_attr "amdfam10_decode" "double") 1045 (and (eq_attr "mode" "SI,DI") 1046 (eq_attr "memory" "load"))))) 1047 "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)") 1048;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9 1049(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9 1050 (and (eq_attr "cpu" "athlon") 1051 (and (eq_attr "type" "sseicvt") 1052 (and (eq_attr "athlon_decode" "double") 1053 (and (eq_attr "mode" "SI,DI") 1054 (eq_attr "memory" "none"))))) 1055 "athlon-vector,athlon-fpsched,athlon-fvector") 1056(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9 1057 (and (eq_attr "cpu" "k8") 1058 (and (eq_attr "type" "sseicvt") 1059 (and (eq_attr "athlon_decode" "double") 1060 (and (eq_attr "mode" "SI,DI") 1061 (eq_attr "memory" "none"))))) 1062 "athlon-double,athlon-fpsched,athlon-fstore") 1063(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8 1064 (and (eq_attr "cpu" "amdfam10") 1065 (and (eq_attr "type" "sseicvt") 1066 (and (eq_attr "amdfam10_decode" "double") 1067 (and (eq_attr "mode" "SI,DI") 1068 (eq_attr "memory" "none"))))) 1069 "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)") 1070;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10 1071(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9 1072 (and (eq_attr "cpu" "amdfam10") 1073 (and (eq_attr "type" "sseicvt") 1074 (and (eq_attr "amdfam10_decode" "double") 1075 (and (eq_attr "mode" "TI") 1076 (eq_attr "memory" "load"))))) 1077 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 1078;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10 1079(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7 1080 (and (eq_attr "cpu" "amdfam10") 1081 (and (eq_attr "type" "sseicvt") 1082 (and (eq_attr "amdfam10_decode" "double") 1083 (and (eq_attr "mode" "TI") 1084 (eq_attr "memory" "none"))))) 1085 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)") 1086 1087 1088(define_insn_reservation "athlon_ssemul_load" 4 1089 (and (eq_attr "cpu" "athlon") 1090 (and (eq_attr "type" "ssemul") 1091 (and (eq_attr "mode" "SF,DF") 1092 (eq_attr "memory" "load")))) 1093 "athlon-direct,athlon-fpload,athlon-fmul") 1094(define_insn_reservation "athlon_ssemul_load_k8" 6 1095 (and (eq_attr "cpu" "k8,amdfam10") 1096 (and (eq_attr "type" "ssemul") 1097 (and (eq_attr "mode" "SF,DF") 1098 (eq_attr "memory" "load")))) 1099 "athlon-direct,athlon-fploadk8,athlon-fmul") 1100(define_insn_reservation "athlon_ssemul" 4 1101 (and (eq_attr "cpu" "athlon,k8,amdfam10") 1102 (and (eq_attr "type" "ssemul") 1103 (eq_attr "mode" "SF,DF"))) 1104 "athlon-direct,athlon-fpsched,athlon-fmul") 1105(define_insn_reservation "athlon_ssemulvector_load" 5 1106 (and (eq_attr "cpu" "athlon") 1107 (and (eq_attr "type" "ssemul") 1108 (eq_attr "memory" "load"))) 1109 "athlon-vector,athlon-fpload2,(athlon-fmul*2)") 1110(define_insn_reservation "athlon_ssemulvector_load_k8" 7 1111 (and (eq_attr "cpu" "k8") 1112 (and (eq_attr "type" "ssemul") 1113 (eq_attr "memory" "load"))) 1114 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") 1115(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6 1116 (and (eq_attr "cpu" "amdfam10") 1117 (and (eq_attr "type" "ssemul") 1118 (eq_attr "memory" "load"))) 1119 "athlon-direct,athlon-fploadk8,athlon-fmul") 1120(define_insn_reservation "athlon_ssemulvector" 5 1121 (and (eq_attr "cpu" "athlon") 1122 (eq_attr "type" "ssemul")) 1123 "athlon-vector,athlon-fpsched,(athlon-fmul*2)") 1124(define_insn_reservation "athlon_ssemulvector_k8" 5 1125 (and (eq_attr "cpu" "k8") 1126 (eq_attr "type" "ssemul")) 1127 "athlon-double,athlon-fpsched,(athlon-fmul*2)") 1128(define_insn_reservation "athlon_ssemulvector_amdfam10" 4 1129 (and (eq_attr "cpu" "amdfam10") 1130 (eq_attr "type" "ssemul")) 1131 "athlon-direct,athlon-fpsched,athlon-fmul") 1132;; divsd timings. divss is faster 1133(define_insn_reservation "athlon_ssediv_load" 20 1134 (and (eq_attr "cpu" "athlon") 1135 (and (eq_attr "type" "ssediv") 1136 (and (eq_attr "mode" "SF,DF") 1137 (eq_attr "memory" "load")))) 1138 "athlon-direct,athlon-fpload,athlon-fmul*17") 1139(define_insn_reservation "athlon_ssediv_load_k8" 22 1140 (and (eq_attr "cpu" "k8,amdfam10") 1141 (and (eq_attr "type" "ssediv") 1142 (and (eq_attr "mode" "SF,DF") 1143 (eq_attr "memory" "load")))) 1144 "athlon-direct,athlon-fploadk8,athlon-fmul*17") 1145(define_insn_reservation "athlon_ssediv" 20 1146 (and (eq_attr "cpu" "athlon,k8,amdfam10") 1147 (and (eq_attr "type" "ssediv") 1148 (eq_attr "mode" "SF,DF"))) 1149 "athlon-direct,athlon-fpsched,athlon-fmul*17") 1150(define_insn_reservation "athlon_ssedivvector_load" 39 1151 (and (eq_attr "cpu" "athlon") 1152 (and (eq_attr "type" "ssediv") 1153 (eq_attr "memory" "load"))) 1154 "athlon-vector,athlon-fpload2,athlon-fmul*34") 1155(define_insn_reservation "athlon_ssedivvector_load_k8" 35 1156 (and (eq_attr "cpu" "k8") 1157 (and (eq_attr "type" "ssediv") 1158 (eq_attr "memory" "load"))) 1159 "athlon-double,athlon-fpload2k8,athlon-fmul*34") 1160(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22 1161 (and (eq_attr "cpu" "amdfam10") 1162 (and (eq_attr "type" "ssediv") 1163 (eq_attr "memory" "load"))) 1164 "athlon-direct,athlon-fploadk8,athlon-fmul*17") 1165(define_insn_reservation "athlon_ssedivvector" 39 1166 (and (eq_attr "cpu" "athlon") 1167 (eq_attr "type" "ssediv")) 1168 "athlon-vector,athlon-fmul*34") 1169(define_insn_reservation "athlon_ssedivvector_k8" 39 1170 (and (eq_attr "cpu" "k8") 1171 (eq_attr "type" "ssediv")) 1172 "athlon-double,athlon-fmul*34") 1173(define_insn_reservation "athlon_ssedivvector_amdfam10" 20 1174 (and (eq_attr "cpu" "amdfam10") 1175 (eq_attr "type" "ssediv")) 1176 "athlon-direct,athlon-fmul*17") 1177(define_insn_reservation "athlon_sseins_amdfam10" 5 1178 (and (eq_attr "cpu" "amdfam10") 1179 (and (eq_attr "type" "sseins") 1180 (eq_attr "mode" "TI"))) 1181 "athlon-vector,athlon-fpsched,athlon-faddmul") 1182