1;; AMD Athlon Scheduling 2;; 3;; The Athlon does contain three pipelined FP units, three integer units and 4;; three address generation units. 5;; 6;; The predecode logic is determining boundaries of instructions in the 64 7;; byte cache line. So the cache line straddling problem of K6 might be issue 8;; here as well, but it is not noted in the documentation. 9;; 10;; Three DirectPath instructions decoders and only one VectorPath decoder 11;; is available. They can decode three DirectPath instructions or one VectorPath 12;; instruction per cycle. 13;; Decoded macro instructions are then passed to 72 entry instruction control 14;; unit, that passes 15;; it to the specialized integer (18 entry) and fp (36 entry) schedulers. 16;; 17;; The load/store queue unit is not attached to the schedulers but 18;; communicates with all the execution units separately instead. 19 20(define_attr "athlon_decode" "direct,vector,double" 21 (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,leave") 22 (const_string "vector") 23 (and (eq_attr "type" "push") 24 (match_operand 1 "memory_operand" "")) 25 (const_string "vector") 26 (and (eq_attr "type" "fmov") 27 (and (eq_attr "memory" "load,store") 28 (eq_attr "mode" "XF"))) 29 (const_string "vector")] 30 (const_string "direct"))) 31 32(define_attr "amdfam10_decode" "direct,vector,double" 33 (const_string "direct")) 34;; 35;; decode0 decode1 decode2 36;; \ | / 37;; instruction control unit (72 entry scheduler) 38;; | | 39;; integer scheduler (18) stack map 40;; / | | | | \ stack rename 41;; ieu0 agu0 ieu1 agu1 ieu2 agu2 scheduler 42;; | agu0 | agu1 agu2 register file 43;; | \ | | / | | | 44;; \ /\ | / fadd fmul fstore 45;; \ / \ | / fadd fmul fstore 46;; imul load/store (2x) fadd fmul fstore 47 48(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp") 49(define_cpu_unit "athlon-decode0" "athlon") 50(define_cpu_unit "athlon-decode1" "athlon") 51(define_cpu_unit "athlon-decode2" "athlon") 52(define_cpu_unit "athlon-decodev" "athlon") 53;; Model the fact that double decoded instruction may take 2 cycles 54;; to decode when decoder2 and decoder0 in next cycle 55;; is used (this is needed to allow troughput of 1.5 double decoded 56;; instructions per cycle). 57;; 58;; In order to avoid dependence between reservation of decoder 59;; and other units, we model decoder as two stage fully pipelined unit 60;; and only double decoded instruction may occupy unit in the first cycle. 61;; With this scheme however two double instructions can be issued cycle0. 62;; 63;; Avoid this by using presence set requiring decoder0 to be allocated 64;; too. Vector decoded instructions then can't be issued when 65;; modeled as consuming decoder0+decoder1+decoder2. 66;; We solve that by specialized vector decoder unit and exclusion set. 67(presence_set "athlon-decode2" "athlon-decode0") 68(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2") 69(define_reservation "athlon-vector" "nothing,athlon-decodev") 70(define_reservation "athlon-direct0" "nothing,athlon-decode0") 71(define_reservation "athlon-direct" "nothing, 72 (athlon-decode0 | athlon-decode1 73 | athlon-decode2)") 74;; Double instructions behaves like two direct instructions. 75(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0) 76 | (nothing,(athlon-decode0 + athlon-decode1)) 77 | (nothing,(athlon-decode1 + athlon-decode2)))") 78 79;; Agu and ieu unit results in extremely large automatons and 80;; in our approximation they are hardly filled in. Only ieu 81;; unit can, as issue rate is 3 and agu unit is always used 82;; first in the insn reservations. Skip the models. 83 84;(define_cpu_unit "athlon-ieu0" "athlon_ieu") 85;(define_cpu_unit "athlon-ieu1" "athlon_ieu") 86;(define_cpu_unit "athlon-ieu2" "athlon_ieu") 87;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)") 88(define_reservation "athlon-ieu" "nothing") 89(define_cpu_unit "athlon-ieu0" "athlon") 90;(define_cpu_unit "athlon-agu0" "athlon_agu") 91;(define_cpu_unit "athlon-agu1" "athlon_agu") 92;(define_cpu_unit "athlon-agu2" "athlon_agu") 93;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)") 94(define_reservation "athlon-agu" "nothing") 95 96(define_cpu_unit "athlon-mult" "athlon_mult") 97 98(define_cpu_unit "athlon-load0" "athlon_load") 99(define_cpu_unit "athlon-load1" "athlon_load") 100(define_reservation "athlon-load" "athlon-agu, 101 (athlon-load0 | athlon-load1),nothing") 102;; 128bit SSE instructions issue two loads at once 103(define_reservation "athlon-load2" "athlon-agu, 104 (athlon-load0 + athlon-load1),nothing") 105 106(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)") 107;; 128bit SSE instructions issue two stores at once 108(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)") 109 110 111;; The FP operations start to execute at stage 12 in the pipeline, while 112;; integer operations start to execute at stage 9 for Athlon and 11 for K8 113;; Compensate the difference for Athlon because it results in significantly 114;; smaller automata. 115(define_reservation "athlon-fpsched" "nothing,nothing,nothing") 116;; The floating point loads. 117(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)") 118(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)") 119(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)") 120(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)") 121 122 123;; The three fp units are fully pipelined with latency of 3 124(define_cpu_unit "athlon-fadd" "athlon_fp") 125(define_cpu_unit "athlon-fmul" "athlon_fp") 126(define_cpu_unit "athlon-fstore" "athlon_fp") 127(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)") 128(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)") 129 130;; Vector operations usually consume many of pipes. 131(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)") 132 133 134;; Jump instructions are executed in the branch unit completely transparent to us 135(define_insn_reservation "athlon_branch" 0 136 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 137 (eq_attr "type" "ibr")) 138 "athlon-direct,athlon-ieu") 139(define_insn_reservation "athlon_call" 0 140 (and (eq_attr "cpu" "athlon,k8,generic64") 141 (eq_attr "type" "call,callv")) 142 "athlon-vector,athlon-ieu") 143(define_insn_reservation "athlon_call_amdfam10" 0 144 (and (eq_attr "cpu" "amdfam10") 145 (eq_attr "type" "call,callv")) 146 "athlon-double,athlon-ieu") 147 148;; Latency of push operation is 3 cycles, but ESP value is available 149;; earlier 150(define_insn_reservation "athlon_push" 2 151 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 152 (eq_attr "type" "push")) 153 "athlon-direct,athlon-agu,athlon-store") 154(define_insn_reservation "athlon_pop" 4 155 (and (eq_attr "cpu" "athlon,k8,generic64") 156 (eq_attr "type" "pop")) 157 "athlon-vector,athlon-load,athlon-ieu") 158(define_insn_reservation "athlon_pop_k8" 3 159 (and (eq_attr "cpu" "k8,generic64") 160 (eq_attr "type" "pop")) 161 "athlon-double,(athlon-ieu+athlon-load)") 162(define_insn_reservation "athlon_pop_amdfam10" 3 163 (and (eq_attr "cpu" "amdfam10") 164 (eq_attr "type" "pop")) 165 "athlon-direct,(athlon-ieu+athlon-load)") 166(define_insn_reservation "athlon_leave" 3 167 (and (eq_attr "cpu" "athlon") 168 (eq_attr "type" "leave")) 169 "athlon-vector,(athlon-ieu+athlon-load)") 170(define_insn_reservation "athlon_leave_k8" 3 171 (and (eq_attr "cpu" "k8,generic64,amdfam10") 172 (eq_attr "type" "leave")) 173 "athlon-double,(athlon-ieu+athlon-load)") 174 175;; Lea executes in AGU unit with 2 cycles latency. 176(define_insn_reservation "athlon_lea" 2 177 (and (eq_attr "cpu" "athlon,k8,generic64") 178 (eq_attr "type" "lea")) 179 "athlon-direct,athlon-agu,nothing") 180;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10 181(define_insn_reservation "athlon_lea_amdfam10" 1 182 (and (eq_attr "cpu" "amdfam10") 183 (eq_attr "type" "lea")) 184 "athlon-direct,athlon-agu,nothing") 185 186;; Mul executes in special multiplier unit attached to IEU0 187(define_insn_reservation "athlon_imul" 5 188 (and (eq_attr "cpu" "athlon") 189 (and (eq_attr "type" "imul") 190 (eq_attr "memory" "none,unknown"))) 191 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0") 192;; ??? Widening multiply is vector or double. 193(define_insn_reservation "athlon_imul_k8_DI" 4 194 (and (eq_attr "cpu" "k8,generic64,amdfam10") 195 (and (eq_attr "type" "imul") 196 (and (eq_attr "mode" "DI") 197 (eq_attr "memory" "none,unknown")))) 198 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0") 199(define_insn_reservation "athlon_imul_k8" 3 200 (and (eq_attr "cpu" "k8,generic64,amdfam10") 201 (and (eq_attr "type" "imul") 202 (eq_attr "memory" "none,unknown"))) 203 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0") 204(define_insn_reservation "athlon_imul_amdfam10_HI" 4 205 (and (eq_attr "cpu" "amdfam10") 206 (and (eq_attr "type" "imul") 207 (and (eq_attr "mode" "HI") 208 (eq_attr "memory" "none,unknown")))) 209 "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0") 210(define_insn_reservation "athlon_imul_mem" 8 211 (and (eq_attr "cpu" "athlon") 212 (and (eq_attr "type" "imul") 213 (eq_attr "memory" "load,both"))) 214 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu") 215(define_insn_reservation "athlon_imul_mem_k8_DI" 7 216 (and (eq_attr "cpu" "k8,generic64,amdfam10") 217 (and (eq_attr "type" "imul") 218 (and (eq_attr "mode" "DI") 219 (eq_attr "memory" "load,both")))) 220 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu") 221(define_insn_reservation "athlon_imul_mem_k8" 6 222 (and (eq_attr "cpu" "k8,generic64,amdfam10") 223 (and (eq_attr "type" "imul") 224 (eq_attr "memory" "load,both"))) 225 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu") 226 227;; Idiv cannot execute in parallel with other instructions. Dealing with it 228;; as with short latency vector instruction is good approximation avoiding 229;; scheduler from trying too hard to can hide it's latency by overlap with 230;; other instructions. 231;; ??? Experiments show that the idiv can overlap with roughly 6 cycles 232;; of the other code 233;; Using the same heuristics for amdfam10 as K8 with idiv 234 235(define_insn_reservation "athlon_idiv" 6 236 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 237 (and (eq_attr "type" "idiv") 238 (eq_attr "memory" "none,unknown"))) 239 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))") 240(define_insn_reservation "athlon_idiv_mem" 9 241 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 242 (and (eq_attr "type" "idiv") 243 (eq_attr "memory" "load,both"))) 244 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))") 245;; The parallelism of string instructions is not documented. Model it same way 246;; as idiv to create smaller automata. This probably does not matter much. 247;; Using the same heuristics for amdfam10 as K8 with idiv 248(define_insn_reservation "athlon_str" 6 249 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 250 (and (eq_attr "type" "str") 251 (eq_attr "memory" "load,both,store"))) 252 "athlon-vector,athlon-load,athlon-ieu0*6") 253 254(define_insn_reservation "athlon_idirect" 1 255 (and (eq_attr "cpu" "athlon,k8,generic64") 256 (and (eq_attr "athlon_decode" "direct") 257 (and (eq_attr "unit" "integer,unknown") 258 (eq_attr "memory" "none,unknown")))) 259 "athlon-direct,athlon-ieu") 260(define_insn_reservation "athlon_idirect_amdfam10" 1 261 (and (eq_attr "cpu" "amdfam10") 262 (and (eq_attr "amdfam10_decode" "direct") 263 (and (eq_attr "unit" "integer,unknown") 264 (eq_attr "memory" "none,unknown")))) 265 "athlon-direct,athlon-ieu") 266(define_insn_reservation "athlon_ivector" 2 267 (and (eq_attr "cpu" "athlon,k8,generic64") 268 (and (eq_attr "athlon_decode" "vector") 269 (and (eq_attr "unit" "integer,unknown") 270 (eq_attr "memory" "none,unknown")))) 271 "athlon-vector,athlon-ieu,athlon-ieu") 272(define_insn_reservation "athlon_ivector_amdfam10" 2 273 (and (eq_attr "cpu" "amdfam10") 274 (and (eq_attr "amdfam10_decode" "vector") 275 (and (eq_attr "unit" "integer,unknown") 276 (eq_attr "memory" "none,unknown")))) 277 "athlon-vector,athlon-ieu,athlon-ieu") 278 279(define_insn_reservation "athlon_idirect_loadmov" 3 280 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 281 (and (eq_attr "type" "imov") 282 (eq_attr "memory" "load"))) 283 "athlon-direct,athlon-load") 284 285(define_insn_reservation "athlon_idirect_load" 4 286 (and (eq_attr "cpu" "athlon,k8,generic64") 287 (and (eq_attr "athlon_decode" "direct") 288 (and (eq_attr "unit" "integer,unknown") 289 (eq_attr "memory" "load")))) 290 "athlon-direct,athlon-load,athlon-ieu") 291(define_insn_reservation "athlon_idirect_load_amdfam10" 4 292 (and (eq_attr "cpu" "amdfam10") 293 (and (eq_attr "amdfam10_decode" "direct") 294 (and (eq_attr "unit" "integer,unknown") 295 (eq_attr "memory" "load")))) 296 "athlon-direct,athlon-load,athlon-ieu") 297(define_insn_reservation "athlon_ivector_load" 6 298 (and (eq_attr "cpu" "athlon,k8,generic64") 299 (and (eq_attr "athlon_decode" "vector") 300 (and (eq_attr "unit" "integer,unknown") 301 (eq_attr "memory" "load")))) 302 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") 303(define_insn_reservation "athlon_ivector_load_amdfam10" 6 304 (and (eq_attr "cpu" "amdfam10") 305 (and (eq_attr "amdfam10_decode" "vector") 306 (and (eq_attr "unit" "integer,unknown") 307 (eq_attr "memory" "load")))) 308 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") 309 310(define_insn_reservation "athlon_idirect_movstore" 1 311 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 312 (and (eq_attr "type" "imov") 313 (eq_attr "memory" "store"))) 314 "athlon-direct,athlon-agu,athlon-store") 315 316(define_insn_reservation "athlon_idirect_both" 4 317 (and (eq_attr "cpu" "athlon,k8,generic64") 318 (and (eq_attr "athlon_decode" "direct") 319 (and (eq_attr "unit" "integer,unknown") 320 (eq_attr "memory" "both")))) 321 "athlon-direct,athlon-load, 322 athlon-ieu,athlon-store, 323 athlon-store") 324(define_insn_reservation "athlon_idirect_both_amdfam10" 4 325 (and (eq_attr "cpu" "amdfam10") 326 (and (eq_attr "amdfam10_decode" "direct") 327 (and (eq_attr "unit" "integer,unknown") 328 (eq_attr "memory" "both")))) 329 "athlon-direct,athlon-load, 330 athlon-ieu,athlon-store, 331 athlon-store") 332 333(define_insn_reservation "athlon_ivector_both" 6 334 (and (eq_attr "cpu" "athlon,k8,generic64") 335 (and (eq_attr "athlon_decode" "vector") 336 (and (eq_attr "unit" "integer,unknown") 337 (eq_attr "memory" "both")))) 338 "athlon-vector,athlon-load, 339 athlon-ieu, 340 athlon-ieu, 341 athlon-store") 342(define_insn_reservation "athlon_ivector_both_amdfam10" 6 343 (and (eq_attr "cpu" "amdfam10") 344 (and (eq_attr "amdfam10_decode" "vector") 345 (and (eq_attr "unit" "integer,unknown") 346 (eq_attr "memory" "both")))) 347 "athlon-vector,athlon-load, 348 athlon-ieu, 349 athlon-ieu, 350 athlon-store") 351 352(define_insn_reservation "athlon_idirect_store" 1 353 (and (eq_attr "cpu" "athlon,k8,generic64") 354 (and (eq_attr "athlon_decode" "direct") 355 (and (eq_attr "unit" "integer,unknown") 356 (eq_attr "memory" "store")))) 357 "athlon-direct,(athlon-ieu+athlon-agu), 358 athlon-store") 359(define_insn_reservation "athlon_idirect_store_amdfam10" 1 360 (and (eq_attr "cpu" "amdfam10") 361 (and (eq_attr "amdfam10_decode" "direct") 362 (and (eq_attr "unit" "integer,unknown") 363 (eq_attr "memory" "store")))) 364 "athlon-direct,(athlon-ieu+athlon-agu), 365 athlon-store") 366 367(define_insn_reservation "athlon_ivector_store" 2 368 (and (eq_attr "cpu" "athlon,k8,generic64") 369 (and (eq_attr "athlon_decode" "vector") 370 (and (eq_attr "unit" "integer,unknown") 371 (eq_attr "memory" "store")))) 372 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu, 373 athlon-store") 374(define_insn_reservation "athlon_ivector_store_amdfam10" 2 375 (and (eq_attr "cpu" "amdfam10") 376 (and (eq_attr "amdfam10_decode" "vector") 377 (and (eq_attr "unit" "integer,unknown") 378 (eq_attr "memory" "store")))) 379 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu, 380 athlon-store") 381 382;; Athlon floatin point unit 383(define_insn_reservation "athlon_fldxf" 12 384 (and (eq_attr "cpu" "athlon") 385 (and (eq_attr "type" "fmov") 386 (and (eq_attr "memory" "load") 387 (eq_attr "mode" "XF")))) 388 "athlon-vector,athlon-fpload2,athlon-fvector*9") 389(define_insn_reservation "athlon_fldxf_k8" 13 390 (and (eq_attr "cpu" "k8,generic64,amdfam10") 391 (and (eq_attr "type" "fmov") 392 (and (eq_attr "memory" "load") 393 (eq_attr "mode" "XF")))) 394 "athlon-vector,athlon-fpload2k8,athlon-fvector*9") 395;; Assume superforwarding to take place so effective latency of fany op is 0. 396(define_insn_reservation "athlon_fld" 0 397 (and (eq_attr "cpu" "athlon") 398 (and (eq_attr "type" "fmov") 399 (eq_attr "memory" "load"))) 400 "athlon-direct,athlon-fpload,athlon-fany") 401(define_insn_reservation "athlon_fld_k8" 2 402 (and (eq_attr "cpu" "k8,generic64,amdfam10") 403 (and (eq_attr "type" "fmov") 404 (eq_attr "memory" "load"))) 405 "athlon-direct,athlon-fploadk8,athlon-fstore") 406 407(define_insn_reservation "athlon_fstxf" 10 408 (and (eq_attr "cpu" "athlon") 409 (and (eq_attr "type" "fmov") 410 (and (eq_attr "memory" "store,both") 411 (eq_attr "mode" "XF")))) 412 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))") 413(define_insn_reservation "athlon_fstxf_k8" 8 414 (and (eq_attr "cpu" "k8,generic64,amdfam10") 415 (and (eq_attr "type" "fmov") 416 (and (eq_attr "memory" "store,both") 417 (eq_attr "mode" "XF")))) 418 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))") 419(define_insn_reservation "athlon_fst" 4 420 (and (eq_attr "cpu" "athlon") 421 (and (eq_attr "type" "fmov") 422 (eq_attr "memory" "store,both"))) 423 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 424(define_insn_reservation "athlon_fst_k8" 2 425 (and (eq_attr "cpu" "k8,generic64,amdfam10") 426 (and (eq_attr "type" "fmov") 427 (eq_attr "memory" "store,both"))) 428 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 429(define_insn_reservation "athlon_fist" 4 430 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 431 (eq_attr "type" "fistp,fisttp")) 432 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 433(define_insn_reservation "athlon_fmov" 2 434 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 435 (eq_attr "type" "fmov")) 436 "athlon-direct,athlon-fpsched,athlon-faddmul") 437(define_insn_reservation "athlon_fadd_load" 4 438 (and (eq_attr "cpu" "athlon") 439 (and (eq_attr "type" "fop") 440 (eq_attr "memory" "load"))) 441 "athlon-direct,athlon-fpload,athlon-fadd") 442(define_insn_reservation "athlon_fadd_load_k8" 6 443 (and (eq_attr "cpu" "k8,generic64,amdfam10") 444 (and (eq_attr "type" "fop") 445 (eq_attr "memory" "load"))) 446 "athlon-direct,athlon-fploadk8,athlon-fadd") 447(define_insn_reservation "athlon_fadd" 4 448 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 449 (eq_attr "type" "fop")) 450 "athlon-direct,athlon-fpsched,athlon-fadd") 451(define_insn_reservation "athlon_fmul_load" 4 452 (and (eq_attr "cpu" "athlon") 453 (and (eq_attr "type" "fmul") 454 (eq_attr "memory" "load"))) 455 "athlon-direct,athlon-fpload,athlon-fmul") 456(define_insn_reservation "athlon_fmul_load_k8" 6 457 (and (eq_attr "cpu" "k8,generic64,amdfam10") 458 (and (eq_attr "type" "fmul") 459 (eq_attr "memory" "load"))) 460 "athlon-direct,athlon-fploadk8,athlon-fmul") 461(define_insn_reservation "athlon_fmul" 4 462 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 463 (eq_attr "type" "fmul")) 464 "athlon-direct,athlon-fpsched,athlon-fmul") 465(define_insn_reservation "athlon_fsgn" 2 466 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 467 (eq_attr "type" "fsgn")) 468 "athlon-direct,athlon-fpsched,athlon-fmul") 469(define_insn_reservation "athlon_fdiv_load" 24 470 (and (eq_attr "cpu" "athlon") 471 (and (eq_attr "type" "fdiv") 472 (eq_attr "memory" "load"))) 473 "athlon-direct,athlon-fpload,athlon-fmul") 474(define_insn_reservation "athlon_fdiv_load_k8" 13 475 (and (eq_attr "cpu" "k8,generic64,amdfam10") 476 (and (eq_attr "type" "fdiv") 477 (eq_attr "memory" "load"))) 478 "athlon-direct,athlon-fploadk8,athlon-fmul") 479(define_insn_reservation "athlon_fdiv" 24 480 (and (eq_attr "cpu" "athlon") 481 (eq_attr "type" "fdiv")) 482 "athlon-direct,athlon-fpsched,athlon-fmul") 483(define_insn_reservation "athlon_fdiv_k8" 11 484 (and (eq_attr "cpu" "k8,generic64,amdfam10") 485 (eq_attr "type" "fdiv")) 486 "athlon-direct,athlon-fpsched,athlon-fmul") 487(define_insn_reservation "athlon_fpspc_load" 103 488 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 489 (and (eq_attr "type" "fpspc") 490 (eq_attr "memory" "load"))) 491 "athlon-vector,athlon-fpload,athlon-fvector") 492(define_insn_reservation "athlon_fpspc" 100 493 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 494 (eq_attr "type" "fpspc")) 495 "athlon-vector,athlon-fpsched,athlon-fvector") 496(define_insn_reservation "athlon_fcmov_load" 7 497 (and (eq_attr "cpu" "athlon") 498 (and (eq_attr "type" "fcmov") 499 (eq_attr "memory" "load"))) 500 "athlon-vector,athlon-fpload,athlon-fvector") 501(define_insn_reservation "athlon_fcmov" 7 502 (and (eq_attr "cpu" "athlon") 503 (eq_attr "type" "fcmov")) 504 "athlon-vector,athlon-fpsched,athlon-fvector") 505(define_insn_reservation "athlon_fcmov_load_k8" 17 506 (and (eq_attr "cpu" "k8,generic64,amdfam10") 507 (and (eq_attr "type" "fcmov") 508 (eq_attr "memory" "load"))) 509 "athlon-vector,athlon-fploadk8,athlon-fvector") 510(define_insn_reservation "athlon_fcmov_k8" 15 511 (and (eq_attr "cpu" "k8,generic64,amdfam10") 512 (eq_attr "type" "fcmov")) 513 "athlon-vector,athlon-fpsched,athlon-fvector") 514;; fcomi is vector decoded by uses only one pipe. 515(define_insn_reservation "athlon_fcomi_load" 3 516 (and (eq_attr "cpu" "athlon") 517 (and (eq_attr "type" "fcmp") 518 (and (eq_attr "athlon_decode" "vector") 519 (eq_attr "memory" "load")))) 520 "athlon-vector,athlon-fpload,athlon-fadd") 521(define_insn_reservation "athlon_fcomi_load_k8" 5 522 (and (eq_attr "cpu" "k8,generic64,amdfam10") 523 (and (eq_attr "type" "fcmp") 524 (and (eq_attr "athlon_decode" "vector") 525 (eq_attr "memory" "load")))) 526 "athlon-vector,athlon-fploadk8,athlon-fadd") 527(define_insn_reservation "athlon_fcomi" 3 528 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 529 (and (eq_attr "athlon_decode" "vector") 530 (eq_attr "type" "fcmp"))) 531 "athlon-vector,athlon-fpsched,athlon-fadd") 532(define_insn_reservation "athlon_fcom_load" 2 533 (and (eq_attr "cpu" "athlon") 534 (and (eq_attr "type" "fcmp") 535 (eq_attr "memory" "load"))) 536 "athlon-direct,athlon-fpload,athlon-fadd") 537(define_insn_reservation "athlon_fcom_load_k8" 4 538 (and (eq_attr "cpu" "k8,generic64,amdfam10") 539 (and (eq_attr "type" "fcmp") 540 (eq_attr "memory" "load"))) 541 "athlon-direct,athlon-fploadk8,athlon-fadd") 542(define_insn_reservation "athlon_fcom" 2 543 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 544 (eq_attr "type" "fcmp")) 545 "athlon-direct,athlon-fpsched,athlon-fadd") 546;; Never seen by the scheduler because we still don't do post reg-stack 547;; scheduling. 548;(define_insn_reservation "athlon_fxch" 2 549; (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 550; (eq_attr "type" "fxch")) 551; "athlon-direct,athlon-fpsched,athlon-fany") 552 553;; Athlon handle MMX operations in the FPU unit with shorter latencies 554 555(define_insn_reservation "athlon_movlpd_load" 0 556 (and (eq_attr "cpu" "athlon") 557 (and (eq_attr "type" "ssemov") 558 (match_operand:DF 1 "memory_operand" ""))) 559 "athlon-direct,athlon-fpload,athlon-fany") 560(define_insn_reservation "athlon_movlpd_load_k8" 2 561 (and (eq_attr "cpu" "k8") 562 (and (eq_attr "type" "ssemov") 563 (match_operand:DF 1 "memory_operand" ""))) 564 "athlon-direct,athlon-fploadk8,athlon-fstore") 565(define_insn_reservation "athlon_movsd_load_generic64" 2 566 (and (eq_attr "cpu" "generic64") 567 (and (eq_attr "type" "ssemov") 568 (match_operand:DF 1 "memory_operand" ""))) 569 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fmul)") 570(define_insn_reservation "athlon_movaps_load_k8" 2 571 (and (eq_attr "cpu" "k8,generic64") 572 (and (eq_attr "type" "ssemov") 573 (and (eq_attr "mode" "V4SF,V2DF,TI") 574 (eq_attr "memory" "load")))) 575 "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore") 576(define_insn_reservation "athlon_movaps_load" 0 577 (and (eq_attr "cpu" "athlon") 578 (and (eq_attr "type" "ssemov") 579 (and (eq_attr "mode" "V4SF,V2DF,TI") 580 (eq_attr "memory" "load")))) 581 "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)") 582(define_insn_reservation "athlon_movss_load" 1 583 (and (eq_attr "cpu" "athlon") 584 (and (eq_attr "type" "ssemov") 585 (and (eq_attr "mode" "SF,DI") 586 (eq_attr "memory" "load")))) 587 "athlon-vector,athlon-fpload,(athlon-fany*2)") 588(define_insn_reservation "athlon_movss_load_k8" 1 589 (and (eq_attr "cpu" "k8,generic64") 590 (and (eq_attr "type" "ssemov") 591 (and (eq_attr "mode" "SF,DI") 592 (eq_attr "memory" "load")))) 593 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)") 594(define_insn_reservation "athlon_mmxsseld" 0 595 (and (eq_attr "cpu" "athlon") 596 (and (eq_attr "type" "mmxmov,ssemov") 597 (eq_attr "memory" "load"))) 598 "athlon-direct,athlon-fpload,athlon-fany") 599(define_insn_reservation "athlon_mmxsseld_k8" 2 600 (and (eq_attr "cpu" "k8,generic64") 601 (and (eq_attr "type" "mmxmov,ssemov") 602 (eq_attr "memory" "load"))) 603 "athlon-direct,athlon-fploadk8,athlon-fstore") 604;; On AMDFAM10 all double, single and integer packed and scalar SSEx data 605;; loads generated are direct path, latency of 2 and do not use any FP 606;; executions units. No seperate entries for movlpx/movhpx loads, which 607;; are direct path, latency of 4 and use the FADD/FMUL FP execution units, 608;; as they will not be generated. 609(define_insn_reservation "athlon_sseld_amdfam10" 2 610 (and (eq_attr "cpu" "amdfam10") 611 (and (eq_attr "type" "ssemov") 612 (eq_attr "memory" "load"))) 613 "athlon-direct,athlon-fploadk8") 614;; On AMDFAM10 MMX data loads generated are direct path, latency of 4 615;; and can use any FP executions units 616(define_insn_reservation "athlon_mmxld_amdfam10" 4 617 (and (eq_attr "cpu" "amdfam10") 618 (and (eq_attr "type" "mmxmov") 619 (eq_attr "memory" "load"))) 620 "athlon-direct,athlon-fploadk8, athlon-fany") 621(define_insn_reservation "athlon_mmxssest" 3 622 (and (eq_attr "cpu" "k8,generic64") 623 (and (eq_attr "type" "mmxmov,ssemov") 624 (and (eq_attr "mode" "V4SF,V2DF,TI") 625 (eq_attr "memory" "store,both")))) 626 "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") 627(define_insn_reservation "athlon_mmxssest_k8" 3 628 (and (eq_attr "cpu" "k8,generic64") 629 (and (eq_attr "type" "mmxmov,ssemov") 630 (and (eq_attr "mode" "V4SF,V2DF,TI") 631 (eq_attr "memory" "store,both")))) 632 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") 633(define_insn_reservation "athlon_mmxssest_short" 2 634 (and (eq_attr "cpu" "athlon,k8,generic64") 635 (and (eq_attr "type" "mmxmov,ssemov") 636 (eq_attr "memory" "store,both"))) 637 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 638;; On AMDFAM10 all double, single and integer packed SSEx data stores 639;; generated are all double path, latency of 2 and use the FSTORE FP 640;; execution unit. No entries seperate for movupx/movdqu, which are 641;; vector path, latency of 3 and use the FSTORE*2 FP execution unit, 642;; as they will not be generated. 643(define_insn_reservation "athlon_ssest_amdfam10" 2 644 (and (eq_attr "cpu" "amdfam10") 645 (and (eq_attr "type" "ssemov") 646 (and (eq_attr "mode" "V4SF,V2DF,TI") 647 (eq_attr "memory" "store,both")))) 648 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)") 649;; On AMDFAM10 all double, single and integer scalar SSEx and MMX 650;; data stores generated are all direct path, latency of 2 and use 651;; the FSTORE FP execution unit 652(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2 653 (and (eq_attr "cpu" "amdfam10") 654 (and (eq_attr "type" "mmxmov,ssemov") 655 (eq_attr "memory" "store,both"))) 656 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 657(define_insn_reservation "athlon_movaps_k8" 2 658 (and (eq_attr "cpu" "k8,generic64") 659 (and (eq_attr "type" "ssemov") 660 (eq_attr "mode" "V4SF,V2DF,TI"))) 661 "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))") 662(define_insn_reservation "athlon_movaps" 2 663 (and (eq_attr "cpu" "athlon") 664 (and (eq_attr "type" "ssemov") 665 (eq_attr "mode" "V4SF,V2DF,TI"))) 666 "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)") 667(define_insn_reservation "athlon_mmxssemov" 2 668 (and (eq_attr "cpu" "athlon,k8,generic64") 669 (eq_attr "type" "mmxmov,ssemov")) 670 "athlon-direct,athlon-fpsched,athlon-faddmul") 671(define_insn_reservation "athlon_mmxmul_load" 4 672 (and (eq_attr "cpu" "athlon,k8,generic64") 673 (and (eq_attr "type" "mmxmul") 674 (eq_attr "memory" "load"))) 675 "athlon-direct,athlon-fpload,athlon-fmul") 676(define_insn_reservation "athlon_mmxmul" 3 677 (and (eq_attr "cpu" "athlon,k8,generic64") 678 (eq_attr "type" "mmxmul")) 679 "athlon-direct,athlon-fpsched,athlon-fmul") 680(define_insn_reservation "athlon_mmx_load" 3 681 (and (eq_attr "cpu" "athlon,k8,generic64") 682 (and (eq_attr "unit" "mmx") 683 (eq_attr "memory" "load"))) 684 "athlon-direct,athlon-fpload,athlon-faddmul") 685(define_insn_reservation "athlon_mmx" 2 686 (and (eq_attr "cpu" "athlon,k8,generic64") 687 (eq_attr "unit" "mmx")) 688 "athlon-direct,athlon-fpsched,athlon-faddmul") 689;; SSE operations are handled by the i387 unit as well. The latency 690;; is same as for i387 operations for scalar operations 691 692(define_insn_reservation "athlon_sselog_load" 3 693 (and (eq_attr "cpu" "athlon") 694 (and (eq_attr "type" "sselog,sselog1") 695 (eq_attr "memory" "load"))) 696 "athlon-vector,athlon-fpload2,(athlon-fmul*2)") 697(define_insn_reservation "athlon_sselog_load_k8" 5 698 (and (eq_attr "cpu" "k8,generic64") 699 (and (eq_attr "type" "sselog,sselog1") 700 (eq_attr "memory" "load"))) 701 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") 702(define_insn_reservation "athlon_sselog_load_amdfam10" 4 703 (and (eq_attr "cpu" "amdfam10") 704 (and (eq_attr "type" "sselog,sselog1") 705 (eq_attr "memory" "load"))) 706 "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)") 707(define_insn_reservation "athlon_sselog" 3 708 (and (eq_attr "cpu" "athlon") 709 (eq_attr "type" "sselog,sselog1")) 710 "athlon-vector,athlon-fpsched,athlon-fmul*2") 711(define_insn_reservation "athlon_sselog_k8" 3 712 (and (eq_attr "cpu" "k8,generic64") 713 (eq_attr "type" "sselog,sselog1")) 714 "athlon-double,athlon-fpsched,athlon-fmul") 715(define_insn_reservation "athlon_sselog_amdfam10" 2 716 (and (eq_attr "cpu" "amdfam10") 717 (eq_attr "type" "sselog,sselog1")) 718 "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)") 719 720;; ??? pcmp executes in addmul, probably not worthwhile to bother about that. 721(define_insn_reservation "athlon_ssecmp_load" 2 722 (and (eq_attr "cpu" "athlon") 723 (and (eq_attr "type" "ssecmp") 724 (and (eq_attr "mode" "SF,DF,DI") 725 (eq_attr "memory" "load")))) 726 "athlon-direct,athlon-fpload,athlon-fadd") 727(define_insn_reservation "athlon_ssecmp_load_k8" 4 728 (and (eq_attr "cpu" "k8,generic64,amdfam10") 729 (and (eq_attr "type" "ssecmp") 730 (and (eq_attr "mode" "SF,DF,DI,TI") 731 (eq_attr "memory" "load")))) 732 "athlon-direct,athlon-fploadk8,athlon-fadd") 733(define_insn_reservation "athlon_ssecmp" 2 734 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 735 (and (eq_attr "type" "ssecmp") 736 (eq_attr "mode" "SF,DF,DI,TI"))) 737 "athlon-direct,athlon-fpsched,athlon-fadd") 738(define_insn_reservation "athlon_ssecmpvector_load" 3 739 (and (eq_attr "cpu" "athlon") 740 (and (eq_attr "type" "ssecmp") 741 (eq_attr "memory" "load"))) 742 "athlon-vector,athlon-fpload2,(athlon-fadd*2)") 743(define_insn_reservation "athlon_ssecmpvector_load_k8" 5 744 (and (eq_attr "cpu" "k8,generic64") 745 (and (eq_attr "type" "ssecmp") 746 (eq_attr "memory" "load"))) 747 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") 748(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4 749 (and (eq_attr "cpu" "amdfam10") 750 (and (eq_attr "type" "ssecmp") 751 (eq_attr "memory" "load"))) 752 "athlon-direct,athlon-fploadk8,athlon-fadd") 753(define_insn_reservation "athlon_ssecmpvector" 3 754 (and (eq_attr "cpu" "athlon") 755 (eq_attr "type" "ssecmp")) 756 "athlon-vector,athlon-fpsched,(athlon-fadd*2)") 757(define_insn_reservation "athlon_ssecmpvector_k8" 3 758 (and (eq_attr "cpu" "k8,generic64") 759 (eq_attr "type" "ssecmp")) 760 "athlon-double,athlon-fpsched,(athlon-fadd*2)") 761(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2 762 (and (eq_attr "cpu" "amdfam10") 763 (eq_attr "type" "ssecmp")) 764 "athlon-direct,athlon-fpsched,athlon-fadd") 765(define_insn_reservation "athlon_ssecomi_load" 4 766 (and (eq_attr "cpu" "athlon") 767 (and (eq_attr "type" "ssecomi") 768 (eq_attr "memory" "load"))) 769 "athlon-vector,athlon-fpload,athlon-fadd") 770(define_insn_reservation "athlon_ssecomi_load_k8" 6 771 (and (eq_attr "cpu" "k8,generic64") 772 (and (eq_attr "type" "ssecomi") 773 (eq_attr "memory" "load"))) 774 "athlon-vector,athlon-fploadk8,athlon-fadd") 775(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5 776 (and (eq_attr "cpu" "amdfam10") 777 (and (eq_attr "type" "ssecomi") 778 (eq_attr "memory" "load"))) 779 "athlon-direct,athlon-fploadk8,athlon-fadd") 780(define_insn_reservation "athlon_ssecomi" 4 781 (and (eq_attr "cpu" "athlon,k8,generic64") 782 (eq_attr "type" "ssecmp")) 783 "athlon-vector,athlon-fpsched,athlon-fadd") 784(define_insn_reservation "athlon_ssecomi_amdfam10" 3 785 (and (eq_attr "cpu" "amdfam10") 786;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10 787 (eq_attr "type" "ssecomi")) 788 "athlon-direct,athlon-fpsched,athlon-fadd") 789(define_insn_reservation "athlon_sseadd_load" 4 790 (and (eq_attr "cpu" "athlon") 791 (and (eq_attr "type" "sseadd") 792 (and (eq_attr "mode" "SF,DF,DI") 793 (eq_attr "memory" "load")))) 794 "athlon-direct,athlon-fpload,athlon-fadd") 795(define_insn_reservation "athlon_sseadd_load_k8" 6 796 (and (eq_attr "cpu" "k8,generic64,amdfam10") 797 (and (eq_attr "type" "sseadd") 798 (and (eq_attr "mode" "SF,DF,DI") 799 (eq_attr "memory" "load")))) 800 "athlon-direct,athlon-fploadk8,athlon-fadd") 801(define_insn_reservation "athlon_sseadd" 4 802 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 803 (and (eq_attr "type" "sseadd") 804 (eq_attr "mode" "SF,DF,DI"))) 805 "athlon-direct,athlon-fpsched,athlon-fadd") 806(define_insn_reservation "athlon_sseaddvector_load" 5 807 (and (eq_attr "cpu" "athlon") 808 (and (eq_attr "type" "sseadd") 809 (eq_attr "memory" "load"))) 810 "athlon-vector,athlon-fpload2,(athlon-fadd*2)") 811(define_insn_reservation "athlon_sseaddvector_load_k8" 7 812 (and (eq_attr "cpu" "k8,generic64") 813 (and (eq_attr "type" "sseadd") 814 (eq_attr "memory" "load"))) 815 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") 816(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6 817 (and (eq_attr "cpu" "amdfam10") 818 (and (eq_attr "type" "sseadd") 819 (eq_attr "memory" "load"))) 820 "athlon-direct,athlon-fploadk8,athlon-fadd") 821(define_insn_reservation "athlon_sseaddvector" 5 822 (and (eq_attr "cpu" "athlon") 823 (eq_attr "type" "sseadd")) 824 "athlon-vector,athlon-fpsched,(athlon-fadd*2)") 825(define_insn_reservation "athlon_sseaddvector_k8" 5 826 (and (eq_attr "cpu" "k8,generic64") 827 (eq_attr "type" "sseadd")) 828 "athlon-double,athlon-fpsched,(athlon-fadd*2)") 829(define_insn_reservation "athlon_sseaddvector_amdfam10" 4 830 (and (eq_attr "cpu" "amdfam10") 831 (eq_attr "type" "sseadd")) 832 "athlon-direct,athlon-fpsched,athlon-fadd") 833 834;; Conversions behaves very irregularly and the scheduling is critical here. 835;; Take each instruction separately. Assume that the mode is always set to the 836;; destination one and athlon_decode is set to the K8 versions. 837 838;; cvtss2sd 839(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4 840 (and (eq_attr "cpu" "k8,athlon,generic64") 841 (and (eq_attr "type" "ssecvt") 842 (and (eq_attr "athlon_decode" "direct") 843 (and (eq_attr "mode" "DF") 844 (eq_attr "memory" "load"))))) 845 "athlon-direct,athlon-fploadk8,athlon-fstore") 846(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7 847 (and (eq_attr "cpu" "amdfam10") 848 (and (eq_attr "type" "ssecvt") 849 (and (eq_attr "amdfam10_decode" "double") 850 (and (eq_attr "mode" "DF") 851 (eq_attr "memory" "load"))))) 852 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 853(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2 854 (and (eq_attr "cpu" "athlon,k8,generic64") 855 (and (eq_attr "type" "ssecvt") 856 (and (eq_attr "athlon_decode" "direct") 857 (eq_attr "mode" "DF")))) 858 "athlon-direct,athlon-fpsched,athlon-fstore") 859(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7 860 (and (eq_attr "cpu" "amdfam10") 861 (and (eq_attr "type" "ssecvt") 862 (and (eq_attr "amdfam10_decode" "vector") 863 (eq_attr "mode" "DF")))) 864 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)") 865;; cvtps2pd. Model same way the other double decoded FP conversions. 866(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5 867 (and (eq_attr "cpu" "k8,athlon,generic64") 868 (and (eq_attr "type" "ssecvt") 869 (and (eq_attr "athlon_decode" "double") 870 (and (eq_attr "mode" "V2DF,V4SF,TI") 871 (eq_attr "memory" "load"))))) 872 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)") 873(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4 874 (and (eq_attr "cpu" "amdfam10") 875 (and (eq_attr "type" "ssecvt") 876 (and (eq_attr "amdfam10_decode" "direct") 877 (and (eq_attr "mode" "V2DF,V4SF,TI") 878 (eq_attr "memory" "load"))))) 879 "athlon-direct,athlon-fploadk8,athlon-fstore") 880(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3 881 (and (eq_attr "cpu" "k8,athlon,generic64") 882 (and (eq_attr "type" "ssecvt") 883 (and (eq_attr "athlon_decode" "double") 884 (eq_attr "mode" "V2DF,V4SF,TI")))) 885 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore") 886(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2 887 (and (eq_attr "cpu" "amdfam10") 888 (and (eq_attr "type" "ssecvt") 889 (and (eq_attr "amdfam10_decode" "direct") 890 (eq_attr "mode" "V2DF,V4SF,TI")))) 891 "athlon-direct,athlon-fpsched,athlon-fstore") 892;; cvtsi2sd mem,reg is directpath path (cvtsi2sd reg,reg is doublepath) 893;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6 894(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6 895 (and (eq_attr "cpu" "athlon,k8") 896 (and (eq_attr "type" "sseicvt") 897 (and (eq_attr "athlon_decode" "direct") 898 (and (eq_attr "mode" "SF,DF") 899 (eq_attr "memory" "load"))))) 900 "athlon-direct,athlon-fploadk8,athlon-fstore") 901(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9 902 (and (eq_attr "cpu" "amdfam10") 903 (and (eq_attr "type" "sseicvt") 904 (and (eq_attr "amdfam10_decode" "double") 905 (and (eq_attr "mode" "SF,DF") 906 (eq_attr "memory" "load"))))) 907 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 908;; cvtsi2ss mem, reg is doublepath 909(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9 910 (and (eq_attr "cpu" "athlon") 911 (and (eq_attr "type" "sseicvt") 912 (and (eq_attr "athlon_decode" "double") 913 (and (eq_attr "mode" "SF,DF") 914 (eq_attr "memory" "load"))))) 915 "athlon-vector,athlon-fpload,(athlon-fstore*2)") 916(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9 917 (and (eq_attr "cpu" "k8,generic64") 918 (and (eq_attr "type" "sseicvt") 919 (and (eq_attr "athlon_decode" "double") 920 (and (eq_attr "mode" "SF,DF") 921 (eq_attr "memory" "load"))))) 922 "athlon-double,athlon-fploadk8,(athlon-fstore*2)") 923(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9 924 (and (eq_attr "cpu" "amdfam10") 925 (and (eq_attr "type" "sseicvt") 926 (and (eq_attr "amdfam10_decode" "double") 927 (and (eq_attr "mode" "SF,DF") 928 (eq_attr "memory" "load"))))) 929 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 930;; cvtsi2sd reg,reg is double decoded (vector on Athlon) 931(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11 932 (and (eq_attr "cpu" "k8,athlon,generic64") 933 (and (eq_attr "type" "sseicvt") 934 (and (eq_attr "athlon_decode" "double") 935 (and (eq_attr "mode" "SF,DF") 936 (eq_attr "memory" "none"))))) 937 "athlon-double,athlon-fploadk8,athlon-fstore") 938(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14 939 (and (eq_attr "cpu" "amdfam10") 940 (and (eq_attr "type" "sseicvt") 941 (and (eq_attr "amdfam10_decode" "vector") 942 (and (eq_attr "mode" "SF,DF") 943 (eq_attr "memory" "none"))))) 944 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 945;; cvtsi2ss reg, reg is doublepath 946(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14 947 (and (eq_attr "cpu" "athlon,k8,generic64") 948 (and (eq_attr "type" "sseicvt") 949 (and (eq_attr "athlon_decode" "vector") 950 (and (eq_attr "mode" "SF,DF") 951 (eq_attr "memory" "none"))))) 952 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)") 953(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14 954 (and (eq_attr "cpu" "amdfam10") 955 (and (eq_attr "type" "sseicvt") 956 (and (eq_attr "amdfam10_decode" "vector") 957 (and (eq_attr "mode" "SF,DF") 958 (eq_attr "memory" "none"))))) 959 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 960;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9 961(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9 962 (and (eq_attr "cpu" "k8,athlon,generic64") 963 (and (eq_attr "type" "ssecvt") 964 (and (eq_attr "athlon_decode" "double") 965 (and (eq_attr "mode" "SF") 966 (eq_attr "memory" "load"))))) 967 "athlon-double,athlon-fploadk8,(athlon-fstore*3)") 968(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9 969 (and (eq_attr "cpu" "amdfam10") 970 (and (eq_attr "type" "ssecvt") 971 (and (eq_attr "amdfam10_decode" "double") 972 (and (eq_attr "mode" "SF") 973 (eq_attr "memory" "load"))))) 974 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 975;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12 976(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12 977 (and (eq_attr "cpu" "athlon,k8,generic64") 978 (and (eq_attr "type" "ssecvt") 979 (and (eq_attr "athlon_decode" "vector") 980 (and (eq_attr "mode" "SF") 981 (eq_attr "memory" "none"))))) 982 "athlon-vector,athlon-fpsched,(athlon-fvector*3)") 983(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8 984 (and (eq_attr "cpu" "amdfam10") 985 (and (eq_attr "type" "ssecvt") 986 (and (eq_attr "amdfam10_decode" "vector") 987 (and (eq_attr "mode" "SF") 988 (eq_attr "memory" "none"))))) 989 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)") 990(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8 991 (and (eq_attr "cpu" "athlon,k8,generic64") 992 (and (eq_attr "type" "ssecvt") 993 (and (eq_attr "athlon_decode" "vector") 994 (and (eq_attr "mode" "V4SF,V2DF,TI") 995 (eq_attr "memory" "load"))))) 996 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)") 997(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9 998 (and (eq_attr "cpu" "amdfam10") 999 (and (eq_attr "type" "ssecvt") 1000 (and (eq_attr "amdfam10_decode" "double") 1001 (and (eq_attr "mode" "V4SF,V2DF,TI") 1002 (eq_attr "memory" "load"))))) 1003 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 1004;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10 1005;; ??? Why it is fater than cvtsd2ss? 1006(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8 1007 (and (eq_attr "cpu" "athlon,k8,generic64") 1008 (and (eq_attr "type" "ssecvt") 1009 (and (eq_attr "athlon_decode" "vector") 1010 (and (eq_attr "mode" "V4SF,V2DF,TI") 1011 (eq_attr "memory" "none"))))) 1012 "athlon-vector,athlon-fpsched,athlon-fvector*2") 1013(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7 1014 (and (eq_attr "cpu" "amdfam10") 1015 (and (eq_attr "type" "ssecvt") 1016 (and (eq_attr "amdfam10_decode" "double") 1017 (and (eq_attr "mode" "V4SF,V2DF,TI") 1018 (eq_attr "memory" "none"))))) 1019 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)") 1020;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9 1021(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9 1022 (and (eq_attr "cpu" "athlon,k8,generic64") 1023 (and (eq_attr "type" "sseicvt") 1024 (and (eq_attr "athlon_decode" "vector") 1025 (and (eq_attr "mode" "SI,DI") 1026 (eq_attr "memory" "load"))))) 1027 "athlon-vector,athlon-fploadk8,athlon-fvector") 1028(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10 1029 (and (eq_attr "cpu" "amdfam10") 1030 (and (eq_attr "type" "sseicvt") 1031 (and (eq_attr "amdfam10_decode" "double") 1032 (and (eq_attr "mode" "SI,DI") 1033 (eq_attr "memory" "load"))))) 1034 "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)") 1035;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9 1036(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9 1037 (and (eq_attr "cpu" "athlon") 1038 (and (eq_attr "type" "sseicvt") 1039 (and (eq_attr "athlon_decode" "double") 1040 (and (eq_attr "mode" "SI,DI") 1041 (eq_attr "memory" "none"))))) 1042 "athlon-vector,athlon-fpsched,athlon-fvector") 1043(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9 1044 (and (eq_attr "cpu" "k8,generic64") 1045 (and (eq_attr "type" "sseicvt") 1046 (and (eq_attr "athlon_decode" "double") 1047 (and (eq_attr "mode" "SI,DI") 1048 (eq_attr "memory" "none"))))) 1049 "athlon-double,athlon-fpsched,athlon-fstore") 1050(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8 1051 (and (eq_attr "cpu" "amdfam10") 1052 (and (eq_attr "type" "sseicvt") 1053 (and (eq_attr "amdfam10_decode" "double") 1054 (and (eq_attr "mode" "SI,DI") 1055 (eq_attr "memory" "none"))))) 1056 "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)") 1057;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10 1058(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9 1059 (and (eq_attr "cpu" "amdfam10") 1060 (and (eq_attr "type" "sseicvt") 1061 (and (eq_attr "amdfam10_decode" "double") 1062 (and (eq_attr "mode" "TI") 1063 (eq_attr "memory" "load"))))) 1064 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 1065;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10 1066(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7 1067 (and (eq_attr "cpu" "amdfam10") 1068 (and (eq_attr "type" "sseicvt") 1069 (and (eq_attr "amdfam10_decode" "double") 1070 (and (eq_attr "mode" "TI") 1071 (eq_attr "memory" "none"))))) 1072 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)") 1073 1074 1075(define_insn_reservation "athlon_ssemul_load" 4 1076 (and (eq_attr "cpu" "athlon") 1077 (and (eq_attr "type" "ssemul") 1078 (and (eq_attr "mode" "SF,DF") 1079 (eq_attr "memory" "load")))) 1080 "athlon-direct,athlon-fpload,athlon-fmul") 1081(define_insn_reservation "athlon_ssemul_load_k8" 6 1082 (and (eq_attr "cpu" "k8,generic64,amdfam10") 1083 (and (eq_attr "type" "ssemul") 1084 (and (eq_attr "mode" "SF,DF") 1085 (eq_attr "memory" "load")))) 1086 "athlon-direct,athlon-fploadk8,athlon-fmul") 1087(define_insn_reservation "athlon_ssemul" 4 1088 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 1089 (and (eq_attr "type" "ssemul") 1090 (eq_attr "mode" "SF,DF"))) 1091 "athlon-direct,athlon-fpsched,athlon-fmul") 1092(define_insn_reservation "athlon_ssemulvector_load" 5 1093 (and (eq_attr "cpu" "athlon") 1094 (and (eq_attr "type" "ssemul") 1095 (eq_attr "memory" "load"))) 1096 "athlon-vector,athlon-fpload2,(athlon-fmul*2)") 1097(define_insn_reservation "athlon_ssemulvector_load_k8" 7 1098 (and (eq_attr "cpu" "k8,generic64") 1099 (and (eq_attr "type" "ssemul") 1100 (eq_attr "memory" "load"))) 1101 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") 1102(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6 1103 (and (eq_attr "cpu" "amdfam10") 1104 (and (eq_attr "type" "ssemul") 1105 (eq_attr "memory" "load"))) 1106 "athlon-direct,athlon-fploadk8,athlon-fmul") 1107(define_insn_reservation "athlon_ssemulvector" 5 1108 (and (eq_attr "cpu" "athlon") 1109 (eq_attr "type" "ssemul")) 1110 "athlon-vector,athlon-fpsched,(athlon-fmul*2)") 1111(define_insn_reservation "athlon_ssemulvector_k8" 5 1112 (and (eq_attr "cpu" "k8,generic64") 1113 (eq_attr "type" "ssemul")) 1114 "athlon-double,athlon-fpsched,(athlon-fmul*2)") 1115(define_insn_reservation "athlon_ssemulvector_amdfam10" 4 1116 (and (eq_attr "cpu" "amdfam10") 1117 (eq_attr "type" "ssemul")) 1118 "athlon-direct,athlon-fpsched,athlon-fmul") 1119;; divsd timings. divss is faster 1120(define_insn_reservation "athlon_ssediv_load" 20 1121 (and (eq_attr "cpu" "athlon") 1122 (and (eq_attr "type" "ssediv") 1123 (and (eq_attr "mode" "SF,DF") 1124 (eq_attr "memory" "load")))) 1125 "athlon-direct,athlon-fpload,athlon-fmul*17") 1126(define_insn_reservation "athlon_ssediv_load_k8" 22 1127 (and (eq_attr "cpu" "k8,generic64,amdfam10") 1128 (and (eq_attr "type" "ssediv") 1129 (and (eq_attr "mode" "SF,DF") 1130 (eq_attr "memory" "load")))) 1131 "athlon-direct,athlon-fploadk8,athlon-fmul*17") 1132(define_insn_reservation "athlon_ssediv" 20 1133 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 1134 (and (eq_attr "type" "ssediv") 1135 (eq_attr "mode" "SF,DF"))) 1136 "athlon-direct,athlon-fpsched,athlon-fmul*17") 1137(define_insn_reservation "athlon_ssedivvector_load" 39 1138 (and (eq_attr "cpu" "athlon") 1139 (and (eq_attr "type" "ssediv") 1140 (eq_attr "memory" "load"))) 1141 "athlon-vector,athlon-fpload2,athlon-fmul*34") 1142(define_insn_reservation "athlon_ssedivvector_load_k8" 35 1143 (and (eq_attr "cpu" "k8,generic64") 1144 (and (eq_attr "type" "ssediv") 1145 (eq_attr "memory" "load"))) 1146 "athlon-double,athlon-fpload2k8,athlon-fmul*34") 1147(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22 1148 (and (eq_attr "cpu" "amdfam10") 1149 (and (eq_attr "type" "ssediv") 1150 (eq_attr "memory" "load"))) 1151 "athlon-direct,athlon-fploadk8,athlon-fmul*17") 1152(define_insn_reservation "athlon_ssedivvector" 39 1153 (and (eq_attr "cpu" "athlon") 1154 (eq_attr "type" "ssediv")) 1155 "athlon-vector,athlon-fmul*34") 1156(define_insn_reservation "athlon_ssedivvector_k8" 39 1157 (and (eq_attr "cpu" "k8,generic64") 1158 (eq_attr "type" "ssediv")) 1159 "athlon-double,athlon-fmul*34") 1160(define_insn_reservation "athlon_ssedivvector_amdfam10" 20 1161 (and (eq_attr "cpu" "amdfam10") 1162 (eq_attr "type" "ssediv")) 1163 "athlon-direct,athlon-fmul*17") 1164(define_insn_reservation "athlon_sseins_amdfam10" 5 1165 (and (eq_attr "cpu" "amdfam10") 1166 (and (eq_attr "type" "sseins") 1167 (eq_attr "mode" "TI"))) 1168 "athlon-vector,athlon-fpsched,athlon-faddmul") 1169