1;; AMD Athlon Scheduling 2;; 3;; The Athlon does contain three pipelined FP units, three integer units and 4;; three address generation units. 5;; 6;; The predecode logic is determining boundaries of instructions in the 64 7;; byte cache line. So the cache line straddling problem of K6 might be issue 8;; here as well, but it is not noted in the documentation. 9;; 10;; Three DirectPath instructions decoders and only one VectorPath decoder 11;; is available. They can decode three DirectPath instructions or one VectorPath 12;; instruction per cycle. 13;; Decoded macro instructions are then passed to 72 entry instruction control 14;; unit, that passes 15;; it to the specialized integer (18 entry) and fp (36 entry) schedulers. 16;; 17;; The load/store queue unit is not attached to the schedulers but 18;; communicates with all the execution units separately instead. 19 20(define_attr "athlon_decode" "direct,vector,double" 21 (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,leave") 22 (const_string "vector") 23 (and (eq_attr "type" "push") 24 (match_operand 1 "memory_operand" "")) 25 (const_string "vector") 26 (and (eq_attr "type" "fmov") 27 (and (eq_attr "memory" "load,store") 28 (eq_attr "mode" "XF"))) 29 (const_string "vector")] 30 (const_string "direct"))) 31 32;; 33;; decode0 decode1 decode2 34;; \ | / 35;; instruction control unit (72 entry scheduler) 36;; | | 37;; integer scheduler (18) stack map 38;; / | | | | \ stack rename 39;; ieu0 agu0 ieu1 agu1 ieu2 agu2 scheduler 40;; | agu0 | agu1 agu2 register file 41;; | \ | | / | | | 42;; \ /\ | / fadd fmul fstore 43;; \ / \ | / fadd fmul fstore 44;; imul load/store (2x) fadd fmul fstore 45 46(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp") 47(define_cpu_unit "athlon-decode0" "athlon") 48(define_cpu_unit "athlon-decode1" "athlon") 49(define_cpu_unit "athlon-decode2" "athlon") 50(define_cpu_unit "athlon-decodev" "athlon") 51;; Model the fact that double decoded instruction may take 2 cycles 52;; to decode when decoder2 and decoder0 in next cycle 53;; is used (this is needed to allow troughput of 1.5 double decoded 54;; instructions per cycle). 55;; 56;; In order to avoid dependence between reservation of decoder 57;; and other units, we model decoder as two stage fully pipelined unit 58;; and only double decoded instruction may occupy unit in the first cycle. 59;; With this scheme however two double instructions can be issued cycle0. 60;; 61;; Avoid this by using presence set requiring decoder0 to be allocated 62;; too. Vector decoded instructions then can't be issued when 63;; modeled as consuming decoder0+decoder1+decoder2. 64;; We solve that by specialized vector decoder unit and exclusion set. 65(presence_set "athlon-decode2" "athlon-decode0") 66(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2") 67(define_reservation "athlon-vector" "nothing,athlon-decodev") 68(define_reservation "athlon-direct0" "nothing,athlon-decode0") 69(define_reservation "athlon-direct" "nothing, 70 (athlon-decode0 | athlon-decode1 71 | athlon-decode2)") 72;; Double instructions behaves like two direct instructions. 73(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0) 74 | (nothing,(athlon-decode0 + athlon-decode1)) 75 | (nothing,(athlon-decode1 + athlon-decode2)))") 76 77;; Agu and ieu unit results in extremely large automatons and 78;; in our approximation they are hardly filled in. Only ieu 79;; unit can, as issue rate is 3 and agu unit is always used 80;; first in the insn reservations. Skip the models. 81 82;(define_cpu_unit "athlon-ieu0" "athlon_ieu") 83;(define_cpu_unit "athlon-ieu1" "athlon_ieu") 84;(define_cpu_unit "athlon-ieu2" "athlon_ieu") 85;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)") 86(define_reservation "athlon-ieu" "nothing") 87(define_cpu_unit "athlon-ieu0" "athlon") 88;(define_cpu_unit "athlon-agu0" "athlon_agu") 89;(define_cpu_unit "athlon-agu1" "athlon_agu") 90;(define_cpu_unit "athlon-agu2" "athlon_agu") 91;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)") 92(define_reservation "athlon-agu" "nothing") 93 94(define_cpu_unit "athlon-mult" "athlon_mult") 95 96(define_cpu_unit "athlon-load0" "athlon_load") 97(define_cpu_unit "athlon-load1" "athlon_load") 98(define_reservation "athlon-load" "athlon-agu, 99 (athlon-load0 | athlon-load1),nothing") 100;; 128bit SSE instructions issue two loads at once 101(define_reservation "athlon-load2" "athlon-agu, 102 (athlon-load0 + athlon-load1),nothing") 103 104(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)") 105;; 128bit SSE instructions issue two stores at once 106(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)") 107 108 109;; The FP operations start to execute at stage 12 in the pipeline, while 110;; integer operations start to execute at stage 9 for Athlon and 11 for K8 111;; Compensate the difference for Athlon because it results in significantly 112;; smaller automata. 113(define_reservation "athlon-fpsched" "nothing,nothing,nothing") 114;; The floating point loads. 115(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)") 116(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)") 117(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)") 118(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)") 119 120 121;; The three fp units are fully pipelined with latency of 3 122(define_cpu_unit "athlon-fadd" "athlon_fp") 123(define_cpu_unit "athlon-fmul" "athlon_fp") 124(define_cpu_unit "athlon-fstore" "athlon_fp") 125(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)") 126(define_reservation "athlon-faddmul" "(athlon-fmul | athlon-fadd)") 127 128;; Vector operations usually consume many of pipes. 129(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)") 130 131 132;; Jump instructions are executed in the branch unit completely transparent to us 133(define_insn_reservation "athlon_branch" 0 134 (and (eq_attr "cpu" "athlon,k8") 135 (eq_attr "type" "ibr")) 136 "athlon-direct,athlon-ieu") 137(define_insn_reservation "athlon_call" 0 138 (and (eq_attr "cpu" "athlon,k8") 139 (eq_attr "type" "call,callv")) 140 "athlon-vector,athlon-ieu") 141 142;; Latency of push operation is 3 cycles, but ESP value is available 143;; earlier 144(define_insn_reservation "athlon_push" 2 145 (and (eq_attr "cpu" "athlon,k8") 146 (eq_attr "type" "push")) 147 "athlon-direct,athlon-agu,athlon-store") 148(define_insn_reservation "athlon_pop" 4 149 (and (eq_attr "cpu" "athlon,k8") 150 (eq_attr "type" "pop")) 151 "athlon-vector,athlon-load,athlon-ieu") 152(define_insn_reservation "athlon_pop_k8" 3 153 (and (eq_attr "cpu" "k8") 154 (eq_attr "type" "pop")) 155 "athlon-double,(athlon-ieu+athlon-load)") 156(define_insn_reservation "athlon_leave" 3 157 (and (eq_attr "cpu" "athlon") 158 (eq_attr "type" "leave")) 159 "athlon-vector,(athlon-ieu+athlon-load)") 160(define_insn_reservation "athlon_leave_k8" 3 161 (and (eq_attr "cpu" "k8") 162 (eq_attr "type" "leave")) 163 "athlon-double,(athlon-ieu+athlon-load)") 164 165;; Lea executes in AGU unit with 2 cycles latency. 166(define_insn_reservation "athlon_lea" 2 167 (and (eq_attr "cpu" "athlon,k8") 168 (eq_attr "type" "lea")) 169 "athlon-direct,athlon-agu,nothing") 170 171;; Mul executes in special multiplier unit attached to IEU0 172(define_insn_reservation "athlon_imul" 5 173 (and (eq_attr "cpu" "athlon") 174 (and (eq_attr "type" "imul") 175 (eq_attr "memory" "none,unknown"))) 176 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0") 177;; ??? Widening multiply is vector or double. 178(define_insn_reservation "athlon_imul_k8_DI" 4 179 (and (eq_attr "cpu" "k8") 180 (and (eq_attr "type" "imul") 181 (and (eq_attr "mode" "DI") 182 (eq_attr "memory" "none,unknown")))) 183 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0") 184(define_insn_reservation "athlon_imul_k8" 3 185 (and (eq_attr "cpu" "k8") 186 (and (eq_attr "type" "imul") 187 (eq_attr "memory" "none,unknown"))) 188 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0") 189(define_insn_reservation "athlon_imul_mem" 8 190 (and (eq_attr "cpu" "athlon") 191 (and (eq_attr "type" "imul") 192 (eq_attr "memory" "load,both"))) 193 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu") 194(define_insn_reservation "athlon_imul_mem_k8_DI" 7 195 (and (eq_attr "cpu" "k8") 196 (and (eq_attr "type" "imul") 197 (and (eq_attr "mode" "DI") 198 (eq_attr "memory" "load,both")))) 199 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu") 200(define_insn_reservation "athlon_imul_mem_k8" 6 201 (and (eq_attr "cpu" "k8") 202 (and (eq_attr "type" "imul") 203 (eq_attr "memory" "load,both"))) 204 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu") 205 206;; Idiv cannot execute in parallel with other instructions. Dealing with it 207;; as with short latency vector instruction is good approximation avoiding 208;; scheduler from trying too hard to can hide it's latency by overlap with 209;; other instructions. 210;; ??? Experiments show that the idiv can overlap with roughly 6 cycles 211;; of the other code 212 213(define_insn_reservation "athlon_idiv" 6 214 (and (eq_attr "cpu" "athlon,k8") 215 (and (eq_attr "type" "idiv") 216 (eq_attr "memory" "none,unknown"))) 217 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))") 218(define_insn_reservation "athlon_idiv_mem" 9 219 (and (eq_attr "cpu" "athlon,k8") 220 (and (eq_attr "type" "idiv") 221 (eq_attr "memory" "load,both"))) 222 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))") 223;; The parallelism of string instructions is not documented. Model it same way 224;; as idiv to create smaller automata. This probably does not matter much. 225(define_insn_reservation "athlon_str" 6 226 (and (eq_attr "cpu" "athlon,k8") 227 (and (eq_attr "type" "str") 228 (eq_attr "memory" "load,both,store"))) 229 "athlon-vector,athlon-load,athlon-ieu0*6") 230 231(define_insn_reservation "athlon_idirect" 1 232 (and (eq_attr "cpu" "athlon,k8") 233 (and (eq_attr "athlon_decode" "direct") 234 (and (eq_attr "unit" "integer,unknown") 235 (eq_attr "memory" "none,unknown")))) 236 "athlon-direct,athlon-ieu") 237(define_insn_reservation "athlon_ivector" 2 238 (and (eq_attr "cpu" "athlon,k8") 239 (and (eq_attr "athlon_decode" "vector") 240 (and (eq_attr "unit" "integer,unknown") 241 (eq_attr "memory" "none,unknown")))) 242 "athlon-vector,athlon-ieu,athlon-ieu") 243(define_insn_reservation "athlon_idirect_loadmov" 3 244 (and (eq_attr "cpu" "athlon,k8") 245 (and (eq_attr "type" "imov") 246 (eq_attr "memory" "load"))) 247 "athlon-direct,athlon-load") 248(define_insn_reservation "athlon_idirect_load" 4 249 (and (eq_attr "cpu" "athlon,k8") 250 (and (eq_attr "athlon_decode" "direct") 251 (and (eq_attr "unit" "integer,unknown") 252 (eq_attr "memory" "load")))) 253 "athlon-direct,athlon-load,athlon-ieu") 254(define_insn_reservation "athlon_ivector_load" 6 255 (and (eq_attr "cpu" "athlon,k8") 256 (and (eq_attr "athlon_decode" "vector") 257 (and (eq_attr "unit" "integer,unknown") 258 (eq_attr "memory" "load")))) 259 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") 260(define_insn_reservation "athlon_idirect_movstore" 1 261 (and (eq_attr "cpu" "athlon,k8") 262 (and (eq_attr "type" "imov") 263 (eq_attr "memory" "store"))) 264 "athlon-direct,athlon-agu,athlon-store") 265(define_insn_reservation "athlon_idirect_both" 4 266 (and (eq_attr "cpu" "athlon,k8") 267 (and (eq_attr "athlon_decode" "direct") 268 (and (eq_attr "unit" "integer,unknown") 269 (eq_attr "memory" "both")))) 270 "athlon-direct,athlon-load, 271 athlon-ieu,athlon-store, 272 athlon-store") 273(define_insn_reservation "athlon_ivector_both" 6 274 (and (eq_attr "cpu" "athlon,k8") 275 (and (eq_attr "athlon_decode" "vector") 276 (and (eq_attr "unit" "integer,unknown") 277 (eq_attr "memory" "both")))) 278 "athlon-vector,athlon-load, 279 athlon-ieu, 280 athlon-ieu, 281 athlon-store") 282(define_insn_reservation "athlon_idirect_store" 1 283 (and (eq_attr "cpu" "athlon,k8") 284 (and (eq_attr "athlon_decode" "direct") 285 (and (eq_attr "unit" "integer,unknown") 286 (eq_attr "memory" "store")))) 287 "athlon-direct,(athlon-ieu+athlon-agu), 288 athlon-store") 289(define_insn_reservation "athlon_ivector_store" 2 290 (and (eq_attr "cpu" "athlon,k8") 291 (and (eq_attr "athlon_decode" "vector") 292 (and (eq_attr "unit" "integer,unknown") 293 (eq_attr "memory" "store")))) 294 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu, 295 athlon-store") 296 297;; Athlon floatin point unit 298(define_insn_reservation "athlon_fldxf" 12 299 (and (eq_attr "cpu" "athlon") 300 (and (eq_attr "type" "fmov") 301 (and (eq_attr "memory" "load") 302 (eq_attr "mode" "XF")))) 303 "athlon-vector,athlon-fpload2,athlon-fvector*9") 304(define_insn_reservation "athlon_fldxf_k8" 13 305 (and (eq_attr "cpu" "k8") 306 (and (eq_attr "type" "fmov") 307 (and (eq_attr "memory" "load") 308 (eq_attr "mode" "XF")))) 309 "athlon-vector,athlon-fpload2k8,athlon-fvector*9") 310;; Assume superforwarding to take place so effective latency of fany op is 0. 311(define_insn_reservation "athlon_fld" 0 312 (and (eq_attr "cpu" "athlon") 313 (and (eq_attr "type" "fmov") 314 (eq_attr "memory" "load"))) 315 "athlon-direct,athlon-fpload,athlon-fany") 316(define_insn_reservation "athlon_fld_k8" 2 317 (and (eq_attr "cpu" "k8") 318 (and (eq_attr "type" "fmov") 319 (eq_attr "memory" "load"))) 320 "athlon-direct,athlon-fploadk8,athlon-fstore") 321 322(define_insn_reservation "athlon_fstxf" 10 323 (and (eq_attr "cpu" "athlon") 324 (and (eq_attr "type" "fmov") 325 (and (eq_attr "memory" "store,both") 326 (eq_attr "mode" "XF")))) 327 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))") 328(define_insn_reservation "athlon_fstxf_k8" 8 329 (and (eq_attr "cpu" "k8") 330 (and (eq_attr "type" "fmov") 331 (and (eq_attr "memory" "store,both") 332 (eq_attr "mode" "XF")))) 333 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))") 334(define_insn_reservation "athlon_fst" 4 335 (and (eq_attr "cpu" "athlon") 336 (and (eq_attr "type" "fmov") 337 (eq_attr "memory" "store,both"))) 338 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 339(define_insn_reservation "athlon_fst_k8" 2 340 (and (eq_attr "cpu" "k8") 341 (and (eq_attr "type" "fmov") 342 (eq_attr "memory" "store,both"))) 343 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 344(define_insn_reservation "athlon_fist" 4 345 (and (eq_attr "cpu" "athlon,k8") 346 (eq_attr "type" "fistp,fisttp")) 347 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 348(define_insn_reservation "athlon_fmov" 2 349 (and (eq_attr "cpu" "athlon,k8") 350 (eq_attr "type" "fmov")) 351 "athlon-direct,athlon-fpsched,athlon-faddmul") 352(define_insn_reservation "athlon_fadd_load" 4 353 (and (eq_attr "cpu" "athlon") 354 (and (eq_attr "type" "fop") 355 (eq_attr "memory" "load"))) 356 "athlon-direct,athlon-fpload,athlon-fadd") 357(define_insn_reservation "athlon_fadd_load_k8" 6 358 (and (eq_attr "cpu" "k8") 359 (and (eq_attr "type" "fop") 360 (eq_attr "memory" "load"))) 361 "athlon-direct,athlon-fploadk8,athlon-fadd") 362(define_insn_reservation "athlon_fadd" 4 363 (and (eq_attr "cpu" "athlon,k8") 364 (eq_attr "type" "fop")) 365 "athlon-direct,athlon-fpsched,athlon-fadd") 366(define_insn_reservation "athlon_fmul_load" 4 367 (and (eq_attr "cpu" "athlon") 368 (and (eq_attr "type" "fmul") 369 (eq_attr "memory" "load"))) 370 "athlon-direct,athlon-fpload,athlon-fmul") 371(define_insn_reservation "athlon_fmul_load_k8" 6 372 (and (eq_attr "cpu" "k8") 373 (and (eq_attr "type" "fmul") 374 (eq_attr "memory" "load"))) 375 "athlon-direct,athlon-fploadk8,athlon-fmul") 376(define_insn_reservation "athlon_fmul" 4 377 (and (eq_attr "cpu" "athlon,k8") 378 (eq_attr "type" "fmul")) 379 "athlon-direct,athlon-fpsched,athlon-fmul") 380(define_insn_reservation "athlon_fsgn" 2 381 (and (eq_attr "cpu" "athlon,k8") 382 (eq_attr "type" "fsgn")) 383 "athlon-direct,athlon-fpsched,athlon-fmul") 384(define_insn_reservation "athlon_fdiv_load" 24 385 (and (eq_attr "cpu" "athlon") 386 (and (eq_attr "type" "fdiv") 387 (eq_attr "memory" "load"))) 388 "athlon-direct,athlon-fpload,athlon-fmul") 389(define_insn_reservation "athlon_fdiv_load_k8" 13 390 (and (eq_attr "cpu" "k8") 391 (and (eq_attr "type" "fdiv") 392 (eq_attr "memory" "load"))) 393 "athlon-direct,athlon-fploadk8,athlon-fmul") 394(define_insn_reservation "athlon_fdiv" 24 395 (and (eq_attr "cpu" "athlon") 396 (eq_attr "type" "fdiv")) 397 "athlon-direct,athlon-fpsched,athlon-fmul") 398(define_insn_reservation "athlon_fdiv_k8" 11 399 (and (eq_attr "cpu" "k8") 400 (eq_attr "type" "fdiv")) 401 "athlon-direct,athlon-fpsched,athlon-fmul") 402(define_insn_reservation "athlon_fpspc_load" 103 403 (and (eq_attr "cpu" "athlon,k8") 404 (and (eq_attr "type" "fpspc") 405 (eq_attr "memory" "load"))) 406 "athlon-vector,athlon-fpload,athlon-fvector") 407(define_insn_reservation "athlon_fpspc" 100 408 (and (eq_attr "cpu" "athlon,k8") 409 (eq_attr "type" "fpspc")) 410 "athlon-vector,athlon-fpsched,athlon-fvector") 411(define_insn_reservation "athlon_fcmov_load" 7 412 (and (eq_attr "cpu" "athlon") 413 (and (eq_attr "type" "fcmov") 414 (eq_attr "memory" "load"))) 415 "athlon-vector,athlon-fpload,athlon-fvector") 416(define_insn_reservation "athlon_fcmov" 7 417 (and (eq_attr "cpu" "athlon") 418 (eq_attr "type" "fcmov")) 419 "athlon-vector,athlon-fpsched,athlon-fvector") 420(define_insn_reservation "athlon_fcmov_load_k8" 17 421 (and (eq_attr "cpu" "k8") 422 (and (eq_attr "type" "fcmov") 423 (eq_attr "memory" "load"))) 424 "athlon-vector,athlon-fploadk8,athlon-fvector") 425(define_insn_reservation "athlon_fcmov_k8" 15 426 (and (eq_attr "cpu" "k8") 427 (eq_attr "type" "fcmov")) 428 "athlon-vector,athlon-fpsched,athlon-fvector") 429;; fcomi is vector decoded by uses only one pipe. 430(define_insn_reservation "athlon_fcomi_load" 3 431 (and (eq_attr "cpu" "athlon") 432 (and (eq_attr "type" "fcmp") 433 (and (eq_attr "athlon_decode" "vector") 434 (eq_attr "memory" "load")))) 435 "athlon-vector,athlon-fpload,athlon-fadd") 436(define_insn_reservation "athlon_fcomi_load_k8" 5 437 (and (eq_attr "cpu" "k8") 438 (and (eq_attr "type" "fcmp") 439 (and (eq_attr "athlon_decode" "vector") 440 (eq_attr "memory" "load")))) 441 "athlon-vector,athlon-fploadk8,athlon-fadd") 442(define_insn_reservation "athlon_fcomi" 3 443 (and (eq_attr "cpu" "athlon,k8") 444 (and (eq_attr "athlon_decode" "vector") 445 (eq_attr "type" "fcmp"))) 446 "athlon-vector,athlon-fpsched,athlon-fadd") 447(define_insn_reservation "athlon_fcom_load" 2 448 (and (eq_attr "cpu" "athlon") 449 (and (eq_attr "type" "fcmp") 450 (eq_attr "memory" "load"))) 451 "athlon-direct,athlon-fpload,athlon-fadd") 452(define_insn_reservation "athlon_fcom_load_k8" 4 453 (and (eq_attr "cpu" "k8") 454 (and (eq_attr "type" "fcmp") 455 (eq_attr "memory" "load"))) 456 "athlon-direct,athlon-fploadk8,athlon-fadd") 457(define_insn_reservation "athlon_fcom" 2 458 (and (eq_attr "cpu" "athlon,k8") 459 (eq_attr "type" "fcmp")) 460 "athlon-direct,athlon-fpsched,athlon-fadd") 461;; Never seen by the scheduler because we still don't do post reg-stack 462;; scheduling. 463;(define_insn_reservation "athlon_fxch" 2 464; (and (eq_attr "cpu" "athlon,k8") 465; (eq_attr "type" "fxch")) 466; "athlon-direct,athlon-fpsched,athlon-fany") 467 468;; Athlon handle MMX operations in the FPU unit with shorter latencies 469 470(define_insn_reservation "athlon_movlpd_load" 0 471 (and (eq_attr "cpu" "athlon") 472 (and (eq_attr "type" "ssemov") 473 (match_operand:DF 1 "memory_operand" ""))) 474 "athlon-direct,athlon-fpload,athlon-fany") 475(define_insn_reservation "athlon_movlpd_load_k8" 2 476 (and (eq_attr "cpu" "k8") 477 (and (eq_attr "type" "ssemov") 478 (match_operand:DF 1 "memory_operand" ""))) 479 "athlon-direct,athlon-fploadk8,athlon-fstore") 480(define_insn_reservation "athlon_movaps_load_k8" 2 481 (and (eq_attr "cpu" "k8") 482 (and (eq_attr "type" "ssemov") 483 (and (eq_attr "mode" "V4SF,V2DF,TI") 484 (eq_attr "memory" "load")))) 485 "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore") 486(define_insn_reservation "athlon_movaps_load" 0 487 (and (eq_attr "cpu" "athlon") 488 (and (eq_attr "type" "ssemov") 489 (and (eq_attr "mode" "V4SF,V2DF,TI") 490 (eq_attr "memory" "load")))) 491 "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)") 492(define_insn_reservation "athlon_movss_load" 1 493 (and (eq_attr "cpu" "athlon") 494 (and (eq_attr "type" "ssemov") 495 (and (eq_attr "mode" "SF,DI") 496 (eq_attr "memory" "load")))) 497 "athlon-vector,athlon-fpload,(athlon-fany*2)") 498(define_insn_reservation "athlon_movss_load_k8" 1 499 (and (eq_attr "cpu" "k8") 500 (and (eq_attr "type" "ssemov") 501 (and (eq_attr "mode" "SF,DI") 502 (eq_attr "memory" "load")))) 503 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)") 504(define_insn_reservation "athlon_mmxsseld" 0 505 (and (eq_attr "cpu" "athlon") 506 (and (eq_attr "type" "mmxmov,ssemov") 507 (eq_attr "memory" "load"))) 508 "athlon-direct,athlon-fpload,athlon-fany") 509(define_insn_reservation "athlon_mmxsseld_k8" 2 510 (and (eq_attr "cpu" "k8") 511 (and (eq_attr "type" "mmxmov,ssemov") 512 (eq_attr "memory" "load"))) 513 "athlon-direct,athlon-fploadk8,athlon-fstore") 514(define_insn_reservation "athlon_mmxssest" 3 515 (and (eq_attr "cpu" "k8") 516 (and (eq_attr "type" "mmxmov,ssemov") 517 (and (eq_attr "mode" "V4SF,V2DF,TI") 518 (eq_attr "memory" "store,both")))) 519 "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") 520(define_insn_reservation "athlon_mmxssest_k8" 3 521 (and (eq_attr "cpu" "k8") 522 (and (eq_attr "type" "mmxmov,ssemov") 523 (and (eq_attr "mode" "V4SF,V2DF,TI") 524 (eq_attr "memory" "store,both")))) 525 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") 526(define_insn_reservation "athlon_mmxssest_short" 2 527 (and (eq_attr "cpu" "athlon,k8") 528 (and (eq_attr "type" "mmxmov,ssemov") 529 (eq_attr "memory" "store,both"))) 530 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 531(define_insn_reservation "athlon_movaps" 2 532 (and (eq_attr "cpu" "k8") 533 (and (eq_attr "type" "ssemov") 534 (eq_attr "mode" "V4SF,V2DF,TI"))) 535 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-faddmul)") 536(define_insn_reservation "athlon_movaps_k8" 2 537 (and (eq_attr "cpu" "athlon") 538 (and (eq_attr "type" "ssemov") 539 (eq_attr "mode" "V4SF,V2DF,TI"))) 540 "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)") 541(define_insn_reservation "athlon_mmxssemov" 2 542 (and (eq_attr "cpu" "athlon,k8") 543 (eq_attr "type" "mmxmov,ssemov")) 544 "athlon-direct,athlon-fpsched,athlon-faddmul") 545(define_insn_reservation "athlon_mmxmul_load" 4 546 (and (eq_attr "cpu" "athlon,k8") 547 (and (eq_attr "type" "mmxmul") 548 (eq_attr "memory" "load"))) 549 "athlon-direct,athlon-fpload,athlon-fmul") 550(define_insn_reservation "athlon_mmxmul" 3 551 (and (eq_attr "cpu" "athlon,k8") 552 (eq_attr "type" "mmxmul")) 553 "athlon-direct,athlon-fpsched,athlon-fmul") 554(define_insn_reservation "athlon_mmx_load" 3 555 (and (eq_attr "cpu" "athlon,k8") 556 (and (eq_attr "unit" "mmx") 557 (eq_attr "memory" "load"))) 558 "athlon-direct,athlon-fpload,athlon-faddmul") 559(define_insn_reservation "athlon_mmx" 2 560 (and (eq_attr "cpu" "athlon,k8") 561 (eq_attr "unit" "mmx")) 562 "athlon-direct,athlon-fpsched,athlon-faddmul") 563;; SSE operations are handled by the i387 unit as well. The latency 564;; is same as for i387 operations for scalar operations 565 566(define_insn_reservation "athlon_sselog_load" 3 567 (and (eq_attr "cpu" "athlon") 568 (and (eq_attr "type" "sselog,sselog1") 569 (eq_attr "memory" "load"))) 570 "athlon-vector,athlon-fpload2,(athlon-fmul*2)") 571(define_insn_reservation "athlon_sselog_load_k8" 5 572 (and (eq_attr "cpu" "k8") 573 (and (eq_attr "type" "sselog,sselog1") 574 (eq_attr "memory" "load"))) 575 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") 576(define_insn_reservation "athlon_sselog" 3 577 (and (eq_attr "cpu" "athlon") 578 (eq_attr "type" "sselog,sselog1")) 579 "athlon-vector,athlon-fpsched,athlon-fmul*2") 580(define_insn_reservation "athlon_sselog_k8" 3 581 (and (eq_attr "cpu" "k8") 582 (eq_attr "type" "sselog,sselog1")) 583 "athlon-double,athlon-fpsched,athlon-fmul") 584;; ??? pcmp executes in addmul, probably not worthwhile to bother about that. 585(define_insn_reservation "athlon_ssecmp_load" 2 586 (and (eq_attr "cpu" "athlon") 587 (and (eq_attr "type" "ssecmp") 588 (and (eq_attr "mode" "SF,DF,DI") 589 (eq_attr "memory" "load")))) 590 "athlon-direct,athlon-fpload,athlon-fadd") 591(define_insn_reservation "athlon_ssecmp_load_k8" 4 592 (and (eq_attr "cpu" "k8") 593 (and (eq_attr "type" "ssecmp") 594 (and (eq_attr "mode" "SF,DF,DI,TI") 595 (eq_attr "memory" "load")))) 596 "athlon-direct,athlon-fploadk8,athlon-fadd") 597(define_insn_reservation "athlon_ssecmp" 2 598 (and (eq_attr "cpu" "athlon,k8") 599 (and (eq_attr "type" "ssecmp") 600 (eq_attr "mode" "SF,DF,DI,TI"))) 601 "athlon-direct,athlon-fpsched,athlon-fadd") 602(define_insn_reservation "athlon_ssecmpvector_load" 3 603 (and (eq_attr "cpu" "athlon") 604 (and (eq_attr "type" "ssecmp") 605 (eq_attr "memory" "load"))) 606 "athlon-vector,athlon-fpload2,(athlon-fadd*2)") 607(define_insn_reservation "athlon_ssecmpvector_load_k8" 5 608 (and (eq_attr "cpu" "k8") 609 (and (eq_attr "type" "ssecmp") 610 (eq_attr "memory" "load"))) 611 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") 612(define_insn_reservation "athlon_ssecmpvector" 3 613 (and (eq_attr "cpu" "athlon") 614 (eq_attr "type" "ssecmp")) 615 "athlon-vector,athlon-fpsched,(athlon-fadd*2)") 616(define_insn_reservation "athlon_ssecmpvector_k8" 3 617 (and (eq_attr "cpu" "k8") 618 (eq_attr "type" "ssecmp")) 619 "athlon-double,athlon-fpsched,(athlon-fadd*2)") 620(define_insn_reservation "athlon_ssecomi_load" 4 621 (and (eq_attr "cpu" "athlon") 622 (and (eq_attr "type" "ssecomi") 623 (eq_attr "memory" "load"))) 624 "athlon-vector,athlon-fpload,athlon-fadd") 625(define_insn_reservation "athlon_ssecomi_load_k8" 6 626 (and (eq_attr "cpu" "k8") 627 (and (eq_attr "type" "ssecomi") 628 (eq_attr "memory" "load"))) 629 "athlon-vector,athlon-fploadk8,athlon-fadd") 630(define_insn_reservation "athlon_ssecomi" 4 631 (and (eq_attr "cpu" "athlon,k8") 632 (eq_attr "type" "ssecmp")) 633 "athlon-vector,athlon-fpsched,athlon-fadd") 634(define_insn_reservation "athlon_sseadd_load" 4 635 (and (eq_attr "cpu" "athlon") 636 (and (eq_attr "type" "sseadd") 637 (and (eq_attr "mode" "SF,DF,DI") 638 (eq_attr "memory" "load")))) 639 "athlon-direct,athlon-fpload,athlon-fadd") 640(define_insn_reservation "athlon_sseadd_load_k8" 6 641 (and (eq_attr "cpu" "k8") 642 (and (eq_attr "type" "sseadd") 643 (and (eq_attr "mode" "SF,DF,DI") 644 (eq_attr "memory" "load")))) 645 "athlon-direct,athlon-fploadk8,athlon-fadd") 646(define_insn_reservation "athlon_sseadd" 4 647 (and (eq_attr "cpu" "athlon,k8") 648 (and (eq_attr "type" "sseadd") 649 (eq_attr "mode" "SF,DF,DI"))) 650 "athlon-direct,athlon-fpsched,athlon-fadd") 651(define_insn_reservation "athlon_sseaddvector_load" 5 652 (and (eq_attr "cpu" "athlon") 653 (and (eq_attr "type" "sseadd") 654 (eq_attr "memory" "load"))) 655 "athlon-vector,athlon-fpload2,(athlon-fadd*2)") 656(define_insn_reservation "athlon_sseaddvector_load_k8" 7 657 (and (eq_attr "cpu" "k8") 658 (and (eq_attr "type" "sseadd") 659 (eq_attr "memory" "load"))) 660 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") 661(define_insn_reservation "athlon_sseaddvector" 5 662 (and (eq_attr "cpu" "athlon") 663 (eq_attr "type" "sseadd")) 664 "athlon-vector,athlon-fpsched,(athlon-fadd*2)") 665(define_insn_reservation "athlon_sseaddvector_k8" 5 666 (and (eq_attr "cpu" "k8") 667 (eq_attr "type" "sseadd")) 668 "athlon-double,athlon-fpsched,(athlon-fadd*2)") 669 670;; Conversions behaves very irregularly and the scheduling is critical here. 671;; Take each instruction separately. Assume that the mode is always set to the 672;; destination one and athlon_decode is set to the K8 versions. 673 674;; cvtss2sd 675(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4 676 (and (eq_attr "cpu" "k8,athlon") 677 (and (eq_attr "type" "ssecvt") 678 (and (eq_attr "athlon_decode" "direct") 679 (and (eq_attr "mode" "DF") 680 (eq_attr "memory" "load"))))) 681 "athlon-direct,athlon-fploadk8,athlon-fstore") 682(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2 683 (and (eq_attr "cpu" "athlon,k8") 684 (and (eq_attr "type" "ssecvt") 685 (and (eq_attr "athlon_decode" "direct") 686 (eq_attr "mode" "DF")))) 687 "athlon-direct,athlon-fpsched,athlon-fstore") 688;; cvtps2pd. Model same way the other double decoded FP conversions. 689(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5 690 (and (eq_attr "cpu" "k8,athlon") 691 (and (eq_attr "type" "ssecvt") 692 (and (eq_attr "athlon_decode" "double") 693 (and (eq_attr "mode" "V2DF,V4SF,TI") 694 (eq_attr "memory" "load"))))) 695 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)") 696(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3 697 (and (eq_attr "cpu" "k8,athlon") 698 (and (eq_attr "type" "ssecvt") 699 (and (eq_attr "athlon_decode" "double") 700 (eq_attr "mode" "V2DF,V4SF,TI")))) 701 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore") 702;; cvtsi2sd mem,reg is directpath path (cvtsi2sd reg,reg is doublepath) 703;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6 704(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6 705 (and (eq_attr "cpu" "athlon,k8") 706 (and (eq_attr "type" "sseicvt") 707 (and (eq_attr "athlon_decode" "direct") 708 (and (eq_attr "mode" "SF,DF") 709 (eq_attr "memory" "load"))))) 710 "athlon-direct,athlon-fploadk8,athlon-fstore") 711;; cvtsi2ss mem, reg is doublepath 712(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9 713 (and (eq_attr "cpu" "athlon") 714 (and (eq_attr "type" "sseicvt") 715 (and (eq_attr "athlon_decode" "double") 716 (and (eq_attr "mode" "SF,DF") 717 (eq_attr "memory" "load"))))) 718 "athlon-vector,athlon-fpload,(athlon-fstore*2)") 719(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9 720 (and (eq_attr "cpu" "k8") 721 (and (eq_attr "type" "sseicvt") 722 (and (eq_attr "athlon_decode" "double") 723 (and (eq_attr "mode" "SF,DF") 724 (eq_attr "memory" "load"))))) 725 "athlon-double,athlon-fploadk8,(athlon-fstore*2)") 726;; cvtsi2sd reg,reg is double decoded (vector on Athlon) 727(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11 728 (and (eq_attr "cpu" "k8,athlon") 729 (and (eq_attr "type" "sseicvt") 730 (and (eq_attr "athlon_decode" "double") 731 (and (eq_attr "mode" "SF,DF") 732 (eq_attr "memory" "none"))))) 733 "athlon-double,athlon-fploadk8,athlon-fstore") 734;; cvtsi2ss reg, reg is doublepath 735(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14 736 (and (eq_attr "cpu" "athlon,k8") 737 (and (eq_attr "type" "sseicvt") 738 (and (eq_attr "athlon_decode" "vector") 739 (and (eq_attr "mode" "SF,DF") 740 (eq_attr "memory" "none"))))) 741 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)") 742;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9 743(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9 744 (and (eq_attr "cpu" "k8,athlon") 745 (and (eq_attr "type" "ssecvt") 746 (and (eq_attr "athlon_decode" "double") 747 (and (eq_attr "mode" "SF") 748 (eq_attr "memory" "load"))))) 749 "athlon-double,athlon-fploadk8,(athlon-fstore*3)") 750;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12 751(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12 752 (and (eq_attr "cpu" "athlon,k8") 753 (and (eq_attr "type" "ssecvt") 754 (and (eq_attr "athlon_decode" "vector") 755 (and (eq_attr "mode" "SF") 756 (eq_attr "memory" "none"))))) 757 "athlon-vector,athlon-fpsched,(athlon-fvector*3)") 758(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8 759 (and (eq_attr "cpu" "athlon,k8") 760 (and (eq_attr "type" "ssecvt") 761 (and (eq_attr "athlon_decode" "vector") 762 (and (eq_attr "mode" "V4SF,V2DF,TI") 763 (eq_attr "memory" "load"))))) 764 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)") 765;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10 766;; ??? Why it is fater than cvtsd2ss? 767(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8 768 (and (eq_attr "cpu" "athlon,k8") 769 (and (eq_attr "type" "ssecvt") 770 (and (eq_attr "athlon_decode" "vector") 771 (and (eq_attr "mode" "V4SF,V2DF,TI") 772 (eq_attr "memory" "none"))))) 773 "athlon-vector,athlon-fpsched,athlon-fvector*2") 774;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9 775(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9 776 (and (eq_attr "cpu" "athlon,k8") 777 (and (eq_attr "type" "sseicvt") 778 (and (eq_attr "athlon_decode" "vector") 779 (and (eq_attr "mode" "SI,DI") 780 (eq_attr "memory" "load"))))) 781 "athlon-vector,athlon-fploadk8,athlon-fvector") 782;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9 783(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9 784 (and (eq_attr "cpu" "athlon") 785 (and (eq_attr "type" "sseicvt") 786 (and (eq_attr "athlon_decode" "double") 787 (and (eq_attr "mode" "SI,DI") 788 (eq_attr "memory" "none"))))) 789 "athlon-vector,athlon-fpsched,athlon-fvector") 790(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9 791 (and (eq_attr "cpu" "k8") 792 (and (eq_attr "type" "sseicvt") 793 (and (eq_attr "athlon_decode" "double") 794 (and (eq_attr "mode" "SI,DI") 795 (eq_attr "memory" "none"))))) 796 "athlon-double,athlon-fpsched,athlon-fstore") 797 798 799(define_insn_reservation "athlon_ssemul_load" 4 800 (and (eq_attr "cpu" "athlon") 801 (and (eq_attr "type" "ssemul") 802 (and (eq_attr "mode" "SF,DF") 803 (eq_attr "memory" "load")))) 804 "athlon-direct,athlon-fpload,athlon-fmul") 805(define_insn_reservation "athlon_ssemul_load_k8" 6 806 (and (eq_attr "cpu" "k8") 807 (and (eq_attr "type" "ssemul") 808 (and (eq_attr "mode" "SF,DF") 809 (eq_attr "memory" "load")))) 810 "athlon-direct,athlon-fploadk8,athlon-fmul") 811(define_insn_reservation "athlon_ssemul" 4 812 (and (eq_attr "cpu" "athlon,k8") 813 (and (eq_attr "type" "ssemul") 814 (eq_attr "mode" "SF,DF"))) 815 "athlon-direct,athlon-fpsched,athlon-fmul") 816(define_insn_reservation "athlon_ssemulvector_load" 5 817 (and (eq_attr "cpu" "athlon") 818 (and (eq_attr "type" "ssemul") 819 (eq_attr "memory" "load"))) 820 "athlon-vector,athlon-fpload2,(athlon-fmul*2)") 821(define_insn_reservation "athlon_ssemulvector_load_k8" 7 822 (and (eq_attr "cpu" "k8") 823 (and (eq_attr "type" "ssemul") 824 (eq_attr "memory" "load"))) 825 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") 826(define_insn_reservation "athlon_ssemulvector" 5 827 (and (eq_attr "cpu" "athlon") 828 (eq_attr "type" "ssemul")) 829 "athlon-vector,athlon-fpsched,(athlon-fmul*2)") 830(define_insn_reservation "athlon_ssemulvector_k8" 5 831 (and (eq_attr "cpu" "k8") 832 (eq_attr "type" "ssemul")) 833 "athlon-double,athlon-fpsched,(athlon-fmul*2)") 834;; divsd timings. divss is faster 835(define_insn_reservation "athlon_ssediv_load" 20 836 (and (eq_attr "cpu" "athlon") 837 (and (eq_attr "type" "ssediv") 838 (and (eq_attr "mode" "SF,DF") 839 (eq_attr "memory" "load")))) 840 "athlon-direct,athlon-fpload,athlon-fmul*17") 841(define_insn_reservation "athlon_ssediv_load_k8" 22 842 (and (eq_attr "cpu" "k8") 843 (and (eq_attr "type" "ssediv") 844 (and (eq_attr "mode" "SF,DF") 845 (eq_attr "memory" "load")))) 846 "athlon-direct,athlon-fploadk8,athlon-fmul*17") 847(define_insn_reservation "athlon_ssediv" 20 848 (and (eq_attr "cpu" "athlon,k8") 849 (and (eq_attr "type" "ssediv") 850 (eq_attr "mode" "SF,DF"))) 851 "athlon-direct,athlon-fpsched,athlon-fmul*17") 852(define_insn_reservation "athlon_ssedivvector_load" 39 853 (and (eq_attr "cpu" "athlon") 854 (and (eq_attr "type" "ssediv") 855 (eq_attr "memory" "load"))) 856 "athlon-vector,athlon-fpload2,athlon-fmul*34") 857(define_insn_reservation "athlon_ssedivvector_load_k8" 35 858 (and (eq_attr "cpu" "k8") 859 (and (eq_attr "type" "ssediv") 860 (eq_attr "memory" "load"))) 861 "athlon-double,athlon-fpload2k8,athlon-fmul*34") 862(define_insn_reservation "athlon_ssedivvector" 39 863 (and (eq_attr "cpu" "athlon") 864 (eq_attr "type" "ssediv")) 865 "athlon-vector,athlon-fmul*34") 866(define_insn_reservation "athlon_ssedivvector_k8" 39 867 (and (eq_attr "cpu" "k8") 868 (eq_attr "type" "ssediv")) 869 "athlon-double,athlon-fmul*34") 870