1;; AMD Athlon Scheduling
2;;
3;; The Athlon does contain three pipelined FP units, three integer units and
4;; three address generation units. 
5;;
6;; The predecode logic is determining boundaries of instructions in the 64
7;; byte cache line. So the cache line straddling problem of K6 might be issue
8;; here as well, but it is not noted in the documentation.
9;;
10;; Three DirectPath instructions decoders and only one VectorPath decoder
11;; is available. They can decode three DirectPath instructions or one VectorPath
12;; instruction per cycle.
13;; Decoded macro instructions are then passed to 72 entry instruction control
14;; unit, that passes
15;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
16;;
17;; The load/store queue unit is not attached to the schedulers but
18;; communicates with all the execution units separately instead.
19
20(define_attr "athlon_decode" "direct,vector,double"
21  (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,leave")
22	   (const_string "vector")
23         (and (eq_attr "type" "push")
24              (match_operand 1 "memory_operand" ""))
25	   (const_string "vector")
26         (and (eq_attr "type" "fmov")
27	      (and (eq_attr "memory" "load,store")
28		   (eq_attr "mode" "XF")))
29	   (const_string "vector")]
30	(const_string "direct")))
31
32(define_attr "amdfam10_decode" "direct,vector,double"
33  (const_string "direct"))
34;;
35;;           decode0 decode1 decode2
36;;                 \    |   /
37;;    instruction control unit (72 entry scheduler)
38;;                |                        |
39;;      integer scheduler (18)         stack map
40;;     /  |    |    |    |   \        stack rename
41;;  ieu0 agu0 ieu1 agu1 ieu2 agu2      scheduler
42;;    |  agu0  |   agu1      agu2    register file
43;;    |      \ |    |       /         |     |     |
44;;     \      /\    |     /         fadd  fmul  fstore
45;;       \  /    \  |   /           fadd  fmul  fstore
46;;       imul  load/store (2x)      fadd  fmul  fstore
47
48(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp")
49(define_cpu_unit "athlon-decode0" "athlon")
50(define_cpu_unit "athlon-decode1" "athlon")
51(define_cpu_unit "athlon-decode2" "athlon")
52(define_cpu_unit "athlon-decodev" "athlon")
53;; Model the fact that double decoded instruction may take 2 cycles
54;; to decode when decoder2 and decoder0 in next cycle
55;; is used (this is needed to allow troughput of 1.5 double decoded
56;; instructions per cycle).
57;;
58;; In order to avoid dependence between reservation of decoder
59;; and other units, we model decoder as two stage fully pipelined unit
60;; and only double decoded instruction may occupy unit in the first cycle.
61;; With this scheme however two double instructions can be issued cycle0.
62;;
63;; Avoid this by using presence set requiring decoder0 to be allocated
64;; too. Vector decoded instructions then can't be issued when
65;; modeled as consuming decoder0+decoder1+decoder2.
66;; We solve that by specialized vector decoder unit and exclusion set.
67(presence_set "athlon-decode2" "athlon-decode0")
68(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2")
69(define_reservation "athlon-vector" "nothing,athlon-decodev")
70(define_reservation "athlon-direct0" "nothing,athlon-decode0")
71(define_reservation "athlon-direct" "nothing,
72				     (athlon-decode0 | athlon-decode1
73				     | athlon-decode2)")
74;; Double instructions behaves like two direct instructions.
75(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0)
76				     | (nothing,(athlon-decode0 + athlon-decode1))
77				     | (nothing,(athlon-decode1 + athlon-decode2)))")
78
79;; Agu and ieu unit results in extremely large automatons and
80;; in our approximation they are hardly filled in.  Only ieu
81;; unit can, as issue rate is 3 and agu unit is always used
82;; first in the insn reservations.  Skip the models.
83
84;(define_cpu_unit "athlon-ieu0" "athlon_ieu")
85;(define_cpu_unit "athlon-ieu1" "athlon_ieu")
86;(define_cpu_unit "athlon-ieu2" "athlon_ieu")
87;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
88(define_reservation "athlon-ieu" "nothing")
89(define_cpu_unit "athlon-ieu0" "athlon")
90;(define_cpu_unit "athlon-agu0" "athlon_agu")
91;(define_cpu_unit "athlon-agu1" "athlon_agu")
92;(define_cpu_unit "athlon-agu2" "athlon_agu")
93;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
94(define_reservation "athlon-agu" "nothing")
95
96(define_cpu_unit "athlon-mult" "athlon_mult")
97
98(define_cpu_unit "athlon-load0" "athlon_load")
99(define_cpu_unit "athlon-load1" "athlon_load")
100(define_reservation "athlon-load" "athlon-agu,
101				   (athlon-load0 | athlon-load1),nothing")
102;; 128bit SSE instructions issue two loads at once
103(define_reservation "athlon-load2" "athlon-agu,
104				   (athlon-load0 + athlon-load1),nothing")
105
106(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)")
107;; 128bit SSE instructions issue two stores at once
108(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)")
109
110
111;; The FP operations start to execute at stage 12 in the pipeline, while
112;; integer operations start to execute at stage 9 for Athlon and 11 for K8
113;; Compensate the difference for Athlon because it results in significantly
114;; smaller automata.
115(define_reservation "athlon-fpsched" "nothing,nothing,nothing")
116;; The floating point loads.
117(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)")
118(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)")
119(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)")
120(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)")
121
122
123;; The three fp units are fully pipelined with latency of 3
124(define_cpu_unit "athlon-fadd" "athlon_fp")
125(define_cpu_unit "athlon-fmul" "athlon_fp")
126(define_cpu_unit "athlon-fstore" "athlon_fp")
127(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
128(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
129
130;; Vector operations usually consume many of pipes.
131(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
132
133
134;; Jump instructions are executed in the branch unit completely transparent to us
135(define_insn_reservation "athlon_branch" 0
136			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
137			      (eq_attr "type" "ibr"))
138			 "athlon-direct,athlon-ieu")
139(define_insn_reservation "athlon_call" 0
140			 (and (eq_attr "cpu" "athlon,k8,generic64")
141			      (eq_attr "type" "call,callv"))
142			 "athlon-vector,athlon-ieu")
143(define_insn_reservation "athlon_call_amdfam10" 0
144			 (and (eq_attr "cpu" "amdfam10")
145			      (eq_attr "type" "call,callv"))
146			 "athlon-double,athlon-ieu")
147
148;; Latency of push operation is 3 cycles, but ESP value is available
149;; earlier
150(define_insn_reservation "athlon_push" 2
151			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
152			      (eq_attr "type" "push"))
153			 "athlon-direct,athlon-agu,athlon-store")
154(define_insn_reservation "athlon_pop" 4
155			 (and (eq_attr "cpu" "athlon,k8,generic64")
156			      (eq_attr "type" "pop"))
157			 "athlon-vector,athlon-load,athlon-ieu")
158(define_insn_reservation "athlon_pop_k8" 3
159			 (and (eq_attr "cpu" "k8,generic64")
160			      (eq_attr "type" "pop"))
161			 "athlon-double,(athlon-ieu+athlon-load)")
162(define_insn_reservation "athlon_pop_amdfam10" 3
163			 (and (eq_attr "cpu" "amdfam10")
164			      (eq_attr "type" "pop"))
165			 "athlon-direct,(athlon-ieu+athlon-load)")
166(define_insn_reservation "athlon_leave" 3
167			 (and (eq_attr "cpu" "athlon")
168			      (eq_attr "type" "leave"))
169			 "athlon-vector,(athlon-ieu+athlon-load)")
170(define_insn_reservation "athlon_leave_k8" 3
171			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
172			      (eq_attr "type" "leave"))
173			 "athlon-double,(athlon-ieu+athlon-load)")
174
175;; Lea executes in AGU unit with 2 cycles latency.
176(define_insn_reservation "athlon_lea" 2
177			 (and (eq_attr "cpu" "athlon,k8,generic64")
178			      (eq_attr "type" "lea"))
179			 "athlon-direct,athlon-agu,nothing")
180;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10
181(define_insn_reservation "athlon_lea_amdfam10" 1
182			 (and (eq_attr "cpu" "amdfam10")
183			      (eq_attr "type" "lea"))
184			 "athlon-direct,athlon-agu,nothing")
185
186;; Mul executes in special multiplier unit attached to IEU0
187(define_insn_reservation "athlon_imul" 5
188			 (and (eq_attr "cpu" "athlon")
189			      (and (eq_attr "type" "imul")
190				   (eq_attr "memory" "none,unknown")))
191			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
192;; ??? Widening multiply is vector or double.
193(define_insn_reservation "athlon_imul_k8_DI" 4
194			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
195			      (and (eq_attr "type" "imul")
196				   (and (eq_attr "mode" "DI")
197					(eq_attr "memory" "none,unknown"))))
198			 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
199(define_insn_reservation "athlon_imul_k8" 3
200			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
201			      (and (eq_attr "type" "imul")
202				   (eq_attr "memory" "none,unknown")))
203			 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
204(define_insn_reservation "athlon_imul_amdfam10_HI" 4
205			 (and (eq_attr "cpu" "amdfam10")
206			      (and (eq_attr "type" "imul")
207				   (and (eq_attr "mode" "HI")
208					(eq_attr "memory" "none,unknown"))))
209			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")			 
210(define_insn_reservation "athlon_imul_mem" 8
211			 (and (eq_attr "cpu" "athlon")
212			      (and (eq_attr "type" "imul")
213				   (eq_attr "memory" "load,both")))
214			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
215(define_insn_reservation "athlon_imul_mem_k8_DI" 7
216			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
217			      (and (eq_attr "type" "imul")
218				   (and (eq_attr "mode" "DI")
219					(eq_attr "memory" "load,both"))))
220			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
221(define_insn_reservation "athlon_imul_mem_k8" 6
222			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
223			      (and (eq_attr "type" "imul")
224				   (eq_attr "memory" "load,both")))
225			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
226
227;; Idiv cannot execute in parallel with other instructions.  Dealing with it
228;; as with short latency vector instruction is good approximation avoiding
229;; scheduler from trying too hard to can hide it's latency by overlap with
230;; other instructions.
231;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
232;; of the other code
233;; Using the same heuristics for amdfam10 as K8 with idiv
234
235(define_insn_reservation "athlon_idiv" 6
236			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
237			      (and (eq_attr "type" "idiv")
238				   (eq_attr "memory" "none,unknown")))
239			 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
240(define_insn_reservation "athlon_idiv_mem" 9
241			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
242			      (and (eq_attr "type" "idiv")
243				   (eq_attr "memory" "load,both")))
244			 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
245;; The parallelism of string instructions is not documented.  Model it same way
246;; as idiv to create smaller automata.  This probably does not matter much.
247;; Using the same heuristics for amdfam10 as K8 with idiv
248(define_insn_reservation "athlon_str" 6
249			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
250			      (and (eq_attr "type" "str")
251				   (eq_attr "memory" "load,both,store")))
252			 "athlon-vector,athlon-load,athlon-ieu0*6")
253
254(define_insn_reservation "athlon_idirect" 1
255			 (and (eq_attr "cpu" "athlon,k8,generic64")
256			      (and (eq_attr "athlon_decode" "direct")
257				   (and (eq_attr "unit" "integer,unknown")
258					(eq_attr "memory" "none,unknown"))))
259			 "athlon-direct,athlon-ieu")
260(define_insn_reservation "athlon_idirect_amdfam10" 1
261			 (and (eq_attr "cpu" "amdfam10")
262			      (and (eq_attr "amdfam10_decode" "direct")
263				   (and (eq_attr "unit" "integer,unknown")
264					(eq_attr "memory" "none,unknown"))))
265			 "athlon-direct,athlon-ieu")
266(define_insn_reservation "athlon_ivector" 2
267			 (and (eq_attr "cpu" "athlon,k8,generic64")
268			      (and (eq_attr "athlon_decode" "vector")
269				   (and (eq_attr "unit" "integer,unknown")
270					(eq_attr "memory" "none,unknown"))))
271			 "athlon-vector,athlon-ieu,athlon-ieu")
272(define_insn_reservation "athlon_ivector_amdfam10" 2
273			 (and (eq_attr "cpu" "amdfam10")
274			      (and (eq_attr "amdfam10_decode" "vector")
275				   (and (eq_attr "unit" "integer,unknown")
276					(eq_attr "memory" "none,unknown"))))
277			 "athlon-vector,athlon-ieu,athlon-ieu")
278
279(define_insn_reservation "athlon_idirect_loadmov" 3
280			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
281			      (and (eq_attr "type" "imov")
282				   (eq_attr "memory" "load")))
283			 "athlon-direct,athlon-load")
284
285(define_insn_reservation "athlon_idirect_load" 4
286			 (and (eq_attr "cpu" "athlon,k8,generic64")
287			      (and (eq_attr "athlon_decode" "direct")
288				   (and (eq_attr "unit" "integer,unknown")
289					(eq_attr "memory" "load"))))
290			 "athlon-direct,athlon-load,athlon-ieu")
291(define_insn_reservation "athlon_idirect_load_amdfam10" 4
292			 (and (eq_attr "cpu" "amdfam10")
293			      (and (eq_attr "amdfam10_decode" "direct")
294				   (and (eq_attr "unit" "integer,unknown")
295					(eq_attr "memory" "load"))))
296			 "athlon-direct,athlon-load,athlon-ieu")
297(define_insn_reservation "athlon_ivector_load" 6
298			 (and (eq_attr "cpu" "athlon,k8,generic64")
299			      (and (eq_attr "athlon_decode" "vector")
300				   (and (eq_attr "unit" "integer,unknown")
301					(eq_attr "memory" "load"))))
302			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
303(define_insn_reservation "athlon_ivector_load_amdfam10" 6
304			 (and (eq_attr "cpu" "amdfam10")
305			      (and (eq_attr "amdfam10_decode" "vector")
306				   (and (eq_attr "unit" "integer,unknown")
307					(eq_attr "memory" "load"))))
308			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
309
310(define_insn_reservation "athlon_idirect_movstore" 1
311			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
312			      (and (eq_attr "type" "imov")
313				   (eq_attr "memory" "store")))
314			 "athlon-direct,athlon-agu,athlon-store")
315
316(define_insn_reservation "athlon_idirect_both" 4
317			 (and (eq_attr "cpu" "athlon,k8,generic64")
318			      (and (eq_attr "athlon_decode" "direct")
319				   (and (eq_attr "unit" "integer,unknown")
320					(eq_attr "memory" "both"))))
321			 "athlon-direct,athlon-load,
322			  athlon-ieu,athlon-store,
323			  athlon-store")
324(define_insn_reservation "athlon_idirect_both_amdfam10" 4
325			 (and (eq_attr "cpu" "amdfam10")
326			      (and (eq_attr "amdfam10_decode" "direct")
327				   (and (eq_attr "unit" "integer,unknown")
328					(eq_attr "memory" "both"))))
329			 "athlon-direct,athlon-load,
330			  athlon-ieu,athlon-store,
331			  athlon-store")			  
332
333(define_insn_reservation "athlon_ivector_both" 6
334			 (and (eq_attr "cpu" "athlon,k8,generic64")
335			      (and (eq_attr "athlon_decode" "vector")
336				   (and (eq_attr "unit" "integer,unknown")
337					(eq_attr "memory" "both"))))
338			 "athlon-vector,athlon-load,
339			  athlon-ieu,
340			  athlon-ieu,
341			  athlon-store")
342(define_insn_reservation "athlon_ivector_both_amdfam10" 6
343			 (and (eq_attr "cpu" "amdfam10")
344			      (and (eq_attr "amdfam10_decode" "vector")
345				   (and (eq_attr "unit" "integer,unknown")
346					(eq_attr "memory" "both"))))
347			 "athlon-vector,athlon-load,
348			  athlon-ieu,
349			  athlon-ieu,
350			  athlon-store")
351
352(define_insn_reservation "athlon_idirect_store" 1
353			 (and (eq_attr "cpu" "athlon,k8,generic64")
354			      (and (eq_attr "athlon_decode" "direct")
355				   (and (eq_attr "unit" "integer,unknown")
356					(eq_attr "memory" "store"))))
357			 "athlon-direct,(athlon-ieu+athlon-agu),
358			  athlon-store")
359(define_insn_reservation "athlon_idirect_store_amdfam10" 1
360			 (and (eq_attr "cpu" "amdfam10")
361			      (and (eq_attr "amdfam10_decode" "direct")
362				   (and (eq_attr "unit" "integer,unknown")
363					(eq_attr "memory" "store"))))
364			 "athlon-direct,(athlon-ieu+athlon-agu),
365			  athlon-store")
366
367(define_insn_reservation "athlon_ivector_store" 2
368			 (and (eq_attr "cpu" "athlon,k8,generic64")
369			      (and (eq_attr "athlon_decode" "vector")
370				   (and (eq_attr "unit" "integer,unknown")
371					(eq_attr "memory" "store"))))
372			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
373			  athlon-store")
374(define_insn_reservation "athlon_ivector_store_amdfam10" 2
375			 (and (eq_attr "cpu" "amdfam10")
376			      (and (eq_attr "amdfam10_decode" "vector")
377				   (and (eq_attr "unit" "integer,unknown")
378					(eq_attr "memory" "store"))))
379			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
380			  athlon-store")
381
382;; Athlon floatin point unit
383(define_insn_reservation "athlon_fldxf" 12
384			 (and (eq_attr "cpu" "athlon")
385			      (and (eq_attr "type" "fmov")
386				   (and (eq_attr "memory" "load")
387					(eq_attr "mode" "XF"))))
388			 "athlon-vector,athlon-fpload2,athlon-fvector*9")
389(define_insn_reservation "athlon_fldxf_k8" 13
390			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
391			      (and (eq_attr "type" "fmov")
392				   (and (eq_attr "memory" "load")
393					(eq_attr "mode" "XF"))))
394			 "athlon-vector,athlon-fpload2k8,athlon-fvector*9")
395;; Assume superforwarding to take place so effective latency of fany op is 0.
396(define_insn_reservation "athlon_fld" 0
397			 (and (eq_attr "cpu" "athlon")
398			      (and (eq_attr "type" "fmov")
399				   (eq_attr "memory" "load")))
400			 "athlon-direct,athlon-fpload,athlon-fany")
401(define_insn_reservation "athlon_fld_k8" 2
402			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
403			      (and (eq_attr "type" "fmov")
404				   (eq_attr "memory" "load")))
405			 "athlon-direct,athlon-fploadk8,athlon-fstore")
406
407(define_insn_reservation "athlon_fstxf" 10
408			 (and (eq_attr "cpu" "athlon")
409			      (and (eq_attr "type" "fmov")
410				   (and (eq_attr "memory" "store,both")
411					(eq_attr "mode" "XF"))))
412			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
413(define_insn_reservation "athlon_fstxf_k8" 8
414			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
415			      (and (eq_attr "type" "fmov")
416				   (and (eq_attr "memory" "store,both")
417					(eq_attr "mode" "XF"))))
418			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))")
419(define_insn_reservation "athlon_fst" 4
420			 (and (eq_attr "cpu" "athlon")
421			      (and (eq_attr "type" "fmov")
422				   (eq_attr "memory" "store,both")))
423			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
424(define_insn_reservation "athlon_fst_k8" 2
425			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
426			      (and (eq_attr "type" "fmov")
427				   (eq_attr "memory" "store,both")))
428			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
429(define_insn_reservation "athlon_fist" 4
430			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
431			      (eq_attr "type" "fistp,fisttp"))
432			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
433(define_insn_reservation "athlon_fmov" 2
434			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
435			      (eq_attr "type" "fmov"))
436			 "athlon-direct,athlon-fpsched,athlon-faddmul")
437(define_insn_reservation "athlon_fadd_load" 4
438			 (and (eq_attr "cpu" "athlon")
439			      (and (eq_attr "type" "fop")
440				   (eq_attr "memory" "load")))
441			 "athlon-direct,athlon-fpload,athlon-fadd")
442(define_insn_reservation "athlon_fadd_load_k8" 6
443			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
444			      (and (eq_attr "type" "fop")
445				   (eq_attr "memory" "load")))
446			 "athlon-direct,athlon-fploadk8,athlon-fadd")
447(define_insn_reservation "athlon_fadd" 4
448			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
449			      (eq_attr "type" "fop"))
450			 "athlon-direct,athlon-fpsched,athlon-fadd")
451(define_insn_reservation "athlon_fmul_load" 4
452			 (and (eq_attr "cpu" "athlon")
453			      (and (eq_attr "type" "fmul")
454				   (eq_attr "memory" "load")))
455			 "athlon-direct,athlon-fpload,athlon-fmul")
456(define_insn_reservation "athlon_fmul_load_k8" 6
457			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
458			      (and (eq_attr "type" "fmul")
459				   (eq_attr "memory" "load")))
460			 "athlon-direct,athlon-fploadk8,athlon-fmul")
461(define_insn_reservation "athlon_fmul" 4
462			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
463			      (eq_attr "type" "fmul"))
464			 "athlon-direct,athlon-fpsched,athlon-fmul")
465(define_insn_reservation "athlon_fsgn" 2
466			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
467			      (eq_attr "type" "fsgn"))
468			 "athlon-direct,athlon-fpsched,athlon-fmul")
469(define_insn_reservation "athlon_fdiv_load" 24
470			 (and (eq_attr "cpu" "athlon")
471			      (and (eq_attr "type" "fdiv")
472				   (eq_attr "memory" "load")))
473			 "athlon-direct,athlon-fpload,athlon-fmul")
474(define_insn_reservation "athlon_fdiv_load_k8" 13
475			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
476			      (and (eq_attr "type" "fdiv")
477				   (eq_attr "memory" "load")))
478			 "athlon-direct,athlon-fploadk8,athlon-fmul")
479(define_insn_reservation "athlon_fdiv" 24
480			 (and (eq_attr "cpu" "athlon")
481			      (eq_attr "type" "fdiv"))
482			 "athlon-direct,athlon-fpsched,athlon-fmul")
483(define_insn_reservation "athlon_fdiv_k8" 11
484			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
485			      (eq_attr "type" "fdiv"))
486			 "athlon-direct,athlon-fpsched,athlon-fmul")
487(define_insn_reservation "athlon_fpspc_load" 103
488			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
489			      (and (eq_attr "type" "fpspc")
490				   (eq_attr "memory" "load")))
491			 "athlon-vector,athlon-fpload,athlon-fvector")
492(define_insn_reservation "athlon_fpspc" 100
493			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
494			      (eq_attr "type" "fpspc"))
495			 "athlon-vector,athlon-fpsched,athlon-fvector")
496(define_insn_reservation "athlon_fcmov_load" 7
497			 (and (eq_attr "cpu" "athlon")
498			      (and (eq_attr "type" "fcmov")
499				   (eq_attr "memory" "load")))
500			 "athlon-vector,athlon-fpload,athlon-fvector")
501(define_insn_reservation "athlon_fcmov" 7
502			 (and (eq_attr "cpu" "athlon")
503			      (eq_attr "type" "fcmov"))
504			 "athlon-vector,athlon-fpsched,athlon-fvector")
505(define_insn_reservation "athlon_fcmov_load_k8" 17
506			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
507			      (and (eq_attr "type" "fcmov")
508				   (eq_attr "memory" "load")))
509			 "athlon-vector,athlon-fploadk8,athlon-fvector")
510(define_insn_reservation "athlon_fcmov_k8" 15
511			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
512			      (eq_attr "type" "fcmov"))
513			 "athlon-vector,athlon-fpsched,athlon-fvector")
514;; fcomi is vector decoded by uses only one pipe.
515(define_insn_reservation "athlon_fcomi_load" 3
516			 (and (eq_attr "cpu" "athlon")
517			      (and (eq_attr "type" "fcmp")
518				   (and (eq_attr "athlon_decode" "vector")
519				        (eq_attr "memory" "load"))))
520			 "athlon-vector,athlon-fpload,athlon-fadd")
521(define_insn_reservation "athlon_fcomi_load_k8" 5
522			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
523			      (and (eq_attr "type" "fcmp")
524				   (and (eq_attr "athlon_decode" "vector")
525				        (eq_attr "memory" "load"))))
526			 "athlon-vector,athlon-fploadk8,athlon-fadd")
527(define_insn_reservation "athlon_fcomi" 3
528			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
529			      (and (eq_attr "athlon_decode" "vector")
530				   (eq_attr "type" "fcmp")))
531			 "athlon-vector,athlon-fpsched,athlon-fadd")
532(define_insn_reservation "athlon_fcom_load" 2
533			 (and (eq_attr "cpu" "athlon")
534			      (and (eq_attr "type" "fcmp")
535				   (eq_attr "memory" "load")))
536			 "athlon-direct,athlon-fpload,athlon-fadd")
537(define_insn_reservation "athlon_fcom_load_k8" 4
538			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
539			      (and (eq_attr "type" "fcmp")
540				   (eq_attr "memory" "load")))
541			 "athlon-direct,athlon-fploadk8,athlon-fadd")
542(define_insn_reservation "athlon_fcom" 2
543			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
544			      (eq_attr "type" "fcmp"))
545			 "athlon-direct,athlon-fpsched,athlon-fadd")
546;; Never seen by the scheduler because we still don't do post reg-stack
547;; scheduling.
548;(define_insn_reservation "athlon_fxch" 2
549;			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
550;			      (eq_attr "type" "fxch"))
551;			 "athlon-direct,athlon-fpsched,athlon-fany")
552
553;; Athlon handle MMX operations in the FPU unit with shorter latencies
554
555(define_insn_reservation "athlon_movlpd_load" 0
556			 (and (eq_attr "cpu" "athlon")
557			      (and (eq_attr "type" "ssemov")
558				   (match_operand:DF 1 "memory_operand" "")))
559			 "athlon-direct,athlon-fpload,athlon-fany")
560(define_insn_reservation "athlon_movlpd_load_k8" 2
561			 (and (eq_attr "cpu" "k8")
562			      (and (eq_attr "type" "ssemov")
563				   (match_operand:DF 1 "memory_operand" "")))
564			 "athlon-direct,athlon-fploadk8,athlon-fstore")
565(define_insn_reservation "athlon_movsd_load_generic64" 2
566			 (and (eq_attr "cpu" "generic64")
567			      (and (eq_attr "type" "ssemov")
568				   (match_operand:DF 1 "memory_operand" "")))
569			 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fmul)")
570(define_insn_reservation "athlon_movaps_load_k8" 2
571			 (and (eq_attr "cpu" "k8,generic64")
572			      (and (eq_attr "type" "ssemov")
573				   (and (eq_attr "mode" "V4SF,V2DF,TI")
574					(eq_attr "memory" "load"))))
575			 "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore")
576(define_insn_reservation "athlon_movaps_load" 0
577			 (and (eq_attr "cpu" "athlon")
578			      (and (eq_attr "type" "ssemov")
579				   (and (eq_attr "mode" "V4SF,V2DF,TI")
580					(eq_attr "memory" "load"))))
581			 "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)")
582(define_insn_reservation "athlon_movss_load" 1
583			 (and (eq_attr "cpu" "athlon")
584			      (and (eq_attr "type" "ssemov")
585				   (and (eq_attr "mode" "SF,DI")
586					(eq_attr "memory" "load"))))
587			 "athlon-vector,athlon-fpload,(athlon-fany*2)")
588(define_insn_reservation "athlon_movss_load_k8" 1
589			 (and (eq_attr "cpu" "k8,generic64")
590			      (and (eq_attr "type" "ssemov")
591				   (and (eq_attr "mode" "SF,DI")
592					(eq_attr "memory" "load"))))
593			 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)")
594(define_insn_reservation "athlon_mmxsseld" 0
595			 (and (eq_attr "cpu" "athlon")
596			      (and (eq_attr "type" "mmxmov,ssemov")
597				   (eq_attr "memory" "load")))
598			 "athlon-direct,athlon-fpload,athlon-fany")
599(define_insn_reservation "athlon_mmxsseld_k8" 2
600			 (and (eq_attr "cpu" "k8,generic64")
601			      (and (eq_attr "type" "mmxmov,ssemov")
602				   (eq_attr "memory" "load")))
603			 "athlon-direct,athlon-fploadk8,athlon-fstore")
604;; On AMDFAM10 all double, single and integer packed and scalar SSEx data
605;; loads  generated are direct path, latency of 2 and do not use any FP
606;; executions units. No seperate entries for movlpx/movhpx loads, which
607;; are direct path, latency of 4 and use the FADD/FMUL FP execution units,
608;; as they will not be generated.
609(define_insn_reservation "athlon_sseld_amdfam10" 2
610			 (and (eq_attr "cpu" "amdfam10")
611			      (and (eq_attr "type" "ssemov")
612				   (eq_attr "memory" "load")))
613			 "athlon-direct,athlon-fploadk8")
614;; On AMDFAM10 MMX data loads  generated are direct path, latency of 4
615;; and can use any  FP executions units
616(define_insn_reservation "athlon_mmxld_amdfam10" 4
617			 (and (eq_attr "cpu" "amdfam10")
618			      (and (eq_attr "type" "mmxmov")
619				   (eq_attr "memory" "load")))
620			 "athlon-direct,athlon-fploadk8, athlon-fany")			 
621(define_insn_reservation "athlon_mmxssest" 3
622			 (and (eq_attr "cpu" "k8,generic64")
623			      (and (eq_attr "type" "mmxmov,ssemov")
624				   (and (eq_attr "mode" "V4SF,V2DF,TI")
625					(eq_attr "memory" "store,both"))))
626			 "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
627(define_insn_reservation "athlon_mmxssest_k8" 3
628			 (and (eq_attr "cpu" "k8,generic64")
629			      (and (eq_attr "type" "mmxmov,ssemov")
630				   (and (eq_attr "mode" "V4SF,V2DF,TI")
631					(eq_attr "memory" "store,both"))))
632			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
633(define_insn_reservation "athlon_mmxssest_short" 2
634			 (and (eq_attr "cpu" "athlon,k8,generic64")
635			      (and (eq_attr "type" "mmxmov,ssemov")
636				   (eq_attr "memory" "store,both")))
637			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
638;; On AMDFAM10 all double, single and integer packed SSEx data stores
639;; generated are all double path, latency of 2 and use the FSTORE FP
640;; execution unit. No entries seperate for movupx/movdqu, which are
641;; vector path, latency of 3 and use the FSTORE*2 FP execution unit,
642;; as they will not be generated.
643(define_insn_reservation "athlon_ssest_amdfam10" 2
644			 (and (eq_attr "cpu" "amdfam10")
645			      (and (eq_attr "type" "ssemov")
646				   (and (eq_attr "mode" "V4SF,V2DF,TI")
647					(eq_attr "memory" "store,both"))))
648			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)")
649;; On AMDFAM10 all double, single and integer scalar SSEx and MMX
650;; data stores generated are all direct path, latency of 2 and use
651;; the FSTORE FP execution unit
652(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2
653			 (and (eq_attr "cpu" "amdfam10")
654			      (and (eq_attr "type" "mmxmov,ssemov")
655				   (eq_attr "memory" "store,both")))
656			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
657(define_insn_reservation "athlon_movaps_k8" 2
658			 (and (eq_attr "cpu" "k8,generic64")
659			      (and (eq_attr "type" "ssemov")
660				   (eq_attr "mode" "V4SF,V2DF,TI")))
661			 "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))")
662(define_insn_reservation "athlon_movaps" 2
663			 (and (eq_attr "cpu" "athlon")
664			      (and (eq_attr "type" "ssemov")
665				   (eq_attr "mode" "V4SF,V2DF,TI")))
666			 "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
667(define_insn_reservation "athlon_mmxssemov" 2
668			 (and (eq_attr "cpu" "athlon,k8,generic64")
669			      (eq_attr "type" "mmxmov,ssemov"))
670			 "athlon-direct,athlon-fpsched,athlon-faddmul")
671(define_insn_reservation "athlon_mmxmul_load" 4
672			 (and (eq_attr "cpu" "athlon,k8,generic64")
673			      (and (eq_attr "type" "mmxmul")
674				   (eq_attr "memory" "load")))
675			 "athlon-direct,athlon-fpload,athlon-fmul")
676(define_insn_reservation "athlon_mmxmul" 3
677			 (and (eq_attr "cpu" "athlon,k8,generic64")
678			      (eq_attr "type" "mmxmul"))
679			 "athlon-direct,athlon-fpsched,athlon-fmul")
680(define_insn_reservation "athlon_mmx_load" 3
681			 (and (eq_attr "cpu" "athlon,k8,generic64")
682			      (and (eq_attr "unit" "mmx")
683				   (eq_attr "memory" "load")))
684			 "athlon-direct,athlon-fpload,athlon-faddmul")
685(define_insn_reservation "athlon_mmx" 2
686			 (and (eq_attr "cpu" "athlon,k8,generic64")
687			      (eq_attr "unit" "mmx"))
688			 "athlon-direct,athlon-fpsched,athlon-faddmul")
689;; SSE operations are handled by the i387 unit as well.  The latency
690;; is same as for i387 operations for scalar operations
691
692(define_insn_reservation "athlon_sselog_load" 3
693			 (and (eq_attr "cpu" "athlon")
694			      (and (eq_attr "type" "sselog,sselog1")
695				   (eq_attr "memory" "load")))
696			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
697(define_insn_reservation "athlon_sselog_load_k8" 5
698			 (and (eq_attr "cpu" "k8,generic64")
699			      (and (eq_attr "type" "sselog,sselog1")
700				   (eq_attr "memory" "load")))
701			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
702(define_insn_reservation "athlon_sselog_load_amdfam10" 4
703			 (and (eq_attr "cpu" "amdfam10")
704			      (and (eq_attr "type" "sselog,sselog1")
705				   (eq_attr "memory" "load")))
706			 "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)")
707(define_insn_reservation "athlon_sselog" 3
708			 (and (eq_attr "cpu" "athlon")
709			      (eq_attr "type" "sselog,sselog1"))
710			 "athlon-vector,athlon-fpsched,athlon-fmul*2")
711(define_insn_reservation "athlon_sselog_k8" 3
712			 (and (eq_attr "cpu" "k8,generic64")
713			      (eq_attr "type" "sselog,sselog1"))
714			 "athlon-double,athlon-fpsched,athlon-fmul")
715(define_insn_reservation "athlon_sselog_amdfam10" 2
716			 (and (eq_attr "cpu" "amdfam10")
717			      (eq_attr "type" "sselog,sselog1"))
718			 "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)")
719
720;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
721(define_insn_reservation "athlon_ssecmp_load" 2
722			 (and (eq_attr "cpu" "athlon")
723			      (and (eq_attr "type" "ssecmp")
724				   (and (eq_attr "mode" "SF,DF,DI")
725					(eq_attr "memory" "load"))))
726			 "athlon-direct,athlon-fpload,athlon-fadd")
727(define_insn_reservation "athlon_ssecmp_load_k8" 4
728			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
729			      (and (eq_attr "type" "ssecmp")
730				   (and (eq_attr "mode" "SF,DF,DI,TI")
731					(eq_attr "memory" "load"))))
732			 "athlon-direct,athlon-fploadk8,athlon-fadd")
733(define_insn_reservation "athlon_ssecmp" 2
734			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
735			      (and (eq_attr "type" "ssecmp")
736				   (eq_attr "mode" "SF,DF,DI,TI")))
737			 "athlon-direct,athlon-fpsched,athlon-fadd")
738(define_insn_reservation "athlon_ssecmpvector_load" 3
739			 (and (eq_attr "cpu" "athlon")
740			      (and (eq_attr "type" "ssecmp")
741				   (eq_attr "memory" "load")))
742			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
743(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
744			 (and (eq_attr "cpu" "k8,generic64")
745			      (and (eq_attr "type" "ssecmp")
746				   (eq_attr "memory" "load")))
747			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
748(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4
749			 (and (eq_attr "cpu" "amdfam10")
750			      (and (eq_attr "type" "ssecmp")
751				   (eq_attr "memory" "load")))
752			 "athlon-direct,athlon-fploadk8,athlon-fadd")
753(define_insn_reservation "athlon_ssecmpvector" 3
754			 (and (eq_attr "cpu" "athlon")
755			      (eq_attr "type" "ssecmp"))
756			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
757(define_insn_reservation "athlon_ssecmpvector_k8" 3
758			 (and (eq_attr "cpu" "k8,generic64")
759			      (eq_attr "type" "ssecmp"))
760			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
761(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2
762			 (and (eq_attr "cpu" "amdfam10")
763			      (eq_attr "type" "ssecmp"))
764			 "athlon-direct,athlon-fpsched,athlon-fadd")
765(define_insn_reservation "athlon_ssecomi_load" 4
766			 (and (eq_attr "cpu" "athlon")
767			      (and (eq_attr "type" "ssecomi")
768				   (eq_attr "memory" "load")))
769			 "athlon-vector,athlon-fpload,athlon-fadd")
770(define_insn_reservation "athlon_ssecomi_load_k8" 6
771			 (and (eq_attr "cpu" "k8,generic64")
772			      (and (eq_attr "type" "ssecomi")
773				   (eq_attr "memory" "load")))
774			 "athlon-vector,athlon-fploadk8,athlon-fadd")
775(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5
776			 (and (eq_attr "cpu" "amdfam10")
777			      (and (eq_attr "type" "ssecomi")
778				   (eq_attr "memory" "load")))
779			 "athlon-direct,athlon-fploadk8,athlon-fadd")
780(define_insn_reservation "athlon_ssecomi" 4
781			 (and (eq_attr "cpu" "athlon,k8,generic64")
782			      (eq_attr "type" "ssecmp"))
783			 "athlon-vector,athlon-fpsched,athlon-fadd")
784(define_insn_reservation "athlon_ssecomi_amdfam10" 3
785			 (and (eq_attr "cpu" "amdfam10")
786;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10
787			      (eq_attr "type" "ssecomi"))
788			 "athlon-direct,athlon-fpsched,athlon-fadd")
789(define_insn_reservation "athlon_sseadd_load" 4
790			 (and (eq_attr "cpu" "athlon")
791			      (and (eq_attr "type" "sseadd")
792				   (and (eq_attr "mode" "SF,DF,DI")
793					(eq_attr "memory" "load"))))
794			 "athlon-direct,athlon-fpload,athlon-fadd")
795(define_insn_reservation "athlon_sseadd_load_k8" 6
796			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
797			      (and (eq_attr "type" "sseadd")
798				   (and (eq_attr "mode" "SF,DF,DI")
799					(eq_attr "memory" "load"))))
800			 "athlon-direct,athlon-fploadk8,athlon-fadd")
801(define_insn_reservation "athlon_sseadd" 4
802			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
803			      (and (eq_attr "type" "sseadd")
804				   (eq_attr "mode" "SF,DF,DI")))
805			 "athlon-direct,athlon-fpsched,athlon-fadd")
806(define_insn_reservation "athlon_sseaddvector_load" 5
807			 (and (eq_attr "cpu" "athlon")
808			      (and (eq_attr "type" "sseadd")
809				   (eq_attr "memory" "load")))
810			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
811(define_insn_reservation "athlon_sseaddvector_load_k8" 7
812			 (and (eq_attr "cpu" "k8,generic64")
813			      (and (eq_attr "type" "sseadd")
814				   (eq_attr "memory" "load")))
815			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
816(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
817			 (and (eq_attr "cpu" "amdfam10")
818			      (and (eq_attr "type" "sseadd")
819				   (eq_attr "memory" "load")))
820			 "athlon-direct,athlon-fploadk8,athlon-fadd")
821(define_insn_reservation "athlon_sseaddvector" 5
822			 (and (eq_attr "cpu" "athlon")
823			      (eq_attr "type" "sseadd"))
824			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
825(define_insn_reservation "athlon_sseaddvector_k8" 5
826			 (and (eq_attr "cpu" "k8,generic64")
827			      (eq_attr "type" "sseadd"))
828			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
829(define_insn_reservation "athlon_sseaddvector_amdfam10" 4
830			 (and (eq_attr "cpu" "amdfam10")
831			      (eq_attr "type" "sseadd"))
832			 "athlon-direct,athlon-fpsched,athlon-fadd")
833
834;; Conversions behaves very irregularly and the scheduling is critical here.
835;; Take each instruction separately.  Assume that the mode is always set to the
836;; destination one and athlon_decode is set to the K8 versions.
837
838;; cvtss2sd
839(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4
840			 (and (eq_attr "cpu" "k8,athlon,generic64")
841			      (and (eq_attr "type" "ssecvt")
842				   (and (eq_attr "athlon_decode" "direct")
843					(and (eq_attr "mode" "DF")
844					     (eq_attr "memory" "load")))))
845			 "athlon-direct,athlon-fploadk8,athlon-fstore")
846(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7
847			 (and (eq_attr "cpu" "amdfam10")
848			      (and (eq_attr "type" "ssecvt")
849				   (and (eq_attr "amdfam10_decode" "double")
850					(and (eq_attr "mode" "DF")
851					     (eq_attr "memory" "load")))))
852			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
853(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
854			 (and (eq_attr "cpu" "athlon,k8,generic64")
855			      (and (eq_attr "type" "ssecvt")
856				   (and (eq_attr "athlon_decode" "direct")
857					(eq_attr "mode" "DF"))))
858			 "athlon-direct,athlon-fpsched,athlon-fstore")
859(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7
860			 (and (eq_attr "cpu" "amdfam10")
861			      (and (eq_attr "type" "ssecvt")
862				   (and (eq_attr "amdfam10_decode" "vector")
863					(eq_attr "mode" "DF"))))
864			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
865;; cvtps2pd.  Model same way the other double decoded FP conversions.
866(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
867			 (and (eq_attr "cpu" "k8,athlon,generic64")
868			      (and (eq_attr "type" "ssecvt")
869				   (and (eq_attr "athlon_decode" "double")
870					(and (eq_attr "mode" "V2DF,V4SF,TI")
871					     (eq_attr "memory" "load")))))
872			 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
873(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4
874			 (and (eq_attr "cpu" "amdfam10")
875			      (and (eq_attr "type" "ssecvt")
876				   (and (eq_attr "amdfam10_decode" "direct")
877					(and (eq_attr "mode" "V2DF,V4SF,TI")
878					     (eq_attr "memory" "load")))))
879			 "athlon-direct,athlon-fploadk8,athlon-fstore")
880(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
881			 (and (eq_attr "cpu" "k8,athlon,generic64")
882			      (and (eq_attr "type" "ssecvt")
883				   (and (eq_attr "athlon_decode" "double")
884					(eq_attr "mode" "V2DF,V4SF,TI"))))
885			 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
886(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2
887			 (and (eq_attr "cpu" "amdfam10")
888			      (and (eq_attr "type" "ssecvt")
889				   (and (eq_attr "amdfam10_decode" "direct")
890					(eq_attr "mode" "V2DF,V4SF,TI"))))
891			 "athlon-direct,athlon-fpsched,athlon-fstore")
892;; cvtsi2sd mem,reg is directpath path  (cvtsi2sd reg,reg is doublepath)
893;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
894(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
895			 (and (eq_attr "cpu" "athlon,k8")
896			      (and (eq_attr "type" "sseicvt")
897				   (and (eq_attr "athlon_decode" "direct")
898					(and (eq_attr "mode" "SF,DF")
899					     (eq_attr "memory" "load")))))
900			 "athlon-direct,athlon-fploadk8,athlon-fstore")
901(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9
902			 (and (eq_attr "cpu" "amdfam10")
903			      (and (eq_attr "type" "sseicvt")
904				   (and (eq_attr "amdfam10_decode" "double")
905					(and (eq_attr "mode" "SF,DF")
906					     (eq_attr "memory" "load")))))
907			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
908;; cvtsi2ss mem, reg is doublepath
909(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
910			 (and (eq_attr "cpu" "athlon")
911			      (and (eq_attr "type" "sseicvt")
912				   (and (eq_attr "athlon_decode" "double")
913					(and (eq_attr "mode" "SF,DF")
914					     (eq_attr "memory" "load")))))
915			 "athlon-vector,athlon-fpload,(athlon-fstore*2)")
916(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9
917			 (and (eq_attr "cpu" "k8,generic64")
918			      (and (eq_attr "type" "sseicvt")
919				   (and (eq_attr "athlon_decode" "double")
920					(and (eq_attr "mode" "SF,DF")
921					     (eq_attr "memory" "load")))))
922			 "athlon-double,athlon-fploadk8,(athlon-fstore*2)")
923(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9
924			 (and (eq_attr "cpu" "amdfam10")
925			      (and (eq_attr "type" "sseicvt")
926				   (and (eq_attr "amdfam10_decode" "double")
927					(and (eq_attr "mode" "SF,DF")
928					     (eq_attr "memory" "load")))))
929			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")			 
930;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
931(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
932			 (and (eq_attr "cpu" "k8,athlon,generic64")
933			      (and (eq_attr "type" "sseicvt")
934				   (and (eq_attr "athlon_decode" "double")
935					(and (eq_attr "mode" "SF,DF")
936					     (eq_attr "memory" "none")))))
937			 "athlon-double,athlon-fploadk8,athlon-fstore")
938(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14
939			 (and (eq_attr "cpu" "amdfam10")
940			      (and (eq_attr "type" "sseicvt")
941				   (and (eq_attr "amdfam10_decode" "vector")
942					(and (eq_attr "mode" "SF,DF")
943					     (eq_attr "memory" "none")))))
944			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
945;; cvtsi2ss reg, reg is doublepath
946(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
947			 (and (eq_attr "cpu" "athlon,k8,generic64")
948			      (and (eq_attr "type" "sseicvt")
949				   (and (eq_attr "athlon_decode" "vector")
950					(and (eq_attr "mode" "SF,DF")
951					     (eq_attr "memory" "none")))))
952			 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
953(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14
954			 (and (eq_attr "cpu" "amdfam10")
955			      (and (eq_attr "type" "sseicvt")
956				   (and (eq_attr "amdfam10_decode" "vector")
957					(and (eq_attr "mode" "SF,DF")
958					     (eq_attr "memory" "none")))))
959			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
960;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
961(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
962			 (and (eq_attr "cpu" "k8,athlon,generic64")
963			      (and (eq_attr "type" "ssecvt")
964				   (and (eq_attr "athlon_decode" "double")
965					(and (eq_attr "mode" "SF")
966					     (eq_attr "memory" "load")))))
967			 "athlon-double,athlon-fploadk8,(athlon-fstore*3)")
968(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9
969			 (and (eq_attr "cpu" "amdfam10")
970			      (and (eq_attr "type" "ssecvt")
971				   (and (eq_attr "amdfam10_decode" "double")
972					(and (eq_attr "mode" "SF")
973					     (eq_attr "memory" "load")))))
974			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
975;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
976(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
977			 (and (eq_attr "cpu" "athlon,k8,generic64")
978			      (and (eq_attr "type" "ssecvt")
979				   (and (eq_attr "athlon_decode" "vector")
980					(and (eq_attr "mode" "SF")
981					     (eq_attr "memory" "none")))))
982			 "athlon-vector,athlon-fpsched,(athlon-fvector*3)")
983(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8
984			 (and (eq_attr "cpu" "amdfam10")
985			      (and (eq_attr "type" "ssecvt")
986				   (and (eq_attr "amdfam10_decode" "vector")
987					(and (eq_attr "mode" "SF")
988					     (eq_attr "memory" "none")))))
989			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
990(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
991			 (and (eq_attr "cpu" "athlon,k8,generic64")
992			      (and (eq_attr "type" "ssecvt")
993				   (and (eq_attr "athlon_decode" "vector")
994					(and (eq_attr "mode" "V4SF,V2DF,TI")
995					     (eq_attr "memory" "load")))))
996			 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
997(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9
998			 (and (eq_attr "cpu" "amdfam10")
999			      (and (eq_attr "type" "ssecvt")
1000				   (and (eq_attr "amdfam10_decode" "double")
1001					(and (eq_attr "mode" "V4SF,V2DF,TI")
1002					     (eq_attr "memory" "load")))))
1003			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
1004;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
1005;; ??? Why it is fater than cvtsd2ss?
1006(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
1007			 (and (eq_attr "cpu" "athlon,k8,generic64")
1008			      (and (eq_attr "type" "ssecvt")
1009				   (and (eq_attr "athlon_decode" "vector")
1010					(and (eq_attr "mode" "V4SF,V2DF,TI")
1011					     (eq_attr "memory" "none")))))
1012			 "athlon-vector,athlon-fpsched,athlon-fvector*2")
1013(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7
1014			 (and (eq_attr "cpu" "amdfam10")
1015			      (and (eq_attr "type" "ssecvt")
1016				   (and (eq_attr "amdfam10_decode" "double")
1017					(and (eq_attr "mode" "V4SF,V2DF,TI")
1018					     (eq_attr "memory" "none")))))
1019			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
1020;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
1021(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
1022			 (and (eq_attr "cpu" "athlon,k8,generic64")
1023			      (and (eq_attr "type" "sseicvt")
1024				   (and (eq_attr "athlon_decode" "vector")
1025					(and (eq_attr "mode" "SI,DI")
1026					     (eq_attr "memory" "load")))))
1027			 "athlon-vector,athlon-fploadk8,athlon-fvector")
1028(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10
1029			 (and (eq_attr "cpu" "amdfam10")
1030			      (and (eq_attr "type" "sseicvt")
1031				   (and (eq_attr "amdfam10_decode" "double")
1032					(and (eq_attr "mode" "SI,DI")
1033					     (eq_attr "memory" "load")))))
1034			 "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)")
1035;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
1036(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
1037			 (and (eq_attr "cpu" "athlon")
1038			      (and (eq_attr "type" "sseicvt")
1039				   (and (eq_attr "athlon_decode" "double")
1040					(and (eq_attr "mode" "SI,DI")
1041					     (eq_attr "memory" "none")))))
1042			 "athlon-vector,athlon-fpsched,athlon-fvector")
1043(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9
1044			 (and (eq_attr "cpu" "k8,generic64")
1045			      (and (eq_attr "type" "sseicvt")
1046				   (and (eq_attr "athlon_decode" "double")
1047					(and (eq_attr "mode" "SI,DI")
1048					     (eq_attr "memory" "none")))))
1049			 "athlon-double,athlon-fpsched,athlon-fstore")
1050(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8
1051			 (and (eq_attr "cpu" "amdfam10")
1052			      (and (eq_attr "type" "sseicvt")
1053				   (and (eq_attr "amdfam10_decode" "double")
1054					(and (eq_attr "mode" "SI,DI")
1055					     (eq_attr "memory" "none")))))
1056			 "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)")
1057;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10
1058(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9
1059			 (and (eq_attr "cpu" "amdfam10")
1060			      (and (eq_attr "type" "sseicvt")
1061				   (and (eq_attr "amdfam10_decode" "double")
1062					(and (eq_attr "mode" "TI")
1063					     (eq_attr "memory" "load")))))
1064			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
1065;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10
1066(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7
1067			 (and (eq_attr "cpu" "amdfam10")
1068			      (and (eq_attr "type" "sseicvt")
1069				   (and (eq_attr "amdfam10_decode" "double")
1070					(and (eq_attr "mode" "TI")
1071					     (eq_attr "memory" "none")))))
1072			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
1073
1074
1075(define_insn_reservation "athlon_ssemul_load" 4
1076			 (and (eq_attr "cpu" "athlon")
1077			      (and (eq_attr "type" "ssemul")
1078				   (and (eq_attr "mode" "SF,DF")
1079					(eq_attr "memory" "load"))))
1080			 "athlon-direct,athlon-fpload,athlon-fmul")
1081(define_insn_reservation "athlon_ssemul_load_k8" 6
1082			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
1083			      (and (eq_attr "type" "ssemul")
1084				   (and (eq_attr "mode" "SF,DF")
1085					(eq_attr "memory" "load"))))
1086			 "athlon-direct,athlon-fploadk8,athlon-fmul")
1087(define_insn_reservation "athlon_ssemul" 4
1088			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
1089			      (and (eq_attr "type" "ssemul")
1090				   (eq_attr "mode" "SF,DF")))
1091			 "athlon-direct,athlon-fpsched,athlon-fmul")
1092(define_insn_reservation "athlon_ssemulvector_load" 5
1093			 (and (eq_attr "cpu" "athlon")
1094			      (and (eq_attr "type" "ssemul")
1095				   (eq_attr "memory" "load")))
1096			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
1097(define_insn_reservation "athlon_ssemulvector_load_k8" 7
1098			 (and (eq_attr "cpu" "k8,generic64")
1099			      (and (eq_attr "type" "ssemul")
1100				   (eq_attr "memory" "load")))
1101			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
1102(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6
1103			 (and (eq_attr "cpu" "amdfam10")
1104			      (and (eq_attr "type" "ssemul")
1105				   (eq_attr "memory" "load")))
1106			 "athlon-direct,athlon-fploadk8,athlon-fmul")
1107(define_insn_reservation "athlon_ssemulvector" 5
1108			 (and (eq_attr "cpu" "athlon")
1109			      (eq_attr "type" "ssemul"))
1110			 "athlon-vector,athlon-fpsched,(athlon-fmul*2)")
1111(define_insn_reservation "athlon_ssemulvector_k8" 5
1112			 (and (eq_attr "cpu" "k8,generic64")
1113			      (eq_attr "type" "ssemul"))
1114			 "athlon-double,athlon-fpsched,(athlon-fmul*2)")
1115(define_insn_reservation "athlon_ssemulvector_amdfam10" 4
1116			 (and (eq_attr "cpu" "amdfam10")
1117			      (eq_attr "type" "ssemul"))
1118			 "athlon-direct,athlon-fpsched,athlon-fmul")			 
1119;; divsd timings.  divss is faster
1120(define_insn_reservation "athlon_ssediv_load" 20
1121			 (and (eq_attr "cpu" "athlon")
1122			      (and (eq_attr "type" "ssediv")
1123				   (and (eq_attr "mode" "SF,DF")
1124					(eq_attr "memory" "load"))))
1125			 "athlon-direct,athlon-fpload,athlon-fmul*17")
1126(define_insn_reservation "athlon_ssediv_load_k8" 22
1127			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
1128			      (and (eq_attr "type" "ssediv")
1129				   (and (eq_attr "mode" "SF,DF")
1130					(eq_attr "memory" "load"))))
1131			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
1132(define_insn_reservation "athlon_ssediv" 20
1133			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
1134			      (and (eq_attr "type" "ssediv")
1135				   (eq_attr "mode" "SF,DF")))
1136			 "athlon-direct,athlon-fpsched,athlon-fmul*17")
1137(define_insn_reservation "athlon_ssedivvector_load" 39
1138			 (and (eq_attr "cpu" "athlon")
1139			      (and (eq_attr "type" "ssediv")
1140				   (eq_attr "memory" "load")))
1141			 "athlon-vector,athlon-fpload2,athlon-fmul*34")
1142(define_insn_reservation "athlon_ssedivvector_load_k8" 35
1143			 (and (eq_attr "cpu" "k8,generic64")
1144			      (and (eq_attr "type" "ssediv")
1145				   (eq_attr "memory" "load")))
1146			 "athlon-double,athlon-fpload2k8,athlon-fmul*34")
1147(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22
1148			 (and (eq_attr "cpu" "amdfam10")
1149			      (and (eq_attr "type" "ssediv")
1150				   (eq_attr "memory" "load")))
1151			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")			 
1152(define_insn_reservation "athlon_ssedivvector" 39
1153			 (and (eq_attr "cpu" "athlon")
1154			      (eq_attr "type" "ssediv"))
1155			 "athlon-vector,athlon-fmul*34")
1156(define_insn_reservation "athlon_ssedivvector_k8" 39
1157			 (and (eq_attr "cpu" "k8,generic64")
1158			      (eq_attr "type" "ssediv"))
1159			 "athlon-double,athlon-fmul*34")
1160(define_insn_reservation "athlon_ssedivvector_amdfam10" 20
1161			 (and (eq_attr "cpu" "amdfam10")
1162			      (eq_attr "type" "ssediv"))
1163			 "athlon-direct,athlon-fmul*17")
1164(define_insn_reservation "athlon_sseins_amdfam10" 5
1165                         (and (eq_attr "cpu" "amdfam10")
1166                              (and (eq_attr "type" "sseins")
1167                                   (eq_attr "mode" "TI")))
1168                         "athlon-vector,athlon-fpsched,athlon-faddmul")
1169